Source code for qiskit_experiments.curve_analysis.curve_analysis
# This code is part of Qiskit.## (C) Copyright IBM 2021.## This code is licensed under the Apache License, Version 2.0. You may# obtain a copy of this license in the LICENSE.txt file in the root directory# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.## Any modifications or derivative works of this code must retain this# copyright notice, and modified files need to carry a notice indicating# that they have been altered from the originals."""Analysis class for curve fitting."""importwarnings# pylint: disable=invalid-namefromtypingimportDict,List,Tuple,Union,Optionalfromfunctoolsimportpartialfromcopyimportdeepcopyimportlmfitimportnumpyasnpimportpandasaspdfromuncertaintiesimportunumpyasunpfromqiskit_experiments.frameworkimport(ExperimentData,AnalysisResultData,)fromqiskit_experiments.framework.containersimportFigureType,ArtifactDatafromqiskit_experiments.data_processing.exceptionsimportDataProcessorErrorfromqiskit_experiments.visualizationimportPlotStylefrom.base_curve_analysisimportBaseCurveAnalysis,PARAMS_ENTRY_PREFIXfrom.curve_dataimportFitOptions,CurveFitResultfrom.scatter_tableimportScatterTablefrom.utilsimport(eval_with_uncertainties,convert_lmfit_result,shot_weighted_average,inverse_weighted_variance,sample_average,)
[docs]classCurveAnalysis(BaseCurveAnalysis):"""Base class for curve analysis with single curve group. The fit parameters from the series defined under the analysis class are all shared and the analysis performs a single multi-objective function optimization. A subclass may override these methods to customize the fit workflow. .. rubric:: _run_data_processing This method performs data processing and returns the processed dataset. By default, it internally calls the :class:`.DataProcessor` instance from the `data_processor` analysis option and processes the experiment data payload to create Y data with uncertainty. X data and other metadata are generated within this method by inspecting the circuit metadata. The series classification is also performed based upon the matching of circuit metadata. .. rubric:: _format_data This method consumes the processed dataset and outputs the formatted dataset. By default, this method takes the average of y values over the same x values and then sort the entire data by x values. .. rubric:: _generate_fit_guesses This method creates initial guesses for the fit parameters. See :ref:`curve_analysis_init_guess` for details. .. rubric:: _run_curve_fit This method performs the fitting with predefined fit models and the formatted dataset. This method internally calls the :meth:`_generate_fit_guesses` method. Note that this is a core functionality of the :meth:`_run_analysis` method, that creates fit result objects from the formatted dataset. .. rubric:: _evaluate_quality This method evaluates the quality of the fit based on the fit result. This returns "good" when reduced chi-squared is less than 3.0. Usually it returns string "good" or "bad" according to the evaluation. .. rubric:: _create_analysis_results This method creates analysis results for important fit parameters that might be defined by analysis options ``result_parameters``. .. rubric:: _create_figures This method creates figures by consuming the scatter table data. Figures are created when the analysis option ``plot`` is ``True``. .. rubric:: _initialize This method initializes analysis options against input experiment data. Usually this method is called before other methods are called. """def__init__(self,models:Optional[List[lmfit.Model]]=None,name:Optional[str]=None,):"""Initialize data fields that are privately accessed by methods. Args: models: List of LMFIT ``Model`` class to define fitting functions and parameters. If multiple models are provided, the analysis performs multi-objective optimization where the parameters with the same name are shared among provided models. When multiple models are provided, user must specify the ``data_subfit_map`` value in the analysis options to allocate experimental results to a particular fit model. name: Optional. Name of this analysis. """super().__init__()self._models=modelsor[]self._name=nameorself.__class__.__name__self._plot_config_cache={}@propertydefname(self)->str:"""Return name of this analysis."""returnself._name@propertydefparameters(self)->List[str]:"""Return parameters of this curve analysis."""unite_params=[]formodelinself._models:fornameinmodel.param_names:ifnamenotinunite_paramsandnamenotinself.options.fixed_parameters:unite_params.append(name)returnunite_params@propertydefmodels(self)->List[lmfit.Model]:"""Return fit models."""returnself._models
[docs]defmodel_names(self)->List[str]:"""Return model names."""return[getattr(m,"_name",f"model-{i}")fori,minenumerate(self._models)]
[docs]defset_options(self,**fields):"""Set the analysis options for :meth:`run` method. Args: fields: The fields to update the options Raises: KeyError: When removed option ``curve_fitter`` is set. """iffields.get("plot_residuals")andnotself.options.get("plot_residuals"):# checking there are no subplots for the figure to prevent collision in subplot indices.ifself.plotter.options.get("subplots")!=(1,1):warnings.warn("Residuals plotting is currently supported for analysis with 1 subplot.",UserWarning,stacklevel=2,)fields["plot_residuals"]=Falseelse:self._add_residuals_plot_config()ifnotfields.get("plot_residuals",True)andself.options.get("plot_residuals"):self._remove_residuals_plot_config()super().set_options(**fields)
def_add_residuals_plot_config(self):"""Configure plotter options for residuals plot."""# check we have model to fit intoresidual_plot_y_axis_size=3ifself.models:# Cache figure options.self._plot_config_cache["figure_options"]={}self._plot_config_cache["figure_options"]["ylabel"]=self.plotter.figure_options.get("ylabel")self._plot_config_cache["figure_options"]["series_params"]=deepcopy(self.plotter.figure_options.get("series_params"))self._plot_config_cache["figure_options"]["sharey"]=self.plotter.figure_options.get("sharey")self.plotter.set_figure_options(ylabel=[self.plotter.figure_options.get("ylabel",""),"Residuals",],)model_names=self.model_names()series_params=self.plotter.figure_options["series_params"]formodel_nameinmodel_names:ifseries_params.get(model_name):series_params[model_name]["canvas"]=0else:series_params[model_name]={"canvas":0}series_params[model_name+"_residuals"]=series_params[model_name].copy()series_params[model_name+"_residuals"]["canvas"]=1self.plotter.set_figure_options(sharey=False,series_params=series_params)# Cache plotter options.self._plot_config_cache["plotter"]={}self._plot_config_cache["plotter"]["subplots"]=self.plotter.options.get("subplots")self._plot_config_cache["plotter"]["style"]=deepcopy(self.plotter.options.get("style",PlotStyle({})))# removing the name from the plotter style, so it will not clash with the new nameprevious_plotter_style=self._plot_config_cache["plotter"]["style"].copy()previous_plotter_style.pop("style_name","")# creating new fig size based on previous sizenew_figsize=self.plotter.drawer.options.get("figsize",(8,5))new_figsize=(new_figsize[0],new_figsize[1]+residual_plot_y_axis_size)# Here add the configuration for the residuals plot:self.plotter.set_options(subplots=(2,1),style=PlotStyle.merge(PlotStyle({"figsize":new_figsize,"textbox_rel_pos":(0.28,-0.10),"sub_plot_heights_list":[7/10,3/10],"sub_plot_widths_list":[1],"style_name":"residuals",}),previous_plotter_style,),)def_remove_residuals_plot_config(self):"""set options for a single plot to its cached values."""ifself.models:self.plotter.set_figure_options(ylabel=self._plot_config_cache["figure_options"]["ylabel"],sharey=self._plot_config_cache["figure_options"]["sharey"],series_params=self._plot_config_cache["figure_options"]["series_params"],)# Here add the style_name so the plotter will know not to print the residual data.self.plotter.set_options(subplots=self._plot_config_cache["plotter"]["subplots"],style=PlotStyle.merge(self._plot_config_cache["plotter"]["style"],PlotStyle({"style_name":"canceled_residuals"}),),)self._plot_config_cache={}def_run_data_processing(self,raw_data:List[Dict],category:str="raw",)->ScatterTable:"""Perform data processing from the experiment result payload. Args: raw_data: Payload in the experiment data. category: Category string of the output dataset. Returns: Processed data that will be sent to the formatter method. Raises: DataProcessorError: When key for x values is not found in the metadata. ValueError: When data processor is not provided. """opt=self.options# Create tableifopt.filter_data:to_process=[dfordinraw_dataifopt.filter_data.items()<=d["metadata"].items()]else:to_process=raw_data# Compute y valueifnotself.options.data_processor:raiseValueError(f"Data processor is not set for the {self.__class__.__name__} instance. ""Initialize the instance with the experiment data, or set the ""data_processor analysis options.")processed=self.options.data_processor(to_process)yvals=unp.nominal_values(processed).flatten()withnp.errstate(invalid="ignore"):# For averaged data, the processed std dev will be NaN.# Setting std_devs to NaN will trigger floating point exceptions# which we can ignore. See https://stackoverflow.com/q/75656026yerrs=unp.std_devs(processed).flatten()# Prepare circuit metadata to data class mapper from data_subfit_map value.iflen(self._models)==1:classifier={self.model_names()[0]:{}}else:classifier=self.options.data_subfit_maptable=ScatterTable()fordatum,yval,yerrinzip(to_process,yvals,yerrs):metadata=datum["metadata"]try:xval=metadata[opt.x_key]exceptKeyErrorasex:raiseDataProcessorError(f"X value key {opt.x_key} is not defined in the circuit metadata.")fromex# Assign series name and series idforseries_id,(series_name,spec)inenumerate(classifier.items()):ifspec.items()<=metadata.items():breakelse:# This is unclassified data.series_name=pd.NAseries_id=pd.NAtable.add_row(xval=xval,yval=yval,yerr=yerr,series_name=series_name,series_id=series_id,category=category,shots=datum.get("shots",pd.NA),analysis=self.name,)returntabledef_format_data(self,curve_data:ScatterTable,category:str="formatted",)->ScatterTable:"""Postprocessing for preparing the fitting data. Args: curve_data: Processed dataset created from experiment results. category: Category string of the output dataset. Returns: New scatter table instance including data to fit. """averaging_methods={"shots_weighted":shot_weighted_average,"iwv":inverse_weighted_variance,"sample":sample_average,}average=averaging_methods[self.options.average_method]model_names=self.model_names()for(series_name,xval),sub_dataincurve_data.iter_groups("series_name","xval"):avg_yval,avg_yerr,shots=average(sub_data.y,sub_data.y_err,sub_data.shots,)try:series_id=model_names.index(series_name)exceptValueError:series_id=pd.NAcurve_data.add_row(xval=xval,yval=avg_yval,yerr=avg_yerr,series_name=series_name,series_id=series_id,category=category,shots=shots,analysis=self.name,)returncurve_datadef_generate_fit_guesses(self,user_opt:FitOptions,curve_data:ScatterTable,# pylint: disable=unused-argument)->Union[FitOptions,List[FitOptions]]:"""Create algorithmic initial fit guess from analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. """returnuser_optdef_run_curve_fit(self,curve_data:ScatterTable,)->CurveFitResult:"""Perform curve fitting on given data collection and fit models. Args: curve_data: Formatted data to fit. Returns: The best fitting outcome with minimum reduced chi-squared value. """unite_parameter_names=[]formodelinself._models:# Seems like this is not efficient looping, but using set operation sometimes# yields bad fit. Not sure if this is an edge case, but# `TestRamseyXY` unittest failed due to the significant chisq value# in which the least_square fitter terminates with `xtol` rather than `ftol`# condition, i.e. `ftol` condition indicates termination by cost function.# This code respects the ordering of parameters so that it matches with# the signature of fit function and it is backward compatible.# In principle this should not matter since LMFIT maps them with names# rather than index. Need more careful investigation.fornameinmodel.param_names:ifnamenotinunite_parameter_names:unite_parameter_names.append(name)default_fit_opt=FitOptions(parameters=unite_parameter_names,default_p0=self.options.p0,default_bounds=self.options.bounds,**self.options.lmfit_options,)# Bind fixed parameters if not emptyifself.options.fixed_parameters:fixed_parameters={k:vfork,vinself.options.fixed_parameters.items()ifkinunite_parameter_names}default_fit_opt.p0.set_if_empty(**fixed_parameters)else:fixed_parameters={}fit_options=self._generate_fit_guesses(default_fit_opt,curve_data)ifisinstance(fit_options,FitOptions):fit_options=[fit_options]# Create convenient function to compute residual of the models.partial_weighted_residuals=[]valid_uncertainty=np.all(np.isfinite(curve_data.y_err))# creating storage for residual plottingifself.options.get("plot_residuals"):residual_weights_list=[]foridx,sub_dataincurve_data.iter_by_series_id():ifvalid_uncertainty:nonzero_yerr=np.where(np.isclose(sub_data.y_err,0.0),np.finfo(float).eps,sub_data.y_err,)raw_weights=1/nonzero_yerr# Remove outlier. When all sample values are the same with sample average,# or sampling error is zero with shot-weighted average,# some yerr values might be very close to zero, yielding significant weights.# With such outlier, the fit doesn't sense residual of other data points.maximum_weight=np.percentile(raw_weights,90)weights_list=np.clip(raw_weights,0.0,maximum_weight)else:weights_list=Nonemodel_weighted_residual=partial(self._models[idx]._residual,data=sub_data.y,weights=weights_list,x=sub_data.x,)partial_weighted_residuals.append(model_weighted_residual)# adding weights to weights_list for residualsifself.options.get("plot_residuals"):ifweights_listisNone:residual_weights_list.append(None)else:residual_weights_list.append(weights_list)# Run fit for each configurationres=Noneforfit_optioninfit_options:# Setup parameter configuration, i.e. init value, boundsguess_params=lmfit.Parameters()fornameinunite_parameter_names:bounds=fit_option.bounds[name]or(-np.inf,np.inf)guess_params.add(name=name,value=fit_option.p0[name],min=bounds[0],max=bounds[1],vary=namenotinfixed_parameters,)try:withnp.errstate(all="ignore"):new=lmfit.minimize(fcn=lambdax:np.concatenate([p(x)forpinpartial_weighted_residuals]),params=guess_params,method=self.options.fit_method,scale_covar=notvalid_uncertainty,nan_policy="omit",**fit_option.fitter_opts,)exceptException:# pylint: disable=broad-exceptcontinueifresisNoneornotres.success:res=newcontinueifnew.successandres.redchi>new.redchi:res=new# if `plot_residuals` is ``False`` I would like the `residuals_model` be None to emphasize it# wasn't calculated.residuals_model=[]ifself.options.get("plot_residuals")elseNoneifresandres.successandself.options.get("plot_residuals"):forweightsinresidual_weights_list:ifweightsisNone:residuals_model.append(res.residual)else:residuals_model.append([weighted_res/np.abs(weight)forweighted_res,weightinzip(res.residual,weights)])ifresiduals_modelisnotNone:residuals_model=np.array(residuals_model)returnconvert_lmfit_result(res,self._models,curve_data.x,curve_data.y,residuals_model,)def_create_figures(self,curve_data:ScatterTable,)->List["matplotlib.figure.Figure"]:"""Create a list of figures from the curve data. Args: curve_data: Scatter data table containing all data points. Returns: A list of figures. """forseries_id,sub_dataincurve_data.iter_by_series_id():model_name=self.model_names()[series_id]# Plot raw data scattersifself.options.plot_raw_data:raw_data=sub_data.filter(category="raw")self.plotter.set_series_data(series_name=model_name,x=raw_data.x,y=raw_data.y,)# Plot formatted data scattersformatted_data=sub_data.filter(category=self.options.fit_category)self.plotter.set_series_data(series_name=model_name,x_formatted=formatted_data.x,y_formatted=formatted_data.y,y_formatted_err=formatted_data.y_err,)# Plot fit linesline_data=sub_data.filter(category="fitted")iflen(line_data)==0:continueself.plotter.set_series_data(series_name=model_name,x_interp=line_data.x,y_interp=line_data.y,)fit_stdev=line_data.y_errifnp.isfinite(fit_stdev).all():self.plotter.set_series_data(series_name=model_name,y_interp_err=fit_stdev,)ifself.options.get("plot_residuals"):residuals_data=sub_data.filter(category="residuals")self.plotter.set_series_data(series_name=model_name,x_residuals=residuals_data.x,y_residuals=residuals_data.y,)return[self.plotter.figure()]def_run_analysis(self,experiment_data:ExperimentData,)->Tuple[List[Union[AnalysisResultData,ArtifactData]],List[FigureType]]:figures:List[FigureType]=[]result_data:List[Union[AnalysisResultData,ArtifactData]]=[]artifacts:list[ArtifactData]=[]# Flag for plotting can be "always", "never", or "selective"# the analysis option overrides self._generate_figures if setifself.options.get("plot",None):plot="always"elifself.options.get("plot",None)isFalse:plot="never"else:plot=getattr(self,"_generate_figures","always")# Prepare for fittingself._initialize(experiment_data)table=self._format_data(self._run_data_processing(experiment_data.data()))formatted_subset=table.filter(category=self.options.fit_category)fit_data=self._run_curve_fit(formatted_subset)iffit_data.success:quality=self._evaluate_quality(fit_data)else:quality="bad"# After the quality is determined, plot can become a boolean flag for whether# to generate the figureplot_bool=plot=="always"or(plot=="selective"andquality=="bad")ifself.options.return_fit_parameters:# Store fit status overview entry regardless of success.# This is sometime useful when debugging the fitting code.overview=AnalysisResultData(name=PARAMS_ENTRY_PREFIX+self.name,value=fit_data,quality=quality,extra=self.options.extra,)result_data.append(overview)iffit_data.success:# Add fit data to curve data tablemodel_names=self.model_names()forseries_id,sub_datainformatted_subset.iter_by_series_id():xval=sub_data.xiflen(xval)==0:# If data is empty, skip drawing this model.# This is the case when fit model exist but no data to fit is provided.continue# Compute X, Y values with fit parameters.xval_arr_fit=np.linspace(np.min(xval),np.max(xval),num=100,dtype=float)uval_arr_fit=eval_with_uncertainties(x=xval_arr_fit,model=self._models[series_id],params=fit_data.ufloat_params,)yval_arr_fit=unp.nominal_values(uval_arr_fit)iffit_data.covarisnotNone:yerr_arr_fit=unp.std_devs(uval_arr_fit)else:yerr_arr_fit=np.zeros_like(xval_arr_fit)forxval,yval,yerrinzip(xval_arr_fit,yval_arr_fit,yerr_arr_fit):table.add_row(xval=xval,yval=yval,yerr=yerr,series_name=model_names[series_id],series_id=series_id,category="fitted",analysis=self.name,)ifself.options.get("plot_residuals"):# need to add here the residuals plot.xval_residual=sub_data.xyval_residuals=unp.nominal_values(fit_data.residuals[series_id])forxval,yvalinzip(xval_residual,yval_residuals):table.add_row(xval=xval,yval=yval,series_name=model_names[series_id],series_id=series_id,category="residuals",analysis=self.name,)result_data.extend(self._create_analysis_results(fit_data=fit_data,quality=quality,**self.options.extra.copy(),))artifacts.append(ArtifactData(name="curve_data",data=table,))artifacts.append(ArtifactData(name="fit_summary",data=fit_data,))ifplot_bool:iffit_data.success:self.plotter.set_supplementary_data(fit_red_chi=fit_data.reduced_chisq,primary_results=[rforrinresult_dataifnotr.name.startswith("@")],)figures.extend(self._create_figures(curve_data=table))returnresult_data+artifacts,figuresdef__getstate__(self):state=self.__dict__.copy()# Convert models into JSON str.# This object includes local function and cannot be pickled.source=[m.dumps()forminstate["_models"]]state["_models"]=sourcereturnstatedef__setstate__(self,state):model_objs=[]forsourceinstate.pop("_models"):tmp_mod=lmfit.Model(func=None)mod=tmp_mod.loads(s=source)model_objs.append(mod)self.__dict__.update(state)self._models=model_objs