Source code for qiskit_experiments.curve_analysis.curve_analysis
# This code is part of Qiskit.## (C) Copyright IBM 2021.## This code is licensed under the Apache License, Version 2.0. You may# obtain a copy of this license in the LICENSE.txt file in the root directory# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.## Any modifications or derivative works of this code must retain this# copyright notice, and modified files need to carry a notice indicating# that they have been altered from the originals."""Analysis class for curve fitting."""# pylint: disable=invalid-namefromtypingimportDict,List,Tuple,Union,Optionalimportlmfitimportnumpyasnpfromuncertaintiesimportunumpyasunpfromqiskit_experiments.frameworkimportExperimentData,AnalysisResultDatafromqiskit_experiments.data_processing.exceptionsimportDataProcessorErrorfrom.base_curve_analysisimportBaseCurveAnalysis,PARAMS_ENTRY_PREFIXfrom.curve_dataimportCurveData,FitOptions,CurveFitResultfrom.utilsimporteval_with_uncertainties,convert_lmfit_result,multi_mean_xy_data,data_sort
[docs]classCurveAnalysis(BaseCurveAnalysis):"""Base class for curve analysis with single curve group. The fit parameters from the series defined under the analysis class are all shared and the analysis performs a single multi-objective function optimization. A subclass may override these methods to customize the fit workflow. .. rubric:: _run_data_processing This method performs data processing and returns the processed dataset. By default, it internally calls the :class:`.DataProcessor` instance from the `data_processor` analysis option and processes the experiment data payload to create Y data with uncertainty. X data and other metadata are generated within this method by inspecting the circuit metadata. The series classification is also performed based upon the matching of circuit metadata and :attr:`SeriesDef.filter_kwargs`. .. rubric:: _format_data This method consumes the processed dataset and outputs the formatted dataset. By default, this method takes the average of y values over the same x values and then sort the entire data by x values. .. rubric:: _generate_fit_guesses This method creates initial guesses for the fit parameters. See :ref:`curve_analysis_init_guess` for details. .. rubric:: _run_curve_fit This method performs the fitting with predefined fit models and the formatted dataset. This method internally calls the :meth:`_generate_fit_guesses` method. Note that this is a core functionality of the :meth:`_run_analysis` method, that creates fit result objects from the formatted dataset. .. rubric:: _evaluate_quality This method evaluates the quality of the fit based on the fit result. This returns "good" when reduced chi-squared is less than 3.0. Usually it returns string "good" or "bad" according to the evaluation. .. rubric:: _create_analysis_results This method creates analysis results for important fit parameters that might be defined by analysis options ``result_parameters``. .. rubric:: _create_curve_data This method creates analysis results containing the formatted dataset, i.e. data used for the fitting. Entries are created when the analysis option ``return_data_points`` is ``True``. If analysis consists of multiple series, an analysis result is created for each series definition. .. rubric:: _initialize This method initializes analysis options against input experiment data. Usually this method is called before other methods are called. """def__init__(self,models:Optional[List[lmfit.Model]]=None,name:Optional[str]=None,):"""Initialize data fields that are privately accessed by methods. Args: models: List of LMFIT ``Model`` class to define fitting functions and parameters. If multiple models are provided, the analysis performs multi-objective optimization where the parameters with the same name are shared among provided models. When multiple models are provided, user must specify the ``data_subfit_map`` value in the analysis options to allocate experimental results to a particular fit model. name: Optional. Name of this analysis. """super().__init__()self._models=modelsor[]self._name=nameorself.__class__.__name__@propertydefname(self)->str:"""Return name of this analysis."""returnself._name@propertydefparameters(self)->List[str]:"""Return parameters of this curve analysis."""unite_params=[]formodelinself._models:fornameinmodel.param_names:ifnamenotinunite_paramsandnamenotinself.options.fixed_parameters:unite_params.append(name)returnunite_params@propertydefmodels(self)->List[lmfit.Model]:"""Return fit models."""returnself._modelsdef_run_data_processing(self,raw_data:List[Dict],models:List[lmfit.Model],)->CurveData:"""Perform data processing from the experiment result payload. Args: raw_data: Payload in the experiment data. models: A list of LMFIT models that provide the model name and optionally data sorting keys. Returns: Processed data that will be sent to the formatter method. Raises: DataProcessorError: When model is a multi-objective function but data sorting option is not provided. DataProcessorError: When key for x values is not found in the metadata. """def_matched(metadata,**filters):try:returnall(metadata[key]==valforkey,valinfilters.items())exceptKeyError:returnFalseifnotself.options.filter_data:analyzed_data=raw_dataelse:analyzed_data=[dfordinraw_dataif_matched(d["metadata"],**self.options.filter_data)]x_key=self.options.x_keytry:xdata=np.asarray([datum["metadata"][x_key]fordatuminanalyzed_data],dtype=float)exceptKeyErrorasex:raiseDataProcessorError(f"X value key {x_key} is not defined in circuit metadata.")fromexydata=self.options.data_processor(analyzed_data)shots=np.asarray([datum.get("shots",np.nan)fordatuminanalyzed_data])iflen(models)==1:# all data belongs to the single modeldata_allocation=np.full(xdata.size,0,dtype=int)else:data_allocation=np.full(xdata.size,-1,dtype=int)foridx,sub_modelinenumerate(models):try:tags=self.options.data_subfit_map[sub_model._name]exceptKeyErrorasex:raiseDataProcessorError(f"Data sort options for model {sub_model._name} is not defined. ""Please provide the 'data_subfit_map' analysis option for this model.")fromexiftagsisNone:continuematched_inds=np.asarray([_matched(d["metadata"],**tags)fordinanalyzed_data],dtype=bool)data_allocation[matched_inds]=idxreturnCurveData(x=xdata,y=unp.nominal_values(ydata),y_err=unp.std_devs(ydata),shots=shots,data_allocation=data_allocation,labels=[sub_model._nameforsub_modelinmodels],)def_format_data(self,curve_data:CurveData,)->CurveData:"""Postprocessing for the processed dataset. Args: curve_data: Processed dataset created from experiment results. Returns: Formatted data. """# take average over the same x value by keeping sigmadata_allocation,xdata,ydata,sigma,shots=multi_mean_xy_data(series=curve_data.data_allocation,xdata=curve_data.x,ydata=curve_data.y,sigma=curve_data.y_err,shots=curve_data.shots,method=self.options.average_method,)# sort by x value in ascending orderdata_allocation,xdata,ydata,sigma,shots=data_sort(series=data_allocation,xdata=xdata,ydata=ydata,sigma=sigma,shots=shots,)returnCurveData(x=xdata,y=ydata,y_err=sigma,shots=shots,data_allocation=data_allocation,labels=curve_data.labels,)def_generate_fit_guesses(self,user_opt:FitOptions,curve_data:CurveData,# pylint: disable=unused-argument)->Union[FitOptions,List[FitOptions]]:"""Create algorithmic initial fit guess from analysis options and curve data. Args: user_opt: Fit options filled with user provided guess and bounds. curve_data: Formatted data collection to fit. Returns: List of fit options that are passed to the fitter function. """returnuser_optdef_run_curve_fit(self,curve_data:CurveData,models:List[lmfit.Model],)->CurveFitResult:"""Perform curve fitting on given data collection and fit models. Args: curve_data: Formatted data to fit. models: A list of LMFIT models that are used to build a cost function for the LMFIT minimizer. Returns: The best fitting outcome with minimum reduced chi-squared value. """unite_parameter_names=[]formodelinmodels:# Seems like this is not efficient looping, but using set operation sometimes# yields bad fit. Not sure if this is an edge case, but# `TestRamseyXY` unittest failed due to the significant chisq value# in which the least_square fitter terminates with `xtol` rather than `ftol`# condition, i.e. `ftol` condition indicates termination by cost function.# This code respects the ordering of parameters so that it matches with# the signature of fit function and it is backward compatible.# In principle this should not matter since LMFIT maps them with names# rather than index. Need more careful investigation.fornameinmodel.param_names:ifnamenotinunite_parameter_names:unite_parameter_names.append(name)default_fit_opt=FitOptions(parameters=unite_parameter_names,default_p0=self.options.p0,default_bounds=self.options.bounds,**self.options.lmfit_options,)# Bind fixed parameters if not emptyifself.options.fixed_parameters:fixed_parameters={k:vfork,vinself.options.fixed_parameters.items()ifkinunite_parameter_names}default_fit_opt.p0.set_if_empty(**fixed_parameters)else:fixed_parameters={}fit_options=self._generate_fit_guesses(default_fit_opt,curve_data)ifisinstance(fit_options,FitOptions):fit_options=[fit_options]valid_uncertainty=np.all(np.isfinite(curve_data.y_err))model_weights={}ifvalid_uncertainty:formodelinmodels:sub_yerr=curve_data.get_subset_of(model._name).y_erriflen(sub_yerr)==0:continuenonzero_yerr=np.where(np.isclose(sub_yerr,0.0),np.finfo(float).eps,sub_yerr)raw_weights=1/nonzero_yerr# Remove outlier. When all sample values are the same with sample average,# or sampling error is zero with shot-weighted average,# some yerr values might be very close to zero, yielding significant weights.# With such outlier, the fit doesn't sense residual of other data points.maximum_weight=np.percentile(raw_weights,90)model_weights[model._name]=np.clip(raw_weights,0.0,maximum_weight)# Objective function for minimize. This computes composite residuals of sub models.def_objective(_params):ys=[]formodelinmodels:sub_data=curve_data.get_subset_of(model._name)yi=model._residual(params=_params,data=sub_data.y,weights=model_weights.get(model._name,None),x=sub_data.x,)ys.append(yi)returnnp.concatenate(ys)# Run fit for each configurationres=Noneforfit_optioninfit_options:# Setup parameter configuration, i.e. init value, boundsguess_params=lmfit.Parameters()fornameinunite_parameter_names:bounds=fit_option.bounds[name]or(-np.inf,np.inf)guess_params.add(name=name,value=fit_option.p0[name],min=bounds[0],max=bounds[1],vary=namenotinfixed_parameters,)try:withnp.errstate(all="ignore"):new=lmfit.minimize(fcn=_objective,params=guess_params,method=self.options.fit_method,scale_covar=notvalid_uncertainty,nan_policy="omit",**fit_option.fitter_opts,)exceptException:# pylint: disable=broad-exceptcontinueifresisNoneornotres.success:res=newcontinueifnew.successandres.redchi>new.redchi:res=newreturnconvert_lmfit_result(res,models,curve_data.x,curve_data.y)def_run_analysis(self,experiment_data:ExperimentData)->Tuple[List[AnalysisResultData],List["pyplot.Figure"]]:# Prepare for fittingself._initialize(experiment_data)analysis_results=[]# Run data processingprocessed_data=self._run_data_processing(raw_data=experiment_data.data(),models=self._models,)ifself.options.plotandself.options.plot_raw_data:formodelinself._models:sub_data=processed_data.get_subset_of(model._name)self.plotter.set_series_data(model._name,x=sub_data.x,y=sub_data.y,)# Format dataformatted_data=self._format_data(processed_data)ifself.options.plot:formodelinself._models:sub_data=formatted_data.get_subset_of(model._name)self.plotter.set_series_data(model._name,x_formatted=sub_data.x,y_formatted=sub_data.y,y_formatted_err=sub_data.y_err,)# Run fittingfit_data=self._run_curve_fit(curve_data=formatted_data,models=self._models,)iffit_data.success:quality=self._evaluate_quality(fit_data)self.plotter.set_supplementary_data(fit_red_chi=fit_data.reduced_chisq)else:quality="bad"ifself.options.return_fit_parameters:# Store fit status overview entry regardless of success.# This is sometime useful when debugging the fitting code.overview=AnalysisResultData(name=PARAMS_ENTRY_PREFIX+self.name,value=fit_data,quality=quality,extra=self.options.extra,)analysis_results.append(overview)# Create figure and result dataiffit_data.success:# Create analysis resultsprimary_results=self._create_analysis_results(fit_data=fit_data,quality=quality,**self.options.extra.copy())analysis_results.extend(primary_results)self.plotter.set_supplementary_data(primary_results=primary_results)# Draw fit curves and reportifself.options.plot:formodelinself._models:sub_data=formatted_data.get_subset_of(model._name)ifsub_data.x.size==0:# If data is empty, skip drawing this model.# This is the case when fit model exist but no data to fit is provided.# For example, experiment may omit experimenting with some setting.continuex_interp=np.linspace(np.min(sub_data.x),np.max(sub_data.x),num=100)y_data_with_uncertainty=eval_with_uncertainties(x=x_interp,model=model,params=fit_data.ufloat_params,)y_interp=unp.nominal_values(y_data_with_uncertainty)# Add fit line dataself.plotter.set_series_data(model._name,x_interp=x_interp,y_interp=y_interp,)iffit_data.covarisnotNone:# Add confidence interval datay_interp_err=unp.std_devs(y_data_with_uncertainty)ifnp.isfinite(y_interp_err).all():self.plotter.set_series_data(model._name,y_interp_err=y_interp_err,)# Add raw data pointsifself.options.return_data_points:analysis_results.extend(self._create_curve_data(curve_data=formatted_data,models=self._models))# Finalize plotifself.options.plot:returnanalysis_results,[self.plotter.figure()]returnanalysis_results,[]def__getstate__(self):state=self.__dict__.copy()# Convert models into JSON str.# This object includes local function and cannot be pickled.source=[m.dumps()forminstate["_models"]]state["_models"]=sourcereturnstatedef__setstate__(self,state):model_objs=[]forsourceinstate.pop("_models"):tmp_mod=lmfit.Model(func=None)mod=tmp_mod.loads(s=source)model_objs.append(mod)self.__dict__.update(state)self._models=model_objs