Source code for qiskit_experiments.data_processing.data_processor
# This code is part of Qiskit.## (C) Copyright IBM 2021.## This code is licensed under the Apache License, Version 2.0. You may# obtain a copy of this license in the LICENSE.txt file in the root directory# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.## Any modifications or derivative works of this code must retain this# copyright notice, and modified files need to carry a notice indicating# that they have been altered from the originals.r"""Actions done on the data to bring it in a usable form.In Qiskit Experiments, uncertainty propagation is offloaded to the ``uncertainties``package, that offers a python float and numpy-array compatible numberrepresentation that natively supports standard errors and their propagation.Given values :math:`a` and :math:`b` with a finite uncertainty, the error propagationin the function :math:`f` is computed with derivatives.. math: \sigma_f^2 \sim \left| \frac{\partial f}{\partial a} \right|^2 \sigma_a^2 + \left| \frac{\partial f}{\partial b} \right|^2 \sigma_b^2 + 2 \frac{\partial f}{\partial a} \frac{\partial f}{\partial b} \sigma_{ab}where :math:`sigma_a` and :math:`sigma_b` are the uncertainties of :math:`a` and :math:`b` while:math:`sigma_{ab}` is the correlation between :math:`a` and :math:`b`.Please refer to the ``uncertainties`` package documentation for additional details... _uncertainties:https://pypi.org/project/uncertainties/"""fromtypingimportDict,List,Set,Tuple,Union,Anyimportnumpyasnpfromuncertaintiesimportunumpyasunpfromqiskit_experiments.data_processing.data_actionimportDataAction,TrainableDataActionfromqiskit_experiments.data_processing.exceptionsimportDataProcessorError
[docs]classDataProcessor:""" A DataProcessor defines a sequence of operations to perform on experimental data. Calling an instance of DataProcessor applies this sequence on the input argument. A DataProcessor is created with a list of DataAction instances. Each DataAction applies its ``_process`` method on the data and returns the processed data. The nodes in the DataProcessor may also perform data validation and some minor formatting. The output of one data action serves as input for the next data action. ``DataProcessor.__call__(datum)`` usually takes in an entry from the data property of an ExperimentData object (i.e. a dict containing metadata and memory keys and possibly counts, like the Result.data property) and produces the formatted data. ``DataProcessor.__call__(datum)`` extracts the data from the given datum under ``DataProcessor._input_key`` (which is specified at initialization) of the given datum. """def__init__(self,input_key:str,data_actions:List[DataAction]=None,):"""Create a chain of data processing actions. Args: input_key: The initial key in the datum Dict[str, Any] under which the data processor will find the data to process. data_actions: A list of data processing actions to construct this data processor with. If nothing is given the processor returns unprocessed data. """self._input_key=input_keyself._nodes=data_actionsifdata_actionselse[]
[docs]defappend(self,node:DataAction):""" Append new data action node to this data processor. Args: node: A DataAction that will process the data. """self._nodes.append(node)
@propertydefis_trained(self)->bool:"""Return True if all nodes of the data processor have been trained."""fornodeinself._nodes:ifisinstance(node,TrainableDataAction):ifnotnode.is_trained:returnFalsereturnTrue
[docs]def__call__(self,data:Union[Dict,List[Dict]],**options)->np.ndarray:""" Call self on the given datum. This method sequentially calls the stored data actions on the datum. Args: data: The data, typically from ``ExperimentData.data(...)``, that needs to be processed. This dict or list of dicts also contains the metadata of each experiment. options: Run-time options given as keyword arguments that will be passed to the nodes. Returns: The data processed by the data processor. This is an arbitrary numpy array that may contain standard errors as a ufloat object. """returnself._call_internal(data,**options)
[docs]defcall_with_history(self,data:Union[Dict,List[Dict]],history_nodes:Set=None)->Tuple[np.ndarray,List]:""" Call self on the given datum. This method sequentially calls the stored data actions on the datum and also returns the history of the processed data. Args: data: The data, typically from ``ExperimentData.data(...)``, that needs to be processed. This dict or list of dicts also contains the metadata of each experiment. history_nodes: The nodes, specified by index in the data processing chain, to include in the history. If None is given then all nodes will be included in the history. Returns: A tuple of (processed data, history), that are the data processed by the processor and its intermediate state in each specified node, respectively. """returnself._call_internal(data,True,history_nodes)
def_call_internal(self,data:Union[Dict,List[Dict]],with_history:bool=False,history_nodes:Set=None,call_up_to_node:int=None,)->Union[np.ndarray,Tuple[np.ndarray,List]]:"""Process the data with or without storing the history of the computation. Args: data: The data, typically from ExperimentData.data(...), that needs to be processed. This dict or list of dicts also contains the metadata of each experiment. with_history: If True the history is returned otherwise it is not. history_nodes: The nodes, specified by index in the data processing chain, to include in the history. If None is given then all nodes will be included in the history. call_up_to_node: The data processor will use each node in the processing chain up to the node indexed by call_up_to_node. If this variable is not specified then all nodes in the data processing chain will be called. Returns: When ``with_history`` is ``False`` it returns an numpy array of processed data. Otherwise it returns a tuple of (processed data, history) in which the `history` is a list of intermediate data at each step. """ifcall_up_to_nodeisNone:call_up_to_node=len(self._nodes)data=self._data_extraction(data)history=[]forindex,nodeinenumerate(self._nodes[:call_up_to_node]):data=node(data)ifwith_historyand(history_nodesisNoneorindexinhistory_nodes):ifdata.shape[0]==1:cache_data=data[0]else:cache_data=datahistory.append((node.__class__.__name__,cache_data,index,))# Return only first entry if len(data) == 1, e.g. [[0, 1]] -> [0, 1]ifdata.shape[0]==1:out_data=data[0]else:out_data=dataifwith_history:returnout_data,historyelse:returnout_data
[docs]deftrain(self,data:Union[Dict,List[Dict]]):"""Train the nodes of the data processor. Args: data: The data to use to train the data processor. """forindex,nodeinenumerate(self._nodes):ifisinstance(node,TrainableDataAction):ifnotnode.is_trained:# Process the data up to the untrained node.node.train(self._call_internal(data,call_up_to_node=index))
def_data_extraction(self,data:Union[Dict,List[Dict]])->np.ndarray:"""Extracts the data on which to run the nodes. If the datum is a list of dicts then the data under self._input_key is extracted from each dict and appended to a list which therefore contains all the data. Args: data: A list of such dicts where the data is contained under the key ``self._input_key``. Returns: The data formatted in such a way that it is ready to be processed by the nodes. Raises: DataProcessorError: - If the input datum is not a list or a dict. - If the input key of the data processor is not contained in the data. - If the data processor receives multiple data with different measurement configuration, i.e. Jagged array. """ifisinstance(data,dict):data=[data]data_to_process=[]dims=Nonefordatumindata:try:outcome=datum[self._input_key]exceptTypeErroraserror:raiseDataProcessorError(f"{self.__class__.__name__} only extracts data from "f"lists or dicts, received {type(data)}.")fromerrorexceptKeyErroraserror:raiseDataProcessorError(f"The input key {self._input_key} was not found in the input datum.")fromerrorifself._input_key!="counts":outcome=np.asarray(outcome)# Validate data shapeifdimsisNone:dims=outcome.shapeelse:# This is because each data node creates full array of all result data.# Jagged array cannot be numerically operated with numpy array.ifoutcome.shape!=dims:raiseDataProcessorError("Input data is likely a mixture of job results with different ""measurement setup. Data processor doesn't support jagged array.")data_to_process.append(outcome)data_to_process=np.asarray(data_to_process)ifdata_to_process.dtypein(float,int):# Likely level1 or below. Return ufloat array with un-computed std_dev.# The output data format is a standard ndarray with dtype=object with# arbitrary shape [n_circuits, ...] depending on the measurement setup.nominal_values=np.asarray(data_to_process,float)withnp.errstate(invalid="ignore"):# Setting std_devs to NaN will trigger floating point exceptions# which we can ignore. See https://stackoverflow.com/q/75656026uarray=unp.uarray(nominal_values=nominal_values,std_devs=np.full_like(nominal_values,np.nan,dtype=float),)returnuarrayelse:# Likely level2 counts or level2 memory data. Cannot be typecasted to ufloat.# The output data format is a standard ndarray with dtype=object with# shape [n_circuits] or [n_circuits, n_shots].# No error value is bound.returnnp.asarray(data_to_process,dtype=object)def__repr__(self):"""String representation of data processors."""names=", ".join(repr(node)fornodeinself._nodes)returnf"{self.__class__.__name__}(input_key={self._input_key}, nodes=[{names}])"def__json_encode__(self)->Dict[str,Any]:"""Return the config dict for this data processor."""return{"cls":type(self),"input_key":self._input_key,"nodes":self._nodes,}@classmethoddef__json_decode__(cls,config:Dict[str,Any])->"DataProcessor":"""Initialize a data processor from config dict."""try:returncls(input_key=config["input_key"],data_actions=config["nodes"])exceptKeyErrorasex:raiseKeyError("Imperfect configuration data. Cannot load this processor.")fromex