Source code for qiskit_experiments.framework.analysis_result_table
# This code is part of Qiskit.## (C) Copyright IBM 2023.## This code is licensed under the Apache License, Version 2.0. You may# obtain a copy of this license in the LICENSE.txt file in the root directory# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.## Any modifications or derivative works of this code must retain this# copyright notice, and modified files need to carry a notice indicating# that they have been altered from the originals."""A table-like dataset for analysis results."""from__future__importannotationsimportreimportthreadingimportuuidimportwarningsfromtypingimportAnyimportnumpyasnpimportpandasaspdfromqiskit_experiments.database_service.exceptionsimportExperimentEntryNotFound
[docs]classAnalysisResultTable:"""A table-like dataset for analysis results. Default table columns are defined in the class attribute :attr:`.DEFAULT_COLUMNS`. The table is automatically expanded when an extra key is included in the input dictionary data. Missing columns in the input data are filled with a null value. Table row index (i.e. entry ID) is created by truncating the result_id string which is basically a UUID-4 string. A random unique ID is generated when the result_id is missing in the input data. Any operation on the table value via the instance methods guarantees thread safety. """VALID_ID_REGEX=re.compile(r"\A(?P<short_id>\w{8})-\w{4}-\w{4}-\w{4}-\w{12}\Z")DEFAULT_COLUMNS=["name","experiment","components","value","quality","experiment_id","result_id","tags","backend","run_time","created_time",]def__init__(self):"""Create new dataset."""self._data=pd.DataFrame(columns=self.DEFAULT_COLUMNS)self._lock=threading.RLock()
[docs]@classmethoddeffrom_dataframe(cls,data:pd.DataFrame)->"AnalysisResultTable":"""Create new dataset with existing dataframe. Args: data: Bare dataframe object. Returns: A new AnalysisResults instance. """instance=AnalysisResultTable()instance._data=pd.concat([instance._data,data])returninstance
@propertydefdataframe(self)->pd.DataFrame:"""Dataframe object of analysis results."""withself._lock:returnself._data.copy(deep=False)@propertydefresult_ids(self)->list[str]:"""Result IDs in current dataset."""withself._lock:returnlist(self._data.result_id)@propertydefcolumns(self)->list[str]:"""All columns in current dataset."""withself._lock:returnlist(self._data.columns)
[docs]defadd_data(self,*,result_id:str|None=None,**data,)->str:"""Add new data to this dataset. Args: result_id: A unique UUID-4 string for this data entry. The full string is used to identify the data in the experiment service database, and a short ID is created by truncating this string as a dataframe index. data: Arbitrary key-value pairs representing a single data entry. Missing values for default columns are filled with ``None``. Returns: Assigned analysis result ID. """result_id=result_idorself._create_unique_hash()ifmatched:=re.match(self.VALID_ID_REGEX,result_id):# Short unique index is generated from result id.# Showing full result id unnecessary occupies horizontal space of the html table.# This mechanism is inspired by the github commit hash.index=matched.group("short_id")else:warnings.warn(f"Result ID of {result_id} is not a valid UUID-4 string. ",UserWarning,)index=result_id[:8]withself._lock:ifindexinself._data.index:raiseValueError(f"Table entry index {index} already exists. ""Please use another ID to avoid index collision.")# Add missing columns to the tableifmissing:=data.keys()-set(self._data.columns):forkindata:# Order sensitiveifkinmissing:loc=len(self._data.columns)self._data.insert(loc,k,value=None)# A hack to avoid unwanted dtype update. Appending new row with .loc indexer# performs enlargement and implicitly changes dtype. This often induces a confusion of# NaN (numeric container) and None (object container) for missing values.# Filling a row with None values before assigning actual values can keep column dtype,# but this behavior might change in future pandas version.# https://github.com/pandas-dev/pandas/issues/6485# Also see test.framework.test_data_table.TestBaseTable.test_type_*self._data.loc[index,:]=[None]*len(self._data.columns)template=dict.fromkeys(self.columns,None)template["result_id"]=result_idtemplate.update(data)self._data.loc[index,:]=pd.array(list(template.values()),dtype=object)returnindex
[docs]defget_data(self,key:str|int|slice|None=None,columns:str|list[str]="default",)->pd.DataFrame:"""Get matched entries from this dataset. Args: key: Identifier of the entry of interest. columns: List of names or a policy (default, minimal, all) of data columns included in the returned data frame. Returns: Matched entries in a single data frame or series. """ifkeyisNone:withself._lock:out=self._data.copy()else:uids=self._resolve_key(key)withself._lock:out=self._data.filter(items=uids,axis=0)ifcolumns!="all":valid_columns=self._resolve_columns(columns)out=out[valid_columns]returnout
[docs]defdel_data(self,key:str|int,)->list[str]:"""Delete matched entries from this dataset. Args: key: Identifier of the entry of interest. Returns: Deleted analysis result IDs. """uids=self._resolve_key(key)withself._lock:self._data.drop(uids,inplace=True)returnuids
[docs]defclear(self):"""Clear all table entries."""withself._lock:self._data=pd.DataFrame(columns=self.DEFAULT_COLUMNS)
[docs]defcopy(self):"""Create new thread-safe instance with the same data. .. note:: This returns a new object with shallow copied data frame. """withself._lock:# Hold the lock so that no data can be addednew_instance=self.__class__()new_instance._data=self._data.copy(deep=False)returnnew_instance
def_create_unique_hash(self)->str:withself._lock:n=0whilen<1000:tmp_id=str(uuid.uuid4())iftmp_id[:8]notinself._data.index:returntmp_idraiseRuntimeError("Unique result_id string cannot be prepared for this table within 1000 trials. ""Reduce number of entries, or manually provide a unique result_id.")def_resolve_columns(self,columns:str|list[str]):withself._lock:extra_columns=[cforcinself._data.columnsifcnotinself.DEFAULT_COLUMNS]ifcolumns=="default":return["name","experiment","components","value","quality","backend","run_time",]+extra_columnsifcolumns=="minimal":return["name","components","value","quality",]+extra_columnsifnotisinstance(columns,str):out=[]forcolumnincolumns:ifcolumninself._data.columns:out.append(column)else:warnings.warn(f"Specified column {column} does not exist in this table.",UserWarning,)returnoutraiseValueError(f"Column group {columns} is not valid name. Use either 'all', 'default', 'minimal'.")def_resolve_key(self,key:int|slice|str)->list[str]:withself._lock:ifisinstance(key,int):ifkey>=len(self):raiseExperimentEntryNotFound(f"Analysis result {key} not found.")return[self._data.index[key]]ifisinstance(key,slice):keys=list(self._data.index)[key]iflen(keys)==0:raiseExperimentEntryNotFound(f"Analysis result {key} not found.")returnkeysifisinstance(key,str):ifkeyinself._data.index:return[key]# This key is name of entryloc=self._data["name"]==keyifnotany(loc):raiseExperimentEntryNotFound(f"Analysis result {key} not found.")returnlist(self._data.index[loc])raiseTypeError(f"Invalid key type {type(key)}. The key must be either int, slice, or str.")
def__contains__(self,item):returniteminself._data.indexdef__json_encode__(self)->dict[str,Any]:withself._lock:return{"class":"AnalysisResultTable","data":self._data.to_dict(orient="index"),}@classmethoddef__json_decode__(cls,value:dict[str,Any])->"AnalysisResultTable":ifnotvalue.get("class",None)=="AnalysisResultTable":raiseValueError("JSON decoded value for AnalysisResultTable is not valid class type.")instance=object.__new__(cls)instance._lock=threading.RLock()instance._data=pd.DataFrame.from_dict(data=value.get("data",{}),orient="index",).replace({np.nan:None})returninstancedef__getstate__(self):state=self.__dict__.copy()delstate["_lock"]returnstatedef__setstate__(self,state):self.__dict__.update(state)self._lock=threading.RLock()