Source code for qiskit_nature.second_q.drivers.pyscfd.pyscfdriver

# This code is part of a Qiskit project.
# (C) Copyright IBM 2018, 2023.
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""The PySCF Driver."""

from __future__ import annotations

import inspect
import logging
import os
import tempfile
import warnings
from enum import Enum
from typing import Any

import numpy as np
from qiskit_algorithms.utils.validation import validate_min

from qiskit_nature.units import DistanceUnit
from qiskit_nature.exceptions import QiskitNatureError
from qiskit_nature.second_q.formats.molecule_info import MoleculeInfo
from qiskit_nature.second_q.formats.qcschema import QCSchema
from qiskit_nature.second_q.formats.qcschema_translator import qcschema_to_problem
from qiskit_nature.second_q.operators.symmetric_two_body import fold
from qiskit_nature.second_q.problems import ElectronicBasis, ElectronicStructureProblem
import qiskit_nature.optionals as _optionals
from qiskit_nature.utils import get_einsum

from ..electronic_structure_driver import ElectronicStructureDriver, MethodType, _QCSchemaData

logger = logging.getLogger(__name__)

warnings.filterwarnings("ignore", category=DeprecationWarning, module="pyscf")

[docs]class InitialGuess(Enum): """Initial Guess Enum""" MINAO = "minao" HCORE = "1e" ONE_E = "1e" ATOM = "atom"
[docs]@_optionals.HAS_PYSCF.require_in_instance class PySCFDriver(ElectronicStructureDriver): """A Second-Quantization driver for Qiskit Nature using the PySCF library. References: """ def __init__( self, atom: str | list[str] = "H 0.0 0.0 0.0; H 0.0 0.0 0.735", *, unit: DistanceUnit = DistanceUnit.ANGSTROM, charge: int = 0, spin: int = 0, basis: str = "sto3g", method: MethodType = MethodType.RHF, xc_functional: str = "lda,vwn", xcf_library: str = "libxc", conv_tol: float = 1e-9, max_cycle: int = 50, init_guess: InitialGuess = InitialGuess.MINAO, max_memory: int | None = None, chkfile: str | None = None, ) -> None: """ Args: atom: A string (or a list thereof) denoting the elements and coordinates of all atoms in the system. Two formats are allowed; first, the PySCF-style `XYZ` format which is a list of strings formatted as `{element symbol} {x_coord} {y_coord} {z_coord}`. If a single string is given, the list entries should be joined by `;` as in the example: `H 0.0 0.0 0.0; H 0.0 0.0 0.735`. Second, the `Z-Matrix` format which is explained at 1_. The previous example would be written as `H; H 3 0.735`. See also 2_ for more details on geometry specifications supported by PySCF. unit: Denotes the unit of coordinates. Valid values are given by the ``UnitsType`` enum. charge: The charge of the molecule. spin: The spin of the molecule. In accordance with PySCF's definition, the spin equals :math:`2*S`, where :math:`S` is the total spin number of the molecule. basis: A basis set name as recognized by PySCF (3_), e.g. `sto3g` (the default), `321g`, etc. Note, that more advanced configuration options like a Dictionary or custom basis sets are not allowed for the moment. Refer to 4_ for an extensive list of PySCF's valid basis set names. method: The SCF method type to be used for the PySCF calculation. While the name refers to HF methods, the PySCFDriver also supports KS methods. Refer to the ``MethodType`` for a list of the supported methods. xc_functional: One of the predefined Exchange-Correlation functional names as recognized by PySCF (5_). Defaults to PySCF's default: 'lda,vwn'. __Note: this setting only has an effect when a KS method is chosen for `method`.__ xcf_library: The Exchange-Correlation functional library to be used. This can be either 'libxc' (the default) or 'xcfun'. Depending on this value, a different set of values for `xc_functional` will be available. Refer to 5_ for more details. conv_tol: The SCF convergence tolerance. See 6_ for more details. max_cycle: The maximum number of SCF iterations. See 6_ for more details. init_guess: The method to make the initial guess for the SCF starting point. Valid values are given by the ``InitialGuess`` enum. See 6_ for more details. max_memory: The maximum memory that PySCF should use. See 6_ for more details. chkfile: The path to a PySCF checkpoint file from which to load a previously run calculation. The data stored in this file is assumed to be already converged. Refer to 6_ and 7_ for more details. Raises: QiskitNatureError: An invalid input was supplied. .. _1: .. _2: .. _3: .. _4: .. _5: .. _6: .. _7: """ super().__init__() # pylint: disable=import-error from pyscf import gto, scf # First, ensure that PySCF supports the method PySCFDriver.check_method_supported(method) if isinstance(atom, list): atom = ";".join(atom) elif isinstance(atom, str): atom = atom.replace("\n", ";") else: raise QiskitNatureError( f"`atom` must be either a `str` or `list[str]`, but you passed {atom}" ) validate_min("max_cycle", max_cycle, 1) # we use the property-setter to deal with conversion self.atom = atom self._unit = unit self._charge = charge self._spin = spin self._basis = basis self._method = method self._xc_functional = xc_functional self.xcf_library = xcf_library # validate choice in property setter self._conv_tol = conv_tol self._max_cycle = max_cycle self._init_guess = init_guess.value self._max_memory = max_memory self._chkfile = chkfile self._mol: gto.Mole = None self._calc: scf.HF = None @property def atom(self) -> str: """Returns the atom.""" return self._atom @atom.setter def atom(self, atom: str | list[str]) -> None: """Sets the atom.""" if isinstance(atom, list): atom = ";".join(atom) self._atom = atom.replace("\n", ";") @property def unit(self) -> DistanceUnit: """Returns the unit.""" return self._unit @unit.setter def unit(self, unit: DistanceUnit) -> None: """Sets the unit.""" self._unit = unit @property def charge(self) -> int: """Returns the charge.""" return self._charge @charge.setter def charge(self, charge: int) -> None: """Sets the charge.""" self._charge = charge @property def spin(self) -> int: """Returns the spin.""" return self._spin @spin.setter def spin(self, spin: int) -> None: """Sets the spin.""" self._spin = spin @property def basis(self) -> str: """return basis""" return self._basis @basis.setter def basis(self, value: str) -> None: """set basis""" self._basis = value @property def method(self) -> MethodType: """Returns Hartree-Fock/Kohn-Sham method""" return self._method @method.setter def method(self, value: MethodType) -> None: """Sets Hartree-Fock/Kohn-Sham method""" self._method = value @property def xc_functional(self) -> str: """Returns the Exchange-Correlation functional.""" return self._xc_functional @xc_functional.setter def xc_functional(self, xc_functional: str) -> None: """Sets the Exchange-Correlation functional.""" self._xc_functional = xc_functional @property def xcf_library(self) -> str: """Returns the Exchange-Correlation functional library.""" return self._xcf_library @xcf_library.setter def xcf_library(self, xcf_library: str) -> None: """Sets the Exchange-Correlation functional library.""" if xcf_library not in ("libxc", "xcfun"): raise QiskitNatureError( "Invalid XCF library. It can be either 'libxc' or 'xcfun', not " f"'{xcf_library}'" ) self._xcf_library = xcf_library @property def conv_tol(self) -> float: """Returns the SCF convergence tolerance.""" return self._conv_tol @conv_tol.setter def conv_tol(self, conv_tol: float) -> None: """Sets the SCF convergence tolerance.""" self._conv_tol = conv_tol @property def max_cycle(self) -> int: """Returns the maximum number of SCF iterations.""" return self._max_cycle @max_cycle.setter def max_cycle(self, max_cycle: int) -> None: """Sets the maximum number of SCF iterations.""" self._max_cycle = max_cycle @property def init_guess(self) -> str: """Returns the method for the initial guess.""" return self._init_guess @init_guess.setter def init_guess(self, init_guess: str) -> None: """Sets the method for the initial guess.""" self._init_guess = init_guess @property def max_memory(self) -> int: """Returns the maximum memory allowance for the calculation.""" return self._max_memory @max_memory.setter def max_memory(self, max_memory: int) -> None: """Sets the maximum memory allowance for the calculation.""" self._max_memory = max_memory @property def chkfile(self) -> str: """Returns the path to the PySCF checkpoint file.""" return self._chkfile @chkfile.setter def chkfile(self, chkfile: str) -> None: """Sets the path to the PySCF checkpoint file.""" self._chkfile = chkfile
[docs] @staticmethod def from_molecule( molecule: MoleculeInfo, *, basis: str = "sto3g", method: MethodType = MethodType.RHF, driver_kwargs: dict[str, Any] | None = None, ) -> "PySCFDriver": """Creates a driver from a molecule. Args: molecule: the molecular information. basis: the basis set. method: the SCF method type. driver_kwargs: keyword arguments to be passed to driver. Returns: The constructed driver instance. """ PySCFDriver.check_method_supported(method) kwargs = {} if driver_kwargs: args = inspect.signature(PySCFDriver.__init__).parameters.keys() for key, value in driver_kwargs.items(): if key not in ["self"] and key in args: kwargs[key] = value kwargs["atom"] = [ " ".join(map(str, (name, *coord))) for name, coord in zip(molecule.symbols, molecule.coords) ] kwargs["charge"] = molecule.charge kwargs["spin"] = molecule.multiplicity - 1 kwargs["unit"] = molecule.units kwargs["basis"] = PySCFDriver.to_driver_basis(basis) kwargs["method"] = method return PySCFDriver(**kwargs)
[docs] @staticmethod def to_driver_basis(basis: str) -> str: """Converts basis to a driver acceptable basis. Args: basis: The basis set to be used. Returns: A driver acceptable basis. """ return basis
[docs] @staticmethod def check_method_supported(method: MethodType) -> None: """Checks that PySCF supports this method. Args: method: the SCF method type. Raises: UnsupportMethodError: If the method is not supported. """ # supports all methods pass
[docs] def run(self) -> ElectronicStructureProblem: """Runs the driver to produce a result. Returns: ElectronicStructureProblem produced by the run driver. Raises: QiskitNatureError: if an error during the PySCF setup or calculation occurred. """ self.run_pyscf() return self.to_problem()
def _build_molecule(self) -> None: """Builds the PySCF molecule object. Raises: QiskitNatureError: If building the PySCF molecule object failed. """ # Get config from input parameters # molecule is in PySCF atom string format e.g. "H .0 .0 .0; H .0 .0 0.2" # or in Z-Matrix format e.g. "H; O 1 1.08; H 2 1.08 1 107.5" # other parameters are as per PySCF got.Mole format # pylint: disable=import-error from pyscf import gto from pyscf.lib import logger as pylogger from pyscf.lib import param atom = self._check_molecule_format(self.atom) if self._max_memory is None: self._max_memory = param.MAX_MEMORY try: verbose = pylogger.QUIET output = None if logger.isEnabledFor(logging.DEBUG): verbose = pylogger.INFO file, output = tempfile.mkstemp(suffix=".log") os.close(file) self._mol = gto.Mole( atom=atom, unit=self._unit.value, basis=self._basis, max_memory=self._max_memory, verbose=verbose, output=output, ) self._mol.symmetry = False self._mol.charge = self._charge self._mol.spin = self._spin if output is not None: self._process_pyscf_log(output) try: os.remove(output) except Exception: # pylint: disable=broad-except pass except Exception as exc: raise QiskitNatureError("Failed to build the PySCF Molecule object.") from exc @staticmethod def _check_molecule_format(val: str) -> str | list[str]: """Ensures the molecule coordinates are in XYZ format. This utility automatically converts a Z-matrix coordinate format into XYZ coordinates. Args: val: the atomic coordinates. Raises: QiskitNatureError: If the provided coordinate are badly formatted. Returns: The coordinates in XYZ format. """ # pylint: disable=import-error from pyscf import gto atoms = [x.strip() for x in val.split(";")] if atoms is None or len(atoms) < 1: raise QiskitNatureError("Molecule format error: " + val) # An xyz format has 4 parts in each atom, if not then do zmatrix convert # Allows dummy atoms, using symbol 'X' in zmatrix format for coord computation to xyz parts = [x.strip() for x in atoms[0].split()] if len(parts) != 4: try: newval = [] for entry in gto.mole.from_zmatrix(val): if entry[0].upper() != "X": newval.append(entry) return newval except Exception as exc: raise QiskitNatureError("Failed to convert atom string: " + val) from exc return val
[docs] def run_pyscf(self) -> None: """Runs the PySCF calculation. This method is part of the public interface to allow the user to easily overwrite it in a subclass to further tailor the behavior to some specific use case. Raises: QiskitNatureError: If an invalid HF method type was supplied. """ self._build_molecule() # pylint: disable=import-error from pyscf import dft, scf from pyscf.lib import chkfile as lib_chkfile method_name = None method_cls = None try: # attempt to gather the SCF-method class specified by the MethodType method_name = self.method.value.upper() method_cls = getattr(scf, method_name) except AttributeError as exc: raise QiskitNatureError(f"Failed to load {method_name} HF object.") from exc self._calc = method_cls(self._mol) if method_name in ("RKS", "ROKS", "UKS"): self._calc._numint.libxc = getattr(dft, self.xcf_library) self._calc.xc = self.xc_functional if self._chkfile is not None and os.path.exists(self._chkfile): self._calc.__dict__.update(lib_chkfile.load(self._chkfile, "scf"))"PySCF loaded from chkfile e(hf): %s", self._calc.e_tot) else: self._calc.conv_tol = self._conv_tol self._calc.max_cycle = self._max_cycle self._calc.init_guess = self._init_guess self._calc.kernel() "PySCF kernel() converged: %s, e(hf): %s", self._calc.converged, self._calc.e_tot, )
[docs] def to_qcschema(self, *, include_dipole: bool = True) -> QCSchema: # pylint: disable=import-error from pyscf import __version__ as pyscf_version from pyscf import ao2mo, gto from import dump_mat einsum_func, _ = get_einsum() data = _QCSchemaData() data.overlap = self._calc.get_ovlp() data.mo_coeff, data.mo_coeff_b = self._expand_mo_object( self._calc.mo_coeff, array_dimension=3 ) data.mo_energy, data.mo_energy_b = self._expand_mo_object(self._calc.mo_energy) data.mo_occ, data.mo_occ_b = self._expand_mo_object(self._calc.mo_occ) if logger.isEnabledFor(logging.DEBUG): # Add some more to PySCF output... # First analyze() which prints extra information about MO energy and occupation self._mol.stdout.write("\n") self._calc.analyze() # Now labelled orbitals for contributions to the MOs for s,p,d etc of each atom self._mol.stdout.write("\n\n--- Alpha Molecular Orbitals ---\n\n") dump_mat.dump_mo(self._mol, data.mo_coeff, digits=7, start=1) if data.mo_coeff_b is not None: self._mol.stdout.write("\n--- Beta Molecular Orbitals ---\n\n") dump_mat.dump_mo(self._mol, data.mo_coeff_b, digits=7, start=1) self._mol.stdout.flush() data.hij = self._calc.get_hcore() data.hij_mo =, data.hij), data.mo_coeff) if data.mo_coeff_b is not None: data.hij_mo_b =, data.hij), data.mo_coeff_b) data.eri = self._mol.intor("int2e", aosym=8) data.eri_mo = fold(ao2mo.full(self._mol, data.mo_coeff, aosym=4)) if data.mo_coeff_b is not None: data.eri_mo_bb = fold(ao2mo.full(self._mol, data.mo_coeff_b, aosym=4)) data.eri_mo_ba = fold( ao2mo.general( self._mol, [data.mo_coeff_b, data.mo_coeff_b, data.mo_coeff, data.mo_coeff], aosym=4, ) ) data.e_nuc = gto.mole.energy_nuc(self._mol) data.e_ref = self._calc.e_tot data.symbols = [self._mol.atom_pure_symbol(i) for i in range(self._mol.natm)] data.coords = self._mol.atom_coords(unit="Bohr").ravel().tolist() data.multiplicity = self._spin + 1 data.charge = self._charge data.masses = list(self._mol.atom_mass_list()) data.method = self._method.value.upper() data.basis = self._basis data.creator = "PySCF" data.version = pyscf_version data.nbasis = self._mol.nbas data.nmo = self._mol.nao data.nalpha = self._mol.nelec[0] data.nbeta = self._mol.nelec[1] if include_dipole: self._mol.set_common_orig((0, 0, 0)) ao_dip = self._mol.intor_symmetric("int1e_r", comp=3) d_m = self._calc.make_rdm1(self._calc.mo_coeff, self._calc.mo_occ) if not (isinstance(d_m, np.ndarray) and d_m.ndim == 2): d_m = d_m[0] + d_m[1] elec_dip = np.negative(einsum_func("xij,ji->x", ao_dip, d_m).real) elec_dip = np.round(elec_dip, decimals=8) nucl_dip = einsum_func("i,ix->x", self._mol.atom_charges(), self._mol.atom_coords()) nucl_dip = np.round(nucl_dip, decimals=8) ref_dip = nucl_dip + elec_dip"HF Electronic dipole moment: %s", elec_dip)"Nuclear dipole moment: %s", nucl_dip)"Total dipole moment: %s", ref_dip) data.dip_nuc = nucl_dip data.dip_ref = ref_dip data.dip_x = ao_dip[0] data.dip_y = ao_dip[1] data.dip_z = ao_dip[2] data.dip_mo_x_a =, data.dip_x), data.mo_coeff) data.dip_mo_y_a =, data.dip_y), data.mo_coeff) data.dip_mo_z_a =, data.dip_z), data.mo_coeff) if data.mo_coeff_b is not None: data.dip_mo_x_b =, data.dip_x), data.mo_coeff_b) data.dip_mo_y_b =, data.dip_y), data.mo_coeff_b) data.dip_mo_z_b =, data.dip_z), data.mo_coeff_b) return self._to_qcschema(data, include_dipole=include_dipole)
[docs] def to_problem( self, *, basis: ElectronicBasis = ElectronicBasis.MO, include_dipole: bool = True, ) -> ElectronicStructureProblem: qcschema = self.to_qcschema(include_dipole=include_dipole) problem = qcschema_to_problem(qcschema, basis=basis, include_dipole=include_dipole) if include_dipole and is not None: = True return problem
def _expand_mo_object( self, mo_object: tuple[np.ndarray | None, np.ndarray | None] | np.ndarray, array_dimension: int = 2, ) -> tuple[np.ndarray, np.ndarray]: """Expands the molecular orbital object into alpha- and beta-spin components. Since PySCF 1.6.2, the alpha and beta components are no longer stored as a tuple but as a multi-dimensional numpy array. This utility takes care of differentiating these cases. Args: mo_object: the molecular orbital object to expand. array_dimension: This argument specifies the dimension of the numpy array (if a tuple is not encountered). Making this configurable permits this function to be used to expand both, MO coefficients (3D array) and MO energies (2D array). Returns: The (alpha, beta) tuple of MO data. """ if isinstance(mo_object, tuple): return mo_object if len(mo_object.shape) == array_dimension: return mo_object[0], mo_object[1] return mo_object, None def _process_pyscf_log(self, logfile: str) -> None: """Processes a PySCF logfile. Args: logfile: the path of the PySCF logfile. """ with open(logfile, "r", encoding="utf8") as file: contents = file.readlines() for i, content in enumerate(contents): if content.startswith("System:"): contents = contents[i:] break logger.debug("PySCF processing messages log:\n%s", "".join(contents))