# (C) Copyright IBM 2024.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.
"""Utilities for handling bitstrings."""
from __future__ import annotations
import math
from collections.abc import Sequence
from enum import Enum, auto
from typing import cast
import numpy as np
from pyscf.fci import cistring
from typing_extensions import deprecated
[docs]
class BitstringType(Enum):
"""Enumeration for indicating the data type of bitstrings.
String:
["0101", "0110"]
Integer:
[5, 6]
Bit array:
[[False, True, False, True],
[False, True, True, False]]
"""
STRING = auto()
"""String."""
INT = auto()
"""Integer."""
BIT_ARRAY = auto()
"""Bit array."""
[docs]
@deprecated(
"ffsim.indices_to_strings is deprecated. "
"Instead, use ffsim.addresses_to_strings."
)
def indices_to_strings(
indices: Sequence[int] | np.ndarray,
norb: int,
nelec: int | tuple[int, int],
concatenate: bool = True,
bitstring_type: BitstringType = BitstringType.STRING,
):
"""Convert state vector indices to bitstrings.
.. warning::
This function is deprecated. Use :class:`ffsim.addresses_to_strings` instead.
Example:
.. code::
import ffsim
norb = 3
nelec = (2, 1)
dim = ffsim.dim(norb, nelec)
ffsim.indices_to_strings(range(dim), norb, nelec)
# output:
# ['001011',
# '010011',
# '100011',
# '001101',
# '010101',
# '100101',
# '001110',
# '010110',
# '100110']
Args:
indices: The state vector indices to convert to bitstrings.
norb: The number of spatial orbitals.
nelec: Either a single integer representing the number of fermions for a
spinless system, or a pair of integers storing the numbers of spin alpha
and spin beta fermions.
bitstring_type: The desired type of bitstring output.
concatenate: Whether to concatenate the spin-alpha and spin-beta parts of the
bitstrings. If True, then a single list of concatenated bitstrings is
returned. The strings are concatenated in the order :math:`s_b s_a`,
that is, the alpha string appears on the right.
If False, then two lists are returned, ``(strings_a, strings_b)``. Note that
the list of alpha strings appears first, that is, on the left.
In the spinless case (when `nelec` is an integer), this argument is ignored.
"""
if isinstance(nelec, int):
# Spinless case
return convert_bitstring_type(
list(cistring.addrs2str(norb=norb, nelec=nelec, addrs=indices)),
input_type=BitstringType.INT,
output_type=bitstring_type,
length=norb,
)
# Spinful case
n_alpha, n_beta = nelec
dim_b = math.comb(norb, n_beta)
indices_a, indices_b = np.divmod(indices, dim_b)
strings_a = convert_bitstring_type(
list(cistring.addrs2str(norb=norb, nelec=n_alpha, addrs=indices_a)),
input_type=BitstringType.INT,
output_type=bitstring_type,
length=norb,
)
strings_b = convert_bitstring_type(
list(cistring.addrs2str(norb=norb, nelec=n_beta, addrs=indices_b)),
input_type=BitstringType.INT,
output_type=bitstring_type,
length=norb,
)
if concatenate:
return concatenate_bitstrings(
strings_a, strings_b, bitstring_type=bitstring_type, length=norb
)
return strings_a, strings_b
def convert_bitstring_type(
strings: Sequence[str] | Sequence[int] | np.ndarray,
input_type: BitstringType,
output_type: BitstringType,
length: int,
):
"""Convert bitstrings from one type to another.
Args:
strings: The bitstrings.
input_type: The bitstring type of the input.
output_type: The desired bitstring type of the output.
length: The length of a bitstring.
Returns:
The converted bitstrings.
"""
if input_type is output_type:
return strings
if input_type is BitstringType.STRING:
strings = cast(list[str], strings)
if output_type is BitstringType.INT:
return [int(s, base=2) for s in strings]
if output_type is BitstringType.BIT_ARRAY:
if not strings:
return np.empty((0, length), dtype=bool)
return np.array([[b == "1" for b in s] for s in strings])
if input_type is BitstringType.INT:
strings = cast(Sequence[int], strings)
if output_type is BitstringType.STRING:
return [f"{string:0{length}b}" for string in strings]
if output_type is BitstringType.BIT_ARRAY:
if not strings:
return np.empty((0, length), dtype=bool)
return np.array(
[[s >> i & 1 for i in range(length - 1, -1, -1)] for s in strings],
dtype=bool,
)
if input_type is BitstringType.BIT_ARRAY:
strings = cast(np.ndarray, strings)
if output_type is BitstringType.STRING:
return ["".join("1" if bit else "0" for bit in bits) for bits in strings]
if output_type is BitstringType.INT:
return [
sum(bit * (1 << i) for i, bit in enumerate(bits[::-1]))
for bits in strings
]
def restrict_bitstrings(
strings: list[str] | list[int] | np.ndarray,
indices: Sequence[int],
bitstring_type: BitstringType,
):
"""Restrict bitstrings to a subset of bitstring indices.
Args:
strings: The bitstrings.
indices: The indices of the bitstrings to restrict to.
bitstring_type: The bitstring type of the input.
Returns:
The restricted bitstrings.
"""
if bitstring_type is BitstringType.STRING:
return [
"".join(s[-1 - i] for i in indices[::-1]) for s in cast(list[str], strings)
]
if bitstring_type is BitstringType.INT:
return [
sum((s >> index & 1) * (1 << i) for i, index in enumerate(indices))
for s in cast(list[int], strings)
]
if bitstring_type is BitstringType.BIT_ARRAY:
return cast(np.ndarray, strings)[:, [-1 - i for i in indices[::-1]]]
def concatenate_bitstrings(
strings_a: list[str] | list[int] | np.ndarray,
strings_b: list[str] | list[int] | np.ndarray,
bitstring_type: BitstringType,
length: int,
):
"""Concatenate bitstrings.
Bitstrings are concatenated in "little endian" order. That is, a bitstring from
the first list will appear on the right side of the concatenated bitstring.
Args:
strings_a: The first list of bitstrings.
strings_b: The second list of bitstrings.
bitstring_type: The bitstring type of the input.
length: The length of a bitstring in the first list of bitstrings.
Returns:
The concatenated bitstrings.
"""
if bitstring_type is BitstringType.STRING:
return ["".join((s_b, s_a)) for s_a, s_b in zip(strings_a, strings_b)]
if bitstring_type is BitstringType.INT:
return [(s_b << length) + s_a for s_a, s_b in zip(strings_a, strings_b)]
if bitstring_type is BitstringType.BIT_ARRAY:
return np.concatenate([strings_b, strings_a], axis=1)
[docs]
@deprecated(
"ffsim.strings_to_indices is deprecated. "
"Instead, use ffsim.strings_to_addresses."
)
def strings_to_indices(
strings: Sequence[str], norb: int, nelec: int | tuple[int, int]
) -> np.ndarray:
"""Convert bitstrings to state vector indices.
.. warning::
This function is deprecated. Use :class:`ffsim.strings_to_addresses` instead.
Example:
.. code::
import ffsim
norb = 3
nelec = (2, 1)
dim = ffsim.dim(norb, nelec)
ffsim.strings_to_indices(
[
"001011",
"010011",
"100011",
"001101",
"010101",
"100101",
"001110",
"010110",
"100110",
],
norb,
nelec,
)
# output:
# array([0, 1, 2, 3, 4, 5, 6, 7, 8], dtype=int32)
"""
if isinstance(nelec, int):
return cistring.strs2addr(
norb=norb, nelec=nelec, strings=[int(s, base=2) for s in strings]
)
n_alpha, n_beta = nelec
strings_a = [int(s[norb:], base=2) for s in strings]
strings_b = [int(s[:norb], base=2) for s in strings]
addrs_a = cistring.strs2addr(norb=norb, nelec=n_alpha, strings=strings_a)
addrs_b = cistring.strs2addr(norb=norb, nelec=n_beta, strings=strings_b)
dim_b = math.comb(norb, n_beta)
return addrs_a * dim_b + addrs_b
[docs]
def addresses_to_strings(
addresses: Sequence[int] | np.ndarray,
norb: int,
nelec: int | tuple[int, int],
concatenate: bool = True,
bitstring_type: BitstringType = BitstringType.INT,
):
"""Convert state vector addresses to bitstrings.
Example:
.. code::
import ffsim
norb = 3
nelec = (2, 1)
dim = ffsim.dim(norb, nelec)
strings = ffsim.addresses_to_strings(range(5), norb, nelec)
print(strings) # prints [11, 19, 35, 13, 21]
strings = ffsim.addresses_to_strings(
range(5), norb, nelec, bitstring_type=ffsim.BitstringType.STRING
)
print(strings) # prints ['001011', '010011', '100011', '001101', '010101']
strings = ffsim.addresses_to_strings(
range(5), norb, nelec, bitstring_type=ffsim.BitstringType.BIT_ARRAY
)
print(strings)
# prints
# [[False False True False True True]
# [False True False False True True]
# [ True False False False True True]
# [False False True True False True]
# [False True False True False True]]
Args:
addresses: The state vector addresses to convert to bitstrings.
norb: The number of spatial orbitals.
nelec: Either a single integer representing the number of fermions for a
spinless system, or a pair of integers storing the numbers of spin alpha
and spin beta fermions.
bitstring_type: The desired type of bitstring output.
concatenate: Whether to concatenate the spin-alpha and spin-beta parts of the
bitstrings. If True, then a single list of concatenated bitstrings is
returned. The strings are concatenated in the order :math:`s_b s_a`,
that is, the alpha string appears on the right.
If False, then two lists are returned, ``(strings_a, strings_b)``. Note that
the list of alpha strings appears first, that is, on the left.
In the spinless case (when `nelec` is an integer), this argument is ignored.
Returns:
The bitstrings. The type of the output depends on `bitstring_type` and
`concatenate`.
"""
if isinstance(nelec, int):
# Spinless case
return convert_bitstring_type(
[
int(s)
for s in cistring.addrs2str(norb=norb, nelec=nelec, addrs=addresses)
],
input_type=BitstringType.INT,
output_type=bitstring_type,
length=norb,
)
# Spinful case
n_alpha, n_beta = nelec
dim_b = math.comb(norb, n_beta)
indices_a, indices_b = np.divmod(addresses, dim_b)
strings_a = convert_bitstring_type(
[int(s) for s in cistring.addrs2str(norb=norb, nelec=n_alpha, addrs=indices_a)],
input_type=BitstringType.INT,
output_type=bitstring_type,
length=norb,
)
strings_b = convert_bitstring_type(
[int(s) for s in cistring.addrs2str(norb=norb, nelec=n_beta, addrs=indices_b)],
input_type=BitstringType.INT,
output_type=bitstring_type,
length=norb,
)
if concatenate:
return concatenate_bitstrings(
strings_a, strings_b, bitstring_type=bitstring_type, length=norb
)
return strings_a, strings_b
[docs]
def strings_to_addresses(
strings: Sequence[int] | Sequence[str] | np.ndarray,
norb: int,
nelec: int | tuple[int, int],
) -> np.ndarray:
"""Convert bitstrings to state vector addresses.
Example:
.. code::
import numpy as np
import ffsim
norb = 3
nelec = (2, 1)
dim = ffsim.dim(norb, nelec)
addresses = ffsim.strings_to_addresses(
[
0b001011,
0b010101,
0b100101,
0b010110,
0b100110,
],
norb,
nelec,
)
print(addresses) # prints [0 4 5 7 8]
addresses = ffsim.strings_to_addresses(
[
"001011",
"010101",
"100101",
"010110",
"100110",
],
norb,
nelec,
)
print(addresses) # prints [0 4 5 7 8]
addresses = ffsim.strings_to_addresses(
np.array(
[
[False, False, True, False, True, True],
[False, True, False, True, False, True],
[True, False, False, True, False, True],
[False, True, False, True, True, False],
[True, False, False, True, True, False],
]
),
norb,
nelec,
)
print(addresses) # prints [0 4 5 7 8]
Args:
strings: The bitstrings to convert to state vector addresses.
Can be a list of strings, a list of integers, or a Numpy array of bits.
norb: The number of spatial orbitals.
nelec: Either a single integer representing the number of fermions for a
spinless system, or a pair of integers storing the numbers of spin alpha
and spin beta fermions.
Returns:
The state vector addresses, as a Numpy array of integers.
"""
if not len(strings):
return np.array([])
if isinstance(strings, np.ndarray):
bitstring_type = BitstringType.BIT_ARRAY
elif isinstance(strings[0], str):
bitstring_type = BitstringType.STRING
else:
bitstring_type = BitstringType.INT
strings = convert_bitstring_type(
strings, input_type=bitstring_type, output_type=BitstringType.INT, length=norb
)
if isinstance(nelec, int):
return cistring.strs2addr(norb=norb, nelec=nelec, strings=strings)
n_alpha, n_beta = nelec
strings = np.asarray(strings)
strings_a = strings & ((1 << norb) - 1)
strings_b = strings >> norb
addrs_a = cistring.strs2addr(norb=norb, nelec=n_alpha, strings=strings_a)
addrs_b = cistring.strs2addr(norb=norb, nelec=n_beta, strings=strings_b)
dim_b = math.comb(norb, n_beta)
return addrs_a * dim_b + addrs_b