Source code for ffsim.states.bitstring

# (C) Copyright IBM 2024.
#
# This code is licensed under the Apache License, Version 2.0. You may
# obtain a copy of this license in the LICENSE.txt file in the root directory
# of this source tree or at http://www.apache.org/licenses/LICENSE-2.0.
#
# Any modifications or derivative works of this code must retain this
# copyright notice, and modified files need to carry a notice indicating
# that they have been altered from the originals.

"""Utilities for handling bitstrings."""

from __future__ import annotations

import math
from collections.abc import Sequence
from enum import Enum, auto
from typing import cast

import numpy as np
from pyscf.fci import cistring



[docs]
class BitstringType(Enum):
    """Enumeration for indicating the data type of bitstrings.

    String:
        ["0101", "0110"]

    Integer:
        [5, 6]

    Bit array:
        [[False, True, False, True],
         [False, True, True, False]]
    """

    STRING = auto()
    """String."""

    INT = auto()
    """Integer."""

    BIT_ARRAY = auto()
    """Bit array."""



def convert_bitstring_type(
    strings: Sequence[str] | Sequence[int] | np.ndarray,
    input_type: BitstringType,
    output_type: BitstringType,
    length: int,
):
    """Convert bitstrings from one type to another.

    Args:
        strings: The bitstrings.
        input_type: The bitstring type of the input.
        output_type: The desired bitstring type of the output.
        length: The length of a bitstring.

    Returns:
        The converted bitstrings.
    """
    if not isinstance(input_type, BitstringType):
        raise TypeError(
            "input_type must be ffsim.BitstringType.STRING, ffsim.BitstringType.INT, "
            f"or ffsim.BitstringType.BIT_ARRAY. Got {input_type}."
        )
    if not isinstance(output_type, BitstringType):
        raise TypeError(
            "output_type must be ffsim.BitstringType.STRING, ffsim.BitstringType.INT, "
            f"or ffsim.BitstringType.BIT_ARRAY. Got {output_type}."
        )

    if input_type is output_type:
        return strings

    if input_type is BitstringType.STRING:
        strings = cast(list[str], strings)

        if output_type is BitstringType.INT:
            return [int(s, base=2) for s in strings]

        if output_type is BitstringType.BIT_ARRAY:
            if not strings:
                return np.empty((0, length), dtype=bool)
            return np.array([[b == "1" for b in s] for s in strings])

    if input_type is BitstringType.INT:
        strings = cast(Sequence[int], strings)

        if output_type is BitstringType.STRING:
            return [f"{string:0{length}b}" for string in strings]

        if output_type is BitstringType.BIT_ARRAY:
            if not strings:
                return np.empty((0, length), dtype=bool)
            return np.array(
                [[s >> i & 1 for i in range(length - 1, -1, -1)] for s in strings],
                dtype=bool,
            )

    if input_type is BitstringType.BIT_ARRAY:
        strings = cast(np.ndarray, strings)

        if output_type is BitstringType.STRING:
            return ["".join("1" if bit else "0" for bit in bits) for bits in strings]

        if output_type is BitstringType.INT:
            return [
                sum(bit * (1 << i) for i, bit in enumerate(bits[::-1]))
                for bits in strings
            ]


def restrict_bitstrings(
    strings: list[str] | list[int] | np.ndarray,
    indices: Sequence[int],
    bitstring_type: BitstringType,
):
    """Restrict bitstrings to a subset of bitstring indices.

    Args:
        strings: The bitstrings.
        indices: The indices of the bitstrings to restrict to.
        bitstring_type: The bitstring type of the input.

    Returns:
        The restricted bitstrings.
    """
    if bitstring_type is BitstringType.STRING:
        return [
            "".join(s[-1 - i] for i in indices[::-1]) for s in cast(list[str], strings)
        ]
    if bitstring_type is BitstringType.INT:
        return [
            sum((s >> index & 1) * (1 << i) for i, index in enumerate(indices))
            for s in cast(list[int], strings)
        ]
    if bitstring_type is BitstringType.BIT_ARRAY:
        return cast(np.ndarray, strings)[:, [-1 - i for i in indices[::-1]]]


def concatenate_bitstrings(
    strings_a: list[str] | list[int] | np.ndarray,
    strings_b: list[str] | list[int] | np.ndarray,
    bitstring_type: BitstringType,
    length: int,
):
    """Concatenate bitstrings.

    Bitstrings are concatenated in "little endian" order. That is, a bitstring from
    the first list will appear on the right side of the concatenated bitstring.

    Args:
        strings_a: The first list of bitstrings.
        strings_b: The second list of bitstrings.
        bitstring_type: The bitstring type of the input.
        length: The length of a bitstring in the first list of bitstrings.

    Returns:
        The concatenated bitstrings.
    """
    if bitstring_type is BitstringType.STRING:
        return ["".join((s_b, s_a)) for s_a, s_b in zip(strings_a, strings_b)]
    if bitstring_type is BitstringType.INT:
        return [(s_b << length) + s_a for s_a, s_b in zip(strings_a, strings_b)]
    if bitstring_type is BitstringType.BIT_ARRAY:
        return np.concatenate([strings_b, strings_a], axis=1)



[docs]
def addresses_to_strings(
    addresses: Sequence[int] | np.ndarray,
    norb: int,
    nelec: int | tuple[int, int],
    concatenate: bool = True,
    bitstring_type: BitstringType = BitstringType.INT,
):
    """Convert state vector addresses to bitstrings.

    Example:

    .. code::

        import ffsim

        norb = 3
        nelec = (2, 1)
        dim = ffsim.dim(norb, nelec)

        strings = ffsim.addresses_to_strings(range(5), norb, nelec)
        print(strings)  # prints [11, 19, 35, 13, 21]

        strings = ffsim.addresses_to_strings(
            range(5), norb, nelec, bitstring_type=ffsim.BitstringType.STRING
        )
        print(strings)  # prints ['001011', '010011', '100011', '001101', '010101']

        strings = ffsim.addresses_to_strings(
            range(5), norb, nelec, bitstring_type=ffsim.BitstringType.BIT_ARRAY
        )
        print(strings)
        # prints
        # [[False False  True False  True  True]
        #  [False  True False False  True  True]
        #  [ True False False False  True  True]
        #  [False False  True  True False  True]
        #  [False  True False  True False  True]]

    Args:
        addresses: The state vector addresses to convert to bitstrings.
        norb: The number of spatial orbitals.
        nelec: Either a single integer representing the number of fermions for a
            spinless system, or a pair of integers storing the numbers of spin alpha
            and spin beta fermions.
        bitstring_type: The desired type of bitstring output.
        concatenate: Whether to concatenate the spin-alpha and spin-beta parts of the
            bitstrings. If True, then a single list of concatenated bitstrings is
            returned. The strings are concatenated in the order :math:`s_b s_a`,
            that is, the alpha string appears on the right.
            If False, then two lists are returned, ``(strings_a, strings_b)``. Note that
            the list of alpha strings appears first, that is, on the left.
            In the spinless case (when `nelec` is an integer), this argument is ignored.

    Returns:
        The bitstrings. The type of the output depends on `bitstring_type` and
        `concatenate`.
    """
    if isinstance(nelec, int):
        # Spinless case
        return convert_bitstring_type(
            [
                int(s)
                for s in cistring.addrs2str(norb=norb, nelec=nelec, addrs=addresses)
            ],
            input_type=BitstringType.INT,
            output_type=bitstring_type,
            length=norb,
        )

    # Spinful case
    n_alpha, n_beta = nelec
    dim_b = math.comb(norb, n_beta)
    indices_a, indices_b = np.divmod(addresses, dim_b)
    strings_a = convert_bitstring_type(
        [int(s) for s in cistring.addrs2str(norb=norb, nelec=n_alpha, addrs=indices_a)],
        input_type=BitstringType.INT,
        output_type=bitstring_type,
        length=norb,
    )
    strings_b = convert_bitstring_type(
        [int(s) for s in cistring.addrs2str(norb=norb, nelec=n_beta, addrs=indices_b)],
        input_type=BitstringType.INT,
        output_type=bitstring_type,
        length=norb,
    )
    if concatenate:
        return concatenate_bitstrings(
            strings_a, strings_b, bitstring_type=bitstring_type, length=norb
        )
    return strings_a, strings_b




[docs]
def strings_to_addresses(
    strings: Sequence[int] | Sequence[str] | np.ndarray,
    norb: int,
    nelec: int | tuple[int, int],
) -> np.ndarray:
    """Convert bitstrings to state vector addresses.

    Example:

    .. code::

        import numpy as np

        import ffsim

        norb = 3
        nelec = (2, 1)
        dim = ffsim.dim(norb, nelec)

        addresses = ffsim.strings_to_addresses(
            [
                0b001011,
                0b010101,
                0b100101,
                0b010110,
                0b100110,
            ],
            norb,
            nelec,
        )
        print(addresses)  # prints [0 4 5 7 8]

        addresses = ffsim.strings_to_addresses(
            [
                "001011",
                "010101",
                "100101",
                "010110",
                "100110",
            ],
            norb,
            nelec,
        )
        print(addresses)  # prints [0 4 5 7 8]

        addresses = ffsim.strings_to_addresses(
            np.array(
                [
                    [False, False, True, False, True, True],
                    [False, True, False, True, False, True],
                    [True, False, False, True, False, True],
                    [False, True, False, True, True, False],
                    [True, False, False, True, True, False],
                ]
            ),
            norb,
            nelec,
        )
        print(addresses)  # prints [0 4 5 7 8]

    Args:
        strings: The bitstrings to convert to state vector addresses.
            Can be a list of strings, a list of integers, or a Numpy array of bits.
        norb: The number of spatial orbitals.
        nelec: Either a single integer representing the number of fermions for a
            spinless system, or a pair of integers storing the numbers of spin alpha
            and spin beta fermions.

    Returns:
        The state vector addresses, as a Numpy array of integers.
    """
    if not len(strings):
        return np.array([])
    if isinstance(strings, np.ndarray) and strings.ndim == 2:
        bitstring_type = BitstringType.BIT_ARRAY
    elif isinstance(strings[0], str):
        bitstring_type = BitstringType.STRING
    else:
        bitstring_type = BitstringType.INT
    strings = convert_bitstring_type(
        strings, input_type=bitstring_type, output_type=BitstringType.INT, length=norb
    )
    if isinstance(nelec, int):
        return cistring.strs2addr(norb=norb, nelec=nelec, strings=strings)
    n_alpha, n_beta = nelec
    strings = np.asarray(strings)
    strings_a = strings & ((1 << norb) - 1)
    strings_b = strings >> norb
    addrs_a = cistring.strs2addr(norb=norb, nelec=n_alpha, strings=strings_a)
    addrs_b = cistring.strs2addr(norb=norb, nelec=n_beta, strings=strings_b)
    dim_b = math.comb(norb, n_beta)
    return addrs_a * dim_b + addrs_b



def bitstring_to_occupied_orbitals(bitstring: int) -> list[int]:
    """Return a list of bit indices where a bitstring is equal to one."""
    return [i for i in range(bitstring.bit_length()) if bitstring >> i & 1]