Source code for nbed.localizers.occupied.pyscf

"""PySCF Localizer Class."""

import logging
from abc import ABC, abstractmethod

import numpy as np
from numpy.typing import NDArray
from pyscf import lo, scf  # type:ignore
from pyscf.lo import vvo  # type:ignore

from nbed.localizers.system import RestrictedLS

from .base import OccupiedLocalizer

logger = logging.getLogger(__name__)


class PySCFLocalizer(OccupiedLocalizer, ABC):
    """Object used to localise molecular orbitals (MOs) using PySCF localization functions.

    Running localization returns active and environment systems.

    Note:
    The major improvement of IBOs over PM orbitals is that they are based on IAO charges instead of the erratic
    Mulliken charges. As a result, IBOs are always well-defined.  (Ref: J. Chem. Theory Comput. 2013, 9, 4834−4843)


    Args:
        global_scf (gto.Mole): PySCF molecule object
        n_active_atoms (int): Number of active atoms
        occ_cutoff (float): Threshold for selecting occupied active region (only requried if
                                spade localization is NOT used)
        virt_cutoff (float): Threshold for selecting unoccupied (virtual) active region (required for
                                spade approach too!)

    Methods:
        run: Main function to run localization.
    """

    def __init__(
        self,
        global_scf: scf.hf.SCF,
        n_active_atoms: int,
        occ_cutoff: float = 0.95,
        virt_cutoff: float = 0.95,
    ):
        """Initialize PySCF Localizer."""
        self.occ_cutoff = self._valid_threshold(occ_cutoff)
        self.virt_cutoff = self._valid_threshold(virt_cutoff)
        super().__init__(
            global_scf,
            n_active_atoms,
        )

    def _valid_threshold(self, threshold: float):
        """Checks if threshold is within 0-1 range (percentage).

        Args:
            threshold (float): input number between 0 and 1 (inclusive)

        Returns:
            threshold (float): input percentage
        """
        if 0.0 <= threshold <= 1.0:
            logger.debug("Localizer threshold valid.")
            return threshold
        else:
            logger.error("Localizer threshold not valid.")
            raise ValueError(f"threshold: {threshold} is not in range [0,1] inclusive")

    @abstractmethod
    def _pyscf_method(self, c_std_occ) -> NDArray:
        """Abstract method containing the PySCF method to use.

        Args:
            c_std_occ (np.ndarray): Unlocalized C matrix of occupied orbitals.

        Returns:
            np.ndarray: Localized C Matrix
        """
        pass

    def _localize_spin(
        self,
        c_matrix: np.ndarray,
        occupancy: np.ndarray,
        n_mo_overwrite: int | None = None,
    ) -> RestrictedLS:
        """Localize orbitals of one spin using PySCF.

        Args:
            c_matrix (np.ndarray): Unlocalized C matrix of occupied orbitals.
            occupancy (np.ndarray): Occupancy of orbitals.
            n_mo_overwrite (int | None): Overwrite the number of active molecular orbitals.

        Returns:
            np.ndarray: Localized C matrix of occupied orbitals.
        """
        logger.debug("Running PySCF Localization.")
        n_occupied_orbitals = np.count_nonzero(occupancy)
        logger.debug(f"{n_occupied_orbitals=}")
        c_std_occ = c_matrix[:, :n_occupied_orbitals]
        logger.debug(f"{c_std_occ.shape=}")

        c_loc_occ: NDArray = self._pyscf_method(c_std_occ)
        c_loc = np.zeros(c_matrix.shape)
        c_loc[: c_loc_occ.shape[0], : c_loc_occ.shape[1]] += c_loc_occ

        ao_slice_matrix = self._global_scf.mol.aoslice_by_atom()

        # TODO: Check the following:
        # S_ovlp = pyscf_scf.get_ovlp()
        # S_half = sp.linalg.fractional_matrix_power(S_ovlp , 0.5)
        # C_loc_occ_ORTHO = S_half@C_loc_occ_full
        # run numerator_all and denominator_all in ortho basis

        # find indices of AO of active atoms
        ao_active_inds = np.arange(
            ao_slice_matrix[0, 2], ao_slice_matrix[self._n_active_atoms - 1, 3]
        )
        # active AOs coeffs for a given MO j
        numerator_all = np.einsum("ij->j", (c_loc_occ[ao_active_inds, :]) ** 2)

        # all AOs coeffs for a given MO j
        denominator_all = np.einsum("ij->j", c_loc_occ**2)

        mo_active_share = numerator_all / denominator_all

        logger.debug(f"(active_AO^2)/(all_AO^2): {np.around(mo_active_share, 4)}")
        logger.debug(f"threshold for active part: {self.occ_cutoff}")

        active_occ_inds = np.zeros(occupancy.shape[-1], dtype=np.bool)

        all_ao_shares_same_bool = np.allclose(
            np.zeros_like(mo_active_share),
            mo_active_share - mo_active_share.sum() / len(mo_active_share),
        )

        if all_ao_shares_same_bool:
            # case for highly symmetric molecules
            # overlap is the same everywhere hence everything goes into env or act part

            # edge case put half and half!
            logger.warning(
                "AO subsystem selection % same everywhere. Splitting half and half"
            )
            print(f"mo_active_share: {mo_active_share}")
            active_occ_inds[: n_occupied_orbitals // 2] = True
        elif len(active_occ_inds) == 0:
            # if no active indices, then take largest possible overlap
            mo_active_percentage_inshare = mo_active_share.argsort()[::-1]
            active_occ_inds[mo_active_percentage_inshare[:1]] = (
                True  # take first element
            )
            logger.warning("no active AOs - forcing one to be active")
            print(f"active system %: {mo_active_share[active_occ_inds][0]} \n")
        else:
            active_occ_inds[: len(mo_active_share)] = mo_active_share >= self.occ_cutoff

        enviro_occ_inds = np.zeros(active_occ_inds.shape, dtype=np.bool)
        enviro_occ_inds[: len(mo_active_share)] = mo_active_share < self.occ_cutoff
        logger.debug(f"{active_occ_inds=}")
        logger.debug(f"{enviro_occ_inds=}")

        # define active MO orbs and environment
        #    take MO (columns of C_matrix) that have high dependence from active AOs
        c_active = c_loc_occ[:, active_occ_inds[: len(mo_active_share)]]
        dm_active = c_active @ c_active.T

        if len(enviro_occ_inds) == 0:
            # case for when no environement
            logger.warning("No environment electronic density")
            c_enviro = np.zeros((c_active.shape[0], 1))
        else:
            c_enviro = c_loc_occ[:, enviro_occ_inds[: len(mo_active_share)]]
        dm_enviro = c_enviro @ c_enviro.T

        # storing condition used to select env system
        self.enviro_selection_condition = mo_active_share

        logger.debug("PySCF localization complete.")
        return RestrictedLS(
            active_occ_inds, enviro_occ_inds, c_loc, dm_active, dm_enviro
        )

    def _localize_virtual_spin(
        self, c_matrix: np.ndarray, virt_threshold: float
    ) -> np.ndarray:
        """Localise virtual (unoccupied) orbitals using different localization schemes in PySCF.

        Args:
            global_scf (scf.hf.SCF): PySCF molecule object
            c_matrix (np.ndarray): Unlocalized C matrix of occupied orbitals.
            virt_threshold (float): Threshold for selecting unoccupied (virtual) active MOs.

        Returns:
            c_virtual_loc (np.array): C matrix of localized virtual MOs (columns define MOs)
        """
        logger.debug("Localizing virtual orbitals.")
        n_occupied_orbitals = np.count_nonzero(self._global_scf.mo_occ == 2)
        c_std_occ: NDArray
        c_std_occ = self._global_scf.mo_coeff[:, :n_occupied_orbitals]  # type:ignore
        c_std_virt: NDArray
        c_std_virt = self._global_scf.mo_coeff[:, self._global_scf.mo_occ < 2]  # type:ignore

        c_virtual_loc = vvo.vvo(
            self._global_scf.mol, c_std_occ, c_std_virt, iaos=None, s=None, verbose=None
        )

        ao_slice_matrix = self._global_scf.mol.aoslice_by_atom()

        # TODO: Check the following:
        # S_ovlp = global_scf.get_ovlp()
        # S_half = sp.linalg.fractional_matrix_power(S_ovlp , 0.5)
        # C_loc_occ_ORTHO = S_half@C_loc_occ_full
        # run numerator_all and denominator_all in ortho basis

        # find indices of AO of active atoms
        ao_active_inds = np.arange(
            ao_slice_matrix[0, 2], ao_slice_matrix[self._n_active_atoms - 1, 3]
        )

        # active AOs coeffs for a given MO j
        numerator_all = np.einsum("ij->j", (c_virtual_loc[ao_active_inds, :]) ** 2)
        # all AOs coeffs for a given MO j
        denominator_all = np.einsum("ij->j", c_virtual_loc**2)

        active_percentage_MO = numerator_all / denominator_all

        logger.debug("Virtual orbitals localized.")
        logger.debug(f"(active_AO^2)/(all_AO^2): {np.around(active_percentage_MO, 4)}")
        logger.debug(f"threshold for active part: {self.virt_cutoff}")

        # NOT IN USE
        # add constant occupied index
        # active_virtual_occ_inds = (
        #     np.where(active_percentage_MO > self.virt_cutoff)[0] + c_std_occ.shape[1]
        # )
        # enviro_virtual_occ_inds = np.array(
        #     [
        #         i
        #         for i in range(
        #             c_std_occ.shape[1], c_std_occ.shape[1] + c_virtual_loc.shape[1]
        #         )
        #         if i not in active_virtual_occ_inds
        #     ]
        # )

        return c_virtual_loc

    def localize_virtual(self, local_scf: scf.hf.SCF) -> scf.hf.SCF:
        """Localise virtual (unoccupied) obitals using PySCF method.

        [1] D. Claudino and N. J. Mayhall, "Simple and Efficient Truncation of Virtual
        Spaces in Embedded Wave Functions via Concentric Localization", Journal of Chemical
        Theory and Computation, vol. 15, no. 11, pp. 6085-6096, Nov. 2019,
        doi: 10.1021/ACS.JCTC.9B00682.

        Args:
            local_scf (scf.hf.SCF): SCF object with occupied orbitals localized.

        Returns:
            scf.hf.SCF: Fully Localized SCF object.
        """
        raise NotImplementedError(
            "Virtual orbital localization not implemented for PySCF methods."
        )
        return local_scf


[docs] class PMLocalizer(PySCFLocalizer): """Object used to localise molecular orbitals (MOs) using Pipek-Mezey localization. Running localization returns active and environment systems. Args: global_scf (gto.Mole): PySCF molecule object n_active_atoms (int): Number of active atoms occ_cutoff (float): Threshold for selecting occupied active region (only requried if spade localization is NOT used) virt_cutoff (float): Threshold for selecting unoccupied (virtual) active region (required for spade approach too!) Attributes: c_active (np.array): C matrix of localized occupied active MOs (columns define MOs) c_enviro (np.array): C matrix of localized occupied ennironment MOs c_loc_occ_and_virt (np.array): Full localized C_matrix (occpuied and virtual) dm_active (np.array): active system density matrix dm_enviro (np.array): environment system density matrix active_occ_inds (np.array): 1D array of active occupied MO indices enviro_occ_inds (np.array): 1D array of environment occupied MO indices c_loc_occ (np.array): C matrix of localized occupied MOs Methods: run: Main function to run localization. """ def __init__( self, global_scf: scf.hf.SCF, n_active_atoms: int, occ_cutoff: float = 0.95, virt_cutoff: float = 0.95, ): """Initialize Localizer.""" super().__init__( global_scf, n_active_atoms, occ_cutoff=occ_cutoff, virt_cutoff=virt_cutoff, ) def _pyscf_method(self, c_std_occ: np.ndarray) -> np.ndarray: """Abstract method containing the PySCF method to use. Args: c_std_occ (np.ndarray): Unlocalized C matrix of occupied orbitals. """ logger.debug("Using Pipek-Mezey method.") # Localise orbitals using Pipek-Mezey localization scheme. # This maximizes the sum of orbital-dependent partial charges on the nuclei. pipmez = lo.PipekMezey(self._global_scf.mol, c_std_occ) # The atomic population projection scheme. # 'mulliken', 'meta-lowdin', 'iao', 'becke' pipmez.pop_method = "meta-lowdin" # run localization return pipmez.kernel()
[docs] class BOYSLocalizer(PySCFLocalizer): """Object used to localise molecular orbitals (MOs) using BOYS localization. Running localization returns active and environment systems. Args: global_scf (gto.Mole): PySCF molecule object n_active_atoms (int): Number of active atoms localization_method (str): String of orbital localization method (spade, pipekmezey, boys, ibo) occ_cutoff (float): Threshold for selecting occupied active region (only requried if spade localization is NOT used) virt_cutoff (float): Threshold for selecting unoccupied (virtual) active region (required for spade approach too!) Attributes: c_active (np.array): C matrix of localized occupied active MOs (columns define MOs) c_enviro (np.array): C matrix of localized occupied ennironment MOs c_loc_occ_and_virt (np.array): Full localized C_matrix (occpuied and virtual) dm_active (np.array): active system density matrix dm_enviro (np.array): environment system density matrix active_occ_inds (np.array): 1D array of active occupied MO indices enviro_occ_inds (np.array): 1D array of environment occupied MO indices c_loc_occ (np.array): C matrix of localized occupied MOs Methods: run: Main function to run localization. """ def __init__( self, global_scf: scf.hf.SCF, n_active_atoms: int, occ_cutoff: float = 0.95, virt_cutoff: float = 0.95, ): """Initialize Localizer.""" super().__init__( global_scf, n_active_atoms, occ_cutoff=occ_cutoff, virt_cutoff=virt_cutoff, ) def _pyscf_method(self, c_std_occ: np.ndarray) -> np.ndarray: """Abstract method containing the PySCF method to use. Args: c_std_occ (np.ndarray): Unlocalized C matrix of occupied orbitals. """ logger.debug("Using BOYS method.") # Minimizes the spatial extent of the orbitals by minimizing a certain function. boys_SCF = lo.boys.Boys(self._global_scf.mol, c_std_occ) return boys_SCF.kernel()
[docs] class IBOLocalizer(PySCFLocalizer): """Object used to localise molecular orbitals (MOs) using IBO localization. Running localization returns active and environment systems. Args: global_scf (gto.Mole): PySCF molecule object n_active_atoms (int): Number of active atoms occ_cutoff (float): Threshold for selecting occupied active region (only requried if spade localization is NOT used) virt_cutoff (float): Threshold for selecting unoccupied (virtual) active region (required for spade approach too!) Attributes: c_active (np.array): C matrix of localized occupied active MOs (columns define MOs) c_enviro (np.array): C matrix of localized occupied ennironment MOs c_loc_occ_and_virt (np.array): Full localized C_matrix (occpuied and virtual) dm_active (np.array): active system density matrix dm_enviro (np.array): environment system density matrix active_occ_inds (np.array): 1D array of active occupied MO indices enviro_occ_inds (np.array): 1D array of environment occupied MO indices c_loc_occ (np.array): C matrix of localized occupied MOs Methods: run: Main function to run localization. """ def __init__( self, global_scf: scf.hf.SCF, n_active_atoms: int, occ_cutoff: float = 0.95, virt_cutoff: float = 0.95, ): """Initialise Localizer.""" super().__init__( global_scf, n_active_atoms, occ_cutoff=occ_cutoff, virt_cutoff=virt_cutoff, ) def _pyscf_method(self, c_std_occ: np.ndarray) -> np.ndarray: """Abstract method containing the PySCF method to use. Args: c_std_occ (np.ndarray): Unlocalized C matrix of occupied orbitals. """ logger.debug("Using IBO method.") # Intrinsic bonding orbitals. iaos = lo.iao.iao(self._global_scf.mol, c_std_occ) # Orthogonalize IAO iaos = lo.vec_lowdin(iaos, self._global_scf.get_ovlp()) c_loc_occ: NDArray c_loc_occ = lo.ibo.ibo( self._global_scf.mol, c_std_occ, locmethod="IBO", iaos=iaos ) # type:ignore return c_loc_occ