"""PySCF Localizer Class."""
import logging
from abc import ABC, abstractmethod
import numpy as np
from numpy.typing import NDArray
from pyscf import lo, scf # type:ignore
from pyscf.lo import vvo # type:ignore
from nbed.localizers.system import RestrictedLS
from .base import OccupiedLocalizer
logger = logging.getLogger(__name__)
class PySCFLocalizer(OccupiedLocalizer, ABC):
"""Object used to localise molecular orbitals (MOs) using PySCF localization functions.
Running localization returns active and environment systems.
Note:
The major improvement of IBOs over PM orbitals is that they are based on IAO charges instead of the erratic
Mulliken charges. As a result, IBOs are always well-defined. (Ref: J. Chem. Theory Comput. 2013, 9, 4834−4843)
Args:
global_scf (gto.Mole): PySCF molecule object
n_active_atoms (int): Number of active atoms
occ_cutoff (float): Threshold for selecting occupied active region (only requried if
spade localization is NOT used)
virt_cutoff (float): Threshold for selecting unoccupied (virtual) active region (required for
spade approach too!)
Methods:
run: Main function to run localization.
"""
def __init__(
self,
global_scf: scf.hf.SCF,
n_active_atoms: int,
occ_cutoff: float = 0.95,
virt_cutoff: float = 0.95,
):
"""Initialize PySCF Localizer."""
self.occ_cutoff = self._valid_threshold(occ_cutoff)
self.virt_cutoff = self._valid_threshold(virt_cutoff)
super().__init__(
global_scf,
n_active_atoms,
)
def _valid_threshold(self, threshold: float):
"""Checks if threshold is within 0-1 range (percentage).
Args:
threshold (float): input number between 0 and 1 (inclusive)
Returns:
threshold (float): input percentage
"""
if 0.0 <= threshold <= 1.0:
logger.debug("Localizer threshold valid.")
return threshold
else:
logger.error("Localizer threshold not valid.")
raise ValueError(f"threshold: {threshold} is not in range [0,1] inclusive")
@abstractmethod
def _pyscf_method(self, c_std_occ) -> NDArray:
"""Abstract method containing the PySCF method to use.
Args:
c_std_occ (np.ndarray): Unlocalized C matrix of occupied orbitals.
Returns:
np.ndarray: Localized C Matrix
"""
pass
def _localize_spin(
self,
c_matrix: np.ndarray,
occupancy: np.ndarray,
n_mo_overwrite: int | None = None,
) -> RestrictedLS:
"""Localize orbitals of one spin using PySCF.
Args:
c_matrix (np.ndarray): Unlocalized C matrix of occupied orbitals.
occupancy (np.ndarray): Occupancy of orbitals.
n_mo_overwrite (int | None): Overwrite the number of active molecular orbitals.
Returns:
np.ndarray: Localized C matrix of occupied orbitals.
"""
logger.debug("Running PySCF Localization.")
n_occupied_orbitals = np.count_nonzero(occupancy)
logger.debug(f"{n_occupied_orbitals=}")
c_std_occ = c_matrix[:, :n_occupied_orbitals]
logger.debug(f"{c_std_occ.shape=}")
c_loc_occ: NDArray = self._pyscf_method(c_std_occ)
c_loc = np.zeros(c_matrix.shape)
c_loc[: c_loc_occ.shape[0], : c_loc_occ.shape[1]] += c_loc_occ
ao_slice_matrix = self._global_scf.mol.aoslice_by_atom()
# TODO: Check the following:
# S_ovlp = pyscf_scf.get_ovlp()
# S_half = sp.linalg.fractional_matrix_power(S_ovlp , 0.5)
# C_loc_occ_ORTHO = S_half@C_loc_occ_full
# run numerator_all and denominator_all in ortho basis
# find indices of AO of active atoms
ao_active_inds = np.arange(
ao_slice_matrix[0, 2], ao_slice_matrix[self._n_active_atoms - 1, 3]
)
# active AOs coeffs for a given MO j
numerator_all = np.einsum("ij->j", (c_loc_occ[ao_active_inds, :]) ** 2)
# all AOs coeffs for a given MO j
denominator_all = np.einsum("ij->j", c_loc_occ**2)
mo_active_share = numerator_all / denominator_all
logger.debug(f"(active_AO^2)/(all_AO^2): {np.around(mo_active_share, 4)}")
logger.debug(f"threshold for active part: {self.occ_cutoff}")
active_occ_inds = np.zeros(occupancy.shape[-1], dtype=np.bool)
all_ao_shares_same_bool = np.allclose(
np.zeros_like(mo_active_share),
mo_active_share - mo_active_share.sum() / len(mo_active_share),
)
if all_ao_shares_same_bool:
# case for highly symmetric molecules
# overlap is the same everywhere hence everything goes into env or act part
# edge case put half and half!
logger.warning(
"AO subsystem selection % same everywhere. Splitting half and half"
)
print(f"mo_active_share: {mo_active_share}")
active_occ_inds[: n_occupied_orbitals // 2] = True
elif len(active_occ_inds) == 0:
# if no active indices, then take largest possible overlap
mo_active_percentage_inshare = mo_active_share.argsort()[::-1]
active_occ_inds[mo_active_percentage_inshare[:1]] = (
True # take first element
)
logger.warning("no active AOs - forcing one to be active")
print(f"active system %: {mo_active_share[active_occ_inds][0]} \n")
else:
active_occ_inds[: len(mo_active_share)] = mo_active_share >= self.occ_cutoff
enviro_occ_inds = np.zeros(active_occ_inds.shape, dtype=np.bool)
enviro_occ_inds[: len(mo_active_share)] = mo_active_share < self.occ_cutoff
logger.debug(f"{active_occ_inds=}")
logger.debug(f"{enviro_occ_inds=}")
# define active MO orbs and environment
# take MO (columns of C_matrix) that have high dependence from active AOs
c_active = c_loc_occ[:, active_occ_inds[: len(mo_active_share)]]
dm_active = c_active @ c_active.T
if len(enviro_occ_inds) == 0:
# case for when no environement
logger.warning("No environment electronic density")
c_enviro = np.zeros((c_active.shape[0], 1))
else:
c_enviro = c_loc_occ[:, enviro_occ_inds[: len(mo_active_share)]]
dm_enviro = c_enviro @ c_enviro.T
# storing condition used to select env system
self.enviro_selection_condition = mo_active_share
logger.debug("PySCF localization complete.")
return RestrictedLS(
active_occ_inds, enviro_occ_inds, c_loc, dm_active, dm_enviro
)
def _localize_virtual_spin(
self, c_matrix: np.ndarray, virt_threshold: float
) -> np.ndarray:
"""Localise virtual (unoccupied) orbitals using different localization schemes in PySCF.
Args:
global_scf (scf.hf.SCF): PySCF molecule object
c_matrix (np.ndarray): Unlocalized C matrix of occupied orbitals.
virt_threshold (float): Threshold for selecting unoccupied (virtual) active MOs.
Returns:
c_virtual_loc (np.array): C matrix of localized virtual MOs (columns define MOs)
"""
logger.debug("Localizing virtual orbitals.")
n_occupied_orbitals = np.count_nonzero(self._global_scf.mo_occ == 2)
c_std_occ: NDArray
c_std_occ = self._global_scf.mo_coeff[:, :n_occupied_orbitals] # type:ignore
c_std_virt: NDArray
c_std_virt = self._global_scf.mo_coeff[:, self._global_scf.mo_occ < 2] # type:ignore
c_virtual_loc = vvo.vvo(
self._global_scf.mol, c_std_occ, c_std_virt, iaos=None, s=None, verbose=None
)
ao_slice_matrix = self._global_scf.mol.aoslice_by_atom()
# TODO: Check the following:
# S_ovlp = global_scf.get_ovlp()
# S_half = sp.linalg.fractional_matrix_power(S_ovlp , 0.5)
# C_loc_occ_ORTHO = S_half@C_loc_occ_full
# run numerator_all and denominator_all in ortho basis
# find indices of AO of active atoms
ao_active_inds = np.arange(
ao_slice_matrix[0, 2], ao_slice_matrix[self._n_active_atoms - 1, 3]
)
# active AOs coeffs for a given MO j
numerator_all = np.einsum("ij->j", (c_virtual_loc[ao_active_inds, :]) ** 2)
# all AOs coeffs for a given MO j
denominator_all = np.einsum("ij->j", c_virtual_loc**2)
active_percentage_MO = numerator_all / denominator_all
logger.debug("Virtual orbitals localized.")
logger.debug(f"(active_AO^2)/(all_AO^2): {np.around(active_percentage_MO, 4)}")
logger.debug(f"threshold for active part: {self.virt_cutoff}")
# NOT IN USE
# add constant occupied index
# active_virtual_occ_inds = (
# np.where(active_percentage_MO > self.virt_cutoff)[0] + c_std_occ.shape[1]
# )
# enviro_virtual_occ_inds = np.array(
# [
# i
# for i in range(
# c_std_occ.shape[1], c_std_occ.shape[1] + c_virtual_loc.shape[1]
# )
# if i not in active_virtual_occ_inds
# ]
# )
return c_virtual_loc
def localize_virtual(self, local_scf: scf.hf.SCF) -> scf.hf.SCF:
"""Localise virtual (unoccupied) obitals using PySCF method.
[1] D. Claudino and N. J. Mayhall, "Simple and Efficient Truncation of Virtual
Spaces in Embedded Wave Functions via Concentric Localization", Journal of Chemical
Theory and Computation, vol. 15, no. 11, pp. 6085-6096, Nov. 2019,
doi: 10.1021/ACS.JCTC.9B00682.
Args:
local_scf (scf.hf.SCF): SCF object with occupied orbitals localized.
Returns:
scf.hf.SCF: Fully Localized SCF object.
"""
raise NotImplementedError(
"Virtual orbital localization not implemented for PySCF methods."
)
return local_scf
[docs]
class PMLocalizer(PySCFLocalizer):
"""Object used to localise molecular orbitals (MOs) using Pipek-Mezey localization.
Running localization returns active and environment systems.
Args:
global_scf (gto.Mole): PySCF molecule object
n_active_atoms (int): Number of active atoms
occ_cutoff (float): Threshold for selecting occupied active region (only requried if
spade localization is NOT used)
virt_cutoff (float): Threshold for selecting unoccupied (virtual) active region (required for
spade approach too!)
Attributes:
c_active (np.array): C matrix of localized occupied active MOs (columns define MOs)
c_enviro (np.array): C matrix of localized occupied ennironment MOs
c_loc_occ_and_virt (np.array): Full localized C_matrix (occpuied and virtual)
dm_active (np.array): active system density matrix
dm_enviro (np.array): environment system density matrix
active_occ_inds (np.array): 1D array of active occupied MO indices
enviro_occ_inds (np.array): 1D array of environment occupied MO indices
c_loc_occ (np.array): C matrix of localized occupied MOs
Methods:
run: Main function to run localization.
"""
def __init__(
self,
global_scf: scf.hf.SCF,
n_active_atoms: int,
occ_cutoff: float = 0.95,
virt_cutoff: float = 0.95,
):
"""Initialize Localizer."""
super().__init__(
global_scf,
n_active_atoms,
occ_cutoff=occ_cutoff,
virt_cutoff=virt_cutoff,
)
def _pyscf_method(self, c_std_occ: np.ndarray) -> np.ndarray:
"""Abstract method containing the PySCF method to use.
Args:
c_std_occ (np.ndarray): Unlocalized C matrix of occupied orbitals.
"""
logger.debug("Using Pipek-Mezey method.")
# Localise orbitals using Pipek-Mezey localization scheme.
# This maximizes the sum of orbital-dependent partial charges on the nuclei.
pipmez = lo.PipekMezey(self._global_scf.mol, c_std_occ)
# The atomic population projection scheme.
# 'mulliken', 'meta-lowdin', 'iao', 'becke'
pipmez.pop_method = "meta-lowdin"
# run localization
return pipmez.kernel()
[docs]
class BOYSLocalizer(PySCFLocalizer):
"""Object used to localise molecular orbitals (MOs) using BOYS localization.
Running localization returns active and environment systems.
Args:
global_scf (gto.Mole): PySCF molecule object
n_active_atoms (int): Number of active atoms
localization_method (str): String of orbital localization method (spade, pipekmezey, boys, ibo)
occ_cutoff (float): Threshold for selecting occupied active region (only requried if
spade localization is NOT used)
virt_cutoff (float): Threshold for selecting unoccupied (virtual) active region (required for
spade approach too!)
Attributes:
c_active (np.array): C matrix of localized occupied active MOs (columns define MOs)
c_enviro (np.array): C matrix of localized occupied ennironment MOs
c_loc_occ_and_virt (np.array): Full localized C_matrix (occpuied and virtual)
dm_active (np.array): active system density matrix
dm_enviro (np.array): environment system density matrix
active_occ_inds (np.array): 1D array of active occupied MO indices
enviro_occ_inds (np.array): 1D array of environment occupied MO indices
c_loc_occ (np.array): C matrix of localized occupied MOs
Methods:
run: Main function to run localization.
"""
def __init__(
self,
global_scf: scf.hf.SCF,
n_active_atoms: int,
occ_cutoff: float = 0.95,
virt_cutoff: float = 0.95,
):
"""Initialize Localizer."""
super().__init__(
global_scf,
n_active_atoms,
occ_cutoff=occ_cutoff,
virt_cutoff=virt_cutoff,
)
def _pyscf_method(self, c_std_occ: np.ndarray) -> np.ndarray:
"""Abstract method containing the PySCF method to use.
Args:
c_std_occ (np.ndarray): Unlocalized C matrix of occupied orbitals.
"""
logger.debug("Using BOYS method.")
# Minimizes the spatial extent of the orbitals by minimizing a certain function.
boys_SCF = lo.boys.Boys(self._global_scf.mol, c_std_occ)
return boys_SCF.kernel()
[docs]
class IBOLocalizer(PySCFLocalizer):
"""Object used to localise molecular orbitals (MOs) using IBO localization.
Running localization returns active and environment systems.
Args:
global_scf (gto.Mole): PySCF molecule object
n_active_atoms (int): Number of active atoms
occ_cutoff (float): Threshold for selecting occupied active region (only requried if
spade localization is NOT used)
virt_cutoff (float): Threshold for selecting unoccupied (virtual) active region (required for
spade approach too!)
Attributes:
c_active (np.array): C matrix of localized occupied active MOs (columns define MOs)
c_enviro (np.array): C matrix of localized occupied ennironment MOs
c_loc_occ_and_virt (np.array): Full localized C_matrix (occpuied and virtual)
dm_active (np.array): active system density matrix
dm_enviro (np.array): environment system density matrix
active_occ_inds (np.array): 1D array of active occupied MO indices
enviro_occ_inds (np.array): 1D array of environment occupied MO indices
c_loc_occ (np.array): C matrix of localized occupied MOs
Methods:
run: Main function to run localization.
"""
def __init__(
self,
global_scf: scf.hf.SCF,
n_active_atoms: int,
occ_cutoff: float = 0.95,
virt_cutoff: float = 0.95,
):
"""Initialise Localizer."""
super().__init__(
global_scf,
n_active_atoms,
occ_cutoff=occ_cutoff,
virt_cutoff=virt_cutoff,
)
def _pyscf_method(self, c_std_occ: np.ndarray) -> np.ndarray:
"""Abstract method containing the PySCF method to use.
Args:
c_std_occ (np.ndarray): Unlocalized C matrix of occupied orbitals.
"""
logger.debug("Using IBO method.")
# Intrinsic bonding orbitals.
iaos = lo.iao.iao(self._global_scf.mol, c_std_occ)
# Orthogonalize IAO
iaos = lo.vec_lowdin(iaos, self._global_scf.get_ovlp())
c_loc_occ: NDArray
c_loc_occ = lo.ibo.ibo(
self._global_scf.mol, c_std_occ, locmethod="IBO", iaos=iaos
) # type:ignore
return c_loc_occ