Source code for nbed.utils

"""Helper functions for the package."""

import argparse
import json
import logging
import logging.config
import os
from pathlib import Path
from typing import Optional

from openfermion.chem.pubchem import geometry_from_pubchem
from pydantic import ValidationError

from nbed.config import NbedConfig

logger = logging.getLogger(__name__)



[docs]
def setup_logs() -> None:
    """Initialise logging."""
    config_dict = {
        "version": 1,
        "disable_existing_loggers": False,
        "formatters": {
            "standard": {"format": "%(asctime)s: %(name)s: %(levelname)s: %(message)s"},
        },
        "handlers": {
            "file_handler": {
                "class": "logging.FileHandler",
                "level": "DEBUG",
                "formatter": "standard",
                "filename": ".nbed.log",
                "mode": "w",
                "encoding": "utf-8",
            },
            "stream_handler": {
                "class": "logging.StreamHandler",
                "level": "INFO",
                "formatter": "standard",
            },
        },
        "loggers": {
            "": {"handlers": ["file_handler", "stream_handler"], "level": "DEBUG"}
        },
    }

    logging.config.dictConfig(config_dict)
    logger = logging.getLogger(__name__)
    logger.debug("Logging initialised.")




[docs]
def parse() -> NbedConfig:
    """Parse arguments from command line interface."""
    logger.debug("Adding CLI arguments.")
    parser = argparse.ArgumentParser(description="Output embedded Qubit Hamiltonian.")
    parser.add_argument(
        "--config",
        required=True,
        type=str,
        help="Path to a config file. Overwrites other arguments.",
    )
    logger.debug("Parsing CLI arguments.")
    args = parser.parse_args()

    logger.debug("Reading config file.")
    filepath = Path(args.config).absolute()
    with open(filepath) as f:
        config_data = json.load(f)
    logger.debug(f"Input data:\n{config_data=}")

    try:
        config = NbedConfig(**config_data)
    except ValidationError as e:
        logger.error("Could not validate input data against NbedConfig model.")
        logger.error(e)
        raise ValidationError(e)
    return config




[docs]
def pubchem_mol_geometry(molecule_name) -> dict:
    """Wrapper of Openfermion function to extract geometry using the molecule's name from the PubChem.

    Returns a dictionary of atomic type and xyz location, each indexed by dictionary key.

    Args:
        molecule_name (str): Name of molecule to search on pubchem
    Returns:
        struct_dict (dict): Keys index atoms and values contain Tuple of ('atom_id', (x_loc, y_loc, z_loc)

    Example:
    output = pubchem_mol_geometry('H2O')
    print(output)

    >> { 0: ('O', (0, 0, 0)),
         1: ('H', (0.2774, 0.8929, 0.2544)),
         2: ('H', (0.6068, -0.2383, -0.7169))
         }

    """
    geometry_pubchem = geometry_from_pubchem(molecule_name, structure="3d")

    if geometry_pubchem is None:
        raise ValueError(
            f"""Could not find geometry of {molecule_name} on PubChem...
                                 make sure molecule input is a correct path to an xyz file or real molecule
                                """
        )

    struct_dict = {}
    for ind, atom_xyz in enumerate(geometry_pubchem):
        struct_dict[ind] = atom_xyz
    return struct_dict




[docs]
def build_ordered_xyz_string(struct_dict: dict, active_atom_inds: list) -> str:
    """Get raw xyz string of molecular geometry.

    This function orders the atoms in struct_dict according to the ordering given in atom_ordering_by_inds list.

    Args:
        struct_dict (dict): Dictionary of indexed atoms and Cartesian coordinates (x,y,z)
        active_atom_inds (list): list of indices to be considered active. This will put these atoms to the top of the xyz file.
                                 Note indices are chosen from the struct_dict.

    Returns:
        xyz_string (str): raw xyz string of molecular geometry (atoms ordered by atom_ordering_by_inds list)

    Example:
        input_struct_dict = { 0: ('O', (0, 0, 0)),
                              1: ('H', (0.2774, 0.8929, 0.2544)),
                              2: ('H', (0.6068, -0.2383, -0.7169))
                            }

        xyz_string = ordered_xyz_string('water', input_struct_dict, [1,0,2])
        print(xyz_string)

         >> 3

            H	0.2774	0.8929	0.2544
            O	0	0	0
            H	0.6068	-0.2383	-0.7169

    """
    if not set(active_atom_inds).issubset(set(list(struct_dict.keys()))):
        raise ValueError(
            "active atom indices not subset of indices in structural dict "
        )

    ordering = (
        *active_atom_inds,
        *[ind for ind in struct_dict.keys() if ind not in active_atom_inds],
    )

    n_atoms = len(struct_dict)
    xyz_file = f"{n_atoms}"
    xyz_file += "\n \n"
    for atom_ind in ordering:
        atom, xyz = struct_dict[atom_ind]
        xyz_file += f"{atom}\t{xyz[0]}\t{xyz[1]}\t{xyz[2]}\n"

    return xyz_file




[docs]
def save_ordered_xyz_file(
    file_name: str,
    struct_dict: dict,
    active_atom_inds: list,
    save_location: Optional[Path] = None,
) -> Path:
    """Saves .xyz file in a molecular_structures directory.

    This function orders the atoms in struct_dict according to the ordering
    given in atom_ordering_by_inds list. The file is then saved.
    The location of this director is either at save_location, or if not defined then in current working dir.
    Function returns the path to xyz file.

    Args:
        file_name (str): Name of xyz file
        struct_dict (dict): Dictionary of indexed atoms and Cartesian coordinates (x,y,z)
        active_atom_inds (list): list of indices to be considered active. This will put these atoms to the top of the xyz file.
                                 Note indices are chosen from the struct_dict.
        save_location (Path): Path of where to save xyz file. If not defined then current working dir used.

    Returns:
        xyz_file_path (Path): Path to xyz file

    Example:
    input_struct_dict = { 0: ('O', (0, 0, 0)),
                            1: ('H', (0.2774, 0.8929, 0.2544)),
                            2: ('H', (0.6068, -0.2383, -0.7169))
                        }

    path = save_ordered_xyz_file('water', input_struct_dict, [1,0,2])
    print(path)
    >> ../molecular_structures/water.xyz

    with open(path,'r') as infile:
        xyz_string = infile.read()
    print(xyz_string)

        >> 3

        H	0.2774	0.8929	0.2544
        O	0	0	0
        H	0.6068	-0.2383	-0.7169

    """
    xyz_string = build_ordered_xyz_string(struct_dict, active_atom_inds)

    if save_location is None:
        save_location = Path(os.getcwd())

    output_dir = os.path.join(str(save_location), "molecular_structures")
    if not os.path.exists(output_dir):
        os.makedirs(output_dir)

    xyz_file_path = os.path.join(output_dir, f"{file_name}.xyz")

    with open(xyz_file_path, "w") as outfile:
        outfile.write(xyz_string)

    return Path(xyz_file_path)