Source code for pysisyphus.io.fchk

import functools
from pathlib import Path
import re
from typing import Union

import numpy as np

from pysisyphus.MOCoeffs import MOCoeffs
from pysisyphus.elem_data import INV_ATOMIC_NUMBERS
from pysisyphus.Geometry import Geometry
from pysisyphus.helpers_pure import file_or_str
from pysisyphus.linalg import sym_mat_from_tril
from pysisyphus.wavefunction.shells import Shell, FCHKShells
from pysisyphus.wavefunction import Wavefunction
from pysisyphus.wavefunction.helpers import BFType


@file_or_str(".fchk")
def parse_fchk(text):
    """Data will be returned in the order, as found in the FCHK file. At least
    for files produced by Gaussian (Fortran code) this means that the data will
    be in column-major order.

    If for example MO coefficients are desired one must reshape the
    'Alpha MO coefficiens' list into the appropriate shape (nmos, nmos) and then
    transpose the reshaped array.
    See pysisyphus.calculats.Gaussian16.get_overlap_data_from_base_name() for an
    example."""

    header_re = r"\s+".join(
        (
            r"(?P<keyword>[\w\-\s/\(\)=\*]+)",
            r"(?P<kind>[HIRCL])",
            r"(?P<length>N=)?",
            r"(?P<num>[\d\-\+\.E]+)",
        )
    )
    header_re = re.compile(header_re)

    cur_keyword = None
    lines = text.strip().split("\n")
    # title1, titl2 are not used
    _, _, *lines = lines

    conv_funcs = {
        "R": lambda line: [float(_) for _ in line.split()],  # real
        "I": lambda line: [int(_) for _ in line.split()],  # integer
        "C": lambda line: [line],  # character
        "H": lambda line: [bool(_) for _ in line.split()],  # logical
        "L": lambda line: [bool(_) for _ in line.split()],  # logical
    }

    data = {}
    for line in lines:
        line = line.strip()
        if mobj := header_re.match(line):
            cur_keyword = mobj.group("keyword").strip()
            conv_func = conv_funcs[mobj.group("kind")]
            if mobj.group("length"):
                data[cur_keyword] = list()
            else:
                data[cur_keyword] = conv_func(mobj.group("num"))[0]
            continue
        data[cur_keyword].extend(conv_func(line))
    return data



[docs]
def mo_coeffs_from_fchk_data(data: dict) -> MOCoeffs:
    occa = data["Number of alpha electrons"]
    occb = data["Number of beta electrons"]
    ensa = np.array(data["Alpha Orbital Energies"])
    nmo = len(ensa)
    occsa = np.zeros(nmo)
    # Does Gaussian store fractional occupation numbers somewhere?
    # For now we all of them at 1.0.
    occsa[:occa] = 1.0
    occsb = np.zeros(nmo)
    occsb[:occb] = 1.0
    Ca = np.array(data["Alpha MO coefficients"])
    Ca = Ca.reshape(nmo, nmo).T
    # Beta quantities may not be present
    try:
        Cb = np.array(data["Beta MO coefficients"])
        Cb = Cb.reshape(nmo, nmo).T
        ensb = np.array(data["Beta Orbital Energies"])
    except KeyError:
        Cb = Ca.copy()
        ensb = ensa.copy()
    mo_coeffs = MOCoeffs(
        Ca=Ca,
        ensa=ensa,
        occsa=occsa,
        Cb=Cb,
        ensb=ensb,
        occsb=occsb,
    )
    return mo_coeffs




[docs]
def all_energies_from_fchk_data(data: dict) -> np.ndarray:
    # GS and excitation energies
    gs_energy = data["SCF Energy"]
    etran = np.reshape(data["ETran state values"], (-1, 16))
    exc_ens = etran[:, 0]
    nstates = len(exc_ens)
    all_energies = np.zeros(nstates + 1)
    all_energies[0] = gs_energy
    all_energies[1:] += exc_ens
    return all_energies



@file_or_str(".fchk")
def shells_from_fchk(text, **kwargs):
    data = parse_fchk(text)

    def to_arr(key, dtype):
        return np.array(data[key], dtype=dtype)

    """
    These coordinates may not coincide with the basis functions?! Nonetheless,
    basis functions are usually atom-centered. But just to be sure we use the
    entries from 'Coordinates of each shell'.
    # coords3d = to_arr("Current cartesian coordinates", float).reshape(-1, 3)
    """
    atomic_nums = to_arr("Atomic numbers", int)

    centers = to_arr("Shell to atom map", int) - 1
    ang_moms = to_arr("Shell types", int)
    prims_per_shell = to_arr("Number of primitives per shell", int)

    prim_exponents = to_arr("Primitive exponents", float)
    prim_coeffs = to_arr("Contraction coefficients", float)
    try:
        p_prim_coeffs = to_arr("P(S=P) Contraction coefficients", float)
    except KeyError:
        p_prim_coeffs = np.zeros_like(prim_coeffs)
    shell_coords3d = to_arr("Coordinates of each shell", float).reshape(-1, 3)

    _shells = list()
    prim_ind = 0
    for i, (center_ind, L, prim_num) in enumerate(
        zip(centers, ang_moms, prims_per_shell)
    ):
        # 0: s, 1: p, -1: sp, 2: 6d, -2: 5d, 3: 10f, -3: 7f, etc...
        # Convert SP-shell with L = -1 to a s-shell with L = 0.
        # The p-shell is also created later.
        L, is_sp = (0, True) if (L == -1) else (L, False)

        coeffs = list()
        p_coeffs = list()
        exps = list()
        for _ in range(prim_num):
            coeffs.append(prim_coeffs[prim_ind])
            p_coeffs.append(p_prim_coeffs[prim_ind])
            exps.append(prim_exponents[prim_ind])
            prim_ind += 1
        atomic_num = atomic_nums[center_ind]
        center = shell_coords3d[i]

        def append_shell(coeffs, L=L):
            shell = Shell(
                L=abs(L),
                atomic_num=atomic_num,
                center=center,
                coeffs=coeffs,
                exps=exps,
                center_ind=center_ind,
            )
            _shells.append(shell)

        # Create shell with actual L.
        # If we deal with a SP shell we also create the P shell below.
        append_shell(coeffs)
        if is_sp:
            # Explicitly create p-shell
            append_shell(p_coeffs, L=1)
    shells = FCHKShells(_shells, **kwargs)
    return shells



[docs]
def atoms_from_data(data):
    atomic_numbers = data["Atomic numbers"]
    atoms = tuple([INV_ATOMIC_NUMBERS[Z] for Z in atomic_numbers])
    return atoms



@file_or_str(".fchk")
def wavefunction_from_fchk(text, **kwargs):
    kwargs = kwargs.copy()
    shell_kwargs = kwargs.pop("shell_kwargs", {})

    data = parse_fchk(text)

    charge = data["Charge"]
    mult = data["Multiplicity"]
    atoms = atoms_from_data(data)
    coords = data["Current cartesian coordinates"]
    unrestricted = "Beta Orbital Energies" in data

    def mos_for_spin(spin):
        mo_ens = data[f"{spin} Orbital Energies"]
        mo_num = len(mo_ens)
        mo_coeffs = np.array(data[f"{spin} MO coefficients"])
        mo_coeffs = mo_coeffs.reshape(mo_num, mo_num)
        return mo_coeffs

    C_a = mos_for_spin("Alpha").T
    occ_a = data["Number of alpha electrons"]
    if unrestricted:
        C_b = mos_for_spin("Beta").T
        occ_b = data["Number of beta electrons"]
    else:
        C_b = C_a.copy()
        occ_b = occ_a
    C = np.stack((C_a, C_b))
    shells = shells_from_fchk(text, **shell_kwargs)
    _, nao, _ = C.shape

    bf_type = {
        shells.cart_size: BFType.CARTESIAN,
        shells.sph_size: BFType.PURE_SPHERICAL,
    }[nao]

    return Wavefunction(
        atoms=atoms,
        coords=coords,
        charge=charge,
        mult=mult,
        unrestricted=unrestricted,
        occ=(occ_a, occ_b),
        C=C,
        bf_type=bf_type,
        shells=shells,
        **kwargs,
    )


@file_or_str(".fchk")
def geom_from_fchk(text, data=None, **geom_kwargs):
    if data is None:
        data = parse_fchk(text)
    atoms = atoms_from_data(data)

    try:
        coords = data["Opt point       1 Geometries"]
    except KeyError:
        coords = data["Current cartesian coordinates"]
    coords = np.array(coords)
    coords = coords.reshape(-1, 3 * len(atoms))

    geoms = [Geometry(atoms, coords_, **geom_kwargs) for coords_ in coords]
    if len(geoms) == 1:
        geoms = geoms[0]
    return geoms


@file_or_str(".fchk")
def geom_with_hessian_from_fchk(text, **geom_kwargs):
    data = parse_fchk(text)
    geom = geom_from_fchk(text, data=data, **geom_kwargs)

    # atoms = atoms_from_data(data)
    # coords = data["Current cartesian coordinates"]
    # coords = np.array(coords)
    # coords = coords.reshape(-1, 3 * len(atoms))
    # geom = Geometry(atoms, coords, **geom_kwargs)

    natoms3 = geom.coords3d.size

    # Energy
    geom.energy = data["Total Energy"]
    # Gradient
    gradient = np.array(data["Cartesian Gradient"])
    geom.cart_gradient = gradient
    # Hessian
    H = np.empty((natoms3, natoms3))
    tril = np.tril_indices(natoms3)
    triu1 = np.triu_indices(natoms3, k=1)
    H[tril] = data["Cartesian Force Constants"]
    H[triu1] = H.T[triu1]
    geom.cart_hessian = H

    return geom



[docs]
@functools.singledispatch
def get_relaxed_density(fchk_data: dict, key="CI"):
    nbas = fchk_data["Number of basis functions"]
    tot_dens = np.empty((nbas, nbas))

    # alpha + beta
    sym_mat_from_tril(tot_dens, fchk_data[f"Total {key} Density"])
    # Unrestricted case; potentially different alpha and beta parts
    try:
        spin_dens = np.empty((nbas, nbas))
        # alpha - beta
        sym_mat_from_tril(spin_dens, fchk_data[f"Spin {key} Density"])
        # (alpha + beta + alpha - beta) / 2.0
        dens_a = (tot_dens + spin_dens) / 2.0
        # alpha + beta - alpha
        dens_b = tot_dens - dens_a
    # Restricted case, just replicate alpha part for beta. The density already
    # seems to include a factor of 2.0 in the restriced case. We remove it, so
    # (identical) alpha and beta parts add up to the total density.
    except KeyError:
        dens_a = tot_dens / 2.0
        dens_b = dens_a

    relaxed_dens = np.stack((dens_a, dens_b))
    return relaxed_dens



@get_relaxed_density.register(str)
@get_relaxed_density.register(Path)
def _(fn: Union[str, Path], **kwargs):
    data = parse_fchk(fn)
    return get_relaxed_density(data, **kwargs)