Source code for pysisyphus.diabatization.coulomb

import itertools as it
from typing import Optional
import warnings

from jinja2 import Template
import numpy as np

from pysisyphus.diabatization.helpers import get_random_U
from pysisyphus.diabatization.results import (
    DiabatizationResult,
    dia_result_from_jac_result,
)
from pysisyphus.helpers_pure import highlight_text
from pysisyphus.diabatization import logger
from pysisyphus.wavefunction.localization import JacobiSweepResult, edmiston_ruedenberg


ERTemplate = Template(
    """{{ name }}

Coulomb-Tensor R
----------------
{% for i in range(R|length) %}
R[{{ i }}, {{ i }}, {{ i }}, {{ i }}] = {{ "%14.6f" % R[i, i, i, i] }}
{%- endfor %}

"""
)



[docs]
def edmiston_ruedenberg_jacobi_sweeps(
    R: np.ndarray,
    random: bool = False,
    U=None,
    max_cycles: Optional[int] = None,
    dP_thresh: float = 1e-8,
) -> JacobiSweepResult:
    assert R.ndim == 4
    nstates = R.shape[0]
    assert all([dim == nstates for dim in R.shape])

    if max_cycles is None:
        max_cycles = nstates * 100

    if U is None:
        if random:
            U = get_random_U(nstates)
        else:
            U = np.eye(nstates)

    msg = ERTemplate.render(
        name=highlight_text("Edmiston-Ruedenberg-diabatization"),
        R=R,
    )
    logger.info(msg)

    D_prev = np.nan
    # Macro-cycles
    for i in range(max_cycles):
        # Calculate rotated Coulomb-tensor
        #
        # The rotated Coulomb tensor is a quartic function of the rotation matrix 'U'.
        # The exact contraction of the Coulomb tensor 'R' with 'U' depends on whether rows
        # or columns of 'U' are rotated/mixed.
        # When rows of 'U' are mixed/rotated we have to use
        #   R_rot = np.einsum("jJ,kK,lL,mM,JKLM->jklm", U, U, U, U, R, optimize="greedy") .
        # When columns of 'U' are mixed/rotated we have to use
        R_rot = np.einsum("Jj,Kk,Ll,Mm,JKLM->jklm", U, U, U, U, R, optimize="greedy")
        #
        # Even though this function is intended to be used for diabatization where 'U' will
        # be quiet small we chose to rotate columns of 'U'. This way, this function can also
        # be used for MO-localization, where the different MOs will be organized in columns.
        # For the diabatization of electronic states it does not matter if we rotate rows or
        # columns, as long as the rotation and the calculation of R_rot are done in a consistent
        # way.
        # The calculation of R_rot scales with nstates**5, but this will be no problem when
        # nstates remains small.

        # Edmiston-Ruedenberg cost-function
        D = np.einsum("IIII->", R_rot)
        # Cost-function change compared to previous macro-cycle
        dD = D - D_prev
        logger.info(f"Macro cycle {i: >3d}: {D=: >14.6f}, {dD=: >+12.6e}")
        if converged := (abs(dD) <= dP_thresh):
            logger.info("Converged!")
            break
        D_prev = D

        # Micro cycles
        #
        # Loop over all possible state pairs and determine the pair that promises the
        # greatest cost-function increase.
        term_max = None
        j_max = None
        k_max = None
        A_max = None
        B_max = None
        for j, k in it.combinations(range(nstates), 2):
            R1212 = R_rot[j, k, j, k]
            R1111 = R_rot[j, j, j, j]
            R1122 = R_rot[j, j, k, k]
            R2222 = R_rot[k, k, k, k]
            R1112 = R_rot[j, j, j, k]
            R1222 = R_rot[j, k, k, k]

            # Eq. (19) in [5
            A = R1212 - 0.25 * (R1111 - 2.0 * R1122 + R2222)
            B = R1112 - R1222
            # RHS of eq. (26) in [5]
            term = A + (A**2 + B**2) ** 0.5
            # logger.info(
            # f"\t{j=: >3d}, {k=: >3d}, {A=: >12.6f}, {B=: >12.6f}, {term=: >12.6f}"
            # )
            # Update pair that promises the greatest cost-function increase
            if (term_max is None) or term > term_max:
                j_max = j
                k_max = k
                term_max = term
                A_max = A
                B_max = B
        # End loop over all micro-cycles

        # When B is very small and A is negative, then A + (A**2 + B**2)**0.5 will be 0.0
        # and term_max will be 0.0 too. In this case no rotation will improve the cost-function
        # to an appreciable degree and we skip the rotation. This will result in convergence
        # in the next macro-cycle, as the cost-function will stay constant.
        if abs(term_max) <= 1e-12:
            continue

        j = j_max
        k = k_max
        A = A_max
        B = B_max
        if term_max is None:
            logger.warning("A**2 + B**2 is very small. Indicating convergence.")
            break
        # Denominator sqrt-term of eq. (19) in [5]
        sqrt_term = (A**2 + B**2) ** 0.5
        # Determine rotation angle according to right column on p. 460 of [5]
        # Calcuate cosine and sinus terms according to eq. (19) in [5]
        cos4a = -A / sqrt_term
        sin4a = B / sqrt_term
        # Two pairs (x1, y1) and (x2, y2)
        #
        # First pair
        x21 = 0.5 * (1.0 + (1.0 - 0.5 * (1 - cos4a)) ** 0.5)
        #                ^--- Note this sign
        x1 = x21**0.5
        y1 = (1.0 - x21) ** 0.5
        # Second pair
        x22 = 0.5 * (1.0 - (1.0 - 0.5 * (1 - cos4a)) ** 0.5)
        #                ^--- Note this sign
        x2 = x22**0.5
        y2 = (1.0 - x22) ** 0.5
        # Check which pair fulfills 4 * x * y * (x**2 - y**2) = sin(4a)
        for x, y in ((x1, y1), (x2, y2)):
            diff = 4 * x * y * (x**2 - y**2) - sin4a
            # Interestingly, diff can become quite 'big' sometimes (> 1e-8)
            if abs(diff) <= 5e-8:
                break
        else:
            raise Exception(
                f"Determination of rotation angle failed ({diff=: >12.6e})!"
            )
        # x of the correct pair corresponds to the cosine of the rotation angle
        rad = np.arccos(x)
        # deg = np.rad2deg(rad)
        # logger.info(f"\tRotation of {j=} and {k=} by {rad=: 10.6e} rad ({deg: >10.6f}°).")
        # Inplace rotation of matrix columns with indices 'j' and 'k' by 'rad' radians.
        # Eq. (15) in [5]
        # NOTE: whether we rotate/mix columns or rows of U determines how we have to rotate
        # the Coulomb-tensor at the beginning of a macro-cycle.
        cos = np.cos(rad)
        sin = np.sin(rad)
        j_rot = cos * U[:, j] + sin * U[:, k]
        k_rot = -sin * U[:, j] + cos * U[:, k]
        U[:, j] = j_rot
        U[:, k] = k_rot
    else:
        raise Exception(f"ER-localization did not converge after {max_cycles} cycles!")
    # End loop over macro-cycles

    result = JacobiSweepResult(
        is_converged=converged,
        cur_cycle=i,
        C=U,
        P=D,
    )
    return result




[docs]
def edmiston_ruedenberg_diabatization_jacobi(
    adia_ens: np.ndarray, R_tensor: np.ndarray, **kwargs
) -> DiabatizationResult:
    """Property-based diabatization using the Coulomb tensor and Jacobi sweeps.

    Similar to Edmiston-Ruedenberg-localization, but w/ electronic states.

    Parameters
    ----------
    adia_ens
        1d array of shape (nstates, ) containing adiabatic excitation energies
        in atomic units/Hartree.
    R_tensor
        4d array of shape (nstates, nstates, nstates, nstates). Coulomb tensor
        in the basis of the adiabatic electronic states.

    Returns
    -------
    dia_result
        Result of diabatization containing various quantities, e.g., the adiabatic-
        diabatic-transformation matrix.
    """
    jac_res = edmiston_ruedenberg_jacobi_sweeps(R_tensor, **kwargs)
    kind = "Edmiston-Ruedenberg"
    return dia_result_from_jac_result(
        kind,
        adia_ens,
        jac_res,
        R_tensor=R_tensor,
    )



# Shortcut
edmiston_ruedenberg_diabatization = edmiston_ruedenberg_diabatization_jacobi



[docs]
def edmiston_ruedenberg_diabatization_df(
    adia_ens: np.ndarray,
    df_tensor: np.ndarray,
    overlap_matrix: Optional[np.ndarray] = None,
    nruns: int = 25,
    max_cycles: int = 10_000,
) -> DiabatizationResult:
    """Property-based diabatization using the Coulomb tensor and density fitting.

    Similar to Edmiston-Ruedenberg-localization, but w/ electronic states.

    TODO: factor out the call to edmiston_ruedenberg() and put it in a separate function.

    Parameters
    ----------
    adia_ens
        1d array of shape (nstates, ) containing adiabatic excitation energies
        in atomic units/Hartree.
    df_tensor
        3d array of shape (naux, nstates, nstates). Coulomb tensor
        in the basis of the adiabatic electronic states.
    overlap_matrix
        Optional 2d array of shape (nstates, nstates). If not given, orthogonal
        electronic states are assumed, which should be the case in typical TDDFT
        calculations.
    nruns
        Number of initial conditions that are tested/number of macro cycles.
    max_cycles
        Positive intger, specifying the maximum number of micro cycles in a macro cycle.

    Returns
    -------
    dia_result
        Result of diabatization containing various quantities, e.g., the adiabatic-
        diabatic-transformation matrix.
    """
    assert nruns > 0
    assert df_tensor.ndim == 3
    nstates = df_tensor.shape[1]

    # Initial rotation matrices
    U0s = np.zeros((nruns, nstates, nstates))
    # Final, hopefully diabatized rotation matrices
    Us = np.zeros((nruns, nstates, nstates))
    jac_results = list()

    # Assume orthgonal states when no overlap matrix is given
    if overlap_matrix is None:
        overlap_matrix = np.eye(nstates)
    cost_funcs = np.zeros(nruns)
    diis_cycles = min(5, nstates)
    for i in range(nruns):
        # Generate random rotation matrix
        U0s[i] = get_random_U(nstates)
        # Use a copy of the rotation matrix as it is updated inplace
        jac_result = edmiston_ruedenberg(
            U0s[i].copy(),
            df_tensor,
            overlap_matrix,
            diis_cycles=diis_cycles,
            max_cycles=max_cycles,
        )
        jac_results.append(jac_result)
        if jac_result.is_converged:
            cost_funcs[i] = jac_result.P
            Us[i] = jac_result.C

    # Report result w/ highest value of cost_function
    max_ind = cost_funcs.argmax()
    best_jac_res = jac_results[max_ind]
    if not best_jac_res.is_converged:
        warnings.warn(
            f"Run {max_ind} has the maximum cost function value, but the optimization "
            f"did not converge after {max_cycles} cycles!"
        )
    kind = "Edmiston-Ruedenberg"
    return dia_result_from_jac_result(
        kind,
        adia_ens,
        best_jac_res,
        L_tensor=df_tensor,
    )