Source code for pysisyphus.optimizers.StabilizedQNMethod

# [1] https://aip.scitation.org/doi/pdf/10.1063/1.4905665?class=pdf

import numpy as np

from pysisyphus.optimizers.Optimizer import Optimizer
from pysisyphus.intcoords.setup import get_bond_mat
from pysisyphus.optimizers.restrict_step import scale_by_max_step



[docs]
class StabilizedQNMethod(Optimizer):
    def __init__(
        self,
        geometry,
        alpha=0.5,
        alpha_max=1,
        alpha_stretch=0.5,
        alpha_stretch_max=1,
        eps=1e-4,
        hist_max=10,
        E_thresh=1e-6,
        bio=True,
        trust_radius=0.1,
        linesearch=True,
        **kwargs,
    ):
        super().__init__(geometry, **kwargs)

        self.alpha = alpha
        self.alpha_max = alpha_max
        self.alpha_stretch = alpha_stretch
        self.alpha_stretch_max = alpha_stretch_max
        self.eps = eps
        self.hist_max = int(hist_max)
        self.E_thresh = E_thresh
        self.bio = bio
        self.trust_radius = trust_radius
        self.linesearch = linesearch

        self.log(
            f"Keeping at most information from {self.hist_max} " "previous cycles."
        )

        self.alpha_start = self.alpha
        self.alpha_stretch_start = self.alpha_stretch

        self.gradients_for_precon = list()
        self.coords_for_precon = list()
        self.stretch_proj_signs = list()


[docs]
    def prepare_opt(self):
        pass


    @property
    def n_hist(self):
        return len(self.steps_normed)


[docs]
    def bio_mode(self, gradient):
        bond_mat = get_bond_mat(self.geometry)
        atom_num = len(self.geometry.atoms)
        bond_vec_empty = np.zeros((atom_num, 3))
        c3d = self.geometry.coords3d
        bond_vectors = list()
        unique_bonds = set(
            [(min(m, k), max(m, k)) for m, k in zip(*np.where(bond_mat == True))]
        )
        for m, k in unique_bonds:
            bm = bond_vec_empty.copy()
            bm[k] = c3d[k] - c3d[m]
            bm[m] = -bm[k]
            bond_vectors.append(bm)
        # The 3d array is reshaped to 2d, so its last dimension matches
        # the gradient's dimension. LHS and RHS are accessible by simple
        # dot products (see [1] (27)).
        bond_vectors = np.array(bond_vectors).reshape(-1, gradient.size)
        lhs = bond_vectors.dot(gradient)
        self.stretch_proj_signs.append(np.sign(lhs).astype(int))
        # Bond-vector overlap matrix.
        rhs = bond_vectors.dot(bond_vectors.T)
        coeffs = np.linalg.solve(rhs, lhs)
        stretch_gradient = np.einsum("i,ij->j", coeffs, bond_vectors)
        remainder_gradient = gradient - stretch_gradient
        return stretch_gradient, remainder_gradient



[docs]
    def adjust_alpha_stretch(self):
        try:
            # -1 when signs changed, 1 when sign is constant
            mult = self.stretch_proj_signs[-1] * self.stretch_proj_signs[-2]
        except IndexError:
            self.log("Can't update alpha_stretch yet!")
            return
        except ValueError:
            self.log(
                "Number of bonds found changed between cycles! Skipping "
                "adjustment of alpha_stretch."
            )
            return
        constant_signs = np.sum(mult == 1)
        fraction = constant_signs / mult.size
        if fraction >= (2 / 3):
            self.alpha_stretch = min(self.alpha_stretch_max, 1.1 * self.alpha_stretch)
            msg = "Increased"
        else:
            self.alpha_stretch *= 1 / 1.1
            msg = "Decreased"
        self.log(f"{msg} alpha_stretch to {self.alpha_stretch:.6f}")



[docs]
    def adjust_alpha(self, gradient, precon_gradient):
        # Overlap between full and preconditioned gradient.
        grad_ovlp = (
            gradient.dot(precon_gradient)
            / np.linalg.norm(gradient)
            / np.linalg.norm(precon_gradient)
        )
        if grad_ovlp > 0.2:
            self.alpha = min(self.alpha_max, 1.1 * self.alpha)
        else:
            self.alpha *= 0.85
        self.log(
            "Overlap between full gradient and preconditioned gradient "
            f"is {grad_ovlp:.4f}. New alpha={self.alpha:.4f}."
        )



[docs]
    def precondition_gradient(self, gradient, steps, grad_diffs, eps):
        # Construct overlap matrix S between normalized steps
        #
        step_norms = np.linalg.norm(steps, axis=1)
        steps_normed = steps / step_norms[:, None]
        S = np.einsum("ij,jk->ik", steps_normed, steps_normed.T)
        # Determine significant subspace by checking the eigenvalues of
        # the overlap matrix.
        w, v = np.linalg.eigh(S)
        # Significant subspace indices
        significant = (w / w.max()) > eps
        ndim = np.sum(significant)
        self.log(f"ndim = {ndim}")
        # Continue only with eigenvalues and -vectors in the significant
        # subspace.
        eigvals = w[significant]
        eigvecs = v[:, significant]

        # Transform steps and gradient differences to significant subspace
        norm_fac = 1 / eigvals ** (1 / 2)
        # Eq. (8) in [1]
        steps_normed_sub = norm_fac * np.einsum("ki,kj->ji", eigvecs, steps_normed)

        # Eq. (11) in [1]
        eigvecs_weighted = eigvecs / np.array(step_norms)[:, None]
        grad_diffs_sub = norm_fac * np.einsum("ki,kj->ji", eigvecs_weighted, grad_diffs)

        # Assemble approximate hessian
        hess_approx = np.einsum("ij,ik->jk", grad_diffs_sub, steps_normed_sub)
        hess_approx = (hess_approx + hess_approx.T) / 2
        hess_w, hess_v = np.linalg.eigh(hess_approx)

        # Calculate step directions and gradient difference in the original
        # space. Eq. (15)
        proj_v = np.einsum("ki,jk->ji", hess_v, steps_normed_sub)
        proj_dg = np.einsum("ki,jk->ji", hess_v, grad_diffs_sub)

        residuals = np.linalg.norm(proj_dg - hess_w * proj_v, axis=0)
        eigvals_mod = np.sqrt(hess_w ** 2 + residuals ** 2)

        # precon_grad = np.einsum("i,j,ij,ij->i", cur_grad, 1/eigvals_mod, proj_v, proj_v)
        precon_grad = np.einsum(
            "i,j,ij,ij->i", gradient, 1 / eigvals_mod, proj_v, proj_v
        )

        projector = proj_v.dot(proj_v.T)
        perp_projector = np.eye(gradient.size) - projector
        perp_grad = perp_projector.dot(gradient)
        # Alternative formulation
        # perp_grad_ = gradient - (proj_v.T.dot(gradient) * proj_v).sum(axis=1)
        # np.testing.assert_allclose(perp_grad_, perp_grad, atol=1e-16)

        self.adjust_alpha(gradient, precon_grad)
        tot_precon_gradient = precon_grad + self.alpha * perp_grad
        return tot_precon_gradient



[docs]
    def optimize(self):
        gradient = self.geometry.gradient
        energy = self.geometry.energy
        self.forces.append(-gradient)
        self.energies.append(energy)
        self.log(f"norm(forces)={np.linalg.norm(gradient):.4e}")

        if self.bio:
            stretch_gradient, remainder_gradient = self.bio_mode(gradient)
            self.adjust_alpha_stretch()
            # Steepest descent against the stretch_gradient
            stretch_step = -self.alpha_stretch * stretch_gradient
            new_coords = self.geometry.coords + stretch_step
            self.geometry.coords = new_coords
            # Use only the remaining gradient for the rest of this method
            gradient = remainder_gradient

        self.gradients_for_precon.append(gradient)
        self.coords_for_precon.append(self.geometry.coords.copy())

        if len(self.coords_for_precon) > 2:
            steps = np.diff(self.coords_for_precon, axis=0)[-self.hist_max :]
            grad_diffs = np.diff(self.gradients_for_precon, axis=0)[-self.hist_max :]

            self.log(
                "Preconditioning gradient with information from "
                f"{len(steps)+1} previous cycles."
            )
            precon_grad = self.precondition_gradient(
                gradient, steps, grad_diffs, self.eps
            )
            step = -precon_grad
        else:
            self.log("Took pure steepest descent step.")
            step = self.alpha * -gradient
            scale_by_max_step(step, self.trust_radius)

        if self.linesearch:
            # Calculate energy at new geometry
            new_coords = self.geometry.coords + step
            _ = self.geometry.get_energy_and_forces_at(new_coords)
            new_energy = _["energy"]
            delta_energy = new_energy - energy
            self.log(
                f"Current energy is {energy:.6f} au. New energy is "
                f"{new_energy:.6f} au, ΔE={delta_energy:.6f} au."
            )

            energy_rise_too_big = new_energy > (energy + self.E_thresh)
            alpha_still_big_enough = self.alpha > (self.alpha_start / 10)
            alpha_stre_still_big_enough = self.alpha_stretch > (
                self.alpha_stretch_start / 10
            )
            if (
                energy_rise_too_big
                and alpha_still_big_enough
                and alpha_stre_still_big_enough
            ):
                self.log(f"Energy increased by {delta_energy:.6f} au")
                self.gradients_for_precon = self.gradients_for_precon[-2:-1]
                self.coords_for_precon = self.coords_for_precon[-2:-1]
                self.log("Resetted history.")
                self.alpha /= 2
                self.alpha_stretch /= 2

                self.log(f"Decreased alpha to {self.alpha}")
                self.log("Reverting bad step")
                return None

        return step