Source code for pysisyphus.tsoptimizers.RSPRFOptimizer

# See [1] https://pubs.acs.org/doi/pdf/10.1021/j100247a015
#         Banerjee, 1985
#     [2] https://aip.scitation.org/doi/abs/10.1063/1.2104507
#         Heyden, 2005
#     [3] https://onlinelibrary.wiley.com/doi/abs/10.1002/jcc.540070402
#         Baker, 1985
#     [4] https://link.springer.com/article/10.1007/s002140050387
#         Besalu, 1998


import numpy as np

from pysisyphus.tsoptimizers.TSHessianOptimizer import TSHessianOptimizer



[docs]
class RSPRFOptimizer(TSHessianOptimizer):

[docs]
    def optimize(self):
        energy, gradient, H, eigvals, eigvecs, resetted = self.housekeeping()
        self.update_ts_mode(eigvals, eigvecs)

        # Transform gradient to eigensystem of hessian
        gradient_trans = eigvecs.T.dot(gradient)
        # Minimize energy along all modes, except the TS-mode
        min_indices = [i for i in range(gradient_trans.size) if i not in self.roots]
        # Maximize energy along all requested modes.
        max_indices = [i for i in range(gradient_trans.size) if i in self.roots]
        # Get line search steps, if requested.
        ip_step_trans, gradient_trans = self.step_and_grad_from_line_search(
            energy,
            gradient_trans,
            eigvecs,
            min_indices,
            max_indices,
        )

        """In the RS-(P)RFO method we have to scale the matrices with alpha.
        Unscaled matrix (Eq. 8) in [1]:
            (H  g) (x)          (S 0) (x)
                       = lambda
            (g+ 0) (1)          (0 1) (1)
        with
            S = alpha * Identity matrix
        and multiplying from the left with the inverse of the scaling matrix
            (1/alpha 0)

            (0       1)
        we get
            (1/alpha 0) (H  g) (x)          (x)
                                   = lambda
            (0       1) (g+ 0) (1)          (1)
        eventually leading to the scaled matrix:
            (H/alpha  g/alpha) (x)          (x)
                                   = lambda     .
            (g+             0) (1)          (1)
        """

        alpha = self.alpha0
        for mu in range(self.max_micro_cycles):
            self.log(f"RS-PRFO micro cycle {mu:02d}, alpha={alpha:.6f}")

            # Maximize energy along the chosen TS mode. The matrix is hardcoded
            # as 2x2, so only first-order saddle point searches are supported.
            H_aug_max = self.get_augmented_hessian(
                eigvals[max_indices], gradient_trans[max_indices], alpha
            )
            step_max, eigval_max, nu_max, self.prev_eigvec_max = self.solve_rfo(
                H_aug_max, "max", prev_eigvec=self.prev_eigvec_max
            )

            # Minimize energy along all modes, but the TS mode.
            H_aug_min = self.get_augmented_hessian(
                eigvals[min_indices], gradient_trans[min_indices], alpha
            )
            step_min, eigval_min, nu_min, self.prev_eigvec_min = self.solve_rfo(
                H_aug_min, "min", prev_eigvec=self.prev_eigvec_min
            )

            # Calculate overlap between directions over the course of the micro cycles
            # if mu == 0:
            # TODO: convert back to original space
            # ref_step_max = step_max.copy()
            # ref_step_min = step_min.copy()
            min_norm = np.linalg.norm(step_min)
            max_norm = np.linalg.norm(step_max)
            self.log(f"norm(step_max)={max_norm:.6f}")
            self.log(f"norm(step_min)={min_norm:.6f}")
            self.log(f"norm(step_max)/norm(step_min)={max_norm/min_norm:.2%}")
            # Calculate overlaps with originally proposed step in mu == 0
            # TODO: convert back to original space
            # max_ovlp = ref_step_max @ step_max
            # min_ovlp = ref_step_min @ step_min

            # As of Eq. (8a) of [4] max_eigval and min_eigval also
            # correspond to:
            # max_eigval = -forces_trans[max_indices].dot(max_step)
            # min_eigval = -forces_trans[min_indices].dot(min_step)

            # Create the full PRFO step
            step = np.zeros_like(gradient_trans)
            step[max_indices] = step_max
            step[min_indices] = step_min
            step_norm = np.linalg.norm(step)
            self.log(f"norm(step)={step_norm:.6f}")

            inside_trust = step_norm <= self.trust_radius
            if inside_trust:
                self.log(
                    "Restricted step satisfies trust radius of "
                    f"{self.trust_radius:.6f}"
                )
                self.log(
                    f"Micro-cycles converged in cycle {mu:02d} with "
                    f"alpha={alpha:.6f}!"
                )
                break

            # Derivative of the squared step w.r.t. alpha
            # max subspace
            dstep2_dalpha_max = (
                2
                * eigval_max
                / (1 + step_max.dot(step_max) ** 2 * alpha)
                * np.sum(
                    gradient_trans[max_indices] ** 2
                    / (eigvals[max_indices] - eigval_max * alpha) ** 3
                )
            )
            # min subspace
            dstep2_dalpha_min = (
                2
                * eigval_min
                / (1 + step_min.dot(step_min) * alpha)
                * np.sum(
                    gradient_trans[min_indices] ** 2
                    / (eigvals[min_indices] - eigval_min * alpha) ** 3
                )
            )
            dstep2_dalpha = dstep2_dalpha_max + dstep2_dalpha_min
            # Update alpha
            alpha_step = (
                2 * (self.trust_radius * step_norm - step_norm**2) / dstep2_dalpha
            )
            alpha += alpha_step

        # Right now the step is still given in the Hessians eigensystem. We
        # transform it back now.
        step += ip_step_trans
        step = eigvecs.dot(step)
        step_norm = np.linalg.norm(step)

        # With max_micro_cycles = 1 the RS part is disabled and the step
        # probably isn't scaled correctly in the one micro cycle.
        # In this case we use a naive scaling if the step is too big.
        if (self.max_micro_cycles == 1) and (step_norm > self.trust_radius):
            step = step / step_norm * self.trust_radius
        self.log(f"norm(step)={np.linalg.norm(step):.6f}")

        # Eq. (6) from [4] seems erronous ... the prediction is usually only ~50%
        # of the actual change ...
        # predicted_energy_change = 1/2 * (eigval_max / nu_max**2 + eigval_min / nu_min**2)
        # self.predicted_energy_changes.append(predicted_energy_change)

        self.predicted_energy_changes.append(self.rfo_model(gradient, self.H, step))

        self.log("")
        return step