# See [1] https://pubs.acs.org/doi/pdf/10.1021/j100247a015
# Banerjee, 1985
# [2] https://aip.scitation.org/doi/abs/10.1063/1.2104507
# Heyden, 2005
# [3] https://onlinelibrary.wiley.com/doi/abs/10.1002/jcc.540070402
# Baker, 1985
# [4] https://link.springer.com/article/10.1007/s002140050387
# Besalu, 1998
import numpy as np
from pysisyphus.tsoptimizers.TSHessianOptimizer import TSHessianOptimizer
[docs]
class RSPRFOptimizer(TSHessianOptimizer):
[docs]
def optimize(self):
energy, gradient, H, eigvals, eigvecs, resetted = self.housekeeping()
self.update_ts_mode(eigvals, eigvecs)
# Transform gradient to eigensystem of hessian
gradient_trans = eigvecs.T.dot(gradient)
# Minimize energy along all modes, except the TS-mode
min_indices = [i for i in range(gradient_trans.size) if i not in self.roots]
# Maximize energy along all requested modes.
max_indices = [i for i in range(gradient_trans.size) if i in self.roots]
# Get line search steps, if requested.
ip_step_trans, gradient_trans = self.step_and_grad_from_line_search(
energy,
gradient_trans,
eigvecs,
min_indices,
max_indices,
)
"""In the RS-(P)RFO method we have to scale the matrices with alpha.
Unscaled matrix (Eq. 8) in [1]:
(H g) (x) (S 0) (x)
= lambda
(g+ 0) (1) (0 1) (1)
with
S = alpha * Identity matrix
and multiplying from the left with the inverse of the scaling matrix
(1/alpha 0)
(0 1)
we get
(1/alpha 0) (H g) (x) (x)
= lambda
(0 1) (g+ 0) (1) (1)
eventually leading to the scaled matrix:
(H/alpha g/alpha) (x) (x)
= lambda .
(g+ 0) (1) (1)
"""
alpha = self.alpha0
for mu in range(self.max_micro_cycles):
self.log(f"RS-PRFO micro cycle {mu:02d}, alpha={alpha:.6f}")
# Maximize energy along the chosen TS mode. The matrix is hardcoded
# as 2x2, so only first-order saddle point searches are supported.
H_aug_max = self.get_augmented_hessian(
eigvals[max_indices], gradient_trans[max_indices], alpha
)
step_max, eigval_max, nu_max, self.prev_eigvec_max = self.solve_rfo(
H_aug_max, "max", prev_eigvec=self.prev_eigvec_max
)
# Minimize energy along all modes, but the TS mode.
H_aug_min = self.get_augmented_hessian(
eigvals[min_indices], gradient_trans[min_indices], alpha
)
step_min, eigval_min, nu_min, self.prev_eigvec_min = self.solve_rfo(
H_aug_min, "min", prev_eigvec=self.prev_eigvec_min
)
# Calculate overlap between directions over the course of the micro cycles
# if mu == 0:
# TODO: convert back to original space
# ref_step_max = step_max.copy()
# ref_step_min = step_min.copy()
min_norm = np.linalg.norm(step_min)
max_norm = np.linalg.norm(step_max)
self.log(f"norm(step_max)={max_norm:.6f}")
self.log(f"norm(step_min)={min_norm:.6f}")
self.log(f"norm(step_max)/norm(step_min)={max_norm/min_norm:.2%}")
# Calculate overlaps with originally proposed step in mu == 0
# TODO: convert back to original space
# max_ovlp = ref_step_max @ step_max
# min_ovlp = ref_step_min @ step_min
# As of Eq. (8a) of [4] max_eigval and min_eigval also
# correspond to:
# max_eigval = -forces_trans[max_indices].dot(max_step)
# min_eigval = -forces_trans[min_indices].dot(min_step)
# Create the full PRFO step
step = np.zeros_like(gradient_trans)
step[max_indices] = step_max
step[min_indices] = step_min
step_norm = np.linalg.norm(step)
self.log(f"norm(step)={step_norm:.6f}")
inside_trust = step_norm <= self.trust_radius
if inside_trust:
self.log(
"Restricted step satisfies trust radius of "
f"{self.trust_radius:.6f}"
)
self.log(
f"Micro-cycles converged in cycle {mu:02d} with "
f"alpha={alpha:.6f}!"
)
break
# Derivative of the squared step w.r.t. alpha
# max subspace
dstep2_dalpha_max = (
2
* eigval_max
/ (1 + step_max.dot(step_max) ** 2 * alpha)
* np.sum(
gradient_trans[max_indices] ** 2
/ (eigvals[max_indices] - eigval_max * alpha) ** 3
)
)
# min subspace
dstep2_dalpha_min = (
2
* eigval_min
/ (1 + step_min.dot(step_min) * alpha)
* np.sum(
gradient_trans[min_indices] ** 2
/ (eigvals[min_indices] - eigval_min * alpha) ** 3
)
)
dstep2_dalpha = dstep2_dalpha_max + dstep2_dalpha_min
# Update alpha
alpha_step = (
2 * (self.trust_radius * step_norm - step_norm**2) / dstep2_dalpha
)
alpha += alpha_step
# Right now the step is still given in the Hessians eigensystem. We
# transform it back now.
step += ip_step_trans
step = eigvecs.dot(step)
step_norm = np.linalg.norm(step)
# With max_micro_cycles = 1 the RS part is disabled and the step
# probably isn't scaled correctly in the one micro cycle.
# In this case we use a naive scaling if the step is too big.
if (self.max_micro_cycles == 1) and (step_norm > self.trust_radius):
step = step / step_norm * self.trust_radius
self.log(f"norm(step)={np.linalg.norm(step):.6f}")
# Eq. (6) from [4] seems erronous ... the prediction is usually only ~50%
# of the actual change ...
# predicted_energy_change = 1/2 * (eigval_max / nu_max**2 + eigval_min / nu_min**2)
# self.predicted_energy_changes.append(predicted_energy_change)
self.predicted_energy_changes.append(self.rfo_model(gradient, self.H, step))
self.log("")
return step