Source code for pysisyphus.optimizers.StabilizedQNMethod

# [1] https://aip.scitation.org/doi/pdf/10.1063/1.4905665?class=pdf

import numpy as np

from pysisyphus.optimizers.Optimizer import Optimizer
from pysisyphus.intcoords.setup import get_bond_mat
from pysisyphus.optimizers.restrict_step import scale_by_max_step


[docs] class StabilizedQNMethod(Optimizer): def __init__( self, geometry, alpha=0.5, alpha_max=1, alpha_stretch=0.5, alpha_stretch_max=1, eps=1e-4, hist_max=10, E_thresh=1e-6, bio=True, trust_radius=0.1, linesearch=True, **kwargs, ): super().__init__(geometry, **kwargs) self.alpha = alpha self.alpha_max = alpha_max self.alpha_stretch = alpha_stretch self.alpha_stretch_max = alpha_stretch_max self.eps = eps self.hist_max = int(hist_max) self.E_thresh = E_thresh self.bio = bio self.trust_radius = trust_radius self.linesearch = linesearch self.log( f"Keeping at most information from {self.hist_max} " "previous cycles." ) self.alpha_start = self.alpha self.alpha_stretch_start = self.alpha_stretch self.gradients_for_precon = list() self.coords_for_precon = list() self.stretch_proj_signs = list()
[docs] def prepare_opt(self): pass
@property def n_hist(self): return len(self.steps_normed)
[docs] def bio_mode(self, gradient): bond_mat = get_bond_mat(self.geometry) atom_num = len(self.geometry.atoms) bond_vec_empty = np.zeros((atom_num, 3)) c3d = self.geometry.coords3d bond_vectors = list() unique_bonds = set( [(min(m, k), max(m, k)) for m, k in zip(*np.where(bond_mat == True))] ) for m, k in unique_bonds: bm = bond_vec_empty.copy() bm[k] = c3d[k] - c3d[m] bm[m] = -bm[k] bond_vectors.append(bm) # The 3d array is reshaped to 2d, so its last dimension matches # the gradient's dimension. LHS and RHS are accessible by simple # dot products (see [1] (27)). bond_vectors = np.array(bond_vectors).reshape(-1, gradient.size) lhs = bond_vectors.dot(gradient) self.stretch_proj_signs.append(np.sign(lhs).astype(int)) # Bond-vector overlap matrix. rhs = bond_vectors.dot(bond_vectors.T) coeffs = np.linalg.solve(rhs, lhs) stretch_gradient = np.einsum("i,ij->j", coeffs, bond_vectors) remainder_gradient = gradient - stretch_gradient return stretch_gradient, remainder_gradient
[docs] def adjust_alpha_stretch(self): try: # -1 when signs changed, 1 when sign is constant mult = self.stretch_proj_signs[-1] * self.stretch_proj_signs[-2] except IndexError: self.log("Can't update alpha_stretch yet!") return except ValueError: self.log( "Number of bonds found changed between cycles! Skipping " "adjustment of alpha_stretch." ) return constant_signs = np.sum(mult == 1) fraction = constant_signs / mult.size if fraction >= (2 / 3): self.alpha_stretch = min(self.alpha_stretch_max, 1.1 * self.alpha_stretch) msg = "Increased" else: self.alpha_stretch *= 1 / 1.1 msg = "Decreased" self.log(f"{msg} alpha_stretch to {self.alpha_stretch:.6f}")
[docs] def adjust_alpha(self, gradient, precon_gradient): # Overlap between full and preconditioned gradient. grad_ovlp = ( gradient.dot(precon_gradient) / np.linalg.norm(gradient) / np.linalg.norm(precon_gradient) ) if grad_ovlp > 0.2: self.alpha = min(self.alpha_max, 1.1 * self.alpha) else: self.alpha *= 0.85 self.log( "Overlap between full gradient and preconditioned gradient " f"is {grad_ovlp:.4f}. New alpha={self.alpha:.4f}." )
[docs] def precondition_gradient(self, gradient, steps, grad_diffs, eps): # Construct overlap matrix S between normalized steps # step_norms = np.linalg.norm(steps, axis=1) steps_normed = steps / step_norms[:, None] S = np.einsum("ij,jk->ik", steps_normed, steps_normed.T) # Determine significant subspace by checking the eigenvalues of # the overlap matrix. w, v = np.linalg.eigh(S) # Significant subspace indices significant = (w / w.max()) > eps ndim = np.sum(significant) self.log(f"ndim = {ndim}") # Continue only with eigenvalues and -vectors in the significant # subspace. eigvals = w[significant] eigvecs = v[:, significant] # Transform steps and gradient differences to significant subspace norm_fac = 1 / eigvals ** (1 / 2) # Eq. (8) in [1] steps_normed_sub = norm_fac * np.einsum("ki,kj->ji", eigvecs, steps_normed) # Eq. (11) in [1] eigvecs_weighted = eigvecs / np.array(step_norms)[:, None] grad_diffs_sub = norm_fac * np.einsum("ki,kj->ji", eigvecs_weighted, grad_diffs) # Assemble approximate hessian hess_approx = np.einsum("ij,ik->jk", grad_diffs_sub, steps_normed_sub) hess_approx = (hess_approx + hess_approx.T) / 2 hess_w, hess_v = np.linalg.eigh(hess_approx) # Calculate step directions and gradient difference in the original # space. Eq. (15) proj_v = np.einsum("ki,jk->ji", hess_v, steps_normed_sub) proj_dg = np.einsum("ki,jk->ji", hess_v, grad_diffs_sub) residuals = np.linalg.norm(proj_dg - hess_w * proj_v, axis=0) eigvals_mod = np.sqrt(hess_w ** 2 + residuals ** 2) # precon_grad = np.einsum("i,j,ij,ij->i", cur_grad, 1/eigvals_mod, proj_v, proj_v) precon_grad = np.einsum( "i,j,ij,ij->i", gradient, 1 / eigvals_mod, proj_v, proj_v ) projector = proj_v.dot(proj_v.T) perp_projector = np.eye(gradient.size) - projector perp_grad = perp_projector.dot(gradient) # Alternative formulation # perp_grad_ = gradient - (proj_v.T.dot(gradient) * proj_v).sum(axis=1) # np.testing.assert_allclose(perp_grad_, perp_grad, atol=1e-16) self.adjust_alpha(gradient, precon_grad) tot_precon_gradient = precon_grad + self.alpha * perp_grad return tot_precon_gradient
[docs] def optimize(self): gradient = self.geometry.gradient energy = self.geometry.energy self.forces.append(-gradient) self.energies.append(energy) self.log(f"norm(forces)={np.linalg.norm(gradient):.4e}") if self.bio: stretch_gradient, remainder_gradient = self.bio_mode(gradient) self.adjust_alpha_stretch() # Steepest descent against the stretch_gradient stretch_step = -self.alpha_stretch * stretch_gradient new_coords = self.geometry.coords + stretch_step self.geometry.coords = new_coords # Use only the remaining gradient for the rest of this method gradient = remainder_gradient self.gradients_for_precon.append(gradient) self.coords_for_precon.append(self.geometry.coords.copy()) if len(self.coords_for_precon) > 2: steps = np.diff(self.coords_for_precon, axis=0)[-self.hist_max :] grad_diffs = np.diff(self.gradients_for_precon, axis=0)[-self.hist_max :] self.log( "Preconditioning gradient with information from " f"{len(steps)+1} previous cycles." ) precon_grad = self.precondition_gradient( gradient, steps, grad_diffs, self.eps ) step = -precon_grad else: self.log("Took pure steepest descent step.") step = self.alpha * -gradient scale_by_max_step(step, self.trust_radius) if self.linesearch: # Calculate energy at new geometry new_coords = self.geometry.coords + step _ = self.geometry.get_energy_and_forces_at(new_coords) new_energy = _["energy"] delta_energy = new_energy - energy self.log( f"Current energy is {energy:.6f} au. New energy is " f"{new_energy:.6f} au, ΔE={delta_energy:.6f} au." ) energy_rise_too_big = new_energy > (energy + self.E_thresh) alpha_still_big_enough = self.alpha > (self.alpha_start / 10) alpha_stre_still_big_enough = self.alpha_stretch > ( self.alpha_stretch_start / 10 ) if ( energy_rise_too_big and alpha_still_big_enough and alpha_stre_still_big_enough ): self.log(f"Energy increased by {delta_energy:.6f} au") self.gradients_for_precon = self.gradients_for_precon[-2:-1] self.coords_for_precon = self.coords_for_precon[-2:-1] self.log("Resetted history.") self.alpha /= 2 self.alpha_stretch /= 2 self.log(f"Decreased alpha to {self.alpha}") self.log("Reverting bad step") return None return step