Source code for pysisyphus.optimizers.HessianOptimizer

from math import sqrt
from pathlib import Path
from typing import Literal, Optional

import numpy as np
from scipy.optimize import root_scalar

from pysisyphus.cos.ChainOfStates import ChainOfStates
from pysisyphus.Geometry import Geometry
from pysisyphus.helpers_pure import rms
from pysisyphus.io.hessian import save_hessian
from pysisyphus.optimizers.guess_hessians import (
    get_guess_hessian,
    xtb_hessian,
    HessInit,
)
from pysisyphus.optimizers.hessian_updates import (
    bfgs_update,
    flowchart_update,
    damped_bfgs_update,
    bofill_update,
    ts_bfgs_update,
    ts_bfgs_update_org,
    ts_bfgs_update_revised,
)
from pysisyphus.optimizers.Optimizer import Optimizer
from pysisyphus.optimizers.exceptions import OptimizationError


[docs] def dummy_hessian_update(H, dx, dg): return np.zeros_like(H), "no"
HESS_UPDATE_FUNCS = { "none": dummy_hessian_update, None: dummy_hessian_update, False: dummy_hessian_update, "bfgs": bfgs_update, "damped_bfgs": damped_bfgs_update, "flowchart": flowchart_update, "bofill": bofill_update, "ts_bfgs": ts_bfgs_update, "ts_bfgs_org": ts_bfgs_update_org, "ts_bfgs_rev": ts_bfgs_update_revised, } HessUpdate = Literal[ "none", None, False, "bfgs", "damped_bfgs", "flowchart", "bofill", "ts_bfgs", "ts_bfgs_org", "ts_bfgs_rev", ]
[docs] class HessianOptimizer(Optimizer): rfo_dict = { "min": (0, "min"), "max": (-1, "max"), }
[docs] def __init__( self, geometry: Geometry, trust_radius: float = 0.5, trust_update: bool = True, trust_min: float = 0.1, trust_max: float = 1, max_energy_incr: Optional[float] = None, hessian_update: HessUpdate = "bfgs", hessian_init: HessInit = "fischer", hessian_recalc: Optional[int] = None, hessian_recalc_adapt: Optional[float] = None, hessian_xtb: bool = False, hessian_recalc_reset: bool = False, small_eigval_thresh: float = 1e-8, line_search: bool = False, alpha0: float = 1.0, max_micro_cycles: int = 25, rfo_overlaps: bool = False, **kwargs, ) -> None: """Baseclass for optimizers utilizing Hessian information. Parameters ---------- geometry Geometry to be optimized. trust_radius Initial trust radius in whatever unit the optimization is carried out. trust_update Whether to update the trust radius throughout the optimization. trust_min Minimum trust radius. trust_max Maximum trust radius. max_energy_incr Maximum allowed energy increased after a faulty step. Optimization is aborted when the threshold is exceeded. hessian_update Type of Hessian update. Defaults to BFGS for minimizations and Bofill for saddle point searches. hessian_init Type of initial model Hessian. hessian_recalc Recalculate exact Hessian every n-th cycle instead of updating it. hessian_recalc_adapt Use a more flexible scheme to determine Hessian recalculation. Undocumented. hessian_xtb Recalculate the Hessian at the GFN2-XTB level of theory. hessian_recalc_reset Whether to skip Hessian recalculation after reset. Undocumented. small_eigval_thresh Threshold for small eigenvalues. Eigenvectors belonging to eigenvalues below this threshold are discardewd. line_search Whether to carry out a line search. Not implemented by a subclassing optimizers. alpha0 Initial alpha for restricted-step (RS) procedure. max_micro_cycles Maximum number of RS iterations. rfo_overlaps Enable mode-following in RS procedure. Other Parameters ---------------- **kwargs Keyword arguments passed to the Optimizer baseclass. """ super().__init__(geometry, **kwargs) assert not issubclass( type(geometry), ChainOfStates ), "HessianOptimizer can't be used for and ChainOfStates objects!" self.trust_update = bool(trust_update) assert trust_min <= trust_max, "trust_min must be <= trust_max!" self.trust_min = float(trust_min) self.trust_max = float(trust_max) self.max_energy_incr = max_energy_incr # Constrain initial trust radius if trust_max > trust_radius self.trust_radius = min(trust_radius, trust_max) self.log(f"Initial trust radius: {self.trust_radius:.6f}") self.hessian_update = hessian_update self.hessian_update_func = HESS_UPDATE_FUNCS[hessian_update] self.hessian_init = hessian_init self.hessian_recalc = hessian_recalc self.hessian_recalc_adapt = hessian_recalc_adapt self.hessian_xtb = hessian_xtb self.hessian_recalc_reset = hessian_recalc_reset self.small_eigval_thresh = float(small_eigval_thresh) self.line_search = bool(line_search) # Restricted-step related self.alpha0 = alpha0 self.max_micro_cycles = int(max_micro_cycles) assert max_micro_cycles >= 0 self.rfo_overlaps = rfo_overlaps assert self.small_eigval_thresh > 0.0, "small_eigval_thresh must be > 0.!" if not self.restarted: self.hessian_recalc_in = None self.adapt_norm = None self.predicted_energy_changes = list() hessian_init_exists = Path(self.hessian_init).exists() if ( # Allow actually calculated Hessians for all coordinate systems not hessian_init_exists and self.hessian_init not in ("calc", "xtb", "xtb1", "xtbff") # But disable model Hessian for Cartesian optimizations and self.geometry.coord_type in ("cart", "cartesian", "mwcartesian") ): self.hessian_init = "unit" self.log( f"Chosen initial (model) Hessian is incompatible with current " f"coord_type: {self.geometry.coord_type}!" ) self._prev_eigvec_min = None self._prev_eigvec_max = None
@property def prev_eigvec_min(self): return self._prev_eigvec_min @prev_eigvec_min.setter def prev_eigvec_min(self, prev_eigvec_min): if self.rfo_overlaps: self._prev_eigvec_min = prev_eigvec_min @property def prev_eigvec_max(self): return self._prev_eigvec_max @prev_eigvec_min.setter def prev_eigvec_max(self, prev_eigvec_max): if self.rfo_overlaps: self._prev_eigvec_max = prev_eigvec_max
[docs] def reset(self): # Don't recalculate the hessian if we have to reset the optimizer hessian_init = self.hessian_init if ( (not self.hessian_recalc_reset) and hessian_init == "calc" and self.geometry.coord_type != "cart" ): hessian_init = "fischer" self.prepare_opt(hessian_init)
[docs] def save_hessian(self): # Don't try to save Hessians of analytical potentials if self.geometry.is_analytical_2d: return h5_fn = self.get_path_for_fn(f"hess_calc_cyc_{self.cur_cycle}.h5") # Save the cartesian hessian, as it is independent of the # actual coordinate system that is used. save_hessian( h5_fn, self.geometry, self.geometry.cart_hessian, self.geometry.energy, self.geometry.calculator.mult, ) self.log(f"Wrote calculated cartesian Hessian to '{h5_fn}'")
[docs] def prepare_opt(self, hessian_init=None): if hessian_init is None: hessian_init = self.hessian_init self.H, hess_str = get_guess_hessian(self.geometry, hessian_init) if self.hessian_init != "calc" and self.geometry.is_analytical_2d: assert self.H.shape == (3, 3) self.H[2, 2] = 0.0 msg = f"Using {hess_str} Hessian" if hess_str == "saved": msg += f" from '{hessian_init}'" self.log(msg) # Dump to disk if hessian was calculated if self.hessian_init == "calc": self.save_hessian() if ( hasattr(self.geometry, "coord_type") and self.geometry.coord_type == "dlc" # Calculated Hessian is already in DLC and hessian_init != "calc" ): U = self.geometry.internal.U self.H = U.T.dot(self.H).dot(U) if self.hessian_recalc_adapt: self.adapt_norm = np.linalg.norm(self.geometry.forces) if self.hessian_recalc: # Already substract one, as we don't do a hessian update in # the first cycle. self.hessian_recalc_in = self.hessian_recalc - 1
def _get_opt_restart_info(self): opt_restart_info = { "adapt_norm": self.adapt_norm, "H": self.H.tolist(), "hessian_recalc_in": self.hessian_recalc_in, "predicted_energy_changes": self.predicted_energy_changes, } return opt_restart_info def _set_opt_restart_info(self, opt_restart_info): self.adapt_norm = opt_restart_info["adapt_norm"] self.H = np.array(opt_restart_info["H"]) self.hessian_recalc_in = opt_restart_info["hessian_recalc_in"] self.predicted_energy_changes = opt_restart_info["predicted_energy_changes"]
[docs] def update_trust_radius(self): # The predicted change should be calculated at the end of optimize # of the previous cycle. assert ( len(self.predicted_energy_changes) == len(self.forces) - 1 ), "Did you forget to append to self.predicted_energy_changes?" self.log("Trust radius update") self.log(f"\tCurrent trust radius: {self.trust_radius:.6f}") predicted_change = self.predicted_energy_changes[-1] actual_change = self.energies[-1] - self.energies[-2] # Only report an unexpected increase if we actually predicted a # decrease. unexpected_increase = (actual_change > 0) and (predicted_change < 0) old_trust = self.trust_radius if unexpected_increase: self.log(f"Energy increased by {actual_change:.6f} au!") if self.max_energy_incr and (actual_change > self.max_energy_incr): raise OptimizationError("Actual energy change too high!") coeff = actual_change / predicted_change self.log(f"\tPredicted change: {predicted_change:.4e} au") self.log(f"\tActual change: {actual_change:.4e} au") self.log(f"\tCoefficient: {coeff:.2%}") step = self.steps[-1] last_step_norm = np.linalg.norm(step) self.set_new_trust_radius(coeff, last_step_norm) if unexpected_increase: self.table.print( f"Unexpected energy increase ({actual_change:.6f} au)! " f"Trust radius: old={old_trust:.4}, new={self.trust_radius:.4}" )
[docs] def set_new_trust_radius(self, coeff, last_step_norm): # Nocedal, Numerical optimization Chapter 4, Algorithm 4.1 # If actual and predicted energy change have different signs # coeff will be negative and lead to a decreased trust radius, # which is fine. if coeff < 0.25: self.trust_radius = max(self.trust_radius / 4, self.trust_min) self.log("\tDecreasing trust radius.") # Only increase trust radius if last step norm was at least 80% of it # See [5], Appendix, step size and direction control # elif coeff > 0.75 and (last_step_norm >= .8*self.trust_radius): # # Only increase trust radius if last step norm corresponded approximately # to the trust radius. elif coeff > 0.75 and abs(self.trust_radius - last_step_norm) <= 1e-3: self.trust_radius = min(self.trust_radius * 2, self.trust_max) self.log("\tIncreasing trust radius.") else: self.log(f"\tKeeping current trust radius at {self.trust_radius:.6f}") return self.log(f"\tUpdated trust radius: {self.trust_radius:.6f}")
[docs] def update_hessian(self): # Compare current forces to reference forces to see if we shall recalc the # hessian. try: cur_norm = np.linalg.norm(self.forces[-1]) ref_norm = self.adapt_norm / self.hessian_recalc_adapt recalc_adapt = cur_norm <= ref_norm self.log( "Check for adaptive Hessian recalculation: " f"{cur_norm:.6f} <= {ref_norm:.6f}, {recalc_adapt}" ) except TypeError: recalc_adapt = False try: self.hessian_recalc_in = max(self.hessian_recalc_in - 1, 0) self.log(f"Recalculation of Hessian in {self.hessian_recalc_in} cycle(s).") except TypeError: self.hessian_recalc_in = None # Update reference norm if needed # TODO: Decide on whether to update the norm when the recalculation is # initiated by 'recalc'. if recalc_adapt: self.adapt_norm = cur_norm recalc = self.hessian_recalc_in == 0 if recalc or recalc_adapt: # Use xtb hessian self.log("Requested Hessian recalculation.") if self.hessian_xtb: self.H = xtb_hessian(self.geometry) key = "xtb" # Calculated hessian at actual level of theory else: self.H = self.geometry.hessian key = "exact" self.save_hessian() if not (self.cur_cycle == 0): self.log(f"Recalculated {key} Hessian in cycle {self.cur_cycle}.") # Reset counter. It is also reset when the recalculation was initiated # by the adaptive formulation. self.hessian_recalc_in = self.hessian_recalc # Simple hessian update else: dx = self.steps[-1] dg = -(self.forces[-1] - self.forces[-2]) curv_cond = dx.dot(dg) if curv_cond < 0.0: self.log( f"Curvature condition (s·y = {curv_cond:.4f} < 0) not satisfied!" ) dH, key = self.hessian_update_func(self.H, dx, dg) self.H = self.H + dH self.log(f"Did {key} Hessian update.")
[docs] def solve_rfo(self, rfo_mat, kind="min", prev_eigvec=None): # When using the restricted step variant of RFO the RFO matrix # may not be symmetric. Thats why we can't use eigh here. eigenvalues, eigenvectors = np.linalg.eig(rfo_mat) self.log("\tdiagonalized augmented Hessian") eigenvalues = eigenvalues.real eigenvectors = eigenvectors.real sorted_inds = np.argsort(eigenvalues) # Depending on wether we want to minimize (maximize) along # the mode(s) in the rfo mat we have to select the smallest # (biggest) eigenvalue and corresponding eigenvector. first_or_last, verbose = self.rfo_dict[kind] # Given sorted eigenvalue-indices (sorted_inds) use the first # (smallest eigenvalue) or the last (largest eigenvalue) index. if prev_eigvec is None: ind = sorted_inds[first_or_last] else: ovlps = np.array([prev_eigvec.dot(ev) for ev in eigenvectors.T]) naive_ind = sorted_inds[first_or_last] ind = np.abs(ovlps).argmax() self.log( f"Overlap: {ind} ({eigenvalues[ind]:.6f}), " f"Naive: {naive_ind} ({eigenvalues[naive_ind]:.6f})" ) follow_eigvec = eigenvectors.T[ind] step_nu = follow_eigvec.copy() nu = step_nu[-1] self.log(f"\tnu_{verbose}={nu:.8e}") # Scale eigenvector so that its last element equals 1. The # final is step is the scaled eigenvector without the last element. step = step_nu[:-1] / nu eigval = eigenvalues[ind] self.log(f"\teigenvalue_{verbose}={eigval:.8e}") return step, eigval, nu, follow_eigvec
[docs] def filter_small_eigvals(self, eigvals, eigvecs, mask=False): small_inds = np.abs(eigvals) < self.small_eigval_thresh eigvals = eigvals[~small_inds] eigvecs = eigvecs[:, ~small_inds] small_num = sum(small_inds) self.log( f"Found {small_num} small eigenvalues in Hessian. Removed " "corresponding eigenvalues and eigenvectors." ) assert small_num <= 6, ( "Expected at most 6 small eigenvalues in cartesian hessian " f"but found {small_num}!" ) if mask: return eigvals, eigvecs, small_inds else: return eigvals, eigvecs
[docs] def log_negative_eigenvalues(self, eigvals, pre_str=""): neg_inds = eigvals < -self.small_eigval_thresh neg_eigval_str = np.array2string(eigvals[neg_inds], precision=6) self.log(f"{pre_str}Hessian has {neg_inds.sum()} negative eigenvalue(s).") self.log(f"\t{neg_eigval_str}")
[docs] def housekeeping(self): """Calculate gradient and energy. Update trust radius and hessian if needed. Return energy, gradient and hessian for the current cycle.""" gradient = self.geometry.gradient energy = self.geometry.energy self.forces.append(-gradient) self.energies.append(energy) self.log(f" Energy: {energy: >12.6f} au") self.log(f"norm(grad): {np.linalg.norm(gradient): >12.6f} au / bohr (rad)") self.log(f" rms(grad): {np.sqrt(np.mean(gradient**2)): >12.6f} au / bohr (rad)") can_update = ( # Allows gradient differences len(self.forces) > 1 and (self.forces[-2].shape == gradient.shape) and len(self.coords) > 1 # Coordinates may have been rebuilt. Take care of that. and (self.coords[-2].shape == self.coords[1].shape) and len(self.energies) > 1 ) if can_update: if self.trust_update: self.update_trust_radius() self.update_hessian() H = self.H if self.geometry.internal: # Shift eigenvalues of orthogonal part to high values, so they # don't contribute to the actual step. H_proj = self.geometry.internal.project_hessian(self.H) # Symmetrize hessian, as the projection may break it?! H = (H_proj + H_proj.T) / 2 eigvals, eigvecs = np.linalg.eigh(H) # Neglect small eigenvalues eigvals, eigvecs = self.filter_small_eigvals(eigvals, eigvecs) resetted = not can_update return energy, gradient, H, eigvals, eigvecs, resetted
[docs] def get_augmented_hessian(self, eigvals, gradient, alpha=1.0): dim_ = eigvals.size + 1 H_aug = np.zeros((dim_, dim_)) H_aug[: dim_ - 1, : dim_ - 1] = np.diag(eigvals / alpha) H_aug[-1, :-1] = gradient H_aug[:-1, -1] = gradient H_aug[:-1, -1] /= alpha return H_aug
[docs] def get_alpha_step(self, cur_alpha, rfo_eigval, step_norm, eigvals, gradient): # Derivative of the squared step w.r.t. alpha numer = gradient**2 denom = (eigvals - rfo_eigval * cur_alpha) ** 3 quot = np.sum(numer / denom) self.log(f"quot={quot:.6f}") dstep2_dalpha = 2 * rfo_eigval / (1 + step_norm**2 * cur_alpha) * quot self.log(f"analytic deriv.={dstep2_dalpha:.6f}") # Update alpha alpha_step = ( 2 * (self.trust_radius * step_norm - step_norm**2) / dstep2_dalpha ) self.log(f"alpha_step={alpha_step:.4f}") assert (cur_alpha + alpha_step) > 0, "alpha must not be negative!" return alpha_step
[docs] def get_rs_step(self, eigvals, eigvecs, gradient, name="RS"): # Transform gradient to basis of eigenvectors gradient_ = eigvecs.T.dot(gradient) alpha = self.alpha0 for mu in range(self.max_micro_cycles): self.log(f"{name} micro cycle {mu:02d}, alpha={alpha:.6f}") H_aug = self.get_augmented_hessian(eigvals, gradient_, alpha) rfo_step_, eigval_min, nu, self.prev_eigvec_min = self.solve_rfo( H_aug, "min", prev_eigvec=self.prev_eigvec_min ) rfo_norm_ = np.linalg.norm(rfo_step_) self.log(f"norm(rfo step)={rfo_norm_:.6f}") if (rfo_norm_ < self.trust_radius) or abs( rfo_norm_ - self.trust_radius ) <= 1e-3: step_ = rfo_step_ break alpha_step = self.get_alpha_step( alpha, eigval_min, rfo_norm_, eigvals, gradient_ ) alpha += alpha_step self.log("") # Otherwise, use trust region newton step else: self.log( "RS algorithm did not produce a desired step length in " f"{self.max_micro_cycles} micro cycles. Trying RFO with α=1.0." ) H_aug = self.get_augmented_hessian(eigvals, gradient_, alpha=1.0) rfo_step_, eigval_min, nu, _ = self.solve_rfo(H_aug, "min") rfo_norm_ = np.linalg.norm(rfo_step_) # This should always be True if the above algorithm failed but we # keep this line nonetheless, to make it more obvious. if rfo_norm_ > self.trust_radius: self.log( f"Proposed RFO step with norm {rfo_norm_:.4f} is outside trust " f"radius Δ={self.trust_radius:.4f}. " ) step_ = self.get_newton_step_on_trust( eigvals, eigvecs, gradient, transform=False ) # Simple, downscaled RFO step # step_ = rfo_step_ / rfo_norm_ * self.trust_radius else: step_ = rfo_step_ # Transform step back to original basis step = eigvecs.dot(step_) return step
[docs] @staticmethod def get_shifted_step_trans(eigvals, gradient_trans, shift): return -gradient_trans / (eigvals + shift)
[docs] @staticmethod def get_newton_step(eigvals, eigvecs, gradient): return eigvecs.dot(eigvecs.T.dot(gradient) / eigvals)
[docs] def get_newton_step_on_trust(self, eigvals, eigvecs, gradient, transform=True): """Step on trust-radius. See Nocedal 4.3 Iterative solutions of the subproblem """ min_ind = eigvals.argmin() min_eigval = eigvals[min_ind] pos_definite = (eigvals > 0.0).all() gradient_trans = eigvecs.T.dot(gradient) # This will be also be True when we come close to a minimizer, # but then the Hessian will also be positive definite and a # simple Newton step will be used. hard_case = abs(gradient_trans[min_ind]) <= 1e-6 self.log(f"Smallest eigenvalue: {min_eigval:.6f}") self.log(f"Positive definite Hessian: {pos_definite}") self.log(f"Hard case: {hard_case}") def get_step(shift): return -gradient_trans / (eigvals + shift) # Unshifted Newton step newton_step_trans = get_step(0.0) newton_norm = np.linalg.norm(newton_step_trans) def on_trust_radius_lin(step): return 1 / self.trust_radius - 1 / np.linalg.norm(step) def finalize_step(shift): step = get_step(shift) if transform: step = eigvecs.dot(step) return step # Simplest case. Positive definite Hessian and predicted step is # already in trust radius. if pos_definite and newton_norm <= self.trust_radius: self.log("Using unshifted Newton step.") return eigvecs.dot(newton_step_trans) # If the Hessian is not positive definite or if the step is too # long we have to determine the shift parameter lambda. rs_kwargs = { "f": lambda shift: on_trust_radius_lin(get_step(shift)), "xtol": 1e-3, # Would otherwise be chosen automatically, but we set it # here explicitly for verbosity. "method": "brentq", } def root_search(bracket): rs_kwargs.update( { "bracket": bracket, "x0": bracket[0] + 1e-3, } ) res = root_scalar(**rs_kwargs) return res BRACKET_END = 1e10 if not hard_case: bracket_start = 0.0 if pos_definite else -min_eigval + 1e-2 bracket = (bracket_start, BRACKET_END) try: res = root_search(bracket) assert res.converged return finalize_step(res.root) # ValueError may be raised when the function values for the # initial bracket have the same sign. If so, we continue with # treating it as a hard case. except ValueError: pass # Now we would try the bracket (-b2, -b1). The resulting step should have # a suitable length, but the (shifted) Hessian would have an incorrect # eigenvalue spectrum (not positive definite). To solve this we use a # different formula to calculate the step. mask = np.ones_like(gradient_trans) mask[min_ind] = 0 mask = mask.astype(bool) without_min = gradient_trans[mask] / (eigvals[mask] - min_eigval) try: tau = sqrt(self.trust_radius**2 - (without_min**2).sum()) step_trans = [tau] + (-without_min).tolist() # Hard case. Search in open interval (endpoints not included) # (-min_eigval, inf). except ValueError: bracket = (-min_eigval + 1e-6, BRACKET_END) res = root_search(bracket) if res.converged: return finalize_step(res.root) if not transform: return step_trans return eigvecs.dot(step_trans)
[docs] @staticmethod def quadratic_model(gradient, hessian, step): return step.dot(gradient) + 0.5 * step.dot(hessian).dot(step)
[docs] @staticmethod def rfo_model(gradient, hessian, step): return HessianOptimizer.quadratic_model(gradient, hessian, step) / ( 1 + step.dot(step) )
[docs] def get_step_func(self, eigvals, gradient, grad_rms_thresh=1e-2): positive_definite = (eigvals < 0).sum() == 0 gradient_small = rms(gradient) < grad_rms_thresh if self.adapt_step_func and gradient_small and positive_definite: return self.get_newton_step_on_trust, self.quadratic_model # RFO fallback else: return self.get_rs_step, self.rfo_model