Source code for pysisyphus.optimizers.closures

# [1] https://arxiv.org/pdf/2101.04413.pdf
#     A Regularized Limited Memory BFGS method for Large-Scale Unconstrained
#     Optimization and itsefficient Implementations
#     Tankaria, Sugimoto, Yamashita, 2021
# [2] Regularization of Limited Memory Quasi-Newton Methods for Large-Scale
#     Nonconvex Minimization
#     https://arxiv.org/pdf/1911.04584.pdf
#     Kanzow, Steck 2021

from collections import deque

import numpy as np
from scipy.sparse.linalg import spsolve


[docs] def get_update_mu_reg( mu_min=1e-3, gamma_1=0.1, gamma_2=5.0, eta_1=0.01, eta_2=0.9, logger=None ): """See 5.1 in [1]""" assert 0.0 < mu_min assert 0.0 < gamma_1 <= 1.0 < gamma_2 assert 0.0 < eta_1 < eta_2 <= 1.0 def log(msg): if logger is not None: logger.debug(msg) def update_mu_reg(mu, cur_energy, trial_energy, cur_grad, step): """Update regularization parameter μ_reg according to ratio r. See 2.3 in [1]. r = actual_reduction / predicted_reduction r = (f(x) - f(x + step)) / (f(x) - model_func(step, mu)) mode_func(step, mu) = f(x) + df(x)^T step(mu) + 1/2 step(mu)^T H^-1 step(mu) f(x) - model_func(step, mu) = f(x) - (f(x) + df(x)^T step(mu) + 1/2 step(mu)^T H(mu)^-1 step(mu)) = -df(x)^T step(mu) - 1/2 step(mu)^T H(mu)^-1 step(mu) = -df(x)^T step(mu) + 1/2 step(mu)^T df(x) = -1/2 df(x)^T step(mu) r = (f(x) - f(x + step) / (-1/2 df(x)^T step(mu)) r = -2 * (f(x) - f(x + step) / (df(x)^T step(mu)) """ log(f" Cur energy: {cur_energy:.8f}") log(f"Trial energy: {trial_energy:.8f}") act_change = cur_energy - trial_energy log(f" Actual change: {act_change: .8f}") pred_change = -1 / 2 * cur_grad.dot(step) log(f"Predicted change: {pred_change: .8f}") r = act_change / pred_change log(f"Ratio r={r:.8f}") # Default case for eta_1 <= r < eta_2. Keep mu and accept step. mu_updated = mu recompute_step = False # Poor actual reduction. Increase shift parameter and request new step. if r < eta_1: mu_updated *= gamma_2 recompute_step = True log(f"Increased μ_reg to {mu_updated:.6f}.") # Significant actual reduction. Reduce shift parameter und accept step. elif r >= eta_2: mu_updated = max(mu_min, mu_updated * gamma_1) log(f"Decreased μ_reg to {mu_updated:.6f}.") return mu_updated, recompute_step return update_mu_reg
[docs] def bfgs_multiply( s_list, y_list, vector, beta=1, P=None, logger=None, gamma_mult=True, mu_reg=None, inds=None, cur_size=None, ): """Matrix-vector product H·v. Multiplies given vector with inverse Hessian, obtained from repeated BFGS updates calculated from steps in 's_list' and gradient differences in 'y_list'. Based on algorithm 7.4 Nocedal, Num. Opt., p. 178.""" assert len(s_list) == len( y_list ), "lengths of step list 's_list' and gradient list 'y_list' differ!" cycles = len(s_list) q = vector.copy() alphas = list() rhos = list() if mu_reg is not None: # Regularized L-BFGS with ŷ = y + μs, see [1] assert mu_reg > 0.0 y_list_reg = list() for i, si in enumerate(s_list): yi = y_list[i] y_hat = yi + mu_reg * si if y_hat.dot(si) <= 0: # See 2 in [1] y_hat = yi + (max(0, -si.dot(yi) / si.dot(si)) + mu_reg) + si y_list_reg.append(y_hat) y_list = y_list_reg # Store rho and alphas as they are also needed in the second loop for i in reversed(range(cycles)): s = s_list[i] y = y_list[i] rho = 1 / y.dot(s) rhos.append(rho) try: alpha = rho * s.dot(q) q -= alpha * y except ValueError: inds_i = inds[i] q_ = q.reshape(cur_size, -1) alpha = rho * s.dot(q_[inds_i].flatten()) # This also modifies q! q_[inds_i] -= alpha * y.reshape(len(inds_i), -1) alphas.append(alpha) # Restore original order, so that rho[i] = 1/s_list[i].dot(y_list[i]) etc. alphas = alphas[::-1] rhos = rhos[::-1] if P is not None: r = spsolve(P, q) msg = "preconditioner" elif gamma_mult and (cycles > 0): s = s_list[-1] y = y_list[-1] gamma = s.dot(y) / y.dot(y) r = gamma * q msg = f"gamma={gamma:.4f}" else: r = beta * q msg = f"beta={beta:.4f}" if mu_reg is not None: msg += f" and μ_reg={mu_reg:.6f}" if logger is not None: msg = f"BFGS multiply using {cycles} previous cycles with {msg}." if len(s_list) == 0: msg += "\nProduced simple SD step." logger.debug(msg) for i in range(cycles): s = s_list[i] y = y_list[i] try: beta = rhos[i] * y.dot(r) r += s * (alphas[i] - beta) except ValueError: inds_i = inds[i] r_ = r.reshape(cur_size, -1) beta = rhos[i] * y.dot(r_[inds_i].flatten()) # This also modifies r! r_[inds_i] += s.reshape(len(inds_i), -1) * (alphas[i] - beta) return r
[docs] def lbfgs_closure(force_getter, M=10, beta=1, restrict_step=None): x_list = list() s_list = list() y_list = list() force_list = list() cur_cycle = 0 if restrict_step is None: restrict_step = lambda x, dx: dx def lbfgs(x, *getter_args): nonlocal x_list nonlocal cur_cycle nonlocal s_list nonlocal y_list force = force_getter(x, *getter_args) if cur_cycle > 0: prev_x = x_list[-1] s = x - prev_x s_list.append(s) prev_force = force_list[-1] y = prev_force - force y_list.append(y) x_list.append(x) force_list.append(force) step = bfgs_multiply(s_list, y_list, force, beta=beta) step = restrict_step(x, step) # Only keep last m cycles s_list = s_list[-M:] y_list = y_list[-M:] cur_cycle += 1 return step, force return lbfgs
[docs] def modified_broyden_closure(force_getter, M=5, beta=1, restrict_step=None): """https://doi.org/10.1006/jcph.1996.0059 F corresponds to the residual gradient, so we after calling force_getter we multiply the force by -1 to get the gradient.""" dxs = list() dFs = list() F = None # The next line is used in SciPy, but then beta strongly depends # on the magnitude of x, which is quite weird. Disregarding the # analytical potentials the electronic energy is usually # invariant under translation and rotation and doesn't depend on # the magnitude of x. # beta = 0.5 * max(np.linalg.norm(x), 1) / np.linalg.norm(F) a = None if restrict_step is None: restrict_step = lambda x, dx: dx def modified_broyden(x, *getter_args): nonlocal dxs nonlocal dFs nonlocal F nonlocal beta nonlocal a F_new = -force_getter(x, *getter_args) if F is not None: dF = F_new - F dFs.append(dF) dFs = dFs[-M:] # Overlap matrix a = np.zeros((len(dFs), len(dFs))) for k, dF_k in enumerate(dFs): for m, dF_m in enumerate(dFs): a[k, m] = dF_k.dot(dF_m) F = F_new dx = -beta * F if len(dxs) > 0: # Calculate gamma dF_F = [dF_k.dot(F) for dF_k in dFs] # Use least squares to avoid crashes with singular matrices gammas, *_ = np.linalg.lstsq(a, dF_F, rcond=None) gammas = gammas[:, None] _ = np.array(dxs) - beta * np.array(dFs) # Substract step correction dx = dx - np.sum(gammas * _, axis=0) dx = restrict_step(x, dx) dxs.append(dx) # Keep only informations of the last M cycles dxs = dxs[-M:] return dx, -F return modified_broyden
[docs] def small_lbfgs_closure(history=5, gamma_mult=True): """Compact LBFGS closure. The returned function takes two arguments: forces and prev_step. forces are the forces at the current iterate and prev_step is the previous step that lead us to the current iterate. In this way step restriction/line search can be done outisde of the lbfgs function. """ prev_forces = None # lgtm [py/unused-local-variable] grad_diffs = deque(maxlen=history) steps = deque(maxlen=history) cur_cycle = 0 def lbfgs(forces, prev_step=None): nonlocal cur_cycle nonlocal prev_forces if prev_step is not None: steps.append(prev_step) # Steepest descent in the first cycle step = forces # LBFGS in the following cycles if cur_cycle > 0: grad_diffs.append(-forces - -prev_forces) step = bfgs_multiply(steps, grad_diffs, forces, gamma_mult=gamma_mult) prev_forces = forces cur_cycle += 1 return step return lbfgs