# [1] https://doi.org/10.26434/chemrxiv-2022-253hc-v2
# Identifying the Marcus dimension of electron transfer from
# ab initio calculations
# Šrut, Lear, Krewald, 2022 on chemrxiv
# [2] https://doi.org/10.1039/D3SC01402A
# Identifying the Marcus dimension of electron transfer from
# ab initio calculations
# Šrut, Lear, Krewald, 2023, actual published version
from collections.abc import Callable
from pathlib import Path
import os
from typing import Optional, Union
import warnings
import distributed
import matplotlib.pyplot as plt
import numpy as np
from pysisyphus.executors import MaybeScheduler
from pysisyphus.exceptions import (
CalculationFailedException,
RunAfterCalculationFailedException,
)
from pysisyphus.Geometry import Geometry
from pysisyphus.helpers_pure import get_state_label, highlight_text
from pysisyphus.marcus_dim.config import DUMMY_2EV, FIT_RESULTS_FN, SCAN_RESULTS_FN
from pysisyphus.marcus_dim.fit import epos_from_wf, fit_marcus_dim
from pysisyphus.marcus_dim.param import param_marcus
from pysisyphus.marcus_dim.scan import plot_scan, scan, scan_parallel
import pysisyphus.marcus_dim.types as mdtypes
[docs]
def dump_scan_result(geom, scan_result, dummy_scan, out_dir="."):
out_dir = Path(out_dir)
# Dump scan geometries into trj file
xyzs = list()
for sc, (se_gs, *se_es) in zip(scan_result.coords, scan_result.energies):
xyz = geom.as_xyz(cart_coords=sc, comment=f"{se_gs:.6f}")
xyzs.append(xyz)
scan_trj_path = out_dir / "marcus_dim_scan.trj"
with open(scan_trj_path, "w") as handle:
handle.write("\n".join(xyzs))
scan_fig, scan_axs = plot_scan(
scan_result.factors,
scan_result.energies,
scan_result.properties,
dummy_scan=dummy_scan,
)
scan_fig.savefig(out_dir / "scan.svg")
plt.close(scan_fig)
[docs]
def run_scan(
geom: Geometry,
marcus_dim: np.ndarray,
calc_getter: Callable,
fragments: list[list[int]],
dummy_scan: bool = False,
out_dir: Union[os.PathLike, str] = ".",
**kwargs,
) -> mdtypes.MarcusDimScanResult:
"""Scan along Marcus dimension using serial calculations in two directions.
Parameters
----------
geom
Equilibrium geometry with N atoms and 3N Cartesian coordinates.
marucs_dim
1d array of shape (3N, ) containing the Marcus dimension in
unweighted Cartesian coordinates.
calc_getter
Callable that returns a new calculator. Takes optional kwargs,
that are passed to the underlying Calculator class.
fragments
List of lists of integers, containing the different fragment indices.
Order is important, as the prefactors for weighting the alpha-electron
spin density depend on the order.
dummy_scan
Optional; when True the scan can be carried out using a calculator that
supports only GS calculations, e.g. xTB. In such a case, a dummy value
for the first ES is used (GS + 2 eV).
out_dir
String or Path object that controls the directory, where various files
are written.
Returns
-------
scan_result
MarcusDimScanResult, carrying the factors, Cartesian coordinates,
calculated energies and properties along the scan.
"""
out_dir = Path(out_dir)
pos_calc = calc_getter(base_name="scan_pos")
neg_calc = calc_getter(base_name="scan_neg")
has_all_energies = hasattr(pos_calc, "get_all_energies")
if dummy_scan and not has_all_energies:
warnings.warn(
f"Calculator '{pos_calc}' does not implement 'get_all_energies()'! "
f"Using dummy values for first excited state!'",
)
def get_properties(factor: int, coords_i: np.ndarray):
# Use one of two different calculators, depending on the scan direction.
calc = neg_calc if factor < 0 else pos_calc
try:
results = calc.get_all_energies(geom.atoms, coords_i)
all_energies = results["all_energies"]
except AttributeError as err:
if not dummy_scan:
raise err
results = calc.get_energy(geom.atoms, coords_i)
energy = results["energy"]
all_energies = np.array((energy, energy + DUMMY_2EV))
energies = all_energies[:2]
assert len(energies) == 2
wf = calc.get_stored_wavefunction()
tot_epos, alpha_epos = epos_from_wf(wf, fragments)
return energies, alpha_epos
scan_result = scan(
coords_init=geom.cart_coords.copy(),
direction=marcus_dim,
get_properties=get_properties,
**kwargs,
)
dump_scan_result(geom, scan_result, dummy_scan, out_dir=out_dir)
return scan_result
[docs]
def run_scan_parallel(
geom: Geometry,
marcus_dim_data: dict,
calc_getter: Callable,
fragments: list[list[int]],
rd_class: mdtypes.RobinDay,
dummy_scan: bool = False,
out_dir: Union[os.PathLike, str] = ".",
scheduler: distributed.LocalCluster = None,
**scan_kwargs,
) -> mdtypes.MarcusDimScanResult:
"""Scan along Marcus dimension using parallel calculations.
Parameters
----------
geom
Equilibrium geometry with N atoms and 3N Cartesian coordinates.
marucs_dim_data
Dictionary, as returned by fit_marcus_dim.
calc_getter
Callable that returns a new calculator. Takes optional kwargs,
that are passed to the underlying Calculator class.
fragments
List of lists of integers, containing the different fragment indices.
Order is important, as the prefactors for weighting the alpha-electron
spin density depend on the order.
rd_class
RobinDay enum, denoting the Robin-Day class.
dummy_scan
Optional; when True the scan can be carried out using a calculator that
supports only GS calculations, e.g. xTB. In such a case, a dummy value
for the first ES is used (GS + 2 eV).
out_dir
String or Path object that controls the directory, where various files
are written.
scheduler
Dask.distributed LocalCluster object.
Returns
-------
scan_result
MarcusDimScanResult, carrying the factors, Cartesian coordinates,
calculated energies and properties along the scan.
"""
out_dir = Path(out_dir)
test_calc = calc_getter()
has_all_energies = hasattr(calc_getter(), "get_all_energies")
if dummy_scan and not has_all_energies:
warnings.warn(
f"Calculator '{test_calc}' does not implement 'get_all_energies()'! "
f"Using dummy values for first excited state!'",
)
atoms = geom.atoms
def get_properties(calc_number, coords):
print(f"Started scan calculation {calc_number}")
calc = calc_getter(
with_td=True, pal=1, calc_number=calc_number, base_name="scan"
)
all_energies = np.nan
alpha_epos = np.nan
try:
# TODO: move actual calculation into property functions?!
results = calc.get_all_energies(geom.atoms, coords)
all_energies = results["all_energies"]
assert (
all_energies.size >= 2
), "Energies of at least two states must be calculated!"
except AttributeError as err:
if not dummy_scan:
raise err
result = calc.get_energy(atoms, coords)
energy = result["energy"]
all_energies = np.array((energy, energy + DUMMY_2EV))
except CalculationFailedException as err:
print(err)
print(f"Calculation {calc_number:03d} failed!")
except RunAfterCalculationFailedException as err:
print(err)
print(f"Postprocessing of calculation {calc_number:03d} failed!")
try:
wf = calc.get_stored_wavefunction()
_, alpha_epos = epos_from_wf(wf, fragments)
except AttributeError:
pass
return all_energies, alpha_epos
marcus_dim = marcus_dim_data["marcus_dim"]
normal_coords = marcus_dim_data["normal_coordinates"]
properties = marcus_dim_data["properties"]
eigvecs = marcus_dim_data["eigvecs"]
masses = marcus_dim_data["masses"]
int_geom = geom.copy(
coord_type="redund",
coord_kwargs={
"bonds_only": True,
},
)
B = int_geom.internal.B
scan_result = scan_parallel(
coords_init=geom.cart_coords.copy(),
get_properties=get_properties,
marcus_dim=marcus_dim,
normal_coords=normal_coords,
properties=properties,
eigvecs=eigvecs,
masses=masses,
B=B,
rd_class=rd_class,
scheduler=scheduler,
out_dir=out_dir,
**scan_kwargs,
)
dump_scan_result(geom, scan_result, dummy_scan, out_dir=out_dir)
return scan_result
[docs]
def run_param(
scan_factors: np.ndarray,
scan_energies: np.ndarray,
mult: int,
out_dir: Union[os.PathLike, str] = ".",
) -> dict[str, mdtypes.MarcusModel]:
"""Try to parametrize quadratic Marcus model from scan data (factors & energies).
Parameters
----------
scan_factors
1d-array of shape (n, ) containing the multiplicative factors, describing the displacment
along the Marcus dimension.
scan_energies
2d-array of shape(n, 2) containing the energies of the ground and first excited state.
mult
Integer representing the multiplicity of the system under study. Used to create
meaningful labels in the plots.
out_dir
String or Path object that controls the directory, where various files
are written.
Returns
-------
models
Dictionary with two str keys ("a", "b") containing the (possibly) parametrized
Marcus models. Parametrization is only possible for class II systems.
"""
out_dir = Path(out_dir)
# 3.) Parametrization of Marcus model
print(highlight_text("Parametrization of Marcus Model"))
models = param_marcus(scan_factors, scan_energies)
shifted_scan_energies = scan_energies - scan_energies.min()
for para, model in models.items():
print(f"Parametrization {para}: {model.pretty()}")
fig, ax = model.plot(scan_factors)
for i, state in enumerate(shifted_scan_energies.T):
label = get_state_label(mult, i)
ax.plot(scan_factors, state, label=f"${label}$ scan")
ax.legend()
fig_fn = out_dir / f"marcus_model_{para}.svg"
fig.savefig(fig_fn)
plt.close(fig)
print(f"Saved plot of Marcus model to '{fig_fn}'")
return models
[docs]
def run_marcus_dim(
# geom: Geometry,
# calc_getter: Callable,
# fragments: list[list[int]],
# rd_class: mdtypes.RobinDay,
# fit_kwargs: Optional[dict] = None,
# scan_kwargs: Optional[dict] = None,
# T: float = 300.0,
# cluster: bool = True,
# force: bool = False,
# dummy_scan: bool = False,
# out_dir: Union[os.PathLike, str] = ".",
geom: Geometry,
calc_getter,
calc_kwargs: dict,
rd_class: mdtypes.RobinDay,
batch_kwargs: Optional[dict] = None,
scan_kwargs: Optional[dict] = None,
# T: float = 300.0,
cluster: bool = True,
force: bool = False,
dummy_scan: bool = False,
out_dir: Union[os.PathLike, str] = ".",
) -> dict[str, mdtypes.MarcusModel]:
"""Fit Marucs dimension, scan along it and try to parametrize the Marcus model
Parameters
----------
geom
Equilibrium geometry with N atoms and 3N Cartesian coordinates.
calc_getter
Callable that returns a new calculator. Takes optional kwargs,
that are passed to the underlying Calculator class.
fragments
List of lists of integers, containing the different fragment indices.
Order is important, as the prefactors for weighting the alpha-electron
spin density depend on the order.
rd_class
RobinDay enum, denoting the Robin-Day class.
fit_kwargs
Optional dict, containing additional arguments for fitting the Marcus dimension.
scan_kwargs
Optional dict, containing additional arguments for scanning along the Marcus dimension.
T
Temperature in Kelvin used for Wigner sampling.
cluster
Boolean; controls wheter all calculations are done in serial or in parallel
using dask.distributed.
force
Boolean; when True fit & scan are done, even when the results are already present.
dummy_scan
Optional; see docstring of run_scan()/run_scan_parallel().
out_dir
String or Path object that controls the directory, where various files
are written.
"""
rd_class = mdtypes.get_rd_class(rd_class)
if batch_kwargs is None:
batch_kwargs = {}
else:
batch_kwargs = batch_kwargs.copy()
if scan_kwargs is None:
scan_kwargs = {}
else:
scan_kwargs = scan_kwargs.copy()
out_dir = Path(out_dir)
# When no explicit property function choice is made, we derive the correct
# property function from the given Robin-Day class.
try:
calc_kwargs["property"]
except KeyError:
property = {
mdtypes.RobinDay.CLASS2: mdtypes.Property.EEXC,
mdtypes.RobinDay.CLASS3: mdtypes.Property.EPOS_IAO,
}[rd_class]
calc_kwargs["property"] = property
print(
"No property was selected for the fit! Using the default property "
f"for {rd_class}: {property}"
)
#############################
# 1.) Fit Marcus dimension #
#############################
print(highlight_text("Fitting of Marcus Dimension"))
fit_results_path = out_dir / FIT_RESULTS_FN
rms_converged = False
one_batch = False
if fit_results_path.exists():
marcus_dim_data = np.load(fit_results_path)
try:
rms_converged = bool(marcus_dim_data["rms_converged"])
one_batch = int(marcus_dim_data["max_batches"]) == 1
except KeyError:
print("Could not determine if Marcus dimension already converged!")
# When rms_converged is True md_results will always be present
if not force and (rms_converged or one_batch):
marcus_dim = marcus_dim_data["marcus_dim"]
print(f"Loaded Marcus dimension from '{fit_results_path}'. Skipping fit.")
print(
f"Final Marcus dimension:\n{geom.as_xyz(cart_coords=marcus_dim, comment=None)}"
)
else:
with MaybeScheduler(cluster) as scheduler:
calc_kwargs["scheduler"] = scheduler
calc_kwargs["calc_getter"] = calc_getter
marcus_dim_data = fit_marcus_dim(
# geom, calc_getter, fragments, T=T, scheduler=scheduler, **fit_kwargs
geom,
calc_kwargs=calc_kwargs,
batch_kwargs=batch_kwargs,
out_dir=out_dir,
)
del calc_kwargs["scheduler"]
del calc_kwargs["calc_getter"]
print()
###################################
# 2.) Scan along Marcus dimension #
###################################
print(highlight_text("Scan along Marcus Dimension"))
scan_results_path = out_dir / SCAN_RESULTS_FN
scan_done = False
if scan_results_path.exists():
scan_results = np.load(scan_results_path)
scan_factors = scan_results["factors"]
scan_coords = scan_results["coords"]
scan_energies = scan_results["energies"]
scan_properties = scan_results["properties"]
scan_result = mdtypes.MarcusDimScanResult(
factors=scan_factors,
coords=scan_coords,
energies=scan_energies,
properties=scan_properties,
)
try:
scan_done = bool(scan_results["scan_done"])
except KeyError:
pass
if not scan_done:
with MaybeScheduler(cluster) as scheduler:
scan_result = run_scan_parallel(
geom,
marcus_dim_data,
calc_getter=calc_getter,
fragments=calc_kwargs["fragments"],
rd_class=rd_class,
dummy_scan=dummy_scan,
scheduler=scheduler,
out_dir=out_dir,
**scan_kwargs,
)
"""
scan_result = run_scan(
geom,
marcus_dim_data["marcus_dim"],
calc_getter,
fragments,
dummy_scan=dummy_scan,
out_dir=".",
)
"""
else:
print(f"Loaded scan data from '{scan_results_path}'. Skipping scan.")
print()
#######################################
# 3.) Parametrization of Marcus model #
#######################################
# Try to determine the multiplicity, for the creation of state labels as D_0, D_1, etc.
# for the following plots.
mult_calc = calc_getter()
try:
mult = mult_calc.mult
except AttributeError:
mult = -1
# Parametrize using only the energies of ground and 1st excited state.
models = run_param(
scan_result.factors, scan_result.energies[:, :2], mult, out_dir=out_dir
)
print()
return marcus_dim_data, scan_result, models