Source code for pysisyphus.stocastic.align

import itertools as it

import numpy as np
import rmsd as rmsd
from scipy.optimize import linear_sum_assignment
from scipy.spatial.distance import cdist


[docs] def match_geom_atoms(ref_geom, geom_to_match, hydrogen=True): """Apply the hungarian method to geom_to_match. Uses the scipy implementation of the Hungarian method/ Kuhn-Munkres algorithm in scipy.optimize.linear_sum_assignment. See [1] 10.1021/ci400534h [2] 10.1021/acs.jcim.6b00516 """ assert len(ref_geom.atoms) == len(geom_to_match.atoms), \ "Atom numbers don't match!" ref_coords, _ = ref_geom.coords_by_type coords_to_match, inds_to_match = geom_to_match.coords_by_type atoms = ref_coords.keys() # The new geometry that will contain the perhaps reordered # coordinates. geom_matched = geom_to_match.copy() for atom in atoms: # Don't match hydrogens if not requested if atom.lower() == "h" and not hydrogen: continue ref_coords_for_atom = ref_coords[atom] coords_to_match_for_atom = coords_to_match[atom] # Pairwise distances between two collections # Atoms of ref_geom are along the rows, atoms of geom_to_match # along the columns. cd = cdist(ref_coords_for_atom, coords_to_match_for_atom) # Hungarian method, row_inds are returned already sorted, so # we only have to consider col_inds. row_inds, col_inds = linear_sum_assignment(cd) # Resort the coordinates new_coords_for_atom = coords_to_match_for_atom[col_inds] # Original indices of the coordinates in the full coords array # in the Geometry object. old_inds = inds_to_match[atom] # Update coordinates and modify the array directly c3d = geom_matched.coords3d c3d[old_inds] = new_coords_for_atom return geom_matched
[docs] def apply_transform(coords3d, swap, reflection): """Apply axis swaps and reflections to a coordinate array.""" # Swap axes coords3d = coords3d[:, swap] # Reflect coords3d *= reflection return coords3d
[docs] def matched_rmsd(geom1, geom2, thresh=5e-2): """RMSD for optimally aligned and matched geometries. Returns ------- matched_rmsd : float RMSD of optimally aligned and matched geometries. matched_geoms : tuple(Geometry, Geometry) Tuple of the optimally aligned and matched geometries. """ # Work on copies of the Geometries, as calling standard_orientation # moves their coordinates. geom1_copy = geom1.copy() geom1_copy.standard_orientation() coords3d_1 = geom1_copy.coords3d geom2_copy = geom2.copy() geom2_copy.standard_orientation() coords3d_2 = geom2_copy.coords3d.copy() # After bringing the Geometries into standard orientation we may # still have to consider additional axis swaps and reflections to # allow optimal atom matching using the Hungarian method. # Six possible axis swaps, (3*2, (x, y, z)*((x), (y), (z))). axes = (0, 1, 2) swaps = list(it.permutations(axes)) # Eight possible reflections, (±x, ±y, ±z). reflections = ( ( 1, 1, 1), # no reflection (-1, 1, 1), # reflect on yz plane ( 1, -1, 1), # reflect on xz plane ( 1, 1, -1), # reflect on xy plane (-1, -1, 1), # reflect on yz and xz planes (-1, 1, -1), # reflect on yz and xy planes ( 1, -1, -1), # reflect on xz and xy planes (-1, -1, -1) # reflect on yz, xz, xy planes ) # 48 combinations of six axis swaps and eight reflections. transforms = list(it.product(swaps, reflections)) matched_rmsds = list() matched_coords = list() for i, transform in enumerate(transforms): # Apply swap and reflection c3d_trans = apply_transform(coords3d_2.copy(), *transform) geom2_to_match = geom2.copy() geom2_to_match.coords3d = c3d_trans # Apply Hungarian method to the transformed Geometry geom2_matched = match_geom_atoms(geom1_copy, geom2_to_match) mrmsd = rmsd.kabsch_rmsd(coords3d_1, geom2_matched.coords3d) matched_rmsds.append(mrmsd) matched_coords.append(geom2_matched.coords) # Break when the two geometries are similar. Then we don't have to # apply the remaining transformations. if mrmsd <= thresh: break matched_rmsds = np.array(matched_rmsds) min_rmsd_ind = matched_rmsds.argmin() min_rmsd = matched_rmsds.min() best_matching_coords = matched_coords[min_rmsd_ind] geom2_copy.coords = best_matching_coords return min_rmsd, (geom1_copy, geom2_copy)