Source code for pysisyphus.xyzloader

import re

import numpy as np

from pysisyphus.constants import BOHR2ANG


[docs] def make_xyz_str(atoms, coords, comment=""): assert len(atoms) == len(coords) atoms = [a.capitalize() for a in atoms] coord_fmt = "{: 03.8f}" line_fmt = "{:>3s} " + " ".join( [ coord_fmt, ] * 3 ) body = [line_fmt.format(a, *xyz) for a, xyz in zip(atoms, coords)] body = "\n".join(body) return f"{len(atoms)}\n{comment}\n{body}"
[docs] def make_trj_str(atoms, coords_list, comments=None): if comments is None: comments = ["" for _ in coords_list] xyz_strings = [ make_xyz_str(atoms, coords, comment) for coords, comment in zip(coords_list, comments) ] return "\n".join(xyz_strings)
[docs] def coords_to_trj(trj_fn, atoms, coords_list, comments=None): coords = np.array(coords_list) coords = coords.reshape(-1, len(atoms), 3) * BOHR2ANG trj_str = make_trj_str(atoms, coords, comments) with open(trj_fn, "w") as handle: handle.write(trj_str) return trj_fn
[docs] def make_trj_str_from_geoms(geoms, comments=None, energy_comments=False): atoms = geoms[0].atoms coords_list = [geom.coords3d * BOHR2ANG for geom in geoms] if energy_comments and comments is None: comments = [str(geom._energy) for geom in geoms] elif comments is not None: assert len(comments) == len(geoms) return make_trj_str(atoms, coords_list, comments)
[docs] def write_geoms_to_trj(geoms, fn, comments=None): trj_str = make_trj_str_from_geoms(geoms, comments) with open(fn, "w") as handle: handle.write(trj_str)
[docs] def split_xyz_str(xyz_str): """Example: xyz: 1 X -1.2 1.4 0.0 1 X 2.0 4.0 0.0 """ float_ = r"([\+\d\-\.]+)" header_re = re.compile(r"(\d+)") coord_re = re.compile(fr"[a-zA-Z]+\s+{float_}\s+{float_}\s+{float_}") lines = [l.strip() for l in xyz_str.strip().split("\n")] lines_remaining = len(lines) cur_line = 0 valid_xyz_strs = list() while lines_remaining: header_mobj = header_re.match(lines[cur_line]) expect_lines = int(header_mobj[1]) slice_ = slice( cur_line + 2, cur_line + 2 + expect_lines ) # lgtm [py/hash-unhashable-value] check_lines = lines[slice_] assert len(check_lines) == expect_lines assert all([coord_re.match(line.strip()) for line in check_lines]) valid_xyz_strs.append(str(expect_lines) + "\n\n" + "\n".join(check_lines)) lines_read = expect_lines + 2 lines_remaining -= lines_read cur_line += lines_read atoms_coords = [parse_xyz_str(_, with_comment=False) for _ in valid_xyz_strs] return atoms_coords
[docs] def parse_xyz_str(xyz_str, with_comment): """Parse a xyz string. Paramters --------- xyz_str : str The contents of a .xyz file. with_comment : bool Return comment line if True. Returns ------- atoms : list List of length N (N = number of atoms) holding the element symbols. coords: np.array An array of shape (N, 3) holding the xyz coordinates. comment_line : str, optional Comment line if with_comment argument was True. """ xyz_lines = xyz_str.strip().split("\n") comment_line = xyz_lines[1] # Only consider the first four items on a line atom_num = int(xyz_lines[0]) atoms_present = len(xyz_lines) - 2 assert len(xyz_lines) == atom_num + 2, \ f"Expected {atom_num} atoms, but found only {atoms_present}!" atoms_coords = [ line.strip().split()[:4] for line in xyz_str.strip().split("\n")[2:] ] atoms, coords = zip(*[(a, c) for a, *c in atoms_coords]) coords = np.array(coords, dtype=float) if with_comment: return atoms, coords, comment_line else: return atoms, coords
[docs] def parse_xyz_file(xyz_fn, with_comment=False): with open(xyz_fn) as handle: xyz_str = handle.read() return parse_xyz_str(xyz_str, with_comment)
[docs] def parse_trj_file(trj_fn, with_comments=False): with open(trj_fn) as handle: trj_str = handle.read() return parse_trj_str(trj_str, with_comments)
[docs] def parse_trj_str(trj_str, with_comments=False): trj_lines = trj_str.strip().split("\n") number_of_atoms = int(trj_lines[0].strip()) xyz_lines = number_of_atoms + 2 # Split the trj file in evenly sized strings xyz_strs = [ "\n".join(trj_lines[i : i + xyz_lines]) for i in range(0, len(trj_lines), xyz_lines) ] xyzs = [parse_xyz_str(xyz_str, with_comments) for xyz_str in xyz_strs] assert len(xyzs) == (len(trj_lines) / xyz_lines) return xyzs