Source code for pysisyphus.io.mol2

import jinja2
import pyparsing as pp
import numpy as np

from pysisyphus.constants import ANG2BOHR
from pysisyphus.Geometry import Geometry
from pysisyphus.helpers_pure import file_or_str


@file_or_str(".mol2")
def parse_mol2(text):
    def get_line_word(*args):
        return pp.Word(*args).setWhitespaceChars(" \t")

    new_record = pp.CaselessLiteral("@<TRIPOS>")

    # <TRIPOS>MOLECULE
    molecule = (
        pp.CaselessLiteral("@<TRIPOS>MOLECULE")
        + pp.LineEnd()
        # comment line/molecule name
        + pp.OneOrMore(get_line_word(pp.printables)).set_results_name("mol_name")
        + pp.common.integer.set_results_name("num_atoms")
        + pp.Optional(pp.common.integer.set_results_name("num_bonds"))
        + pp.Optional(pp.common.integer.set_results_name("num_subst"))
        + pp.Optional(pp.common.integer.set_results_name("num_feat"))
        + pp.Optional(pp.common.integer.set_results_name("num_sets"))
        + pp.oneOf(
            ("SMALL", "BIOPOLYMER", "PROTEIN", "NUCLEIC_ACID", "SACCHARIDE")
        ).set_results_name("mol_type")
        + pp.oneOf(
            ("NO_CHARGES", "GASTEIGER", "MMFF94_CHARGES", "USER_CHARGES", "HUCKEL")
        ).set_results_name("charge_type")
        + pp.Optional(
            ~new_record + pp.Word(pp.printables).set_results_name("status_bits")
        )
        # Only spaces may not be enough for mol comment
        + pp.Optional(~new_record + pp.Word(pp.printables + " ")).set_results_name(
            "mol_comment"
        )
    )

    # <TRIPOS>ATOM
    atom_data_line = pp.Group(
        ~new_record
        + pp.common.integer.set_results_name("atom_id")
        + pp.Word(pp.alphanums).set_results_name("atom_name")
        + pp.Group(pp.common.real + pp.common.real + pp.common.real).set_results_name(
            "xyz"
        )
        + pp.Word(pp.printables).set_results_name("atom_type")
        + pp.Optional(pp.common.integer.set_results_name("subst_id"))
        + pp.Optional(pp.Word(pp.printables).set_results_name("subst_name"))
        + pp.Optional(pp.common.real.set_results_name("charge"))
        + pp.Optional(get_line_word(pp.printables).set_results_name("status_bit"))
    )
    atom = pp.CaselessLiteral("@<TRIPOS>ATOM") + pp.OneOrMore(
        atom_data_line
    ).set_results_name("atoms_xyzs")

    # <TRIPOS>BOND
    bond_data_line = pp.Group(
        ~new_record
        + pp.common.integer.set_results_name("bond_id")
        + pp.common.integer.set_results_name("origin_atom_id")
        + pp.common.integer.set_results_name("target_atom_id")
        + pp.Word(pp.printables).set_results_name("bond_type")
    )
    bond = pp.CaselessLiteral("@<TRIPOS>BOND") + pp.OneOrMore(
        bond_data_line
    ).set_results_name("bond")

    parser = molecule + atom + pp.Optional(bond)
    parser.ignore(pp.helpers.python_style_comment)

    result = parser.parseString(text)
    return result


@file_or_str(".mol2")
def atoms_coords_from_mol2(text):
    result = parse_mol2(text)
    as_dict = result.asDict()

    atoms = list()
    coords = np.zeros((as_dict["num_atoms"], 3), dtype=float)
    atoms_xyzs = as_dict["atoms_xyzs"]
    for i, line in enumerate(atoms_xyzs):
        assert i + 1 == line["atom_id"]
        atom = line["atom_type"].split(".")[0]
        atoms.append(atom)
        coords[i] = line["xyz"]
    assert len(atoms) == len(coords)

    coords *= ANG2BOHR

    return atoms, coords


[docs] def geom_from_mol2(text, **kwargs): atoms, coords = atoms_coords_from_mol2(text) geom = Geometry(atoms, coords, **kwargs) return geom
MOL2_DICT_TPL = jinja2.Template( """@<TRIPOS>MOLECULE {{ mol_name }} {{ num_atoms }} {{ num_bonds}} {{ num_subst }} {{ num_feat }} {{ num_sets }} {{ mol_type }} {{ charge_type }} @<TRIPOS>ATOM {%- for ax in atoms_xyzs %} {{ ax["atom_id"] }} {{ ax["atom_name"] }} {{ render_xyz(ax["xyz"])}} {{ ax["atom_type"] }} {{ ax["subst_id"] }} {{ ax["subst_name"] }} {{ ax["charge"]}} {%- endfor %} @<TRIPOS>BOND {%- for bond in bonds %} {{ bond["bond_id"] }} {{ bond["origin_atom_id"]}} {{ bond["target_atom_id"] }} {{ bond["bond_type"]}} {%- endfor %} """ )
[docs] def render_xyz(xyz): fmt = " >12.6f" return " ".join(map(lambda _: f"{_:{fmt}}", xyz))
[docs] def dict_to_mol2_string(as_dict): d = as_dict rendered = MOL2_DICT_TPL.render( mol_name=d["mol_name"], num_bonds=d["num_bonds"], num_subst=d["num_subst"], num_feat=d["num_feat"], num_sets=d["num_sets"], num_atoms=d["num_atoms"], mol_type=d["mol_type"], charge_type=d["charge_type"], atoms_xyzs=d["atoms_xyzs"], bonds=d["bond"], render_xyz=render_xyz, ) return rendered