Module molgri.parsers

Expand source code
import re
import numbers

import numpy as np
from mendeleev.fetch import fetch_table
from ast import literal_eval

from .bodies import Molecule
from .constants import MOLECULE_NAMES, SIX_METHOD_NAMES, FULL_RUN_NAME


class NameParser:

    def __init__(self, name: str or dict):
        """
        Correct ordering: '[H2O_HF_]ico[_NO]_500_full_openMM[_extra]

        Args:
            string with a label used eg. for naming files, usually stating two molecules, grid type and size
        """
        # define all properties
        self.central_molecule = None
        self.rotating_molecule = None
        self.grid_type = None
        self.ordering = True
        self.num_grid_points = None
        self.traj_type = None
        self.open_MM = False
        self.is_real_run = False
        self.additional_data = None
        self.ending = None
        # parse name
        if isinstance(name, str):
            self._read_str(name)
        elif isinstance(name, dict):
            self._read_dict(name)

    def _read_str(self, name):
        try:
            if "." in name:
                name, self.ending = name.split(".")
        except ValueError:
            pass
        split_str = name.split("_")
        for split_item in split_str:
            if split_item in MOLECULE_NAMES:
                if self.central_molecule is None:
                    self.central_molecule = split_item
                else:
                    self.rotating_molecule = split_item
        for method_name in SIX_METHOD_NAMES:
            if method_name in split_str:
                self.grid_type = method_name
        if "_NO" in name:
            self.ordering = False
        for split_item in split_str:
            if split_item.isnumeric():
                self.num_grid_points = int(split_item)
                break
        for traj_type in ["circular", "full"]:
            if traj_type in split_str:
                self.traj_type = traj_type
        if "openMM" in split_str:
            self.open_MM = True
        if FULL_RUN_NAME in name:
            self.is_real_run = True
        # get the remainder of the string
        if self.central_molecule:
            split_str.remove(self.central_molecule)
        if self.rotating_molecule:
            split_str.remove(self.rotating_molecule)
        if self.grid_type:
            split_str.remove(self.grid_type)
        if self.num_grid_points:
            split_str.remove(str(self.num_grid_points))
        if self.traj_type:
            split_str.remove(self.traj_type)
        if not self.ordering:
            split_str.remove("NO")
        if self.is_real_run:
            split_str.remove(FULL_RUN_NAME)
        if self.open_MM:
            split_str.remove("openMM")
        self.additional_data = "_".join(split_str)

    def _read_dict(self, dict_name):
        self.central_molecule = dict_name.pop("central_molecule", None)
        self.rotating_molecule = dict_name.pop("rotating_molecule", None)
        self.grid_type = dict_name.pop("grid_type", None)
        self.ordering = dict_name.pop("ordering", True)
        self.num_grid_points = dict_name.pop("num_grid_points", None)
        self.traj_type = dict_name.pop("traj_type", None)
        self.open_MM = dict_name.pop("open_MM", False)
        self.is_real_run = dict_name.pop("is_real_run", False)
        self.additional_data = dict_name.pop("additional_data", None)
        self.ending = dict_name.pop("ending", None)

    def get_dict_properties(self):
        return vars(self)

    def get_standard_name(self):
        standard_name = ""
        if self.central_molecule:
            standard_name += self.central_molecule + "_"
        if self.rotating_molecule:
            standard_name += self.rotating_molecule + "_"
        if self.is_real_run:
            standard_name += FULL_RUN_NAME + "_"
        if self.grid_type:
            standard_name += self.grid_type + "_"
        if not self.ordering:
            standard_name += "NO_"
        if self.num_grid_points:
            standard_name += str(self.num_grid_points) + "_"
        if self.traj_type:
            standard_name += self.traj_type + "_"
        if self.open_MM:
            standard_name += "openMM_"
        if standard_name.endswith("_"):
            standard_name = standard_name[:-1]
        return standard_name

    def get_human_readable_name(self):
        # TODO: name eg H2O-H2O system, icosahedron grid, 22 rotations
        pass

    def get_grid_type(self):
        if not self.grid_type:
            raise ValueError(f"No grid type given!")
        return self.grid_type

    def get_traj_type(self):
        if not self.traj_type:
            raise ValueError(f"No traj type given!")
        return self.traj_type

    def get_num(self):
        if not self.num_grid_points:
            raise ValueError(f"No number given!")
        return self.num_grid_points


def particle_type2element(particle_type: str) -> str:
    """
    A helper function to convert gromacs particle type to element name readable by mendeleev.

    Args:
        particle_type: text written at characters 10:15 in a standard GROMACS line.

    Returns:
        element name, one of the names in periodic system (or ValueError)
    """
    ptable = fetch_table('elements')
    all_symbols = ptable["symbol"]
    # option 1: atom_name written in gro file is equal to element name (N, Na, Cl ...) -> directly use as element
    if particle_type in all_symbols.values:
        element_name = particle_type
    # option 2 (special case): CA is a symbol of alpha-carbon
    elif particle_type.startswith("CA"):
        element_name = "C"
    # option 3: first two letters are the name of a typical ion in upper case
    elif particle_type[:2] in ["CL", "MG", "RB", "CS", "LI", "ZN", "NA"]:
        element_name = particle_type.capitalize()[:2]
    # option 4: special case for calcium = C0
    elif particle_type[:2] == "C0":
        element_name = "Ca"
    # option 5: first letter is the name of the element in upper case
    elif particle_type[0] in all_symbols.values:
        element_name = particle_type[0]
    # error if still unable to determine the element
    else:
        message = f"I do not know how to extract element name from GROMACS atom type {particle_type}."
        raise ValueError(message)
    return element_name


class BaseGroParser:

    def __init__(self, gro_read: str, parse_atoms: bool = True):
        """
        This parser reads the data from a .gro file. If multiple time steps are written, it only reads the first one.

        If you want to access or copy parts of the .gro file (comment, number of atoms, atom position lines, box)
        to another file, select parse_atoms=False (this is faster) and use the data saved in self.comment,
        self.num_atoms, self.atom_lines_nm and self.box.

        If you want to read the .gro file in order to translate/rotate atoms in it, select parse_atoms=True and
        access the Molecule object under self.molecule_set.

        Args:
            gro_read: the path to the .gro file to be parsed
            parse_atoms: select True if you want to manipulate (rotate/translate) any atoms from this .gro file;
                         select False if you only want to copy the atom position lines as-provided
        """
        self.gro_file = open(gro_read, "r")
        self.comment = self.gro_file.readline().strip()
        self.num_atoms = int(self.gro_file.readline().strip())
        self.atom_lines_nm = []
        for line in range(self.num_atoms):
            # Append exact copy of the current line in self.gro_file to self.atom_lines_nm (including \n at the end).
            line = self.gro_file.readline()
            self.atom_lines_nm.append(line)
        if parse_atoms:
            a_labels, a_names, a_pos = self._parse_atoms()
            a_pos = np.array(a_pos)
            self.molecule_set = Molecule(atom_names=a_names, centers=a_pos, center_at_origin=False,
                                         gro_labels=a_labels)
        else:
            self.molecule_set = None
        self.box = tuple([literal_eval(x) for x in self.gro_file.readline().strip().split()])
        assert len(self.atom_lines_nm) == self.num_atoms
        self.gro_file.close()

    def _parse_atoms(self) -> tuple:
        list_gro_labels = []
        list_atom_names = []
        list_atom_pos = []
        for line in self.atom_lines_nm:
            # read out each individual part of the atom position line
            # residue_num = int(line[0:5])
            # residue_name = line[5:10].strip()
            atom_name = line[10:15].strip()
            element_name = particle_type2element(atom_name)
            # atom_num = int(line[15:20])
            x_pos_nm = float(line[20:28])
            y_pos_nm = float(line[28:36])
            z_pos_nm = float(line[36:44])
            # optionally velocities in nm/ps are writen at characters 44:52, 52:60, 60:68 of the line
            list_gro_labels.append(atom_name)
            list_atom_names.append(element_name)
            list_atom_pos.append([x_pos_nm, y_pos_nm, z_pos_nm])
        return list_gro_labels, list_atom_names, list_atom_pos


class TranslationParser(object):

    def __init__(self, user_input: str):
        self.user_input = user_input
        if "linspace" in self.user_input:
            bracket_input = self._read_within_brackets()
            self.trans_grid = np.linspace(*bracket_input)
        elif "range" in self.user_input:
            bracket_input = self._read_within_brackets()
            self.trans_grid = np.arange(*bracket_input)
        else:
            self.trans_grid = literal_eval(self.user_input)
            self.trans_grid = np.array(self.trans_grid)
            self.trans_grid = np.sort(self.trans_grid, axis=None)

    def get_trans_grid(self) -> np.ndarray:
        return self.trans_grid

    def get_increments(self):
        increment_grid = [self.trans_grid[0]]
        for start, stop in zip(self.trans_grid, self.trans_grid[1:]):
            increment_grid.append(stop-start)
        increment_grid = np.array(increment_grid)
        assert np.all(increment_grid > 0), "Negative or zero increments in translation grid make no sense!"
        return increment_grid

    def _read_within_brackets(self) -> tuple:
        str_in_brackets = self.user_input.split('(', 1)[1].split(')')[0]
        str_in_brackets = literal_eval(str_in_brackets)
        if isinstance(str_in_brackets,numbers.Number):
            str_in_brackets = tuple((str_in_brackets,))
        return str_in_brackets

Functions

def particle_type2element(particle_type: str) ‑> str

A helper function to convert gromacs particle type to element name readable by mendeleev.

Args

particle_type
text written at characters 10:15 in a standard GROMACS line.

Returns

element name, one of the names in periodic system (or ValueError)

Expand source code
def particle_type2element(particle_type: str) -> str:
    """
    A helper function to convert gromacs particle type to element name readable by mendeleev.

    Args:
        particle_type: text written at characters 10:15 in a standard GROMACS line.

    Returns:
        element name, one of the names in periodic system (or ValueError)
    """
    ptable = fetch_table('elements')
    all_symbols = ptable["symbol"]
    # option 1: atom_name written in gro file is equal to element name (N, Na, Cl ...) -> directly use as element
    if particle_type in all_symbols.values:
        element_name = particle_type
    # option 2 (special case): CA is a symbol of alpha-carbon
    elif particle_type.startswith("CA"):
        element_name = "C"
    # option 3: first two letters are the name of a typical ion in upper case
    elif particle_type[:2] in ["CL", "MG", "RB", "CS", "LI", "ZN", "NA"]:
        element_name = particle_type.capitalize()[:2]
    # option 4: special case for calcium = C0
    elif particle_type[:2] == "C0":
        element_name = "Ca"
    # option 5: first letter is the name of the element in upper case
    elif particle_type[0] in all_symbols.values:
        element_name = particle_type[0]
    # error if still unable to determine the element
    else:
        message = f"I do not know how to extract element name from GROMACS atom type {particle_type}."
        raise ValueError(message)
    return element_name

Classes

class BaseGroParser (gro_read: str, parse_atoms: bool = True)

This parser reads the data from a .gro file. If multiple time steps are written, it only reads the first one.

If you want to access or copy parts of the .gro file (comment, number of atoms, atom position lines, box) to another file, select parse_atoms=False (this is faster) and use the data saved in self.comment, self.num_atoms, self.atom_lines_nm and self.box.

If you want to read the .gro file in order to translate/rotate atoms in it, select parse_atoms=True and access the Molecule object under self.molecule_set.

Args

gro_read
the path to the .gro file to be parsed
parse_atoms
select True if you want to manipulate (rotate/translate) any atoms from this .gro file; select False if you only want to copy the atom position lines as-provided
Expand source code
class BaseGroParser:

    def __init__(self, gro_read: str, parse_atoms: bool = True):
        """
        This parser reads the data from a .gro file. If multiple time steps are written, it only reads the first one.

        If you want to access or copy parts of the .gro file (comment, number of atoms, atom position lines, box)
        to another file, select parse_atoms=False (this is faster) and use the data saved in self.comment,
        self.num_atoms, self.atom_lines_nm and self.box.

        If you want to read the .gro file in order to translate/rotate atoms in it, select parse_atoms=True and
        access the Molecule object under self.molecule_set.

        Args:
            gro_read: the path to the .gro file to be parsed
            parse_atoms: select True if you want to manipulate (rotate/translate) any atoms from this .gro file;
                         select False if you only want to copy the atom position lines as-provided
        """
        self.gro_file = open(gro_read, "r")
        self.comment = self.gro_file.readline().strip()
        self.num_atoms = int(self.gro_file.readline().strip())
        self.atom_lines_nm = []
        for line in range(self.num_atoms):
            # Append exact copy of the current line in self.gro_file to self.atom_lines_nm (including \n at the end).
            line = self.gro_file.readline()
            self.atom_lines_nm.append(line)
        if parse_atoms:
            a_labels, a_names, a_pos = self._parse_atoms()
            a_pos = np.array(a_pos)
            self.molecule_set = Molecule(atom_names=a_names, centers=a_pos, center_at_origin=False,
                                         gro_labels=a_labels)
        else:
            self.molecule_set = None
        self.box = tuple([literal_eval(x) for x in self.gro_file.readline().strip().split()])
        assert len(self.atom_lines_nm) == self.num_atoms
        self.gro_file.close()

    def _parse_atoms(self) -> tuple:
        list_gro_labels = []
        list_atom_names = []
        list_atom_pos = []
        for line in self.atom_lines_nm:
            # read out each individual part of the atom position line
            # residue_num = int(line[0:5])
            # residue_name = line[5:10].strip()
            atom_name = line[10:15].strip()
            element_name = particle_type2element(atom_name)
            # atom_num = int(line[15:20])
            x_pos_nm = float(line[20:28])
            y_pos_nm = float(line[28:36])
            z_pos_nm = float(line[36:44])
            # optionally velocities in nm/ps are writen at characters 44:52, 52:60, 60:68 of the line
            list_gro_labels.append(atom_name)
            list_atom_names.append(element_name)
            list_atom_pos.append([x_pos_nm, y_pos_nm, z_pos_nm])
        return list_gro_labels, list_atom_names, list_atom_pos
class NameParser (name: str)

Correct ordering: '[H2O_HF_]ico[_NO]_500_full_openMM[_extra]

Args

string with a label used eg. for naming files, usually stating two molecules, grid type and size

Expand source code
class NameParser:

    def __init__(self, name: str or dict):
        """
        Correct ordering: '[H2O_HF_]ico[_NO]_500_full_openMM[_extra]

        Args:
            string with a label used eg. for naming files, usually stating two molecules, grid type and size
        """
        # define all properties
        self.central_molecule = None
        self.rotating_molecule = None
        self.grid_type = None
        self.ordering = True
        self.num_grid_points = None
        self.traj_type = None
        self.open_MM = False
        self.is_real_run = False
        self.additional_data = None
        self.ending = None
        # parse name
        if isinstance(name, str):
            self._read_str(name)
        elif isinstance(name, dict):
            self._read_dict(name)

    def _read_str(self, name):
        try:
            if "." in name:
                name, self.ending = name.split(".")
        except ValueError:
            pass
        split_str = name.split("_")
        for split_item in split_str:
            if split_item in MOLECULE_NAMES:
                if self.central_molecule is None:
                    self.central_molecule = split_item
                else:
                    self.rotating_molecule = split_item
        for method_name in SIX_METHOD_NAMES:
            if method_name in split_str:
                self.grid_type = method_name
        if "_NO" in name:
            self.ordering = False
        for split_item in split_str:
            if split_item.isnumeric():
                self.num_grid_points = int(split_item)
                break
        for traj_type in ["circular", "full"]:
            if traj_type in split_str:
                self.traj_type = traj_type
        if "openMM" in split_str:
            self.open_MM = True
        if FULL_RUN_NAME in name:
            self.is_real_run = True
        # get the remainder of the string
        if self.central_molecule:
            split_str.remove(self.central_molecule)
        if self.rotating_molecule:
            split_str.remove(self.rotating_molecule)
        if self.grid_type:
            split_str.remove(self.grid_type)
        if self.num_grid_points:
            split_str.remove(str(self.num_grid_points))
        if self.traj_type:
            split_str.remove(self.traj_type)
        if not self.ordering:
            split_str.remove("NO")
        if self.is_real_run:
            split_str.remove(FULL_RUN_NAME)
        if self.open_MM:
            split_str.remove("openMM")
        self.additional_data = "_".join(split_str)

    def _read_dict(self, dict_name):
        self.central_molecule = dict_name.pop("central_molecule", None)
        self.rotating_molecule = dict_name.pop("rotating_molecule", None)
        self.grid_type = dict_name.pop("grid_type", None)
        self.ordering = dict_name.pop("ordering", True)
        self.num_grid_points = dict_name.pop("num_grid_points", None)
        self.traj_type = dict_name.pop("traj_type", None)
        self.open_MM = dict_name.pop("open_MM", False)
        self.is_real_run = dict_name.pop("is_real_run", False)
        self.additional_data = dict_name.pop("additional_data", None)
        self.ending = dict_name.pop("ending", None)

    def get_dict_properties(self):
        return vars(self)

    def get_standard_name(self):
        standard_name = ""
        if self.central_molecule:
            standard_name += self.central_molecule + "_"
        if self.rotating_molecule:
            standard_name += self.rotating_molecule + "_"
        if self.is_real_run:
            standard_name += FULL_RUN_NAME + "_"
        if self.grid_type:
            standard_name += self.grid_type + "_"
        if not self.ordering:
            standard_name += "NO_"
        if self.num_grid_points:
            standard_name += str(self.num_grid_points) + "_"
        if self.traj_type:
            standard_name += self.traj_type + "_"
        if self.open_MM:
            standard_name += "openMM_"
        if standard_name.endswith("_"):
            standard_name = standard_name[:-1]
        return standard_name

    def get_human_readable_name(self):
        # TODO: name eg H2O-H2O system, icosahedron grid, 22 rotations
        pass

    def get_grid_type(self):
        if not self.grid_type:
            raise ValueError(f"No grid type given!")
        return self.grid_type

    def get_traj_type(self):
        if not self.traj_type:
            raise ValueError(f"No traj type given!")
        return self.traj_type

    def get_num(self):
        if not self.num_grid_points:
            raise ValueError(f"No number given!")
        return self.num_grid_points

Methods

def get_dict_properties(self)
Expand source code
def get_dict_properties(self):
    return vars(self)
def get_grid_type(self)
Expand source code
def get_grid_type(self):
    if not self.grid_type:
        raise ValueError(f"No grid type given!")
    return self.grid_type
def get_human_readable_name(self)
Expand source code
def get_human_readable_name(self):
    # TODO: name eg H2O-H2O system, icosahedron grid, 22 rotations
    pass
def get_num(self)
Expand source code
def get_num(self):
    if not self.num_grid_points:
        raise ValueError(f"No number given!")
    return self.num_grid_points
def get_standard_name(self)
Expand source code
def get_standard_name(self):
    standard_name = ""
    if self.central_molecule:
        standard_name += self.central_molecule + "_"
    if self.rotating_molecule:
        standard_name += self.rotating_molecule + "_"
    if self.is_real_run:
        standard_name += FULL_RUN_NAME + "_"
    if self.grid_type:
        standard_name += self.grid_type + "_"
    if not self.ordering:
        standard_name += "NO_"
    if self.num_grid_points:
        standard_name += str(self.num_grid_points) + "_"
    if self.traj_type:
        standard_name += self.traj_type + "_"
    if self.open_MM:
        standard_name += "openMM_"
    if standard_name.endswith("_"):
        standard_name = standard_name[:-1]
    return standard_name
def get_traj_type(self)
Expand source code
def get_traj_type(self):
    if not self.traj_type:
        raise ValueError(f"No traj type given!")
    return self.traj_type
class TranslationParser (user_input: str)
Expand source code
class TranslationParser(object):

    def __init__(self, user_input: str):
        self.user_input = user_input
        if "linspace" in self.user_input:
            bracket_input = self._read_within_brackets()
            self.trans_grid = np.linspace(*bracket_input)
        elif "range" in self.user_input:
            bracket_input = self._read_within_brackets()
            self.trans_grid = np.arange(*bracket_input)
        else:
            self.trans_grid = literal_eval(self.user_input)
            self.trans_grid = np.array(self.trans_grid)
            self.trans_grid = np.sort(self.trans_grid, axis=None)

    def get_trans_grid(self) -> np.ndarray:
        return self.trans_grid

    def get_increments(self):
        increment_grid = [self.trans_grid[0]]
        for start, stop in zip(self.trans_grid, self.trans_grid[1:]):
            increment_grid.append(stop-start)
        increment_grid = np.array(increment_grid)
        assert np.all(increment_grid > 0), "Negative or zero increments in translation grid make no sense!"
        return increment_grid

    def _read_within_brackets(self) -> tuple:
        str_in_brackets = self.user_input.split('(', 1)[1].split(')')[0]
        str_in_brackets = literal_eval(str_in_brackets)
        if isinstance(str_in_brackets,numbers.Number):
            str_in_brackets = tuple((str_in_brackets,))
        return str_in_brackets

Methods

def get_increments(self)
Expand source code
def get_increments(self):
    increment_grid = [self.trans_grid[0]]
    for start, stop in zip(self.trans_grid, self.trans_grid[1:]):
        increment_grid.append(stop-start)
    increment_grid = np.array(increment_grid)
    assert np.all(increment_grid > 0), "Negative or zero increments in translation grid make no sense!"
    return increment_grid
def get_trans_grid(self) ‑> numpy.ndarray
Expand source code
def get_trans_grid(self) -> np.ndarray:
    return self.trans_grid