Source code for MDAnalysis.coordinates.XYZ

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4
#
# MDAnalysis --- http://mdanalysis.googlecode.com
# Copyright (c) 2006-2011 Naveen Michaud-Agrawal,
#               Elizabeth J. Denning, Oliver Beckstein,
#               and contributors (see website for details)
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
#     N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and
#     O. Beckstein. MDAnalysis: A Toolkit for the Analysis of
#     Molecular Dynamics Simulations. J. Comput. Chem. 32 (2011), 2319--2327,
#     doi:10.1002/jcc.21787
#

"""XYZ trajectory reader --- :mod:`MDAnalysis.coordinates.XYZ`
==============================================================

Resources: the XYZ format was taken from
http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html and
is therefore compatible with VMD (you need a PDB or PSF file to define
the topology, just as here)

* comments are not allowed in the XYZ file
* the atom name (first column) is ignored
* the coordinates are assumed to be space-delimited rather than fixed width (this may cause issues - see below)
* all fields to the right of the z-coordinate are ignored
* it is assumed that the coordinates are in Angstroms
* the unitcell information is all zeros since this is not recorded in the XYZ format
* the length of a timestep can be set by passing the `delta` argument, it's assumed to be in ps (default: 1 ps)

There appears to be no rigid format definition so it is likely users
will need to tweak this Class.
"""

import os, errno
import numpy
import bz2
import gzip

import base
from base import Timestep


[docs]class XYZReader(base.Reader): """Reads from an XYZ file :Data: ts Timestep object containing coordinates of current frame :Methods: ``len(xyz)`` return number of frames in xyz ``for ts in xyz:`` iterate through trajectory .. Note: this can read both compressed (foo.xyz) and compressed (foo.xyz.bz2 or foo.xyz.gz) files; uncompression is handled on the fly File format: http://www.ks.uiuc.edu/Research/vmd/plugins/molfile/xyzplugin.html Validation: the geometric centre of 1284 atoms was calculated over 500 frames using both MDAnalysis and a VMD Tcl script. There was exact agreement (measured to 3DP). bzipped and gzipped versions of the XYZ file were also tested """ # this will be overidden when an instance is created and the file extension checked format = "XYZ" # these are assumed! units = {'time': 'ps', 'length': 'Angstrom'} def __init__(self, xyzfilename, **kwargs): self.filename = xyzfilename # the filename has been parsed to be either be foo.xyz or foo.xyz.bz2 by coordinates::core.py # so the last file extension will tell us if it is bzipped or not root, ext = os.path.splitext(self.filename) if ext[1:] == "bz2": self.compression = "bz2" self.xyzfile = bz2.BZ2File(self.filename, 'rb') elif ext[1:] == "gz": self.compression = "gz" self.xyzfile = gzip.open(self.filename, 'rb') elif ext[1:] == "xyz": self.compression = None self.xyzfile = open(self.filename, 'r') # note that, like for xtc and trr files, __numatoms and __numframes are used quasi-private variables # to prevent the properties being recalculated # this is because there is no indexing so the way it measures the number of frames is to read the whole file! self.__numatoms = None self.__numframes = None self.fixed = 0 self.skip = 1 self.periodic = False self.delta = kwargs.pop("delta", 1.0) # can set delta manually, default is 1ps (taken from TRJReader) self.skip_timestep = 1 self.ts = Timestep(self.numatoms) # Read in the first timestep self._read_next_timestep() @property
[docs] def numatoms(self): """number of atoms in a frame""" if not self.__numatoms is None: # return cached value return self.__numatoms try: self.__numatoms = self._read_xyz_natoms(self.filename) except IOError: return 0 else: return self.__numatoms
def _read_xyz_natoms(self,filename): # this assumes that this is only called once at startup and that the filestream is already open # read the first line n = self.xyzfile.readline() self.close() # need to check type of n return int(n) @property def numframes(self): if not self.__numframes is None: # return cached value return self.__numframes try: self.__numframes = self._read_xyz_numframes(self.filename) except IOError: return 0 else: return self.__numframes def _read_xyz_numframes(self, filename): self._reopen() # the number of lines in the XYZ file will be 2 greater than the number of atoms linesPerFrame = self.numatoms+2 counter = 0 # step through the file (assuming xyzfile has an iterator) for i in self.xyzfile: counter = counter + 1 self.close() # need to check this is an integer! numframes = int(counter/linesPerFrame) return numframes def __iter__(self): self.ts.frame = 0 # start at 0 so that the first frame becomes 1 self._reopen() while True: try: yield self._read_next_timestep() except EOFError: self.close() raise StopIteration def _read_next_timestep(self, ts=None): # check that the timestep object exists if ts is None: ts = self.ts # check that the xyzfile object exists; if not reopen the trajectory if self.xyzfile is None: self.open_trajectory() x = [] y = [] z = [] # we assume that there are only two header lines per frame counter = -2 for line in self.xyzfile: counter += 1 if counter > 0: # assume the XYZ file is space delimited rather than being fixed format # (this could lead to problems where there is no gap e.g 9.768-23.4567) words = line.split() x.append(float(words[1])) y.append(float(words[2])) z.append(float(words[3])) # stop when the cursor has reached the end of that block if counter == self.numatoms: ts._unitcell = numpy.zeros((6), numpy.float32) ts._x[:] = x # more efficient to do it this way to avoid re-creating the numpy arrays ts._y[:] = y ts._z[:] = z ts.frame += 1 return ts raise EOFError
[docs] def rewind(self): """reposition on first frame""" self._reopen() # the next method is inherited from the Reader Class and calls _read_next_timestep self.next()
def _reopen(self): self.close() self.open_trajectory() def open_trajectory(self): if not self.xyzfile is None: raise IOError(errno.EALREADY, 'XYZ file already opened', self.filename) if not os.path.exists(self.filename): # must check; otherwise might segmentation fault raise IOError(errno.ENOENT, 'XYZ file not found', self.filename) if self.compression == "bz2": self.xyzfile = bz2.BZ2File(self.filename, 'rb') elif self.compression == "gz": self.xyzfile = gzip.open(self.filename, 'rb') elif self.compression == None: self.xyzfile = open(self.filename, 'r') # reset ts ts = self.ts ts.status = 1 ts.frame = 0 ts.step = 0 ts.time = 0 return self.xyzfile
[docs] def close(self): """Close xyz trajectory file if it was open.""" if self.xyzfile is None: return self.xyzfile.close() self.xyzfile = None
def __del__(self): if not self.xyzfile is None: self.close()