Source code for MDAnalysis.analysis.density

# -*- Mode: python; tab-width: 4; indent-tabs-mode:nil; coding:utf-8 -*-
# vim: tabstop=4 expandtab shiftwidth=4 softtabstop=4 fileencoding=utf-8
#
# MDAnalysis --- http://mdanalysis.googlecode.com
# Copyright (c) 2006-2015 Naveen Michaud-Agrawal, Elizabeth J. Denning, Oliver Beckstein
# and contributors (see AUTHORS for the full list)
#
# Released under the GNU Public Licence, v2 or any higher version
#
# Please cite your use of MDAnalysis in published work:
#
# N. Michaud-Agrawal, E. J. Denning, T. B. Woolf, and O. Beckstein.
# MDAnalysis: A Toolkit for the Analysis of Molecular Dynamics Simulations.
# J. Comput. Chem. 32 (2011), 2319--2327, doi:10.1002/jcc.21787
#

# MDAnalysis -- density analysis
# Copyright (c) 2007-2011 Oliver Beckstein <orbeckst@gmail.com>
# (based on code from Hop --- a framework to analyze solvation dynamics from MD simulations)

"""
Generating densities from trajectories --- :mod:`MDAnalysis.analysis.density`
=============================================================================

:Author: Oliver Beckstein
:Year: 2011
:Copyright: GNU Public License v3

The module provides classes and functions to generate and represent
volumetric data, in particular densities.

Generating a density from a MD trajectory
-----------------------------------------

An input trajectory is required that

1. Has been centered on the protein of interest.
2. Has all molecules made whole that have been broken across periodic
   boundaries.
3. Has the solvent molecules remapped so that they are closest to the
   solute (this is important when using funky unit cells such as
   a dodecahedron or a truncated octahedron).

To generate the density of water molecules around a protein::

  from MDAnalysis.analysis.density import density_from_Universe
  u = Universe(PSF,DCD)
  D = density_from_Universe(u, delta=1.0, atomselection="name OH2")
  D.convert_density('TIP3P')
  D.export("water.dx")

The positions of all water oxygens are histogrammed on a grid with spacing
*delta* = 1 A. Initially the density is measured in 1/A**3. With the
:meth:`Density.convert_density` method, the units of measurement are
changed. In the example we are now measuring the density relative to the
literature value of the TIP3P water model at ambient conditions (see the values
in :data:`MDAnalysis.core.units.water` for details). Finally, the density is
writte as an OpenDX_ compatible file that can be read in VMD_ or PyMOL_.

See :class:`Density` for details. In particular, the density is stored
as a NumPy array in :attr:`Density.grid`, which can be processed in
any manner.

.. _OpenDX: http://www.opendx.org/


Classes and Functions
---------------------

.. autoclass:: Density
   :members:
   :inherited-members:
   :show-inheritance:
.. autofunction:: density_from_Universe
.. autofunction:: density_from_trajectory
.. autofunction:: density_from_PDB
.. autofunction:: Bfactor2RMSF
.. autoclass:: BfactorDensityCreator
   :members:
.. autoclass:: Grid
   :members:
   :inherited-members:

"""
import numpy  # need v >= 1.0.3
import sys
import os
import os.path
import errno
import cPickle
import warnings

try:
    from gridData import Grid  # http://github.com/orbeckst/GridDataFormats
except ImportError:
    print """ERROR --- The GridDataFormats package can not be found!

The 'gridData' module from GridDataFormats could not be
imported. Please install it first.  You can try installing with
setuptools directly from the internet:

    easy_install GridDataFormats

Alternatively, download the package from

    http://pypi.python.org/pypi/GridDataFormats/

and install in the usual manner.
"""
    raise

import MDAnalysis
from MDAnalysis.core.util import fixedwidth_bins, iterable, asiterable
from MDAnalysis import NoDataError, MissingDataWarning

import logging

logger = logging.getLogger("MDAnalysis.analysis.density")


[docs]class Density(Grid): """Class representing a density on a regular cartesian grid. The data (:attr:`Density.grid`) can be manipulated as a standard numpy array. Changes can be saved to a file using the :meth:`Density.save` method. The grid can be restored using the :meth:`Density.load` method or by supplying the filename to the constructor. The attribute :attr:`Density.metadata` holds a user-defined dictionary that can be used to annotate the data. It is also saved with :meth:`Density.save`. The :meth:`Density.export` method always exports a 3D object (written in such a way to be readable in VMD_ and PyMOL_), the rest should work for an array of any dimension. If the input histogram consists of counts per cell then the :meth:`Density.make_density` method converts the grid to a physical density. For a probability density, divide it by :meth:`Density.grid.sum` or use ``normed=True`` right away in :func:`~numpy.histogramdd`. The user *should* set the *parameters* keyword (see docs for the constructor); in particular, if the data are already a density, one must set *isDensity* == ``True`` because there is no reliable way to detect if data represent counts or a density. As a special convenience, if data are read from a file and the user has not set *isDensity* then it is assumed that the data are in fact a density. Typical use: 1. From a histogram (i.e. counts on a grid):: h,edges = numpy.histogramdd(...) D = Density(h,edges) D.make_density() 2. From a saved density file (e.g. in OpenDX format), where the lengths are in Angstrom and the density in 1/A**3:: D = Density("density.dx") 3. From a saved density file (e.g. in OpenDX format), where the lengths are in Angstrom and the density is measured relative to the density of water at ambient conditions:: D = Density("density.dx", units={'density': 'water'}) 4. From a saved *histogram* (less common, but in order to demonstrate the *parameters* keyword) where the lengths are in nm:: D = Density("counts.dx", parameters={'isDensity': False}, units={'length': 'nm'}) D.make_density() D.convert_length('Angstrom^{-3}') D.convert_density('water') After the final step, ``D`` will contain a density on a grid measured in Angstrom, with the density values itself measured relative to the density of water. :class:`Density` objects can be algebraically manipulated (added, subtracted, multiplied, ...) but there are *no sanity checks* in place to make sure that units, metadata, etc are compatible! .. Note:: It is suggested to construct the Grid object from a histogram, to supply the appropriate length unit, and to use :meth:`Density.make_density` to obtain a density. This ensures that the length- and the density unit correspond to each other. .. SeeAlso:: :class:`Grid` which is the base class of :class:`Density`. (:class:`Grid` has been imported from :class:`gridData.Grid` which is part of GridDataFormats_). .. _VMD: http://www.ks.uiuc.edu/Research/vmd/ .. _PyMOL: http://www.pymol.org/ .. _GridDataFormats: https://github.com/orbeckst/GridDataFormats """ def __init__(self, *args, **kwargs): """Create a :class:`Density` from data. :Arguments: *grid* histogram or density, typically a :class:`numpy.ndarray` *edges* list of arrays, the lower and upper bin edges along the axes *parameters* dictionary of class parameters; saved with :meth:`Density.save`. The following keys are meaningful to the class. Meaning of the values are listed: *isDensity* - ``False``: grid is a histogram with counts [default] - ``True``: a density Applying :meth:`Density.make_density`` sets it to ``True``. *units* A dict with the keys - *length*: physical unit of grid edges (Angstrom or nm) [Angstrom] - *density*: unit of the density if ``isDensity == True`` or ``None`` otherwise; the default is "Angstrom^{-3}" for densities (meaning A^-3). (Actually, the default unit is the value of :attr:`MDAnalysis.core.flags['length_unit']`; in most cases this is "Angstrom".) *metadata* a user defined dictionary of arbitrary values associated with the density; the class does not touch :attr:`Density.metadata` but stores it with :meth:`Density.save` """ length_unit = MDAnalysis.core.flags['length_unit'] parameters = kwargs.pop('parameters', {}) if (len(args) > 0 and type(args[0]) is str) or (type(kwargs.get('grid', None) is str)): # try to be smart: when reading from a file then it is likely that this # is a density parameters.setdefault('isDensity', True) else: parameters.setdefault('isDensity', False) units = kwargs.pop('units', {}) units.setdefault('length', length_unit) if parameters['isDensity']: units.setdefault('density', length_unit) else: units.setdefault('density', None) super(Density, self).__init__(*args, **kwargs) self.parameters = parameters # isDensity: set by make_density() self.units = units def _check_set_unit(self, u): """Check that all bindings ``{unit_type : value, ...}`` in the dict `u` are valid and set the object's units attribute. """ # all this unit crap should be a class... try: for unit_type, value in u.items(): if value is None: # check here, too iffy to use dictionary[None]=None self.units[unit_type] = None continue try: MDAnalysis.core.units.conversion_factor[unit_type][value] self.units[unit_type] = value except KeyError: raise ValueError('Unit ' + str(value) + ' of type ' + str(unit_type) + ' is not recognized.') except AttributeError: errmsg = '"unit" must be a dictionary with keys "length" and "density.' logger.fatal(errmsg) raise ValueError(errmsg) # need at least length and density (can be None) if 'length' not in self.units: raise ValueError('"unit" must contain a unit for "length".') if 'density' not in self.units: self.units['density'] = None
[docs] def make_density(self): """Convert the grid (a histogram, counts in a cell) to a density (counts/volume). make_density() (1) This changes the grid irrevocably. (2) For a probability density, manually divide by grid.sum(). If this is already a density, then a warning is issued and nothing is done. """ # Make it a density by dividing by the volume of each grid cell # (from numpy.histogramdd, which is for general n-D grids) if self.parameters['isDensity']: msg = "Running make_density() makes no sense: Grid is already a density. Nothing done." logger.warn(msg) warnings.warn(msg) return dedges = map(numpy.diff, self.edges) D = len(self.edges) for i in xrange(D): shape = numpy.ones(D, int) shape[i] = len(dedges[i]) self.grid /= dedges[i].reshape(shape) self.parameters['isDensity'] = True self.units['density'] = self.units['length'] + "^{-3}" # see core.units.densityUnit_factor
[docs] def convert_length(self, unit='Angstrom'): """Convert Grid object to the new *unit*. Grid.convert_length(<unit>) :Keywords: *unit* Angstrom, nm This changes the edges but will not change the density; it is the user's responsibility to supply the appropriate unit if the Grid object is constructed from a density. It is suggested to start from a histogram and a length unit and use :meth:`make_density`. """ if unit == self.units['length']: return cvnfact = MDAnalysis.core.units.get_conversion_factor('length', self.units['length'], unit) self.edges = [x * cvnfact for x in self.edges] self.units['length'] = unit self._update() # needed to recalculate midpoints and origin
[docs] def convert_density(self, unit='Angstrom'): """Convert the density to the physical units given by *unit*. Grid.convert_to(unit) *unit* can be one of the following: ============= =============================================================== name description of the unit ============= =============================================================== Angstrom^{-3} particles/A**3 nm^{-3} particles/nm**3 SPC density of SPC water at standard conditions TIP3P ... see :data:`MDAnalysis.core.units.water` TIP4P ... see :data:`MDAnalysis.core.units.water` water density of real water at standard conditions (0.997 g/cm**3) Molar mol/l ============= =============================================================== Note: (1) This only works if the initial length unit is provided. (2) Conversions always go back to unity so there can be rounding and floating point artifacts for multiple conversions. There may be some undesirable cross-interactions with :meth:`convert_length`... """ if not self.parameters['isDensity']: errmsg = 'The grid is not a density so converty_density() makes no sense.' logger.fatal(errmsg) raise RuntimeError(errmsg) if unit == self.units['density']: return try: self.grid *= MDAnalysis.core.units.get_conversion_factor('density', self.units['density'], unit) except KeyError: raise ValueError("The name of the unit (%r supplied) must be one of:\n%r" % (unit, MDAnalysis.core.units.conversion_factor['density'].keys())) self.units['density'] = unit
def __repr__(self): if self.parameters['isDensity']: grid_type = 'density' else: grid_type = 'histogram' return '<Density ' + grid_type + ' with ' + str(self.grid.shape) + ' bins>'
[docs]def density_from_trajectory(*args, **kwargs): """Create a density grid from a trajectory. density_from_trajectory(PSF, DCD, delta=1.0, atomselection='name OH2', ...) --> density or density_from_trajectory(PDB, XTC, delta=1.0, atomselection='name OH2', ...) --> density :Arguments: topology file any topology file understood by MDAnalysis trajectory any trajectory (coordinate) file understood by MDAnalysis; if reading a single PDB file it is sufficient to just provide it once as a single argument :Keywords: atomselection selection string (MDAnalysis syntax) for the species to be analyzed ["name OH2"] delta bin size for the density grid in Angstroem (same in x,y,z) [1.0] metadata dictionary of additional data to be saved with the object padding increase histogram dimensions by padding (on top of initial box size) in Angstroem [2.0] soluteselection MDAnalysis selection for the solute, e.g. "protein" [``None``] cutoff With *cutoff*, select '<atomsel> NOT WITHIN <cutoff> OF <soluteselection>' (Special routines that are faster than the standard AROUND selection) [0] :Returns: :class:`Density` .. SeeAlso:: docs for :func:`density_from_Universe` (defaults for kwargs are defined there). """ return density_from_Universe(MDAnalysis.Universe(*args), **kwargs)
[docs]def density_from_Universe(universe, delta=1.0, atomselection='name OH2', metadata=None, padding=2.0, cutoff=0, soluteselection=None, use_kdtree=True, **kwargs): """Create a density grid from a MDAnalysis.Universe object. density_from_Universe(universe, delta=1.0, atomselection='name OH2', ...) --> density :Arguments: universe :class:`MDAnalysis.Universe` object with a trajectory :Keywords: atomselection selection string (MDAnalysis syntax) for the species to be analyzed ["name OH2"] delta bin size for the density grid in Angstroem (same in x,y,z) [1.0] metadata dictionary of additional data to be saved with the object padding increase histogram dimensions by padding (on top of initial box size) in Angstroem [2.0] soluteselection MDAnalysis selection for the solute, e.g. "protein" [``None``] cutoff With *cutoff*, select '<atomsel> NOT WITHIN <cutoff> OF <soluteselection>' (Special routines that are faster than the standard AROUND selection) [0] parameters dict with some special parameters for :class:`Density` (see doc) kwargs metadata, parameters are modified and passed on to :class:`Density` :Returns: :class:`Density` """ try: universe.selectAtoms('all') universe.trajectory.ts except AttributeError: raise TypeError("The universe must be a proper MDAnalysis.Universe instance.") u = universe if cutoff > 0 and soluteselection is not None: # special fast selection for '<atomsel> not within <cutoff> of <solutesel>' notwithin_coordinates = notwithin_coordinates_factory(u, atomselection, soluteselection, cutoff, use_kdtree=use_kdtree) def current_coordinates(): return notwithin_coordinates() else: group = u.selectAtoms(atomselection) def current_coordinates(): return group.coordinates() coord = current_coordinates() logger.info("Selected %d atoms out of %d atoms (%s) from %d total." % (coord.shape[0], len(u.selectAtoms(atomselection)), atomselection, len(u.atoms))) # mild warning; typically this is run on RMS-fitted trajectories and # so the box information is rather meaningless box, angles = u.trajectory.ts.dimensions[:3], u.trajectory.ts.dimensions[3:] if tuple(angles) != (90., 90., 90.): msg = "Non-orthorhombic unit-cell --- make sure that it has been remapped properly!" warnings.warn(msg) logger.warn(msg) # Make the box bigger to avoid as much as possible 'outlier'. This # is important if the sites are defined at a high density: in this # case the bulk regions don't have to be close to 1 * n0 but can # be less. It's much more difficult to deal with outliers. The # ideal solution would use images: implement 'looking across the # periodic boundaries' but that gets complicate when the box # rotates due to RMS fitting. smin = numpy.min(coord, axis=0) - padding smax = numpy.max(coord, axis=0) + padding BINS = fixedwidth_bins(delta, smin, smax) arange = zip(BINS['min'], BINS['max']) bins = BINS['Nbins'] # create empty grid with the right dimensions (and get the edges) grid, edges = numpy.histogramdd(numpy.zeros((1, 3)), bins=bins, range=arange, normed=False) grid *= 0.0 h = grid.copy() for ts in u.trajectory: print "Histograming %6d atoms in frame %5d/%d [%5.1f%%]\r" % \ (len(coord), ts.frame, u.trajectory.numframes, 100.0 * ts.frame / u.trajectory.numframes), coord = current_coordinates() if len(coord) == 0: continue h[:], edges[:] = numpy.histogramdd(coord, bins=bins, range=arange, normed=False) grid += h # accumulate average histogram print numframes = u.trajectory.numframes / u.trajectory.skip grid /= float(numframes) # pick from kwargs metadata = kwargs.pop('metadata', {}) metadata['psf'] = u.filename metadata['dcd'] = u.trajectory.filename metadata['atomselection'] = atomselection metadata['numframes'] = numframes metadata['totaltime'] = round(u.trajectory.numframes * u.trajectory.dt, 3) metadata['dt'] = u.trajectory.dt metadata['time_unit'] = MDAnalysis.core.flags['time_unit'] metadata['trajectory_skip'] = u.trajectory.skip_timestep # frames metadata['trajectory_delta'] = u.trajectory.delta # in native units if cutoff > 0 and soluteselection is not None: metadata['soluteselection'] = soluteselection metadata['cutoff'] = cutoff # in Angstrom parameters = kwargs.pop('parameters', {}) parameters['isDensity'] = False # must override # all other kwargs are discarded g = Density(grid=grid, edges=edges, units={'length': MDAnalysis.core.flags['length_unit']}, parameters=parameters, metadata=metadata) g.make_density() logger.info("Density completed (initial density in Angstrom**-3)") return g
def notwithin_coordinates_factory(universe, sel1, sel2, cutoff, not_within=True, use_kdtree=True): """Generate optimized selection for '*sel1* not within *cutoff* of *sel2*' Example usage:: notwithin_coordinates = notwithin_coordinates_factory(universe, 'name OH2','protein and not name H*',3.5) ... coord = notwithin_coordinates() # changes with time step coord = notwithin_coordinates(cutoff2) # can use different cut off :Keywords: *not_within* True: selection behaves as 'not within' (As described above) False: selection is a <sel1> WITHIN <cutoff> OF <sel2>' *use_kdtree* True: use fast kd-tree based selections (requires new MDAnalysis >= 0.6) False: use distance matrix approach .. Note:: * Periodic boundary conditions are *not* taken into account: the naive minimum image convention employed in the distance check is currently not being applied to remap the coordinates themselves, and hence it would lead to counts in the wrong region. * The selections are static and do not change with time steps. """ # Benchmark of FABP system (solvent 3400 OH2, protein 2100 atoms) on G4 powerbook, 500 frames # cpu/s relative speedup use_kdtree # distance matrix 633 1 1 False # AROUND + kdtree 420 0.66 1.5 n/a ('name OH2 around 4 protein') # manual + kdtree 182 0.29 3.5 True solvent = universe.selectAtoms(sel1) protein = universe.selectAtoms(sel2) if use_kdtree: # using faster hand-coded 'not within' selection with kd-tree import MDAnalysis.KDTree.NeighborSearch as NS import MDAnalysis.core.AtomGroup set_solvent = set(solvent) # need sets to do bulk = allsolvent - selection if not_within is True: # default def notwithin_coordinates(cutoff=cutoff): # must update every time step ns_w = NS.AtomNeighborSearch(solvent) # build kd-tree on solvent (N_w > N_protein) solvation_shell = ns_w.search_list(protein, cutoff) # solvent within CUTOFF of protein group = MDAnalysis.core.AtomGroup.AtomGroup(set_solvent - set(solvation_shell)) # bulk return group.coordinates() else: def notwithin_coordinates(cutoff=cutoff): # acts as '<solvent> WITHIN <cutoff> OF <protein>' # must update every time step ns_w = NS.AtomNeighborSearch(solvent) # build kd-tree on solvent (N_w > N_protein) group = ns_w.search_list(protein, cutoff) # solvent within CUTOFF of protein return group.coordinates() else: # slower distance matrix based (calculate all with all distances first) import MDAnalysis.analysis.distances dist = numpy.zeros((len(solvent), len(protein)), dtype=numpy.float64) box = None # as long as s_coor is not minimum-image remapped if not_within is True: # default compare = numpy.greater aggregatefunc = numpy.all else: compare = numpy.less_equal aggregatefunc = numpy.any def notwithin_coordinates(cutoff=cutoff): s_coor = solvent.coordinates() p_coor = protein.coordinates() # Does water i satisfy d[i,j] > r for ALL j? d = MDAnalysis.analysis.distances.distance_array(s_coor, p_coor, box=box, result=dist) return s_coor[aggregatefunc(compare(d, cutoff), axis=1)] return notwithin_coordinates
[docs]def Bfactor2RMSF(B): r"""Atomic root mean square fluctuation (in Angstrom) from the crystallographic B-factor RMSF and B-factor are related by [Willis1975]_ .. math:: B = \frac{8\pi^2}{3} \rm{RMSF}^2 and this function returns .. math:: \rm{RMSF} = \sqrt{\frac{3 B}{8\pi^2}} .. rubric:: References .. [Willis1975] BTM Willis and AW Pryor. *Thermal vibrations in crystallography*. Cambridge Univ. Press, 1975 """ return numpy.sqrt(3. * B / 8.) / numpy.pi
[docs]def density_from_PDB(pdb, **kwargs): """Create a density from a single frame PDB. Typical use is to make a density from the crystal water molecules. The density is created from isotropic gaussians centered at each selected atoms. If B-factors are present in the file then they are used to calculate the width of the gaussian. Using the *sigma* keyword, one can override this choice and prescribe a gaussian width for all atoms (in Angstrom), which is calculated as described for :func:`Bfactor2RMSF`. .. Note:: The current implementation is *painfully* slow. .. SeeAlso:: :func:`Bfactor2RMSF` and :class:`BfactorDensityCreator`. :Arguments: *pdb* PDB file (should have the temperatureFactor set); ANISO records are currently *not* processed :Keywords: *atomselection* selection string (MDAnalysis syntax) for the species to be analyzed ['resname HOH and name O'] *delta* bin size for the density grid in Angstroem (same in x,y,z) [1.0] *metadata* dictionary of additional data to be saved with the object [``None``] *padding* increase histogram dimensions by padding (on top of initial box size) [1.0] *sigma* width (in Angstrom) of the gaussians that are used to build up the density; if ``None`` then uses B-factors from *pdb* [``None``] :Returns: a :class:`Density` object with a density measured relative to the water density at standard conditions """ return BfactorDensityCreator(pdb, **kwargs).Density()
[docs]class BfactorDensityCreator(object): """Create a density grid from a pdb file using MDAnalysis. dens = BfactorDensityCreator(pdb,...).Density() The main purpose of this function is to convert crystal waters in an X-ray structure into a density so that one can compare the experimental density with the one from molecular dynamics trajectories. Because a pdb is a single snapshot, the density is estimated by placing Gaussians of width sigma at the position of all selected atoms. Sigma can be fixed or taken from the B-factor field, in which case sigma is taken as sqrt(3.*B/8.)/pi (see :func:`BFactor2RMSF`). .. TODO .. * Make Gaussian convolution more efficient (at least for same .. sigma) because right now it is *very* slow (which may be .. acceptable if one only runs this once) .. * Using a temporary Creator class with the .. :meth:`BfactorDensityCreator.Density` helper method is clumsy. """ def __init__(self, pdb, delta=1.0, atomselection='resname HOH and name O', metadata=None, padding=1.0, sigma=None): """Construct the density from psf and pdb and the atomselection. DC = BfactorDensityCreator(pdb, delta=<delta>, atomselection=<MDAnalysis selection>, metadata=<dict>, padding=2, sigma=None) density = DC.Density() :Arguments: pdb PDB file or :class:`MDAnalysis.Universe`; a PDB is read with the simpl PDB reader. If the Bio.PDB reader is required, either set the *permissive_pdb_reader* flag to ``False`` in :data:`MDAnalysis.core.flags` or supply a Universe that was created with the `permissive` = ``False`` keyword. atomselection selection string (MDAnalysis syntax) for the species to be analyzed delta bin size for the density grid in Angstroem (same in x,y,z) [1.0] metadata dictionary of additional data to be saved with the object padding increase histogram dimensions by padding (on top of initial box size) sigma width (in Angstrom) of the gaussians that are used to build up the density; if None then uses B-factors from pdb For assigning X-ray waters to MD densities one might have to use a sigma of about 0.5 A to obtain a well-defined and resolved x-ray water density that can be easily matched to a broader density distribution. """ from MDAnalysis import asUniverse u = asUniverse(pdb) group = u.selectAtoms(atomselection) coord = group.coordinates() logger.info("Selected %d atoms (%s) out of %d total." % (coord.shape[0], atomselection, len(u.atoms))) smin = numpy.min(coord, axis=0) - padding smax = numpy.max(coord, axis=0) + padding BINS = fixedwidth_bins(delta, smin, smax) arange = zip(BINS['min'], BINS['max']) bins = BINS['Nbins'] # get edges by doing a fake run grid, self.edges = numpy.histogramdd(numpy.zeros((1, 3)), bins=bins, range=arange, normed=False) self.delta = numpy.diag(map(lambda e: (e[-1] - e[0]) / (len(e) - 1), self.edges)) self.midpoints = map(lambda e: 0.5 * (e[:-1] + e[1:]), self.edges) self.origin = map(lambda m: m[0], self.midpoints) numframes = 1 if sigma is None: # histogram individually, and smear out at the same time # with the appropriate B-factor if numpy.any(group.bfactors == 0.0): wmsg = "Some B-factors are Zero (will be skipped)." logger.warn(wmsg) warnings.warn(wmsg, category=MissingDataWarning) rmsf = Bfactor2RMSF(group.bfactors) grid *= 0.0 # reset grid self.g = self._smear_rmsf(coord, grid, self.edges, rmsf) else: # histogram 'delta functions' grid, self.edges = numpy.histogramdd(coord, bins=bins, range=arange, normed=False) logger.info("Histogrammed %6d atoms from pdb." % len(group.atoms)) # just a convolution of the density with a Gaussian self.g = self._smear_sigma(grid, sigma) try: metadata['pdb'] = pdb except TypeError: metadata = {'pdb': pdb} metadata['atomselection'] = atomselection metadata['numframes'] = numframes metadata['sigma'] = sigma self.metadata = metadata logger.info("Histogram completed (initial density in Angstrom**-3)") # Density automatically converts histogram to density for isDensity=False -- ??[OB]
[docs] def Density(self, threshold=None): """Returns a Density object. """ d = Density(grid=self.g, edges=self.edges, units=dict(length='Angstrom'), parameters=dict(isDensity=False), metadata=self.metadata) d.make_density() d.convert_density('water') return d
def _smear_sigma(self, grid, sigma): # smear out points # (not optimized -- just to test the principle; faster approach could use # convolution of the whole density with a single Gaussian via FFTs: # rho_smeared = F^-1[ F[g]*F[rho] ] g = numpy.zeros(grid.shape) # holds the smeared out density pos = numpy.where(grid != 0) # position in histogram (as bin numbers) for iwat in xrange(len(pos[0])): # super-ugly loop p = tuple([wp[iwat] for wp in pos]) g += grid[p] * numpy.fromfunction(self._gaussian, grid.shape, dtype=numpy.int, p=p, sigma=sigma) print "Smearing out atom position %4d/%5d with RMSF %4.2f A\r" % \ (iwat + 1, len(pos[0]), sigma), return g def _smear_rmsf(self, coordinates, grid, edges, rmsf): # smear out each water with its individual Gaussian # (slower than smear_sigma) g = numpy.zeros(grid.shape) # holds the smeared out density N, D = coordinates.shape for iwat, coord in enumerate(coordinates): if rmsf[iwat] == 0: continue g += numpy.fromfunction(self._gaussian_cartesian, grid.shape, dtype=numpy.int, c=coord, sigma=rmsf[iwat]) print "Smearing out atom position %4d/%5d with RMSF %4.2f A\r" % \ (iwat + 1, N, rmsf[iwat]), return g def _gaussian(self, i, j, k, p, sigma): # i,j,k can be numpy arrays # p is center of gaussian as grid index, sigma its width (in A) x = self.delta[0, 0] * (i - p[0]) # in Angstrom y = self.delta[1, 1] * (j - p[1]) z = self.delta[2, 2] * (k - p[2]) return (2 * numpy.pi * sigma) ** (-1.5) * numpy.exp(-(x * x + y * y + z * z) / (2 * sigma * sigma)) def _gaussian_cartesian(self, i, j, k, c, sigma): # i,j,k can be numpy arrays # c is center of gaussian in cartesian coord (A), sigma its width (in A) x = self.origin[0] + self.delta[0, 0] * i - c[0] # in Angstrom y = self.origin[1] + self.delta[1, 1] * j - c[1] z = self.origin[2] + self.delta[2, 2] * k - c[2] return (2 * numpy.pi * sigma) ** (-1.5) * numpy.exp(-(x * x + y * y + z * z) / (2 * sigma * sigma))