diff --git a/Extras/LPRMSD/lprmsd.py b/Extras/LPRMSD/lprmsd.py index 47f361e9..09b71b06 100644 --- a/Extras/LPRMSD/lprmsd.py +++ b/Extras/LPRMSD/lprmsd.py @@ -13,6 +13,8 @@ import _lprmsd import mdtraj as md +from collections import namedtuple +import copy import numpy as np import itertools from scipy import optimize @@ -60,6 +62,96 @@ def ReadPermFile(fnm): continue return (LL, K) + +class TheoData(object): + """Stores temporary data required during Theobald RMSD calculation. + + Notes: + Storing temporary data allows us to avoid re-calculating the G-Values + repeatedly. Also avoids re-centering the coordinates.""" + + Theoslice = namedtuple('TheoSlice', ('xyz', 'G')) + + def __init__(self, XYZData, NumAtoms=None, G=None): + """Create a container for intermediate values during RMSD Calculation. + + Notes: + 1. We remove center of mass. + 2. We pre-calculate matrix magnitudes (ConfG)""" + + if NumAtoms is None or G is None: + NumConfs = len(XYZData) + NumAtoms = XYZData.shape[1] + + XYZData = copy.copy(XYZData) + self.centerConformations(XYZData) + + NumAtomsWithPadding = 4 + NumAtoms - NumAtoms % 4 + + # Load data and generators into aligned arrays + XYZData2 = np.zeros((NumConfs, 3, NumAtomsWithPadding), dtype=np.float32) + for i in range(NumConfs): + XYZData2[i, 0:3, 0:NumAtoms] = XYZData[i].transpose() + + #Precalculate matrix magnitudes + ConfG = np.zeros((NumConfs,),dtype=np.float32) + for i in xrange(NumConfs): + ConfG[i] = self.calcGvalue(XYZData[i, :, :]) + + self.XYZData = XYZData2 + self.G = ConfG + self.NumAtoms = NumAtoms + self.NumAtomsWithPadding = NumAtomsWithPadding + self.CheckCentered() + else: + self.XYZData = XYZData + self.G = G + self.NumAtoms = NumAtoms + self.NumAtomsWithPadding = XYZData.shape[2] + + def __getitem__(self, key): + # to keep the dimensions right, we make everything a slice + if isinstance(key, int): + key = slice(key, key+1) + return TheoData(self.XYZData[key], NumAtoms=self.NumAtoms, G=self.G[key]) + + def __setitem__(self, key, value): + self.XYZData[key] = value.XYZData + self.G[key] = value.G + + def CheckCentered(self, Epsilon=1E-5): + """Raise an exception if XYZAtomMajor has nonnzero center of mass(CM).""" + + XYZ = self.XYZData.transpose(0, 2, 1) + x = np.array([max(abs(XYZ[i].mean(0))) for i in xrange(len(XYZ))]).max() + if x > Epsilon: + raise Exception("The coordinate data does not appear to have been centered correctly.") + + @staticmethod + def centerConformations(XYZList): + """Remove the center of mass from conformations. Inplace to minimize mem. use.""" + + for ci in xrange(XYZList.shape[0]): + X = XYZList[ci].astype('float64') # To improve the accuracy of RMSD, it can help to do certain calculations in double precision. + X -= X.mean(0) + XYZList[ci] = X.astype('float32') + return + + @staticmethod + def calcGvalue(XYZ): + """Calculate the sum of squares of the key matrix G. A necessary component of Theobold RMSD algorithm.""" + + conf=XYZ.astype('float64') # Doing this operation in double significantly improves numerical precision of RMSD + G = 0 + G += np.dot(conf[:, 0], conf[:, 0]) + G += np.dot(conf[:, 1], conf[:, 1]) + G += np.dot(conf[:, 2], conf[:, 2]) + return G + + def __len__(self): + return len(self.XYZData) + + class LPTraj(dict): def __init__(self, S, atomindices=None, permuteindices=None): super(LPTraj, self).__init__() @@ -69,9 +161,9 @@ def __init__(self, S, atomindices=None, permuteindices=None): pidx = list(itertools.chain(*permuteindices)) if permuteindices != None else [] if atomindices == None: - self.TD = RMSD.TheoData(S.xyz) + self.TD = TheoData(S.xyz) else: - self.TD = RMSD.TheoData(S.xyz[:, np.array(aidx)]) + self.TD = TheoData(S.xyz[:, np.array(aidx)]) def __getitem__(self, key): if isinstance(key, int) or isinstance(key, slice) or isinstance(key, np.ndarray): diff --git a/Extras/LPRMSD/setup.py b/Extras/LPRMSD/setup.py index 9c313859..7461b1c4 100644 --- a/Extras/LPRMSD/setup.py +++ b/Extras/LPRMSD/setup.py @@ -1,14 +1,80 @@ +from __future__ import print_function import os, sys import numpy from glob import glob +import tempfile +import shutil from setuptools import setup, Extension +from distutils.ccompiler import new_compiler + +def hasfunction(cc, funcname, include=None, extra_postargs=None): + # From http://stackoverflow.com/questions/ + # 7018879/disabling-output-when-compiling-with-distutils + tmpdir = tempfile.mkdtemp(prefix='hasfunction-') + devnull = oldstderr = None + try: + try: + fname = os.path.join(tmpdir, 'funcname.c') + f = open(fname, 'w') + if include is not None: + f.write('#include %s\n' % include) + f.write('int main(void) {\n') + f.write(' %s;\n' % funcname) + f.write('}\n') + f.close() + devnull = open(os.devnull, 'w') + oldstderr = os.dup(sys.stderr.fileno()) + os.dup2(devnull.fileno(), sys.stderr.fileno()) + objects = cc.compile([fname], output_dir=tmpdir, + extra_postargs=extra_postargs) + cc.link_executable(objects, os.path.join(tmpdir, 'a.out')) + except Exception as e: + return False + return True + finally: + if oldstderr is not None: + os.dup2(oldstderr, sys.stderr.fileno()) + if devnull is not None: + devnull.close() + shutil.rmtree(tmpdir) + + +def detect_openmp(): + "Does this compiler support OpenMP parallelization?" + compiler = new_compiler() + print('Attempting to autodetect OpenMP support...', end=' ') + hasopenmp = hasfunction(compiler, 'omp_get_num_threads()') + needs_gomp = hasopenmp + if not hasopenmp: + compiler.add_library('gomp') + hasopenmp = hasfunction(compiler, 'omp_get_num_threads()') + needs_gomp = hasopenmp + print + if hasopenmp: + print('Compiler supports OpenMP') + else: + print('Did not detect OpenMP support; parallel RMSD disabled') + return hasopenmp, needs_gomp + + +hasopenmp, needs_gomp = detect_openmp() + +# If you are 32-bit you should remove the -m64 flag +compile_args = ["-std=c99", "-O2", "-msse2", "-msse3", + "-Wno-unused", "-m64"] + +extra_link_args = ["-lblas", "-lpthread", "-lm"] + +if hasopenmp: + compile_args.append("-fopenmp") + +if needs_gomp: + extra_link_args.append("-gomp") _lprmsd = Extension('_lprmsd', sources = glob('src/*.c'), - extra_compile_args = ["-std=c99","-O2", - "-msse2","-msse3","-Wno-unused","-fopenmp","-m64"], - # If you are 32-bit you should remove the -m64 flag - extra_link_args = ['-lblas', '-lpthread', '-lm', '-lgomp'], + extra_compile_args = compile_args, + extra_link_args = extra_link_args, include_dirs = [numpy.get_include(), os.path.join(numpy.get_include(), 'numpy')]) setup(name='msmbuilder.metrics.lprmsd', diff --git a/Extras/LPRMSD/src/lprmsd.c b/Extras/LPRMSD/src/lprmsd.c index c8fdd22f..f9816902 100644 --- a/Extras/LPRMSD/src/lprmsd.c +++ b/Extras/LPRMSD/src/lprmsd.c @@ -25,7 +25,11 @@ #include #include "theobald_rmsd.h" #include "apc.h" + +#ifdef _OPENMP #include +#endif + #include #define CHECKARRAYFLOAT(ary,name) if (PyArray_TYPE(ary) != NPY_FLOAT32) { \ diff --git a/MSMBuilder/metrics/__init__.py b/MSMBuilder/metrics/__init__.py index dc097d5f..d0fdeb99 100644 --- a/MSMBuilder/metrics/__init__.py +++ b/MSMBuilder/metrics/__init__.py @@ -13,9 +13,9 @@ from .baseclasses import AbstractDistanceMetric from .baseclasses import Vectorized from .rmsd import RMSD +from .lprmsd import LPRMSD from .dihedral import Dihedral from .contact import BooleanContact, AtomPairs, ContinuousContact from .projection import RedDimPNorm from .hybrid import Hybrid, HybridPNorm -from .projection import RedDimPNorm from .positions import Positions diff --git a/MSMBuilder/metrics/lprmsd.py b/MSMBuilder/metrics/lprmsd.py new file mode 100644 index 00000000..8f4a51de --- /dev/null +++ b/MSMBuilder/metrics/lprmsd.py @@ -0,0 +1,183 @@ +from __future__ import print_function, absolute_import, division +from mdtraj.utils.six.moves import xrange +import warnings +import numpy as np +from collections import namedtuple +import mdtraj as md + +from .baseclasses import AbstractDistanceMetric + + +class LPRMSD(AbstractDistanceMetric): + + """ + Compute distance between frames using the Room Mean Square Deviation + over a specifiable set of atoms using the Theobald QCP algorithm + + References + ---------- + .. [1] Theobald, D. L. Acta. Crystallogr., Sect. A 2005, 61, 478-480. + + """ + def __init__(self, atom_indices=None, alt_indices=None, + permute_groups=None, omp_parallel=True): + """Initalize an RMSD calculator + + Parameters + ---------- + atom_indices : array_like, optional + List of the indices of the atoms that you want to use for the RMSD + calculation. For example, if your trajectory contains the coordinates + of all the atoms, but you only want to compute the RMSD on the C-alpha + atoms, then you can supply a reduced set of atom_indices. If unsupplied, + all of the atoms will be used. + alt_indices : array_like, optional + List of indices to compute the actual distance. Here, the alignment + will be done using atom_indices, but the distance will be calculated + between the alt_indices + permute_groups : array_like, optional + list of lists, where each element is a list of atom indices that are + indistinguishable and can be permuted. The distance returned will + be based on finding the permutation that minimizes the total distance. + omp_parallel : bool, optional + Use OpenMP parallelized C code under the hood to take advantage of + multicore architectures. If you're using another parallelization scheme + (e.g. MPI), you might consider turning off this flag. + + Notes + ----- + You can also control the degree of parallelism with the OMP_NUM_THREADS + envirnoment variable + + + """ + self.atom_indices = atom_indices + self.alt_indices = alt_indices + self.permute_groups = permute_groups + + self.omp_parallel = omp_parallel + + def __repr__(self): + try: + val = 'metrics.LPRMSD(atom_indices=%s, alt_indices=%s, ' % ( + repr(list(self.atom_indices)), repr(list(self.alt_indices))) + \ + 'permute_groups=%s, omp_parallel=%s)' % ( + repr(list(self.permute_groups)), self.omp_parallel) + except: + val = 'metrics.LPRMSD(atom_indices=%s, alt_indices=%s, ' % ( + self.atom_indices, self.alt_indices) + \ + 'permute_groups=%s, omp_parallel=%s)' % ( + self.permute_groups, self.omp_parallel) + + return val + + def prepare_trajectory(self, trajectory): + """Prepare the trajectory for RMSD calculation. + + Preprocessing includes extracting the relevant atoms, centering the + frames, and computing the G matrix. + + + Parameters + ---------- + trajectory : mdtraj.Trajectory + Molecular dynamics trajectory + + Returns + ------- + theodata : array_like + A msmbuilder.metrics.TheoData object, which contains some preprocessed + calculations for the RMSD calculation + """ + + t = trajectory + t.center_coordinates() + + return t + + + def one_to_many(self, prepared_traj1, prepared_traj2, index1, indices2): + """Calculate a vector of distances from one frame of the first trajectory + to many frames of the second trajectory + + The distances calculated are from the `index1`th frame of `prepared_traj1` + to the frames in `prepared_traj2` with indices `indices2` + + Parameters + ---------- + prepared_traj1 : rmsd.TheoData + First prepared trajectory + prepared_traj2 : rmsd.TheoData + Second prepared trajectory + index1 : int + index in `prepared_trajectory` + indices2 : ndarray + list of indices in `prepared_traj2` to calculate the distances to + + Returns + ------- + Vector of distances of length len(indices2) + + Notes + ----- + If the omp_parallel optional argument is True, we use shared-memory + parallelization in C to do this faster. Using omp_parallel = False is + advised if indices2 is a short list and you are paralellizing your + algorithm (say via mpi) at a different + level. + """ + return self.one_to_all(prepared_traj1, prepared_traj2[indices2], index1) + + + def one_to_all(self, prepared_traj1, prepared_traj2, index1): + """Calculate a vector of distances from one frame of the first trajectory + to all of the frames in the second trajectory + + The distances calculated are from the `index1`th frame of `prepared_traj1` + to the frames in `prepared_traj2` + + Parameters + ---------- + prepared_traj1 : rmsd.TheoData + First prepared trajectory + prepared_traj2 : rmsd.TheoData + Second prepared trajectory + index1 : int + index in `prepared_trajectory` + + Returns + ------- + Vector of distances of length len(prepared_traj2) + + Notes + ----- + If the omp_parallel optional argument is True, we use shared-memory + parallelization in C to do this faster. + """ + + if not self.alt_indices is None: + distances = md.lprmsd(prepared_traj2, prepared_traj1, index1, + parallel=self.omp_parallel, atom_indices=self.atom_indices, + permute_groups=self.permute_groups, superpose=True) + + alt_distances = prepared_traj2.xyz[:, self.alt_indices] - prepared_traj1.xyz[index1, self.alt_indices] + alt_distances = np.sqrt(np.square(alt_distances).sum(axis=2).mean(axis=1)) + + return alt_distances + + else: + distances = md.lprmsd(prepared_traj2, prepared_traj1, index1, + parallel=self.omp_parallel, atom_indices=self.atom_indices, + permute_groups=self.permute_groups) + + return distances + + + def _square_all_pairwise(self, prepared_traj): + """Reference implementation of all_pairwise""" + warnings.warn( + 'This is HORRIBLY inefficient. This operation really needs to be done directly in C') + output = np.empty((prepared_traj.n_frames, prepared_traj.n_frames)) + for i in xrange(prepared_traj.n_frames): + output[i] = self.one_to_all(prepared_traj, prepared_traj, i) + return output diff --git a/MSMBuilder/metrics/positions.py b/MSMBuilder/metrics/positions.py index 872d3488..f0a16730 100644 --- a/MSMBuilder/metrics/positions.py +++ b/MSMBuilder/metrics/positions.py @@ -5,15 +5,9 @@ logger = logging.getLogger(__name__) import mdtraj as md import numpy as np -try: - import lprmsd -except: - RuntimeError( - "Unable to import lprmsd. See msmbuilder/Extras/lprmsd for directions on installing") from .baseclasses import Vectorized, AbstractDistanceMetric - class Positions(Vectorized, AbstractDistanceMetric): """ @@ -64,11 +58,14 @@ def __init__(self, target, align_indices=None, atom_indices=None, else: self.atom_indices = None - self.lprmsd = lprmsd.LPRMSD(atomindices=self.atom_indices, - altindices=self.align_indices) - self.target = target - self.prep_target = self.lprmsd.prepare_trajectory(self.target) + self.target.center_coordinates() + if self.align_indices is None: + self._permute_groups = [[i] for i in xrange(self.target.n_atoms)] + + else: + slf._permute_groups = [[i] for i in self.align_indices] + def prepare_trajectory(self, trajectory, return_dist=False): """ @@ -93,16 +90,11 @@ def prepare_trajectory(self, trajectory, return_dist=False): after alignment for each frame in trajectory """ - # TODO: This method hasn't been updated yet (2013-11-22 mph) - lp_prep_trajectory = self.lprmsd.prepare_trajectory(trajectory) - aligned_distances, prep_trajectory = self.lprmsd._compute_one_to_all( - self.prep_target, lp_prep_trajectory, 0, b_xyzout=True) + t = md.Trajectory(trajectory.xyz.copy(), topology=trajectory.topology.copy()) - if not self.atom_indices is None: - prep_trajectory = prep_trajectory[:, self.atom_indices] - prep_trajectory = np.reshape(prep_trajectory, (prep_trajectory.shape[0], -1)) + distances = md.lprmsd(t, self.target, superpose=True, atom_indices=self.align_indices, + permute_groups=self._permute_groups) - if return_dist: - return prep_trajectory, aligned_distances - else: - return prep_trajectory + prep_trajectory = t.xyz[:, self.atom_indices].reshape((len(t), -1)) + + return prep_trajectory diff --git a/tests/test_positions.py b/tests/test_positions.py new file mode 100644 index 00000000..c3da1db3 --- /dev/null +++ b/tests/test_positions.py @@ -0,0 +1,26 @@ +from __future__ import print_function +from __future__ import division +from __future__ import absolute_import + +from msmbuilder.metrics import Positions +from msmbuilder.testing import get, eq, expected_failure +import numpy as np +import unittest + +class TestPositions(): + + def setup(self): + self.ala = get('native.pdb') + self.rotZ = np.array([[0, -1, 0], [1, 0, 0], [0, 0, 1]]) + self.rotated_ala = get('native.pdb') + self.rotated_ala.xyz[0] = np.array([self.rotZ.dot(atom) for atom in self.ala.xyz[0]]) + + @expected_failure + def test_negative(self): + eq(self.ala.xyz, self.rotated_ala.xyz) + + def test(self): + pos_metric = Positions(self.ala) + protated_ala = pos_metric.prepare_trajectory(self.rotated_ala) + + eq(self.ala.xyz.reshape((1, -1)), protated_ala)