diff --git a/Extras/LigandPMF3D/PMF3D.py b/Extras/LigandPMF3D/PMF3D.py new file mode 100755 index 00000000..6dbe3187 --- /dev/null +++ b/Extras/LigandPMF3D/PMF3D.py @@ -0,0 +1,237 @@ +import mdtraj as md +import sys +import pickle +import multiprocessing +import optparse +import pylab +from numpy import * +import glob +import os + +# Helper Functions +def eval_distance(mapped_state_distances, cutoff): + # assumes mapped states and state_distances + mapped_cutoff_states=[] + for state in xrange(len(mapped_state_distances)): + # if mindist to protein > cutoff, add to unbound frames + if mapped_state_distances[state] > cutoff: + mapped_cutoff_states.append(state) + mapped_cutoff_states=array([int(i) for i in mapped_cutoff_states]) + return mapped_cutoff_states + +def get_ligand_minmax(ligcoors, map): + # build grid of min/max ligand coords from Gens.vmd_ligcoords.dat + mapped_ligcoors=dict() + n=0 + for i in xrange(ligcoors.shape[0]): + if map[i]!=-1: + mapped_ligcoors[n]=[k*10 for k in ligcoors[i]] + n+=1 + xmin=100000 + xmax=0 + ymin=100000 + ymax=0 + zmin=100000 + zmax=0 + dx=1 + dy=1 + dz=1 + coordinates=['x', 'y', 'z'] + mins=[xmin, ymin, zmin] + maxes=[xmax, ymax, zmax] + for i in sorted(mapped_ligcoors.keys()): + for j in range(0, len(mapped_ligcoors[i])): + for (n, k) in enumerate(coordinates): + if mapped_ligcoors[i][j][n]<=mins[n]: + mins[n]=mapped_ligcoors[i][j][n] + elif mapped_ligcoors[i][j][n]>=maxes[n]: + maxes[n]=mapped_ligcoors[i][j][n] + lengths=dict() + for (n, i) in enumerate(coordinates): + lengths[n]=int(((round(maxes[n])+1)-round(mins[n]))/1.0) + box_volume=lengths[0]*dx*lengths[1]*lengths[2]*dz + print "box volume sampled by ligand is %s angstroms^3" % box_volume + total=max(lengths.values()) + ranges=dict() + for (n, i) in enumerate(coordinates): + pad=-(lengths[n]-total) + ranges[n]=range(int(round(mins[n])), int(round(maxes[n]+1+(pad*1.0+1))), 1) + return mapped_ligcoors, ranges[0], ranges[1], ranges[2], box_volume + + +def estimate_com_free_energy(modeldir, space, spacetrack, mapped_com_distances, mapped_ligcoors, correction): + frees=[] + volumes=[] + corrs=[] + axis=[] + #cutoffs for Protein-Ligand COM distances + mapped_states=range(0, len(mapped_com_distances)) + cutoffs=arange(0, 20, 1) + if os.path.exists('%s/frees.dat' % (modeldir)): + print "free energies already computed for %s" % modeldir + sys.exit() + for cutoff in cutoffs: + bound_frames=where(mapped_com_distances < cutoff)[0] + if len(bound_frames)==0: + print "no bound states less than reference com distance %s" % cutoff + continue + print "on COM cutoff %s" % cutoff + new_points={ key: mapped_ligcoors[key] for key in bound_frames} + new_pops={ key: space.pops[key] for key in bound_frames} + GD=space.new_manual_allcoor_grid(spacetrack, new_points, new_pops, type='pops') + boundspace=GD.sum() + + new_pops={ key: 1.0 for key in bound_frames} + GD=space.new_manual_allcoor_grid(spacetrack, new_points, new_pops, type='pops') + boundvolume=GD.sum() + volumes.append(boundspace*boundvolume) + print "bound volume ", boundspace*boundvolume + + # unbound frames are above COM cutoff + unbound_frames=array([int(x) for x in mapped_states if x not in bound_frames]) + new_points={ key: mapped_ligcoors[key] for key in unbound_frames} + new_pops={ key: space.pops[key] for key in unbound_frames} + GD=space.new_manual_allcoor_grid(spacetrack, new_points, new_pops, type='pops') + unboundspace=GD.sum() + + # for counting states + new_pops={ key: 1.0 for key in unbound_frames} + GD=space.new_manual_allcoor_grid(spacetrack, new_points, new_pops, type='pops') + unboundvolume=GD.sum() + + # free energy from ratio + depth=-0.6*log((boundspace*boundvolume)/(unboundspace*boundvolume)) + frees.append(depth) + axis.append(cutoff) + print "corrected free energy at COM cutoff %s is %s" % (cutoff, depth+correction) + k=len(space.pops) + savetxt('%s/frees.dat' % (modeldir), [x+correction for x in frees]) + savetxt('%s/boundvolumes.dat' % (modeldir), volumes) + savetxt('%s/axis.dat' % (modeldir), axis) + +def get_displacement(coor1, coor2): + delta = subtract(coor1, coor2) + return (delta ** 2.).sum(-1) ** 0.5 + +def get_com_distances(proteinfile, ligandfile, genfile, topology): + lig_indices=loadtxt(ligandfile, ndmin=1, dtype=int) + prot_indices=loadtxt(proteinfile, ndmin=1, dtype=int) + prot_gens=md.load(genfile, top=topology, atom_indices=prot_indices) + lig_gens=md.load(genfile, top=topology, atom_indices=lig_indices) + prot_coms=md.compute_center_of_mass(prot_gens) + lig_coms=md.compute_center_of_mass(lig_gens) + com_distances=[10*get_displacement(i,j) for (i,j) in zip(prot_coms, lig_coms)] + return array(com_distances), lig_gens + + +def convert(GDfree, max): + frames=where(GDfree==inf) + for (i,j,k) in zip(frames[0], frames[1], frames[2]): + GDfree[i,j,k]=max + return GDfree + +# Class for 3D PMF +class PMF3D: + def __init__(self, pops=None, xaxis=None, yaxis=None, zaxis=None, grid=None): + self.pops=pops + self.dx =abs(xaxis[0]-xaxis[1]) + self.dy =abs(yaxis[0]-yaxis[1]) + if zaxis!=None: + self.dz =abs(zaxis[0]-zaxis[1]) + self.xaxis = array(xaxis) + self.yaxis = array(yaxis) + if zaxis!=None: + self.zaxis = array(zaxis) + if self.dx==3: + self.tol=1.5 + elif self.dx==2: + self.tol=1.0 + elif self.dx==1: + self.tol=0.5 + + def microstate_count_grid(self, allcoor): + spacetrack=dict() + for i in sorted(allcoor.keys()): + if i not in spacetrack.keys(): + spacetrack[i]=[] + for j in range(0, len(allcoor[i])): + x_location=where((self.xaxis>=round(allcoor[i][j][0]))&(self.xaxis<(round(allcoor[i][j][0])+self.dx)))[0] + y_location=where((self.yaxis>=round(allcoor[i][j][1]))&(self.yaxis<(round(allcoor[i][j][1])+self.dy)))[0] + z_location=where((self.zaxis>=round(allcoor[i][j][2]))&(self.zaxis<(round(allcoor[i][j][2])+self.dz)))[0] + if len(x_location)==0 or len(y_location)==0 or len(z_location)==0: + print "no bin available for allcoor[i][j]" + import pdb + pdb.set_trace() + if [x_location, y_location, z_location] not in spacetrack[i]: + spacetrack[i].append([x_location[0], y_location[0], z_location[0]]) + return spacetrack + + def new_manual_allcoor_grid(self, spacetrack, allcoor, values, type='pops'): + if type=='pops': + newgrid=zeros((len(self.xaxis), len(self.yaxis), len(self.zaxis))) + elif type=='free': + # pad with max free energy value (pop=0) + newgrid=max(values)*ones((len(self.xaxis), len(self.yaxis), len(self.zaxis))) + for (n, i) in enumerate(sorted(allcoor.keys())): + cn=len(spacetrack[i]) + for j in range(0, len(allcoor[i])): + x_location=where((self.xaxis>=round(allcoor[i][j][0]))&(self.xaxis<(round(allcoor[i][j][0])+self.dx)))[0] + y_location=where((self.yaxis>=round(allcoor[i][j][1]))&(self.yaxis<(round(allcoor[i][j][1])+self.dy)))[0] + z_location=where((self.zaxis>=round(allcoor[i][j][2]))&(self.zaxis<(round(allcoor[i][j][2])+self.dz)))[0] + if len(x_location)==0 or len(y_location)==0 or len(z_location)==0: + print "no bin available for allcoor[i][j]" + import pdb + pdb.set_trace() + newgrid[x_location, y_location, z_location]+=(1.0/cn)*values[i] + return newgrid + + + + def write_dx(self, GD, dir): + gridspace=1 + newfile=open('%s/pmf3d_d%s.dx' % (dir, gridspace), 'w') + newfile.write('# Data calculated MSM State Probabilities\n') + newfile.write('object 1 class gridpositions counts %s %s %s\n' % (GD.shape[0], GD.shape[1], GD.shape[2])) + newfile.write('origin %s %s %s\n' % (self.xaxis[0], self.yaxis[0], self.zaxis[0])) + newfile.write('delta %s 0 0\n' % self.dx) + newfile.write('delta 0 %s 0\n' % self.dy) + newfile.write('delta 0 0 %s\n' % self.dz) + newfile.write('object 2 class gridconnections counts %s %s %s\n' % (GD.shape[0], GD.shape[1], GD.shape[2])) + newfile.write('object 3 class array type double rank 0 items %s data follows\n' % (GD.shape[0]*GD.shape[1]*GD.shape[2])) + intergrid=zeros((GD.shape[0], GD.shape[1], GD.shape[2])) + count=0 + for i in range(0, GD.shape[0]): + for j in range(0, GD.shape[1]): + for k in range(0, GD.shape[2]): + if count==2: + if GD[i][j][k]==0: + newfile.write('%s\n' % int(GD[i][j][k])) + else: + newfile.write('%s\n' % GD[i][j][k]) + count=0 + else: + if GD[i][j][k]==0: + newfile.write('%s\t' % int(GD[i][j][k])) + else: + newfile.write('%s\t' % GD[i][j][k]) + count+=1 + newfile.write('\nobject "ligand free energy" class field') + newfile.close() + + def pmfvolume(self, GD): + sum=0 + oldx=self.xaxis[0] + oldval=0 + for i in range(0, len(self.xaxis)): + oldy=self.yaxis[0] + for j in range(0, len(self.yaxis)): + oldz=self.zaxis[0] + for k in range(0, len(self.zaxis)): + if abs(self.zaxis[k]-oldz) !=0: + if GD[i,j, k] != 0: + sum+=GD[i,j, k]*abs(self.xaxis[i]-oldx)*abs(self.yaxis[j]-oldy)*abs(self.zaxis[k]-oldz) + oldz=self.zaxis[k] + oldy=self.yaxis[j] + oldx=self.xaxis[i] + return float(sum) + diff --git a/Extras/LigandPMF3D/README.md b/Extras/LigandPMF3D/README.md new file mode 100644 index 00000000..7e5d34e9 --- /dev/null +++ b/Extras/LigandPMF3D/README.md @@ -0,0 +1,26 @@ +LigandPMF3D +=========== + +** This branch to be used with MSMBuilder 2.8 ** + +Python program for creating 3D PMFs and estimating binding free energies from protein-ligand binding simulations. + +Program requires as input (see options with -h flag): +* the path to the directory with MSM output (Populations.dat, Mapping.dat). This directory will also be used for the PMF and free energy output. +* the generators trajectory file, in any format readble by MDTraj. The computed ligand-protein center of mass (COM) distances will be saved here with the same prefix (i.e. for Gens.lh5, output it Gens.com.dat) +* the index file for ligand atoms. +* the index file for protein atoms, which are used for calculating ligand-protein center of mass (COM) distances and binding free energies (we suggest active site atoms only). +* topology, a reference PDB file. + +Program output is a OpenDX file pmf3d_d1.dx (3D PMF at gridspace 1 Angstrom) centered on the protein as aligned in the generators trajectory. +This file can be loaded into VMD: +* you can run the command: vmd -pdb reference.pdb -dx pmf3d_d1.dx +* make a new representation in Isosurface, and scale the isovalues ranging from the PMF minimum at 0, up to the max. +If your 3D PMF is not centered on your protein, check that your reference structure is aligned to your generators file. + +An additional -f option will output estimated binding free energies as a function of ligand-protein COM distance. This prints: +* the standard state correction computed from an estimated box size using ligand coordinates: standard_correction.dat +* standard corrected, binding free energyes: frees.dat +* population-weighted bound volumes (which should plateau as you increase your cutoff): boundvolumes.dat +* the COM distances corresponding to the free energies and volumes: axis.dat + diff --git a/Extras/LigandPMF3D/RunPMF.py b/Extras/LigandPMF3D/RunPMF.py new file mode 100755 index 00000000..5f2c20d8 --- /dev/null +++ b/Extras/LigandPMF3D/RunPMF.py @@ -0,0 +1,104 @@ +import mdtraj as md +import PMF3D +import optparse +import pylab +from numpy import * +import glob +import os +import sys +import pickle +from scipy import interpolate, integrate + +""" +LigandPMF3D +=========== + +** This branch to be used with MSMBuilder 2.8 ** + +Python program for creating 3D PMFs and estimating binding free energies from protein-ligand binding simulations. + +Program requires as input (see options with -h flag): +* the path to the directory with MSM output (Populations.dat, Mapping.dat). This directory will also be used for the PMF and free energy output. +* the generators trajectory file, in any format readble by MDTraj. The computed ligand-protein center of mass (COM) distances will be saved here with the same prefix (i.e. for Gens.lh5, output it Gens.com.dat) +* the index file for ligand atoms. +* the index file for protein atoms, which are used for calculating ligand-protein center of mass (COM) distances and binding free energies (we suggest active site atoms only). +* topology, a reference PDB file. + +Program output is a OpenDX file pmf3d_d1.dx (3D PMF at gridspace 1 Angstrom) centered on the protein as aligned in the generators trajectory. +This file can be loaded into VMD: +* you can run the command: vmd -pdb reference.pdb -dx pmf3d_d1.dx +* make a new representation in Isosurface, and scale the isovalues ranging from the PMF minimum at 0, up to the max. +If your 3D PMF is not centered on your protein, check that your reference structure is aligned to your generators file. + +An additional -f option will output estimated binding free energies as a function of ligand-protein COM distance. This prints: +* the standard state correction computed from an estimated box size using ligand coordinates: standard_correction.dat +* standard corrected, binding free energyes: frees.dat +* population-weighted bound volumes (which should plateau as you increase your cutoff): boundvolumes.dat +* the COM distances corresponding to the free energies and volumes: axis.dat +""" + +def main(modeldir, genfile, ligandfile, proteinfile, topology, writefree=False): + dir=os.path.dirname(genfile) + filename=genfile.split('%s/' % dir)[1].split('.')[0] + gens=md.load(genfile, top=topology) + + print "computing 3D PMF from model in %s" % modeldir + map=loadtxt('%s/Mapping.dat' % modeldir) + pops=loadtxt('%s/Populations.dat' % modeldir) + ligandind=loadtxt(ligandfile, dtype=int, ndmin=1) + + print "Tracking ligand sampling" + # also computes COM distances for free energy estimation, if desired + com_distances, lig_gens=PMF3D.get_com_distances(proteinfile, ligandfile, genfile, topology) + mapped_ligcoors, x_range, y_range, z_range, box_volume=PMF3D.get_ligand_minmax(lig_gens.xyz, map) + + # Generate Mapped 3-D PMF + frames=where(map!=-1)[0] + mapped_states=map[frames] + space=PMF3D.PMF3D(pops, x_range, y_range, z_range) + spacetrack=space.microstate_count_grid(mapped_ligcoors) + new_pops={ key: pops[n] for (n, key) in enumerate(mapped_ligcoors.keys())} + GD=space.new_manual_allcoor_grid(spacetrack, mapped_ligcoors, new_pops, type='pops') + free=array([-0.6*log(i) for i in pops]) + subtract=min(free) + free=array([k-subtract for k in free]) + GD=space.new_manual_allcoor_grid(spacetrack, mapped_ligcoors, new_pops, type='pops') + GDfree=-0.6*log(GD) + GDfree=PMF3D.convert(GDfree, max(free)) + GDfree=GDfree-min(GDfree.flatten()) + space.write_dx(GDfree, modeldir) + if writefree==True: + print "writing COM output for %s to %s.com.dat" % (dir, filename) + savetxt('%s/%s.com.dat' % (dir, filename), com_distances) + mapped_com_distances=com_distances[frames] + correction=-0.6*log(box_volume/1600.0) + print "standard correction %s" % correction + savetxt('%s/standard_correction.dat' % modeldir, array([correction,])) + PMF3D.estimate_com_free_energy(modeldir, space, spacetrack,mapped_com_distances, mapped_ligcoors, correction) + + + +def parse_commandline(): + parser = optparse.OptionParser() + parser.add_option('-m', '--modeldir', dest='modeldir', + help='directory with MSM') + parser.add_option('-g', '--genfile', dest='genfile', + help='input gens file lh5') + parser.add_option('-l', '--ligandfile', dest='ligandfile', + help='ligand index file') + parser.add_option('-p', '--proteinfile', dest='proteinfile', + help='protein binding site residue index file') + parser.add_option('-t', '--topology', dest='topology', + help='reference topology') + parser.add_option('-f', action="store_true", dest="writefree", help='perform free energy calc with P-L COM distances') + (options, args) = parser.parse_args() + return (options, args) + +#run the function main if namespace is main +if __name__ == "__main__": + (options, args) = parse_commandline() + if options.writefree==True: + main(modeldir=options.modeldir, genfile=options.genfile, ligandfile=options.ligandfile, proteinfile=options.proteinfile, topology=options.topology, writefree=True) + else: + main(modeldir=options.modeldir, genfile=options.genfile, ligandfile=options.ligandfile, proteinfile=options.proteinfile, topology=options.topology) + diff --git a/Extras/LigandPMF3D/setup.py b/Extras/LigandPMF3D/setup.py new file mode 100644 index 00000000..ad3dd7f6 --- /dev/null +++ b/Extras/LigandPMF3D/setup.py @@ -0,0 +1,53 @@ +import sys +from distutils.version import StrictVersion +import glob +from setuptools import setup + +def warn_on_version(module_name, minimum=None, package_name=None, recommend_conda=True): + if package_name is None: + package_name = module_name + + class VersionError(Exception): + pass + + msg = None + try: + package = __import__(module_name) + if minimum is not None: + try: + v = package.version.short_version + except AttributeError: + v = package.__version__ + if StrictVersion(v) < StrictVersion(minimum): + raise VersionError + except ImportError: + if minimum is None: + msg = 'LigandPMF3D requires the python package "%s", which is not installed.' % package_name + else: + msg = 'LigandPMF3D requires the python package "%s", version %s or later.' % (package_name, minimum) + except VersionError: + msg = ('LigandPMF3D requires the python package "%s", version %s or ' + ' later. You have version %s installed. You will need to upgrade.') % (package_name, minimum, v) + + if recommend_conda: + install = ('\nTo install %s, we recommend the conda package manger. See http://conda.pydata.org for info on conda.\n' + 'Using conda, you can install it with::\n\n $ conda install %s') % (package_name, package_name) + install += '\n\nAlternatively, with pip you can install the package with:\n\n $ pip install %s' % package_name + else: + install = '\nWith pip you can install the package with:\n\n $ pip install %s' % package_name + + if msg: + banner = ('==' * 40) + print('\n'.join([banner, banner, "", msg, install, "", banner, banner])) + + + +setup(name='PMF3D', + author='Morgan Lawrenz', + author_email='morganlawrenz@gmail.com', + version = '1.0', + description = 'Protein-ligand binding 3-D free energy maps', + py_modules = ['PMF3D'], + scripts=glob.glob('RunPMF.py'), ) +warn_on_version('mdtraj', '0.8.0') + diff --git a/Extras/LigandPMF3D/warn b/Extras/LigandPMF3D/warn new file mode 100644 index 00000000..89ea881e --- /dev/null +++ b/Extras/LigandPMF3D/warn @@ -0,0 +1,38 @@ +def warn_on_version(module_name, minimum=None, package_name=None, recommend_conda=True): + if package_name is None: + package_name = module_name + + class VersionError(Exception): + pass + + msg = None + try: + package = __import__(module_name) + if minimum is not None: + try: + v = package.version.short_version + except AttributeError: + v = package.__version__ + if StrictVersion(v) < StrictVersion(minimum): + raise VersionError + except ImportError: + if minimum is None: + msg = 'MSMBuilder requires the python package "%s", which is not installed.' % package_name + else: + msg = 'MSMBuilder requires the python package "%s", version %s or later.' % (package_name, minimum) + except VersionError: + msg = ('MSMBuilder requires the python package "%s", version %s or ' + ' later. You have version %s installed. You will need to upgrade.') % (package_name, minimum, v) + + if recommend_conda: + install = ('\nTo install %s, we recommend the conda package manger. See http://conda.pydata.org for info on conda.\n' + 'Using conda, you can install it with::\n\n $ conda install %s') % (package_name, package_name) + install += '\n\nAlternatively, with pip you can install the package with:\n\n $ pip install %s' % package_name + else: + install = '\nWith pip you can install the package with:\n\n $ pip install %s' % package_name + + if msg: + banner = ('==' * 40) + print('\n'.join([banner, banner, "", msg, install, "", banner, banner])) + +