"""Class to run an anomalous phased fourier on MR results"""
__author__ = "Adam Simpkin"
__date__ = "17 Mar 2017"
__version__ = "0.1"
import logging
import os
from pyjob import cexec
from scipy.spatial import distance
import iotbx.pdb
import simbad.util.mtz_util
logger = logging.getLogger(__name__)
class _AnomScore(object):
"""An anomalous phased fourier scoring class"""
__slots__ = ("peaks_over_6_rms", "peaks_over_6_rms_within_4a_of_model",
"peaks_over_9_rms", "peaks_over_9_rms_within_4a_of_model")
def __init__(self, peaks_over_6_rms, peaks_over_6_rms_within_4a_of_model, peaks_over_9_rms,
peaks_over_9_rms_within_4a_of_model):
self.peaks_over_6_rms = peaks_over_6_rms
self.peaks_over_6_rms_within_4a_of_model = peaks_over_6_rms_within_4a_of_model
self.peaks_over_9_rms = peaks_over_9_rms
self.peaks_over_9_rms_within_4a_of_model = peaks_over_9_rms_within_4a_of_model
def __repr__(self):
return "{0}(peaks_over_6_rms={1} " \
"peaks_over_6_rms_within_4a_of_model={2} " \
"peaks_over_9_rms={3} " \
"peaks_over_9_rms_within_4a_of_model={4}".format(self.__class__.__name__,
self.peaks_over_6_rms,
self.peaks_over_6_rms_within_4a_of_model,
self.peaks_over_9_rms,
self.peaks_over_9_rms_within_4a_of_model)
def _as_dict(self):
"""Convert the :obj:`_MrScore <simbad.mr._MrScore>`
object to a dictionary"""
return {k: getattr(self, k) for k in self.__slots__}
[docs]class AnomSearch(object):
"""An anomalous phased fourier running class
Attributes
----------
mtz : str
The path to the input MTZ
output_dir : str
The path to the output directory
model : class
Class object containing the PDB code for the input model
Example
-------
>>> from simbad.mr.anomalous_util import AnomSearch
>>> anomalous_search = AnomSearch("<mtz>", "<output_dir>")
>>> anomalous_search.run("<model>")
"""
def __init__(self, mtz, output_dir, mr_program):
self._mtz = None
self._mr_program = None
self._f = None
self._sigf = None
self._dano = None
self._sigdano = None
self._free = None
self._space_group = None
self._resolution = None
self._cell_parameters = None
self._output_dir = None
self.name = None
self.mr_program = mr_program
self.mtz = mtz
self.output_dir = output_dir
self.work_dir = None
@property
def mr_program(self):
"""The molecular replacement program to use"""
return self._mr_program
@mr_program.setter
def mr_program(self, mr_program):
"""Define the molecular replacement program to use"""
self._mr_program = mr_program
@property
def mtz(self):
"""The input MTZ file"""
return self._mtz
@mtz.setter
def mtz(self, mtz):
"""Define the input MTZ file"""
self._mtz = mtz
@property
def output_dir(self):
"""The path to the working directory"""
return self._output_dir
@output_dir.setter
def output_dir(self, output_dir):
"""Define the working directory"""
self._output_dir = output_dir
[docs] def run(self, model):
"""Function to run SFALL/CAD/FFT to create phased anomalous fourier map"""
# Make output directory
self.work_dir = os.path.join(self.output_dir, model.pdb_code, "anomalous")
os.mkdir(self.work_dir)
self._f, self._sigf, _, _, self._dano, self._sigdano, self._free = simbad.util.mtz_util.get_labels(self.mtz)
self._space_group, self._resolution, cell_parameters = simbad.util.mtz_util.crystal_data(self.mtz)
self._cell_parameters = " ".join(map(str, cell_parameters))
# Create path to the placed mr solution
input_model = os.path.join(self.output_dir, model.pdb_code, "mr",
self.mr_program, "{0}_mr_output.pdb".format(model.pdb_code))
self.name = model.pdb_code
# Run programs
self.sfall(input_model)
self.cad()
self.fft()
self.peakmax()
self.csymmatch()
[docs] def search_results(self, min_dist=4.0):
"""Function to extract search results"""
heavy_atom_model = os.path.join(self.work_dir, "csymmatch_{0}.pdb".format(self.name))
input_model = os.path.join(self.output_dir, self.name, "mr",
self.mr_program, "{0}_mr_output.pdb".format(self.name))
peaks_over_6_rms_coordinates = []
peaks_over_9_rms_coordinates = []
# Get the coordinates of peaks larger than 8 rms / 12 rms
pdb_input = iotbx.pdb.pdb_input(file_name=heavy_atom_model)
hierarchy = pdb_input.construct_hierarchy()
for residue_group in hierarchy.models()[0].chains()[0].residue_groups():
for atom_group in residue_group.atom_groups():
for atom in atom_group.atoms():
if atom.b >= 6:
peaks_over_6_rms_coordinates.append((atom.xyz[0], atom.xyz[1], atom.xyz[2]))
if atom.b >= 9:
peaks_over_9_rms_coordinates.append((atom.xyz[0], atom.xyz[1], atom.xyz[2]))
# Get the atom coordinates of the placed MTZ
input_model_coordinates = []
pdb_input = iotbx.pdb.pdb_input(file_name=input_model)
hierarchy = pdb_input.construct_hierarchy()
for residue_group in hierarchy.models()[0].chains()[0].residue_groups():
for atom_group in residue_group.atom_groups():
for atom in atom_group.atoms():
input_model_coordinates.append((atom.xyz[0], atom.xyz[1], atom.xyz[2]))
# Find the number of peaks within min dist (default 2A) of protein
peaks_over_6_rms_within_4 = 0
peaks_over_9_rms_within_4 = 0
# Find the number of peaks over 8 rms that have an euclidean distance from the protein of less than min_dist
for peak_coordinate in peaks_over_6_rms_coordinates:
for atom_coordinate in input_model_coordinates:
dist = distance.euclidean(peak_coordinate, atom_coordinate)
if dist <= min_dist:
peaks_over_6_rms_within_4 += 1
break
# Find the number of peaks over 12 rms that have an euclidean distance from the protein of less than min_dist
for peak_coordinate in peaks_over_9_rms_coordinates:
for atom_coordinate in input_model_coordinates:
dist = distance.euclidean(peak_coordinate, atom_coordinate)
if dist <= min_dist:
peaks_over_9_rms_within_4 += 1
break
score = _AnomScore(peaks_over_6_rms=len(peaks_over_6_rms_coordinates),
peaks_over_9_rms=len(peaks_over_9_rms_coordinates),
peaks_over_6_rms_within_4a_of_model=peaks_over_6_rms_within_4,
peaks_over_9_rms_within_4a_of_model=peaks_over_9_rms_within_4)
return score
[docs] def sfall(self, model):
"""Function to run SFALL to calculated structure factors for the placed MR model
Parameters
----------
model : str
path to placed model from MR
self.name : str
unique identifier for the input model set by :obj:`AnomSearch.run`
self.mtz : str
mtz file input to :obj:`AnomSearch`
self.work_dir : str
working directory set by :obj:`AnomSearch.run`
self._f : str
f column label set by :obj: `AnomSearch`
self._sigf : str
sigf column label set by :obj: `AnomSearch`
self._free : str
free column label set by :obj: `AnomSearch`
Returns
-------
file
mtz file containing FCalc and PHICalc columns
"""
cmd = ["sfall", "HKLOUT", os.path.join(self.work_dir, "sfall_{0}.mtz".format(self.name)),
"XYZIN", model, "HKLIN", self.mtz]
stdin = os.linesep.join([
"LABIN FP={0} SIGFP={1} FREE={2}",
"labout -",
"FC=FCalc PHIC=PHICalc",
"MODE SFCALC -",
" XYZIN -",
" HKLIN",
"symmetry '{3}'",
"badd 0.0",
"vdwr 2.5",
"end",
])
stdin = stdin.format(self._f, self._sigf, self._free, self._space_group)
cexec(cmd, stdin=stdin)
[docs] def cad(self):
"""Function to run CAD to combine the calculated structure factors and the anomalous signal
Parameters
----------
self.name : str
unique identifier for the input model set by :obj:`AnomSearch.run`
self.mtz : str
mtz file input to :obj: `AnomSearch`
self.work_dir : str
working directory set by :obj:`AnomSearch.run`
self._f : str
f column label set by :obj: `AnomSearch`
self._sigf : str
sigf column label set by :obj: `AnomSearch`
self._free : str
free column label set by :obj: `AnomSearch`
self._dano : str
dano column label set by :obj: `AnomSearch`
self._sigdano : str
sigdano column label set by :obj: `AnomSearch`
self._resolution : float
mtz resolution set by :obj: `AnomSearch`
Returns
-------
file
mtz file containing FCalc, PHICalc, DANO and SIGDANO columns
"""
cmd = ["cad", "HKLIN1", self.mtz,
"HKLIN2", os.path.join(self.work_dir, "sfall_{0}.mtz".format(self.name)),
"HKLOUT", os.path.join(self.work_dir, "cad_{0}.mtz".format(self.name))]
stdin = os.linesep.join([
"monitor BRIEF",
"labin file 1 -",
" E1 = {0} -",
" E2 = {1} -",
" E3 = {2} -",
" E4 = {3} -",
" E5 = {4}",
"labout file 1 -",
" E1 = {0} -",
" E2 = {1} -",
" E3 = {2} -",
" E4 = {3} -",
" E5 = {4}",
"ctypin file 1 -",
" E1 = F -",
" E2 = Q -",
" E3 = I -",
" E4 = D -",
" E5 = Q",
"resolution file 1 50 {5}",
"labin file 2 -",
" E1 = FCalc -",
" E2 = PHICalc",
"labout file 2 -",
" E1 = FCalc -",
" E2 = PHICalc",
"ctypin file 2 -",
" E1 = F -",
" E2 = P",
])
stdin = stdin.format(self._f, self._sigf, self._free, self._dano, self._sigdano, self._resolution)
cexec(cmd, stdin=stdin)
[docs] def fft(self):
"""Function to run FFT to create phased anomalous fourier map
Parameters
----------
self.name : str
unique identifier for the input model set by :obj:`AnomSearch.run`
self.work_dir : str
working directory set by :obj:`AnomSearch.run`
Returns
-------
file
anomalous phased fourier map file
file
log file containing the results from the anomalous phased fourier
"""
cmd = ["fft", "HKLIN", os.path.join(self.work_dir, "cad_{0}.mtz".format(self.name)),
"MAPOUT", os.path.join(self.work_dir, "fft_{0}.map".format(self.name))]
stdin = os.linesep.join(["xyzlim asu", "scale F1 1.0", "labin -",
" DANO={0} SIG1={1} PHI=PHICalc", "end"])
stdin = stdin.format(self._dano, self._sigdano)
cexec(cmd, stdin=stdin)
[docs] def peakmax(self):
"""Function to run peakmax to return the peaks from FFT
Parameters
----------
self.name : str
unique identifier for the input model set by :obj:`AnomSearch.run`
self.work_dir : str
working directory set by :obj:`AnomSearch.run`
Returns
-------
file
PDB file containing peaks
file
HA file containing peaks
file
log file containing the peaks identified by the anomalous phased fourier
"""
cmd = ["peakmax", "MAPIN", os.path.join(self.work_dir, "fft_{0}.map".format(self.name)),
"XYZOUT", os.path.join(self.work_dir, "peakmax_{0}.pdb".format(self.name)),
"XYZFRC", os.path.join(self.work_dir, "peakmax_{0}.ha".format(self.name))]
stdin = os.linesep.join(["threshhold -", " rms -", " 3.0", "numpeaks 50",
"output brookhaven frac", "residue WAT", "atname OW", "chain X"])
cexec(cmd, stdin=stdin)
[docs] def csymmatch(self):
"""Function to run csymmatch to correct for symmetry shifted coordinates
Parameters
----------
self.name : str
unique identifier for the input model set by :obj:`AnomSearch.run`
self.work_dir : str
working directory set by :obj:`AnomSearch.run`
self.output_dir : str
output directory input to :obj: `AnomSearch`
Returns
-------
file
PDB file containing the symmetry corrected atom coordinates
"""
cmd = ["csymmatch", "-stdin"]
stdin = os.linesep.join(["pdbin {0}", "pdbin-ref {1}", "pdbout {2}", "connectivity-radius 2.0"])
stdin = stdin.format(
os.path.join(self.work_dir, "peakmax_{0}.pdb".format(self.name)),
os.path.join(self.output_dir, self.name, "mr", self.mr_program,
"{0}_mr_output.pdb".format(self.name)),
os.path.join(self.work_dir, "csymmatch_{0}.pdb".format(self.name))
)
cexec(cmd, stdin=stdin)