Source code for simbad.mr.molrep_mr

#!/usr/bin/env ccp4-python
"""Module to run molrep on a model"""

__author__ = "Adam Simpkin"
__date__ = "02 May 2017"
__version__ = "1.0"

import os
import operator
import shutil

from simbad.util import mtz_util

from pyjob import cexec


[docs]def check_contrast(logfile): """Check the logfile of the job for the contrast value Parameters ---------- logfile : str Path to the logfile Returns ------- float Contrast score in log file """ contrasts = [] with open(logfile, 'r') as f: for line in f: if "Contrast" in line: fields = line.split() if len(fields) > 3: pass else: contrasts.append(float(fields[-1])) if len(contrasts) < 1: return 0.0 else: return max(contrasts)
[docs]class Molrep(object): """Class to run Molrep Attributes ---------- hklin : str Path to input hkl file logfile : str Path to output log file pdbin : str Path to the input pdb file pdbout : str Path to the output pdb file sgalternative : str Specify whether to try alternative space groups (all | enant) space_group : str The space group of the input hkl file work_dir : str Path to the working directory were you want MOLREP to run Example ------- >>> from simbad.mr.molrep_mr import Molrep >>> molrep = Molrep('<hklin>', '<hklout>', '<logfile>', '<nmol>', '<pdbin>', '<pdbout>', '<sgalternative>', >>> '<space_group>', '<work_dir>') >>> molrep.run() Files relating to the MOLREP run will be contained within the work_dir however the location of the output pdb and logfile can be specified. """ def __init__(self, hklin, logfile, nmol, pdbin, pdbout, sgalternative, space_group, work_dir): self._hklin = None self._logfile = None self._nmol = None self._pdbin = None self._pdbout = None self._work_dir = None self._sgalternative = None self._space_group = None self.hklin = hklin self.logfile = logfile self.nmol = nmol self.pdbin = pdbin self.pdbout = pdbout self.sgalternative = sgalternative self.space_group = space_group self.work_dir = work_dir self.all_sg_codes = {"P2": "3", "P21": "4", "C2": "5", "P222": "16", "P2221": "17", "P21212": "18", "P212121": "19", "C2221": "20", "C222": "21", "F222": "22", "I222": "23", "I212121": "24", "P4": "75", "P41": "76", "P42": "77", "P43": "78", "I4": "79", "I41": "80", "P422": "89", "P4212": "90", "P4122": "91", "P41212": "92", "P4222": "93", "P42212": "94", "P4322": "95", "P43212": "96", "I422": "97", "I4122": "98", "P3": "143", "P31": "144", "P32": "145", "R3": "146", "P312": "149", "P321": "150", "P3112": "151", "P3121": "152", "P3212": "153", "P3221": "154", "R32": "155", "P6": "168", "P61": "169", "P65": "170", "P62": "171", "P64": "172", "P63": "173", "P622": "177", "P6122": "178", "P6522": "179", "P6222": "180", "P6422": "181", "P6322": "182", "P23": "195", "F23": "196", "I23": "197", "P213": "198", "I213": "199", "P432": "207", "P4232": "208", "F432": "209", "F4132": "210", "I432": "211", "P4332": "212", "P4132": "213", "I4132": "214"} self.enant_sg_codes = {"P31": "144", "P32": "145", "P3112": "151", "P3212": "153", "P3121": "152", "P3221": "154", "P41": "76", "P43": "78", "P4122": "91", "P4322": "95", "P41212": "92", "P43212": "96", "P61": "169", "P65": "170", "P62": "171", "P64": "172", "P6122": "178", "P6522": "179", "P6222": "180", "P6422": "181", "P4332": "212", "P4132": "213"} self.enant_sg = {"144": "145", "145": "144", "151": "153", "153": "151", "152": "154", "154": "152", "76": "78", "78": "76", "91": "95", "95": "91", "92": "96", "96": "92", "169": "170", "170": "169", "171": "172", "172": "171", "178": "179", "179": "178", "180": "181", "181": "180", "212": "213", "213": "212"} self.all_alt_sg = {"3": ["4", "5"], "4": ["3", "5"], "5": ["3", "4"], "16": ["17", "18", "19", "20", "21", "22", "23", "24"], "17": ["16", "18", "19", "20", "21", "22", "23", "24"], "18": ["16", "17", "19", "20", "21", "22", "23", "24"], "19": ["16", "17", "18", "20", "21", "22", "23", "24"], "20": ["16", "17", "18", "19", "21", "22", "23", "24"], "21": ["16", "17", "18", "19", "20", "22", "23", "24"], "22": ["16", "17", "18", "19", "20", "21", "23", "24"], "23": ["16", "17", "18", "19", "20", "21", "22", "24"], "24": ["16", "17", "18", "19", "20", "21", "22", "23"], "75": ["76", "77", "78", "79", "80"], "76": ["75", "77", "78", "79", "80"], "77": ["75", "76", "78", "79", "80"], "78": ["75", "76", "77", "79", "80"], "79": ["75", "76", "77", "78", "80"], "80": ["75", "76", "77", "78", "79"], "89": ["90", "91", "92", "93", "94", "95", "96", "97", "98"], "90": ["89", "91", "92", "93", "94", "95", "96", "97", "98"], "91": ["89", "90", "92", "93", "94", "95", "96", "97", "98"], "92": ["89", "90", "91", "93", "94", "95", "96", "97", "98"], "93": ["89", "90", "91", "92", "94", "95", "96", "97", "98"], "94": ["89", "90", "91", "92", "93", "95", "96", "97", "98"], "95": ["89", "90", "91", "92", "93", "94", "96", "97", "98"], "96": ["89", "90", "91", "92", "93", "94", "95", "97", "98"], "97": ["89", "90", "91", "92", "93", "94", "95", "96", "98"], "98": ["89", "90", "91", "92", "93", "94", "95", "96", "97"], "143": ["144", "145", "146"], "144": ["143", "145", "146"], "145": ["143", "144", "146"], "146": ["143", "144", "145"], "149": ["150", "151", "152", "153", "154", "155"], "150": ["149", "151", "152", "153", "154", "155"], "151": ["149", "150", "152", "153", "154", "155"], "152": ["149", "150", "151", "153", "154", "155"], "153": ["149", "150", "151", "152", "154", "155"], "154": ["149", "150", "151", "152", "153", "155"], "155": ["149", "150", "151", "152", "153", "154"], "168": ["169", "170", "171", "172", "173"], "169": ["168", "170", "171", "172", "173"], "170": ["168", "169", "171", "172", "173"], "171": ["168", "169", "170", "172", "173"], "172": ["168", "169", "170", "171", "173"], "173": ["168", "169", "170", "171", "172"], "177": ["178", "179", "180", "181", "182"], "178": ["177", "179", "180", "181", "182"], "179": ["177", "178", "180", "181", "182"], "180": ["177", "178", "179", "181", "182"], "181": ["177", "178", "179", "180", "182"], "182": ["177", "178", "179", "180", "181"], "195": ["196", "197", "198", "199"], "196": ["195", "197", "198", "199"], "197": ["195", "196", "198", "199"], "198": ["195", "196", "197", "199"], "199": ["195", "196", "197", "198"], "207": ["208", "209", "210", "211", "212", "213", "214"], "208": ["207", "209", "210", "211", "212", "213", "214"], "209": ["207", "208", "210", "211", "212", "213", "214"], "210": ["207", "208", "209", "211", "212", "213", "214"], "211": ["207", "208", "209", "210", "212", "213", "214"], "212": ["207", "208", "209", "210", "211", "213", "214"], "213": ["207", "208", "209", "210", "211", "212", "214"], "214": ["207", "208", "209", "210", "211", "212", "213"]} @property def hklin(self): """The input hkl file""" return self._hklin @hklin.setter def hklin(self, hklin): """Define the input hkl file""" self._hklin = hklin @property def logfile(self): """The logfile output""" return self._logfile @logfile.setter def logfile(self, logfile): """Define the output logfile""" self._logfile = logfile @property def nmol(self): """The number of molecules to look for""" return self._nmol @nmol.setter def nmol(self, nmol): """Define the number of molecules to look for""" self._nmol = nmol @property def pdbin(self): """The input pdb file""" return self._pdbin @pdbin.setter def pdbin(self, pdbin): """Define the input pdb file""" self._pdbin = pdbin @property def pdbout(self): """The output pdb file""" return self._pdbout @pdbout.setter def pdbout(self, pdbout): """Define the output pdb file""" self._pdbout = pdbout @property def sgalternative(self): """Whether to check for alternative space groups""" return self._sgalternative @sgalternative.setter def sgalternative(self, sgalternative): """Define whether to check for alternative space groups""" if sgalternative: self._sgalternative = sgalternative.lower() else: self._sgalternative = sgalternative @property def space_group(self): """The input space group""" return self._space_group @space_group.setter def space_group(self, space_group): """Define the input space group""" self._space_group = space_group @property def work_dir(self): """The path to the working directory""" return self._work_dir @work_dir.setter def work_dir(self, work_dir): """Define the working directory""" self._work_dir = work_dir
[docs] def run(self): """Function to run molecular replacement using MOLREP Returns ------- file The output pdb from MOLREP """ # Make a note of the current working directory current_work_dir = os.getcwd() # Change to the MOLREP working directory if os.path.exists(self.work_dir): os.chdir(self.work_dir) else: os.makedirs(self.work_dir) os.chdir(self.work_dir) # Copy hklin and pdbin to working dire for efficient running of MOLREP hklin = os.path.join(self.work_dir, os.path.basename(self.hklin)) shutil.copyfile(self.hklin, hklin) pdbin = os.path.join(self.work_dir, os.path.basename(self.pdbin)) shutil.copyfile(self.pdbin, pdbin) logfile = os.path.join(self.work_dir, 'molrep_out_{0}.log'.format(self.space_group)) template_key = """ FILE_F {0} FILE_M {1} NMON {2} {3} END""" key = template_key.format(os.path.relpath(hklin), os.path.relpath(pdbin), self.nmol, "") self.molrep(key, logfile) # Move output pdb to specified name if os.path.isfile(os.path.join(self.work_dir, "molrep.pdb")): shutil.move(os.path.join(self.work_dir, "molrep.pdb"), os.path.join(self.work_dir, 'molrep_out_{0}.pdb'.format(self.space_group))) if self.sgalternative == "enant" and self.space_group in self.enant_sg_codes: hklin_sg_code = self.enant_sg_codes[self.space_group] enant_sg_code = self.enant_sg[hklin_sg_code] contrast = check_contrast(os.path.join(self.work_dir, 'molrep_out_{0}.log'.format(self.space_group))) contrasts = {self.space_group: contrast} key = template_key.format(os.path.relpath(hklin), os.path.relpath(pdbin), self.nmol, "NOSG {0}".format(enant_sg_code)) logfile = os.path.join(self.work_dir, 'molrep_out_{0}.log'.format(enant_sg_code)) self.molrep(key, logfile) contrasts[enant_sg_code] = (check_contrast(logfile)) if os.path.isfile(os.path.join(self.work_dir, "molrep.pdb")): shutil.move(os.path.join(self.work_dir, "molrep.pdb"), os.path.join(self.work_dir, 'molrep_out_{0}.pdb'.format(enant_sg_code))) self.evaluate_results(contrasts) elif self.sgalternative == "all" and self.space_group in self.all_sg_codes: hklin_sg_code = self.all_sg_codes[self.space_group] all_alt_sg_codes = self.all_alt_sg[hklin_sg_code] contrast = check_contrast(os.path.join(self.work_dir, 'molrep_out_{0}.log'.format(self.space_group))) contrasts = {self.space_group: contrast} for alt_sg_code in all_alt_sg_codes: key = template_key.format(os.path.relpath(hklin), os.path.relpath(pdbin), self.nmol, "NOSG {0}".format(alt_sg_code)) logfile = os.path.join(self.work_dir, 'molrep_out_{0}.log'.format(alt_sg_code)) self.molrep(key, logfile) contrasts[alt_sg_code] = (check_contrast(logfile)) if os.path.isfile(os.path.join(self.work_dir, "molrep.pdb")): shutil.move(os.path.join(self.work_dir, "molrep.pdb"), os.path.join(self.work_dir, 'molrep_out_{0}.pdb'.format(alt_sg_code))) self.evaluate_results(contrasts) else: # Move output pdb to specified name if os.path.isfile(os.path.join(self.work_dir, 'molrep_out_{0}.pdb'.format(self.space_group))): shutil.move(os.path.join(self.work_dir, 'molrep_out_{0}.pdb'.format(self.space_group)), self.pdbout) # Move log file to specified name if os.path.isfile(os.path.join(self.work_dir, 'molrep_out_{0}.log'.format(self.space_group))): shutil.move(os.path.join(self.work_dir, 'molrep_out_{0}.log'.format(self.space_group)), self.logfile) # Return to original working directory os.chdir(current_work_dir) # Delete any files copied across if os.path.isfile(os.path.join(self.work_dir, os.path.basename(self.hklin))): os.remove(os.path.join(self.work_dir, os.path.basename(self.hklin))) if os.path.isfile(os.path.join(self.work_dir, os.path.basename(self.pdbin))): os.remove(os.path.join(self.work_dir, os.path.basename(self.pdbin))) return
[docs] def evaluate_results(self, results): """Function to evaluate molrep results and move the result with the best contrast score to the output pdb Parameters ---------- results : dict Dictionary containing space group code with the corresponding contrast score Returns ------- file The output pdb for the best result file The output log for the best result """ top_sg_code = max(results.iteritems(), key=operator.itemgetter(1))[0] if os.path.isfile(os.path.join(self.work_dir, 'molrep_out_{0}.pdb'.format(top_sg_code))): shutil.move(os.path.join(self.work_dir, 'molrep_out_{0}.pdb'.format(top_sg_code)), self.pdbout) if os.path.isfile(os.path.join(self.work_dir, 'molrep_out_{0}.log'.format(top_sg_code))): shutil.move(os.path.join(self.work_dir, 'molrep_out_{0}.log'.format(top_sg_code)), self.logfile) ed = mtz_util.ExperimentalData(self.hklin) ed.change_space_group(top_sg_code) ed.output_mtz(self.hklout)
[docs] @staticmethod def molrep(key, logfile): """Function to run molecular replacement using MOLREP Parameters ---------- key : str MOLREP keywords logfile : Path to output log file Returns ------- file The output pdb from MOLREP file The output log file """ cmd = ["molrep"] stdout = cexec(cmd, stdin=key) with open(logfile, 'w') as f_out: f_out.write(stdout)
if __name__ == '__main__': import argparse parser = argparse.ArgumentParser(description='Runs MR using MOLREP', prefix_chars="-") group = parser.add_argument_group() group.add_argument('-hklin', type=str, help="Path the input hkl file") group.add_argument('-logfile', type=str, help="Path to the ouput log file") group.add_argument('-nmol', type=int, help="The predicted number of molecules to build") group.add_argument('-pdbin', type=str, help="Path to the input pdb file") group.add_argument('-pdbout', type=str, help="Path to the output pdb file") group.add_argument('-sgalternative', default=None, help="Try alternative space groups <all/enant>") group.add_argument('-space_group', type=str, help="The input space group") group.add_argument('-work_dir', type=str, help="Path to the working directory") args = parser.parse_args() molrep = Molrep(args.hklin, args.logfile, args.nmol, args.pdbin, args.pdbout, args.sgalternative, args.space_group, args.work_dir) molrep.run()