Source code for simbad.util.simbad_results

#!/usr/bin/env ccp4-python
# encoding: utf-8

import logging
import os
import pandas as pd

LATTICE_ID = 'latt'
CONTAMINANT_ID = 'cont'
MORDA_ID = 'morda'

ID2STR = { LATTICE_ID : 'lattice',
           CONTAMINANT_ID : 'contaminant',
           MORDA_ID : 'MORDA',
          }

logger = logging.getLogger(__name__)

[docs]class FileCollection(object): """Class for holding results files and annotations""" def __init__(self): self.mr_log = None self.ref_pdb = None self.ref_pdb_annotation = None self.ref_mtz_annotation = None self.ref_log = None self.ref_map = None self.diff_map = None
[docs]class SimbadResults(object): """Class to handle the results from a SIMBAD search Attributes ---------- work_dir : str The SIMBAD work directory where SIMBAD was run lattice_results : :Pandas dataframe: Dataframe containing the results of the lattice search lattice_mr_results : :Pandas dataframe: Dataframe containing the results of the MR """ def __init__(self, work_dir): assert os.path.isdir(work_dir),"Cannot find work_dir: %s" % work_dir self.work_dir = os.path.abspath(work_dir) # Create dataframes from results lattice_results = os.path.join(self.work_dir, LATTICE_ID, 'lattice_search.csv') self.lattice_results = pd.read_csv(lattice_results) if os.path.isfile(lattice_results) else None lattice_mr_results = os.path.join(self.work_dir, LATTICE_ID, 'lattice_mr.csv') self.lattice_mr_results = pd.read_csv(lattice_mr_results) if os.path.isfile(lattice_mr_results) else None contaminant_results = os.path.join(self.work_dir, CONTAMINANT_ID, 'rot_search.csv') self.contaminant_results = pd.read_csv(contaminant_results) if os.path.isfile(contaminant_results) else None contaminant_mr_results = os.path.join(self.work_dir, CONTAMINANT_ID, 'cont_mr.csv') self.contaminant_mr_results = pd.read_csv(contaminant_mr_results) if os.path.isfile(contaminant_mr_results) else None morda_db_results = os.path.join(self.work_dir, MORDA_ID, 'rot_search.csv') self.morda_db_results = pd.read_csv(morda_db_results) if os.path.isfile(morda_db_results) else None morda_db_mr_results = os.path.join(self.work_dir, MORDA_ID, 'morda_mr.csv') self.morda_db_mr_results = pd.read_csv(morda_db_mr_results) if os.path.isfile(morda_db_mr_results) else None
[docs] def top_files(self, num_results=3): """Return a list of num_results FileCollection objects with the results of the best MR steps""" if self.morda_db_mr_results is not None: return self.get_files(MORDA_ID, num_results=num_results) elif self.contaminant_mr_results is not None: return self.get_files(CONTAMINANT_ID, num_results=num_results) elif self.lattice_mr_results is not None: return self.get_files(LATTICE_ID, num_results=num_results) else: None
[docs] def get_files(self, search_type, num_results=10): """Return the best num_results results files from search_type Parameters ---------- search_type : str The type of search undertaken Returns ------- """ if search_type == LATTICE_ID: df = self.lattice_mr_results elif search_type == CONTAMINANT_ID: df = self.contaminant_mr_results elif search_type == MORDA_ID: df = self.morda_db_mr_results else: raise RuntimeError("Unrecognised search type: %s" % search_type) results = [] for i in range(0, num_results): try: fc = FileCollection() pdb_code = df.loc[i][0] mr_program = list(df)[1][0:6] mr_workdir = os.path.join( self.work_dir, search_type, 'mr_search', pdb_code, 'mr', mr_program) fc.mr_log = os.path.join( mr_workdir, '{0}_mr.log'.format(pdb_code)) fc.ref_pdb = os.path.join( mr_workdir, 'refine', '{0}_refinement_output.pdb'.format(pdb_code)) fc.ref_pdb_annotation = 'PDB #{0} from REFMAC-refined result of the {1} search'.format(i+1, ID2STR[search_type]) fc.ref_mtz = os.path.join( mr_workdir, 'refine', '{0}_refinement_output.mtz'.format(pdb_code)) fc.ref_mtz_annotation = 'MTZ #{0} from REFMAC-refined result of the {1} search'.format(i+1, ID2STR[search_type]) fc.ref_log = os.path.join( mr_workdir, 'refine', '{0}_ref.log'.format(pdb_code)) fc.ref_map = os.path.join( mr_workdir, 'refine', '{0}_refmac_2fofcwt.map'.format(pdb_code)) fc.diff_map = os.path.join( mr_workdir, 'refine', '{0}_refmac_fofcwt.map'.format(pdb_code)) results.append(fc) except KeyError: logger.debug("No result found at position %s", (i + 1)) return results
if __name__ == "__main__": # Setup argument parser from argparse import ArgumentParser parser = ArgumentParser() parser.add_argument(dest="work_dir") # Process arguments args = parser.parse_args() SR = SimbadResults(args.work_dir) for fc in SR.top_files(): print "FC ",fc, fc.ref_log, fc.ref_pdb_annotation