Source code for simbad.util.mtz_util

"""Module for MTZ file I/O and manipulation"""

__author__ = "Adam Simpkin & Jens Thomas"
__date__ = "17 May 2017"
__version__ = "0.2"

from cctbx import crystal
from cctbx import miller
from cctbx import sgtbx
from cctbx.xray import observation_types
from iotbx import reflection_file_reader
from iotbx.reflection_file_utils import looks_like_r_free_flags_info

import logging

logger = logging.getLogger(__name__)


[docs]class ExperimentalData(object): """Class to create a temporary mtz containing all the columns needed for SIMBAD from input reflection file Attributes ---------- input_reflection_file : str Path to the input reflection file in ccp4 mtz format output_mtz_file : str Path to the output mtz file Example 1 --------- >>> from simbad.util import mtz_util >>> ED = mtz_util.ExperimentalData("<input_reflection_file>") >>> ED.process_miller_arrays() >>> ED.output_mtz("<output_mtz_file>") Example 2 --------- >>> from simbad.util import mtz_util >>> ED = mtz_util.ExperimentalData("<input_reflection_file>") >>> ED.change_space_group('<new space group>') >>> ED.output_mtz("<output_mtz_file>") """ def __init__(self, input_reflection_file): self.amplitude_array = None self.anomalous_amplitude_array = None self.reconstructed_amplitude_array = None self.intensity_array = None self.anomalous_intensity_array = None self.free_array = None self.mtz_dataset = None reflection_file = reflection_file_reader.any_reflection_file(file_name=input_reflection_file) if not reflection_file.file_type() == "ccp4_mtz": msg = "File is not of type ccp4_mtz: {0}".format(input_reflection_file) logging.critical(msg) raise RuntimeError(msg) self.all_miller_arrays = reflection_file.as_miller_arrays() self.get_array_types()
[docs] def add_array_to_mtz_dataset(self, miller_array, column_root_label=None): """Function to add cctbx miller array obj to cctbx mtz dataset obj Parameters ---------- miller_array : cctbx :obj: Input cctbx obj containing a miller array column_root_label : str The root for the label of the output column Returns ------- self.mtz_dataset : cctbx :obj: cctbx mtz obj containing the input miller array """ if self.mtz_dataset: if column_root_label: self.mtz_dataset.add_miller_array(miller_array, column_root_label=column_root_label) else: column_root_label = miller_array.info().labels[0] self.mtz_dataset.add_miller_array(miller_array, column_root_label=column_root_label) else: if column_root_label: self.mtz_dataset = miller_array.as_mtz_dataset(column_root_label=column_root_label) else: column_root_label = miller_array.info().labels[0] self.mtz_dataset = miller_array.as_mtz_dataset(column_root_label=column_root_label) return
[docs] @staticmethod def check_anomalous_arrays(miller_array): """Function to check intensity/amplitude arrays from cctbx to ensure that they are not anomalous arrays Parameters ---------- miller_array : cctbx :obj: A cctbx :obj: containing a miller array of either intensities or amplitudes Returns ------- bool True/False """ if miller_array.anomalous_flag(): return True elif miller_array.info().type_hints_from_file == "anomalous_difference": return True elif len(miller_array.info().labels) > 2 and any("+" in label for label in miller_array.info().labels): return True return False
[docs] def change_space_group(self, new_space_group): """Change space group of input mtz Parameters ---------- new_space_group : str The new space group """ for miller_array in self.all_miller_arrays: array_info = miller_array.info() if not looks_like_r_free_flags_info(miller_array.info()): new_space_group_info = sgtbx.space_group_info(symbol=new_space_group) new_crystal_symmetry = crystal.symmetry(unit_cell=miller_array.unit_cell(), space_group_info=new_space_group_info, assert_is_compatible_unit_cell=False) miller_array = miller_array.customized_copy(crystal_symmetry=new_crystal_symmetry, info=array_info) self.add_array_to_mtz_dataset(miller_array)
[docs] def create_amplitude_array(self, intensity_array): """Function to create a cctbx amplitude array from an cctbx intensity array Parameters ---------- intensity_array : cctbx :obj: A cctbx :obj: containing a miller array of intensities Returns ------- self.amplitude_array : cctbx :obj: A cctbx :obj: containing a miller array of amplitudes """ array_info = miller.array_info() array_info.labels = ['F', 'SIGF'] self.amplitude_array = intensity_array.customized_copy( observation_type=observation_types.amplitude(), info=array_info ) return
[docs] def create_anomalous_amplitude_array(self, anomalous_intensity_array): """Function to create a cctbx anomalous amplitude array from a cctbx anomalous intensity array Parameters ---------- anomalous_intensity_array : cctbx :obj: A cctbx :obj: containing a miller array of anomalous intensities Returns ------- self.anomalous_amplitude_array : cctbx :obj: A cctbx :obj: containing a miller array of anomalous amplitudes """ array_info = miller.array_info() array_info.labels = ['F(+)', 'F(-)', 'SIGF(+)', 'SIGF(-)'] self.anomalous_amplitude_array = anomalous_intensity_array.customized_copy( observation_type=observation_types.amplitude(), info=array_info ) return
[docs] def create_anomalous_intensity_array(self, anomalous_amplitude_array): """Function to create a cctbx anomalous intensity array from a cctbx anomalous amplitude array Parameters ---------- anomalous_amplitude_array : cctbx :obj: A cctbx :obj: containing a miller array of anomalous amplitudes Returns ------- self.anomalous_intensity_array : cctbx :obj: A cctbx :obj: containing a miller array of anomalous intensities """ array_info = miller.array_info() array_info.labels = ['I(+)', 'I(-)', 'SIGI(+)', 'SIGI(-)'] self.anomalous_intensity_array = anomalous_amplitude_array.customized_copy( observation_type=observation_types.intensity(), info=array_info ) return
[docs] def create_merged_intensity_array(self, anomalous_intensity_array): """Function to create a cctbs intensity array from a cctbx anomalous intensity array Parameters ---------- anomalous_intensity_array : cctbx :obj: A cctbx :obj: containing a miller array of anomalous intensities Returns ------- self.intensity_array : cctbx :obj: A cctbx :obj: containing a miller array of intensities """ array_info = miller.array_info() array_info.labels = ['I', 'SIGI'] merged_intensity_array = anomalous_intensity_array.copy().as_non_anomalous_array().merge_equivalents() self.intensity_array = merged_intensity_array.array().customized_copy( observation_type=observation_types.intensity(), info=array_info ) return
[docs] def create_reconstructed_amplitude_array(self, anomalous_amplitude_array): """Function to create a cctbx reconstructed amplitude array from a cctbx anomalous amplitude array Parameters ---------- anomalous_amplitude_array : cctbx :obj: A cctbx :obj: containing a miller array of anomalous amplitudes Returns ------- self.reconstructed_amplitude_array : cctbx :obj: A cctbx :obj: containing a miller array of reconstructed amplitudes """ array_info = miller.array_info() array_info.labels = ['F', 'SIGF', 'DANO', 'SIGDANO', 'ISYM'] self.reconstructed_amplitude_array = anomalous_amplitude_array.customized_copy( observation_type=observation_types.reconstructed_amplitude(), info=array_info ) return
[docs] def get_array_types(self): """Function to assign array types contained within cctbx obj, in cases where there are multiple instances of a type of array, only the first will be considered. Due to limitations in cctbx Returns ------- self.free_array : cctbx :obj: A cctbx :obj: containing a miller array containing the Free R data self.amplitude_array : cctbx :obj: A cctbx :obj: containing a miller array of amplitudes self.anomalous_amplitude_array : cctbx :obj: A cctbx :obj: containing a miller array of anomalous amplitudes self.reconstructed_amplitude_array : cctbx :obj: A cctbx :obj: containing a miller array of reconstructed amplitudes self.intensity_array : cctbx :obj: A cctbx :obj: containing a miller array of intensities self.anomalous_intensity_array : cctbx :obj: A cctbx :obj: containing a miller array of anomalous intensities """ for miller_array in self.all_miller_arrays: if miller_array.observation_type() is None: if looks_like_r_free_flags_info(miller_array.info()): if not self.free_array: self.free_array = miller_array if miller_array.is_xray_amplitude_array() and not self.check_anomalous_arrays(miller_array): if not self.amplitude_array: self.amplitude_array = miller_array elif miller_array.info().type_hints_from_file == 'amplitude' and \ not self.check_anomalous_arrays(miller_array): if not self.amplitude_array: self.amplitude_array = miller_array elif miller_array.is_xray_reconstructed_amplitude_array(): if not self.reconstructed_amplitude_array: self.reconstructed_amplitude_array = miller_array elif miller_array.is_xray_amplitude_array() and self.check_anomalous_arrays(miller_array): if not self.anomalous_amplitude_array: self.anomalous_amplitude_array = miller_array elif miller_array.is_xray_intensity_array() and not self.check_anomalous_arrays(miller_array): if not self.intensity_array: self.intensity_array = miller_array elif miller_array.info().type_hints_from_file == 'intensity' and \ not self.check_anomalous_arrays(miller_array): if not self.intensity_array: self.intensity_array = miller_array elif miller_array.is_xray_intensity_array() and self.check_anomalous_arrays(miller_array): if not self.anomalous_intensity_array: self.anomalous_intensity_array = miller_array return
[docs] def output_mtz(self, output_mtz_file): """Function to output an mtz file from processed miller arrays Parameters ---------- output_mtz_file : str Path to output mtz file Returns ------- file mtz file containing all the columns needed to run SIMBAD """ mtz_object = self.mtz_dataset.mtz_object() mtz_object.write(file_name=output_mtz_file) return
[docs] def process_miller_arrays(self): """Function to process the miller arrays needed for SIMBAD Parameters ---------- self.free_array : cctbx :obj: A cctbx :obj: containing a miller array containing the Free R data self.amplitude_array : cctbx :obj: A cctbx :obj: containing a miller array of amplitudes self.anomalous_amplitude_array : cctbx :obj: A cctbx :obj: containing a miller array of anomalous amplitudes self.reconstructed_amplitude_array : cctbx :obj: A cctbx :obj: containing a miller array of reconstructed amplitudes self.intensity_array : cctbx :obj: A cctbx :obj: containing a miller array of intensities self.anomalous_intensity_array : cctbx :obj: A cctbx :obj: containing a miller array of anomalous intensities Returns ------- self.mtz_dataset : cctbx :obj: cctbx mtz obj containing all the miller arrays needed to run SIMBAD """ # Add amplitudes if self.reconstructed_amplitude_array: self.add_array_to_mtz_dataset(self.reconstructed_amplitude_array) elif self.anomalous_amplitude_array: self.create_reconstructed_amplitude_array(self.anomalous_amplitude_array) self.add_array_to_mtz_dataset(self.reconstructed_amplitude_array) elif self.amplitude_array: self.add_array_to_mtz_dataset(self.amplitude_array) elif self.intensity_array: self.create_amplitude_array(self.intensity_array) self.add_array_to_mtz_dataset(self.amplitude_array) elif self.anomalous_intensity_array: self.create_anomalous_amplitude_array(self.anomalous_intensity_array) self.create_reconstructed_amplitude_array(self.anomalous_amplitude_array) self.add_array_to_mtz_dataset(self.reconstructed_amplitude_array) # Add intensities if self.intensity_array: self.add_array_to_mtz_dataset(self.intensity_array) elif self.anomalous_intensity_array: self.create_merged_intensity_array(self.anomalous_intensity_array) self.add_array_to_mtz_dataset(self.intensity_array) # if not self.intensity_array or not self.amplitude_array: # msg = "No amplitudes or intensities found in input reflection file" # logging.critical(msg) # raise RuntimeError(msg) # Add free flag if self.free_array: try: self.add_array_to_mtz_dataset(self.free_array) except RuntimeError: pass else: self.free_array = self.amplitude_array.generate_r_free_flags(format='ccp4') self.add_array_to_mtz_dataset(self.free_array, "FreeR_flag") return
[docs]def crystal_data(mtz_file): """Set crystallographic parameters from mtz file Parameters ---------- mtz_file : str The path to the mtz file Returns ------- space_group : str The space group resolution : str The resolution cell_parameters : tuple The cell parameters """ reflection_file = reflection_file_reader.any_reflection_file(file_name=mtz_file) content = reflection_file.file_content() space_group = content.space_group_name().replace(" ", "") resolution = content.max_min_resolution()[1] cell_parameters = content.crystals()[0].unit_cell_parameters() return space_group, resolution, cell_parameters
[docs]def get_labels(mtz_file): """Function to get the column labels for input mtz file Parameters ---------- mtz_file : str The path to the mtz file Returns ------- f : str f column label fp : str fp column label i : str i column label sigi : str sigi column label dano : str dano column label sigdano : str sigdano column label free : str free column label """ reflection_file = reflection_file_reader.any_reflection_file(file_name=mtz_file) if not reflection_file.file_type() == "ccp4_mtz": msg="File is not of type ccp4_mtz: {0}".format(mtz_file) logging.critical(msg) raise RuntimeError(msg) content = reflection_file.file_content() ctypes = content.column_types() clabels = content.column_labels() ftype = 'F' jtype = 'J' dtype = 'D' if ftype not in ctypes: msg = "Cannot find any structure amplitudes in: {0}".format(mtz_file) raise RuntimeError(msg) f = clabels[ctypes.index(ftype)] # FP derived from F fp = 'SIG' + f fp_alt = 'PHI' + f if fp in clabels: pass elif fp_alt in clabels: fp = fp_alt pass else: msg = "Cannot find label {0} or {1} in file: {2}".format(fp, fp_alt, mtz_file) logging.warning(msg) i, sigi = None, None if jtype in ctypes: i = clabels[ctypes.index(jtype)] # SIGI derired from I sigi = 'SIG' + i if sigi not in clabels: msg = "Cannot find label {0} in file: {1}".format(sigi, mtz_file) raise RuntimeError(msg) try: if dtype not in ctypes: msg = "Cannot find any structure amplitudes in: {0}".format(mtz_file) raise RuntimeError(msg) dano = clabels[ctypes.index(dtype)] # SIGDANO derived from DANO sigdano = 'SIG' + dano if sigdano not in clabels: msg = "Cannot find label {0} in file: {1}".format(sigdano, mtz_file) raise RuntimeError(msg) except RuntimeError: dano, sigdano = None, None free = None for label in clabels: for word in ["free", "test", "cross", "status", "flag"]: if label.lower().find(word) >= 0: if free: logger.warning("FOUND >1 R FREE label in file!") free = label break return f, fp, i, sigi, dano, sigdano, free