Source code for biobuild.structural.patch

"""
Functions to patch molecules together
"""

import numpy as np

import biobuild.utils.abstract as abstract
import biobuild.structural.connector as base
import biobuild.structural.infer as infer
import biobuild.core.Molecule as Molecule
import biobuild.core.Linkage as Linkage


[docs] class PatchError(Exception): pass
[docs] class Patcher(base.Connector): """ This class is responsible for patching molecules together based on a `Linkage` object and two `Molecule` objects, of which one is the target and one is the source. Residues from the source are integrated into the target molecule. The Linkage object must define a "patch", meaning it must contain data on internal coodrinates (ICs) of the resulting molecule in the vicinity of the newly formed bond. Parameters ---------- copy_target : bool Whether to copy the target molecule before patching copy_source : bool Whether to copy the source molecule before patching """ def __init__(self, copy_target: bool = False, copy_source: bool = False): super().__init__(copy_target=copy_target, copy_source=copy_source) self.patch = None
[docs] def set_patch(self, patch: "Linkage.Linkage"): """ Set the patch Parameters ---------- patch : Linkage The patch """ self.patch = patch
[docs] def apply( self, patch: "Linkage.Linkage" = None, target: "Molecule.Molecule" = None, source: "Molecule.Molecule" = None, target_residue=None, source_residue=None, ): """ Patch two molecules together. This will merge the source molecule's residues into the target molecule. If no patch, target, and source have been set already, they can be provided as arguments to this method. Parameters ---------- patch : Linkage The patch to apply (this needs to have ICs) target : Molecule The target molecule source : Molecule The source molecule target_residue : int or str or Residue The residue in the target molecule to which the source molecule will be patched. By default, the last residue in the target molecule will be used if it contains appropriate anchor atoms. source_residue : int or str or Residue The residue in the source molecule that will be patched into the target molecule. By default, the first residue in the source molecule will be used if it contains appropriate anchor atoms. Returns ------- tuple The patched target and source molecules (not yet merged into one molecule) """ if patch: self.patch = patch if target: self.target = target if source: self.source = source if self.copy_target: self.target = self.target.copy() if self.copy_source: self.source = self.source.copy() # a dictionary to store the anchor atoms in the source # molecule and their original coordinates self._source_computed_anchors = {} if not target_residue: target_residue = self.target.attach_residue if not source_residue: source_residue = self.source.attach_residue self.get_anchors(target_residue, source_residue) # compute internal coordinates to rebuild the source molecule # _ics = infer.compute_internal_coordinates(self.source.bonds) # self._source_ICs = abstract.AbstractEntity() # self._source_ICs.internal_coordinates = _ics # self._v.draw_edges(self.source.bonds, color="red", opacity=0.5) # align the source molecule to the target molecule self._align_anchors() # now start imputing the source molecule into the target molecule # starting with the atoms that are described in the patch's internal coordinates self._infer_patch_IC_neighbors() # and now trans-rotate the source molecule such that # the inferred neighboring atoms overlay their imputed counterparts self._transpose_source() self._delete_atoms() return self.target, self.source
[docs] def merge(self): """ Merge the source molecule into the target molecule Returns ------- Molecule The target molecule with the source molecule merged into it """ # self.target.adjust_indexing(self.source) self.target.add_residues(*self.source.residues) self.target._bonds.extend(self.source.bonds) self.target._AtomGraph.migrate_bonds(self.source._AtomGraph) self.target.add_bond(*self._anchors) return self.target
[docs] def get_anchors(self, target_residue=None, source_residue=None): """ Returns the two atoms that will be used for anchoring structure alignment. These atoms form a bond between the two molecules. Parameters ---------- target_residue : int or str or Residue The residue in the target molecule to which the source molecule will be patched. By default, the last residue in the target molecule will be used if it contains appropriate anchor atoms. source_residue : int or str or Residue The residue in the source molecule that will be patched into the target molecule. By default, the first residue in the source molecule will be used if it contains appropriate anchor atoms. Returns ------- tuple The two atoms that form the bond between the two molecules the first atom is from molecule1 and the second is from molecule2. """ if not self.patch: raise AttributeError("No patch set") _ref_atoms = {int(i[0]) - 1: i[1:] for i in self.patch._ref_atoms} ref_atom_1, ref_atom_2 = super().get_anchors( _ref_atoms, target_residue, source_residue ) return ref_atom_1, ref_atom_2
def _align_anchors(self): """ Align the source to the target molecule such that the anchors are in the correct distance """ ic_34 = self._match_IC_34() if len(ic_34) == 0: raise PatchError("No matching IC found") ic = ic_34[0] atom1 = self._get_ref_atom(ic.atom1) atom2 = self._get_ref_atom(ic.atom2) # atom1 = self.target.get_atoms(ic.atom1[1:]) # atom2 = self.target.get_atoms(ic.atom2[1:]) # if self._target_residue: # atom1 = [i for i in atom1 if i.get_parent() == self._target_residue] # atom2 = [i for i in atom2 if i.get_parent() == self._target_residue] # if len(atom1) == 0: # raise PatchError("No anchor atom found in target molecule") # if len(atom2) == 0: # raise PatchError("No anchor atom found in target molecule") # atom1 = atom1[-1] # atom2 = atom2[-1] new_anchor_source = infer.compute_atom4_from_others( atom1.coord, atom2.coord, self._anchors[0].coord, ic, ) # self._v.draw_point("target ref1", atom1.coord, color="teal") # self._v.draw_point("target ref2", atom2.coord, color="teal") # store the old coordinates and set the new coordinates self._source_computed_anchors[self._anchors[1]] = self._anchors[1].coord self._anchors[1].set_coord(new_anchor_source) # self._v.draw_point("anchor_source (new)", self._anchors[1].coord, color="green") # self._v.draw_point("anchor_target", self._anchors[0].coord, color="green") def _infer_patch_IC_neighbors(self): """ Infer the neighbors of the internal coordinates in the patch. This is required to rebuild the source molecule. """ for ic in self.patch.internal_coordinates: if ic.atom4[0] == "1": continue atom4 = self._get_ref_atom(ic.atom4) if atom4 in self._source_computed_anchors.keys(): continue atom3 = self._get_ref_atom(ic.atom3) atom2 = self._get_ref_atom(ic.atom2) atom1 = self._get_ref_atom(ic.atom1) _new_coords = infer.compute_atom4_from_others( atom1.coord, atom2.coord, atom3.coord, ic ) # self._v.draw_point( # atom4.id + " (old)", # atom4.coord, # color="brown", # opacity=0.6, # ) if np.isnan(_new_coords).any(): continue self._source_computed_anchors[atom4] = atom4.coord atom4.set_coord(_new_coords) # self._v.draw_point( # atom4.id # + " (new should-be)" # + f"tb-deleted={atom4.id in self.patch.deletes[1]}", # atom4.coord, # color="blue", # opacity=0.6, # ) def _transpose_source(self): """ Transpose the source molecule to overlay the """ if len(self._source_computed_anchors) < 3: raise PatchError("Not enough anchors to transpose") _old_coords = np.stack(list(self._source_computed_anchors.values())) _new_coords = np.array([i.coord for i in self._source_computed_anchors.keys()]) # for n in _old_coords: # self._v.draw_point("old", n, color="red") # for n in _new_coords: # self._v.draw_point("new", n, color="limegreen") # compute translation vector old_centroid = _old_coords.mean(axis=0) new_centroid = _new_coords.mean(axis=0) # translation_vector = new_centroid - old_centroid # self._v.draw_vector( # "translation vector", # old_centroid, # translation_vector, # color="teal", # ) _relative_old_coords = _old_coords - old_centroid _relative_new_coords = _new_coords - new_centroid H = (_relative_old_coords).T.dot(_relative_new_coords) U, S, VT = np.linalg.svd(H) R = VT.T @ U.T atoms = [ i for i in self.source.get_atoms() if i not in self._anchors and i not in self._source_computed_anchors.keys() ] atom_coords = np.array([i.coord for i in atoms]) atom_coords -= old_centroid atom_coords = atom_coords @ R.T atom_coords += new_centroid # old_centroid + translation_vector for atom, coord in zip(atoms, atom_coords): atom.set_coord(coord) # THE OLD STUFF HERE WORKED JUST FINE BUT IS LESS EFFICIENT SINCE WE # KEEP PERFORMING THE MATRIX MULTIPLICATIONS MANY TIMES IN A PYTHON # LOOP. THE NEW STUFF ABOVE IS MORE EFFICIENT SINCE WE ONLY PERFORM # THE MATRIX MULTIPLICATIONS ONCE AND THEN APPLY THE TRANSFORMATION # TO ALL ATOMS AT ONCE. # for atom in self.source.get_atoms(): # self._v.draw_point( # atom.id + " (old)", # atom.coord, # color="brown", # opacity=0.3, # showlegend=False, # ) # if atom in self._source_computed_anchors.keys(): # continue # vec = self._source_computed_anchors[atom] - old_centroid # new_coord = (R @ vec) + old_centroid + translation_vector # self._v.draw_point( # atom.id + " (new computed from old)", # new_coord, # color="purple", # opacity=0.6, # ) # atom.set_coord(current[idx]) # idx += 1 # vec = atom.coord - old_centroid # new_coord = (R @ vec) + old_centroid + translation_vector # atom.set_coord(new_coord) # self._v.draw_point( # atom.id + " (new)", # atom.coord, # color="lightblue", # opacity=0.6, # ) # self._v.draw_edges(self.source.bonds, color="blue", opacity=0.6) def _match_IC(self, n_target: int, n_source: int): """ Get appropriate internal coordinates from the patch that include the anchor atoms of the target and source molecule at specific positions (starting at 1). Parameters ---------- n_target : int The position of the target anchor atom in the internal coordinate n_source : int The position of the source anchor atom in the internal coordinate Returns ------- list All available internal coordinates that include the anchor atoms at the specified positions """ ids = [None, None, None, None] ids[n_target - 1] = "1" + self._anchors[0].id ids[n_source - 1] = "2" + self._anchors[1].id ics = self.patch.get_internal_coordinates( *ids, mode="partial", ) if len(ics) == 0: raise PatchError( "No internal coordinates found for anchor atoms at positions {} and {}".format( n_target, n_source ) ) return ics def _match_IC_34(self, ic=None): """ Get appropriate internal coordinates from the patch that include the anchor atoms of the target and source molecule at positions 3 and 4 and have three atoms from the target molecule. Or check if a given internal coordinate matches the criteria. Returns ------- list or bool All available internal coordinates that include the anchor atoms at the specified positions or True if the given internal coordinate matches the criteria. """ if ic: return ic.atom1[0] == "1" and ic.atom2[0] == "1" and ic.atom3[0] == "1" ids = ["1" + i.id for i in self.target.get_atoms()] ics = self._match_IC(3, 4) ics = [i for i in ics if i.atom1 in ids and i.atom2 in ids and i.atom3 in ids] if len(ics) == 0: raise PatchError( "No internal coordinates found for anchor atoms at positions 3 and 4" ) return ics def _delete_atoms(self): """ Delete all atoms that need to be deleted according to the patch """ delete_from_target, delete_from_source = self.patch.deletes if self._target_residue: delete_from_target = [ i for i in self._target_residue.child_list if i.id in delete_from_target ] if self._source_residue: delete_from_source = [ i for i in self._source_residue.child_list if i.id in delete_from_source ] self.target._remove_atoms(*delete_from_target) self.source._remove_atoms(*delete_from_source) @property def _objs(self): return { # target, object, specific residue, index policy if no residue "1": (self.target, self._target_residue, -1), "2": (self.source, self._source_residue, 0), } def _get_ref_atom(self, atom: str): """ Get the atom that is used as a reference for the given atom when computing the relative coordinates. Parameters ---------- atom : str The atom for which to get the reference atom Returns ------- Atom The reference atom """ res, id = atom[0], atom[1:] _obj, _res, _idx = self._objs[res] atoms = _obj.get_atoms(id, by="id") if _res: atoms = [i for i in atoms if i.get_parent() is _res] if len(atoms) == 0: raise PatchError("No atom found with id {}".format(atom)) atom = atoms[_idx] return atom
__default_keep_keep_patcher__ = Patcher(copy_target=False, copy_source=False) """ Default instance of the Patcher that will modify the target molecule in place """ # __default_keep_copy_patcher__ = Patcher(copy_target=False, copy_source=True) # """ # Default instance of the Patcher that will modify the target molecule in place # but use a copy of the source molecule to leave it intact. # """ # __default_copy_copy_patcher__ = Patcher(copy_target=True, copy_source=True) # """ # Default instance of the Patcher that will copy both the target and source molecules # to leave the originals intact. # """
[docs] def patch( target: "Molecule.Molecule", source: "Molecule.Molecule", patch: "Linkage.Linkage", target_residue=None, source_residue=None, copy_target: bool = False, copy_source: bool = False, ) -> "Molecule.Molecule": """ Patch two molecules together. This will merge the source molecule's residues into the target molecule. Parameters ---------- patch : Linkage The patch to apply target : Molecule The target molecule source : Molecule The source molecule target_residue : int or str or Residue The residue in the target molecule to which the source molecule will be patched. By default, the last residue in the target molecule will be used if it contains appropriate anchor atoms. source_residue : int or str or Residue The residue in the source molecule that will be patched into the target molecule. By default, the first residue in the source molecule will be used if it contains appropriate anchor atoms. copy_target : bool Whether to copy the target molecule before patching it copy_source : bool Whether to copy the source molecule before patching it Returns ------- Molecule The patched molecule """ if copy_target: target = target.copy() if copy_source: source = source.copy() __default_keep_keep_patcher__.apply( patch=patch, target=target, source=source, target_residue=target_residue, source_residue=source_residue, ) return __default_keep_keep_patcher__.merge()
if __name__ == "__main__": import biobuild as bb bb.load_sugars() glc = bb.get_compound("GLC") patcher = Patcher(copy_target=False, copy_source=False) v = glc.draw() patcher._v = v link = bb.get_linkage("14bb") a, b = patcher.apply(link, glc, glc.copy()) merged = patcher.merge() merged.show() # ser = bb.molecule("ser.json") # his = bb.molecule("his.json") # link = bb.Linkage.from_json("peptide_linkage.json") # patcher = Patcher(copy_target=False, copy_source=False) # v = ser.draw() # patcher._v = v # patcher.apply(link, ser, ser.copy()) # merged = patcher.merge() # patcher.apply(link, merged, ser.copy()) # merged = patcher.merge() # merged.show() # man = "support/examples/MAN.pdb" # man1 = bb.Molecule.from_pdb(man) # man1.infer_bonds() # man2 = man1.copy() # # now make sure that man2 has some different coordinates # man2.rotate_around_bond(1, 2, 35) # r = np.random.rand(3) * 0.1 # for i in man2.atoms: # i.coord += r # man1.lock_all() # man2.lock_all() # top = bb.get_default_topology() # colors = ["red", "green", "blue", "orange", "purple", "pink", "black"] # patcher = Patcher(False, True) # for i in ("14bb", "14bb", "12ab"): # # v2 = bb.utils.visual.MoleculeViewer3D(man1) # # patcher._v = v2 # patch_or_recipe = top.get_patch(i) # patcher.apply(patch_or_recipe, man1, man2) # man1 = patcher.merge() # new = man1 # seen_atoms = set() # for atom in new.atoms: # assert atom.serial_number not in seen_atoms # seen_atoms.add(atom.serial_number) # res_con = bb.structural.infer_residue_connections(new.chain, triplet=True) # v2 = bb.utils.visual.MoleculeViewer3D(man1) # for idx, residue in enumerate(man1.residues): # bonds = [ # i # for i in man1.bonds # if i[0] in residue.child_list and i[1] in residue.child_list # ] # v2.draw_edges(edges=bonds, color=colors[idx], linewidth=2) # # v2.draw_edges(edges=list(new.bonds), color="blue", opacity=1) # # v2.draw_edges(edges=list(new._locked_bonds), color="red", linewidth=3) # # v2.draw_edges(edges=res_con, color="limegreen", linewidth=4) # v2.show()