Source code for biobuild.core.base_classes

"""
The base_classes are deriviatives of the original Biopython classes, but with
the change that they use a UUID4 as their identifier (full_id) instead of a hierarchical
tuple. This makes each object unique and allows for easy comparison where `a == b` is akin to `a is b`.
Consequently, the `__hash__` method is overwritten to use the UUID4 as the hash.

.. warning::
    
    Each class has its own `copy` method that returns a deep copy of the object with a new UUID4. So `a.copy() == a` is `False`, while a standard `deepcopy(a) == a` is `True` since the UUID4 will not have been updated automatically.

Converting to and from `biopython`
----------------------------------

Each biobuild class can be generated from a biopython class using the `from_biopython` class method. And each biobuild class has a `to_biopython` method that returns the pure-biopython equivalent.
It is important to note, that for most purposes, however, the biobuild classes should work fine as trop-in replacements for the original biopython classes. 

.. code-block:: python

    import Bio.PDB as bio
    from biobuild.core.base_classes import Atom

    bio_atom = bio.Atom("CA", (0, 0, 0))
    atom = Atom.from_biopython(bio_atom)

    assert atom == bio_atom # False since atom uses a UUID4 as its identifier
    assert atom.to_biopython() == bio_atom # True 

The conversion from and to biopython works hierarchically, so if an entire biopython structure is converted to biobuild
then all atoms, residues, chains and models will be converted to their biobuild equivalents.

.. code-block:: python

    import Bio.PDB as bio
    from biobuild.core.base_classes import Structure

    bio_structure = bio.PDBParser().get_structure("test", "test.pdb")
    structure = Structure.from_biopython(bio_structure)

    atoms = list(structure.get_atoms())
    bio_atoms = list(bio_structure.get_atoms())
    assert len(atoms) == len(bio_atoms) # True
    
"""
from copy import deepcopy

# from uuid import uuid4
import Bio.PDB as bio
import periodictable as pt


__all__ = ["Atom", "Residue", "Chain", "Model", "Structure", "Bond"]


class ID:
    """
    The base class for Biobuild's internal object identification.
    All classes that inheret from this class will be recorded as unique objects.
    """

    __global_idx__ = 0

    def __init__(self):
        self.__id = ID.__global_idx__ + 1
        ID.__global_idx__ += 1

    def copy(self):
        new = deepcopy(self)
        new._new_id()
        return new

    def get_id(self):
        return self.__id

    def has_id(self, id):
        return id in self.child_dict

    def _new_id(self):
        self.__id = ID.__global_idx__ + 1
        ID.__global_idx__ += 1

    def _adopt_id(self, id):
        self.__id = id

    def __hash__(self):
        return self.__id

    def __eq__(self, other):
        if not isinstance(other, ID):
            return False
        return self.__id == other.__id

    def __ne__(self, other):
        if not isinstance(other, ID):
            return True
        return self.__id != other.__id


[docs] class Atom(ID, bio.Atom.Atom): """ An Atom object that inherits from Biopython's Atom class. Parameters ---------- id : str The atom identifier coord : ndarray The atom coordinates serial_number : int, optional The atom serial number. The default is 1. bfactor : float, optional The atom bfactor. The default is 0.0. occupancy : float, optional The atom occupancy. The default is 1.0. fullname : str, optional The atom fullname. The default is None, in which case the id is used again. element : str, optional The atom element. The default is None, in which case it is inferred based on the id. altloc : str, optional The atom altloc. The default is " ". pqr_charge : float, optional The atom pqr_charge. The default is None. radius : float, optional The atom radius. The default is None. """ __slots__ = ( "id", "parent", "name", "fullname", "coord", "mass", "serial_number", "bfactor", "occupancy", "altloc", "element", "pqr_charge", "radius", "level", "disordered_flag", "anisou_array", "siguij_array", "sigatm_array", "xtra", "_sorting_keys", ) def __init__( self, id: str, coord: "ndarray", serial_number: int = 1, bfactor: float = 0.0, occupancy: float = 1.0, fullname: str = None, element: str = None, altloc=" ", pqr_charge=None, radius=None, ): if not fullname: fullname = id ID.__init__(self) if element: element = element.upper() bio.Atom.Atom.__init__( self, id, coord, bfactor, occupancy, altloc, fullname, serial_number, element, pqr_charge, radius, ) self.level = "A" @property def full_id(self): p = self.get_parent() if p: return (*p.get_full_id(), (self.name, self.altloc)) else: return (None, None, None, None, (self.id, self.altloc)) @full_id.setter def full_id(self, value): pass
[docs] @classmethod def from_biopython(cls, atom) -> "Atom": """ Convert a Biopython atom to an Atom object Parameters ---------- atom The Biopython atom Returns ------- Atom The Atom object """ return cls( atom.id, atom.coord, atom.serial_number, atom.bfactor, atom.occupancy, atom.fullname, atom.element, atom.altloc, atom.pqr_charge, atom.radius, )
[docs] def to_biopython(self): """ Convert the Atom object to a Biopython atom Returns ------- Atom The Biopython atom """ return bio.Atom.Atom( self.id, self.coord, self.bfactor, self.occupancy, self.altloc, self.fullname, self.serial_number, self.element.upper(), self.pqr_charge, self.radius, )
def __repr__(self): return f"Atom({self.id}, {self.serial_number})" def __lt__(self, other): return (self.serial_number < other.serial_number) or ( pt.elements.symbol(self.element.title()).number < pt.elements.symbol(other.element.title()).number ) def __gt__(self, other): return self.serial_number > other.serial_number or ( pt.elements.symbol(self.element.title()).number > pt.elements.symbol(other.element.title()).number ) def __le__(self, other): return self.serial_number <= other.serial_number or ( pt.elements.symbol(self.element.title()).number <= pt.elements.symbol(other.element.title()).number ) def __ge__(self, other): return self.serial_number >= other.serial_number or ( pt.elements.symbol(self.element.title()).number >= pt.elements.symbol(other.element.title()).number ) def __hash__(self): return ID.__hash__(self)
# def __eq__(self, other): # return self.serial_number == other.serial_number and ( # pt.elements.symbol(self.element.title()).number # == pt.elements.symbol(other.element.title()).number # ) # def __ne__(self, other): # return self.serial_number != other.serial_number or ( # pt.elements.symbol(self.element.title()).number # != pt.elements.symbol(other.element.title()).number # )
[docs] class Residue(ID, bio.Residue.Residue): """ A Residue object that inherits from Biopython's Residue class. Parameters ---------- resname : str The residue name segid : str The residue segid. icode : int The residue icode. This is the residue serial number. """ __slots__ = ( "level", "disordered", "resname", "segid", "internal_coord", "_id", "parent", "child_list", "child_dict", "xtra", "_coord", ) def __init__(self, resname, segid, icode): ID.__init__(self) bio.Residue.Residue.__init__( self, ("H_" + resname, icode, segid), resname, segid ) self.level = "R" self.serial_number = icode self._coord = None @property def id(self): return ("H_" + self.resname, self.serial_number, self.segid) @id.setter def id(self, value): pass @property def full_id(self): p = self.get_parent() if p: return (*p.get_full_id(), self.id) else: return ( None, None, None, self.id, ) @full_id.setter def full_id(self, value): pass @property def coord(self): if self._coord is None: return self.center_of_mass() else: return self._coord @coord.setter def coord(self, value): self._coord = value # def add(self, atom): # if atom.get_id() not in self.child_dict: # self.child_list.append(atom) # self.child_dict[atom.id] = atom # atom.set_parent(self) # def remove(self, atom): # if atom.get_id() in self.child_dict: # self.child_list.remove(atom) # del self.child_dict[atom.id] # atom.set_parent(None)
[docs] @classmethod def from_biopython(cls, residue) -> "Residue": """ Convert a BioPython Residue object to a Residue object. Parameters ---------- residue : BioPython Residue object The residue to convert. Returns ------- Residue The converted residue """ new = cls(residue.id[0], residue.id[1], residue.id[-1]) for atom in residue.get_atoms(): new.add(Atom.from_biopython(atom)) return new
[docs] def to_biopython(self) -> bio.Residue.Residue: """ Convert a Residue object to a pure BioPython Residue object. Returns ------- bio.Residue.Residue The converted residue. """ new = bio.Residue.Residue(self.id, self.resname, self.segid) for atom in self.get_atoms(): new.add(atom.to_biopython()) return new
[docs] def add(self, atom): if not isinstance(atom, Atom): atom = Atom.from_biopython(atom) bio.Residue.Residue.add(self, atom)
def __repr__(self): return f"Residue({self.resname}, {self.serial_number})" def __lt__(self, other): return self.serial_number < other.serial_number def __gt__(self, other): return self.serial_number > other.serial_number def __le__(self, other): return self.serial_number <= other.serial_number def __ge__(self, other): return self.serial_number >= other.serial_number
# def __eq__(self, other): # return ( # self.serial_number == other.serial_number # and self.resname == other.resname # and self.parent == other.parent # ) # def __ne__(self, other): # return ( # self.serial_number != other.serial_number # or self.resname != other.resname # or self.parent != other.parent # )
[docs] class Chain(ID, bio.Chain.Chain): """ A Chain object that inherits from Biopython's Chain class. Parameters ---------- id : str The chain identifier """ __slots__ = ( "level", "internal_coord", "_id", "parent", "child_list", "child_dict", "xtra", ) def __init__(self, id): ID.__init__(self) super(bio.Chain.Chain, self).__init__(id) self.level = "C" @property def full_id(self): p = self.get_parent() if p: return (*p.get_full_id(), self.id) else: return (None, None, self.id) @full_id.setter def full_id(self, value): pass
[docs] def add(self, residue): if not isinstance(residue, Residue): residue = Residue.from_biopython(residue) bio.Chain.Chain.add(self, residue)
[docs] @classmethod def from_biopython(cls, chain) -> "Chain": """ Convert a BioPython Chain object to a Chain object. Parameters ---------- chain : BioPython Chain object The chain to convert. Returns ------- Chain The converted chain. """ new = cls(chain.id) for residue in chain.get_residues(): new.add(Residue.from_biopython(residue)) return new
[docs] def to_biopython(self) -> bio.Chain.Chain: """ Convert a Chain object to a pure BioPython Chain object. Parameters ---------- with_children : bool, optional Whether to convert the residues of the chain as well. The default is True. Returns ------- bio.Chain.Chain The converted chain. """ new = bio.Chain.Chain(self.id) for residue in self.get_residues(): new.add(residue.to_biopython()) return new
def __repr__(self): return f"Chain({self._id})" def __lt__(self, other): return ord(self.id) < ord(other.id) def __gt__(self, other): return ord(self.id) > ord(other.id) def __le__(self, other): return ord(self.id) <= ord(other.id) def __ge__(self, other): return ord(self.id) >= ord(other.id)
# def __eq__(self, other): # return ord(self.id) == ord(other.id) # def __ne__(self, other): # return ord(self.id) != ord(other.id)
[docs] class Model(bio.Model.Model, ID): """ A Model object that inherits from Biopython's Model class. Parameters ---------- id : int or str The model identifier """ __slots__ = ( "level", # "serial_num", "_id", "parent", "child_list", "child_dict", "xtra", ) def __init__(self, id): ID.__init__(self) super(bio.Model.Model, self).__init__(id) self.level = "M" @property def serial_number(self): if isinstance(self.id, int): return self.id else: return ord(self.id) @serial_number.setter def serial_number(self, value): pass @property def serial_num(self): return self.serial_number @serial_num.setter def serial_num(self, value): pass @property def full_id(self): p = self.get_parent() if p: return (p.get_full_id(), self.id) else: return (None, self.id) @full_id.setter def full_id(self, value): pass
[docs] def add(self, chain): if not isinstance(chain, Chain): chain = Chain.from_biopython(chain) bio.Model.Model.add(self, chain)
[docs] @classmethod def from_biopython(cls, model): """ Convert a BioPython Model object to a Model object. Parameters ---------- model : BioPython Model object The model to convert. Returns ------- Model The converted model. """ new = cls(model.id) for chain in model.get_chains(): new.add(Chain.from_biopython(chain, with_children=True)) return new
[docs] def to_biopython(self): """ Convert a Model object to a pure BioPython Model object. Returns ------- bio.Model.Model The converted model. """ new = bio.Model.Model(self.id) for chain in self.get_chains(): new.add(chain.to_biopython(with_children=True)) return new
def __repr__(self): return f"Model({self._id})" def __lt__(self, other): return self.id < other.id def __gt__(self, other): return self.id > other.id def __le__(self, other): return self.id <= other.id def __ge__(self, other): return self.id >= other.id
[docs] class Structure(ID, bio.Structure.Structure): """ A Structure object that inherits from Biopython's Structure class. Parameters ---------- id : str The structure identifier """ __slots__ = ( "level", "_id", "parent", "child_list", "child_dict", "xtra", ) def __init__(self, id): ID.__init__(self) super(bio.Structure.Structure, self).__init__(id) self.level = "S" @property def full_id(self): return self.id @full_id.setter def full_id(self, value): pass
[docs] @classmethod def from_biopython(cls, structure: "bio.Structure.Structure") -> "Structure": """ Convert a BioPython Structure object to a Structure object. Parameters ---------- structure : BioPython Structure object The structure to convert. Returns ------- Structure The converted structure. """ s = cls(structure.id) rdx = 1 adx = 1 for model in structure.get_models(): m = Model(model.id) for chain in model.get_chains(): c = Chain(chain.id) for residue in chain.get_residues(): # ------------------------ NOTE ------------------------- # This is a little weird bugfix where I found # that sometimes the segid was " " instead of " ". # This is a little hacky, but it works. # It could be that there is a problem with the pdb module # but that one has already seen enough modification so # I don't want to tinker with it again... # ------------------------------------------------------- segid = residue.segid if len(segid) > 1: segid = segid[0] # ------------------------------------------------------- r = Residue(residue.resname, segid, rdx) rdx += 1 for atom in residue.get_atoms(): a = Atom( atom.id, atom.coord, atom.serial_number, atom.bfactor, atom.occupancy, atom.fullname, atom.element, atom.altloc, atom.pqr_charge, atom.radius, ) r.add(a) c.add(r) m.add(c) s.add(m) return s
[docs] def to_biopython(self) -> "bio.Structure.Structure": """ Convert a Structure object to a pure BioPython Structure object. Returns ------- bio.Structure.Structure The converted structure. """ s = bio.Structure.Structure(self.id) for model in self.get_models(): m = bio.Model.Model(model.id) for chain in model.get_chains(): c = bio.Chain.Chain(chain.id) for residue in chain.get_residues(): r = bio.Residue.Residue( (residue.resname, residue.serial_number, residue.segid), residue.resname, residue.segid, ) for atom in residue.get_atoms(): a = bio.Atom.Atom( atom.id, atom.coord, atom.bfactor, atom.occupancy, atom.altloc, atom.fullname, atom.serial_number, atom.element, atom.pqr_charge, atom.radius, ) r.add(a) c.add(r) m.add(c) s.add(m) return s
def __repr__(self): return f"Structure({self._id})" def __lt__(self, other): return self.id < other.id def __gt__(self, other): return self.id > other.id def __le__(self, other): return self.id <= other.id def __ge__(self, other): return self.id >= other.id
[docs] class Bond: """ A class representing a bond between two atoms. Attributes ---------- atom1 : Atom The first atom in the bond. atom2 : Atom The second atom in the bond. """ __linkers = {0: "<none>", 1: "--", 2: "==", 3: "#"} __slots__ = ("atom1", "atom2", "order") def __init__(self, *atoms) -> None: if len(atoms) == 1: self = Bond(*atoms[0]) elif len(atoms) == 2: self.atom1 = atoms[0] self.atom2 = atoms[1] self.order = 1 elif len(atoms) == 3: self.atom1 = atoms[0] self.atom2 = atoms[1] self.order = atoms[2] else: raise ValueError("Bond must be initialized with one tuple or two atoms")
[docs] def invert(self): """ Invert the bond, i.e. swap the two atoms. """ self.atom1, self.atom2 = self.atom2, self.atom1
[docs] def single(self): """ Make the bond a single bond. """ self.order = 1
[docs] def double(self): """ Make the bond a double bond. """ self.order = 2
[docs] def triple(self): """ Make the bond a triple bond. """ self.order = 3
[docs] def is_single(self) -> bool: """ Check if the bond is a single bond. Returns ------- bool True if the bond is a single bond, False otherwise. """ return self.order == 1
[docs] def is_double(self) -> bool: """ Check if the bond is a double bond. Returns ------- bool True if the bond is a double bond, False otherwise. """ return self.order == 2
[docs] def is_triple(self) -> bool: """ Check if the bond is a triple bond. Returns ------- bool True if the bond is a triple bond, False otherwise. """ return self.order == 3
[docs] def compute_length(self) -> float: """ Compute the bond length. Returns ------- float The bond length. """ return self.atom1 - self.atom2
[docs] def to_tuple(self) -> tuple: """ Convert the bond to a tuple. Returns ------- tuple The bond as a tuple. """ return (self.atom1, self.atom2, self.order)
def __iter__(self): yield self.atom1 yield self.atom2 def __getitem__(self, idx): if idx == 0: return self.atom1 elif idx == 1: return self.atom2 else: raise IndexError("Bond only has two atoms") def __repr__(self) -> str: return f"Bond({self.atom1}, {self.atom2})" def __str__(self) -> str: return f"({self.atom1} {self.__linkers.get(self.order, '?') } {self.atom2})" def __eq__(self, other): a = self.atom1 == other[0] and self.atom2 == other[1] b = self.atom1 == other[1] and self.atom2 == other[0] return a or b def __ne__(self, other): return not self.__eq__(other) def __hash__(self): return hash(self.atom1) + hash(self.atom2) def __len__(self): return 2 def __contains__(self, item): return item == self.atom1 or item == self.atom2
if __name__ == "__main__": s = bio.PDBParser().get_structure( "test", "/Users/noahhk/GIT/biobuild/support/examples/GLC.pdb" ) _s = Structure.from_biopython(s) __s = _s.to_biopython() print(_s)