"""
The base_classes are deriviatives of the original Biopython classes, but with
the change that they use a UUID4 as their identifier (full_id) instead of a hierarchical
tuple. This makes each object unique and allows for easy comparison where `a == b` is akin to `a is b`.
Consequently, the `__hash__` method is overwritten to use the UUID4 as the hash.
.. warning::
Each class has its own `copy` method that returns a deep copy of the object with a new UUID4. So `a.copy() == a` is `False`, while a standard `deepcopy(a) == a` is `True` since the UUID4 will not have been updated automatically.
Converting to and from `biopython`
----------------------------------
Each BuildAMol class can be generated from a biopython class using the `from_biopython` class method. And each BuildAMol class has a `to_biopython` method that returns the pure-biopython equivalent.
It is important to note, that for most purposes, however, the BuildAMol classes should work fine as trop-in replacements for the original biopython classes.
.. code-block:: python
import Bio.PDB as bio
from buildamol.base_classes import Atom
bio_atom = bio.Atom("CA", (0, 0, 0))
atom = Atom.from_biopython(bio_atom)
assert atom == bio_atom # False since atom uses a UUID4 as its identifier
assert atom.to_biopython() == bio_atom # True
The conversion from and to biopython works hierarchically, so if an entire biopython structure is converted to BuildAMol
then all atoms, residues, chains and models will be converted to their BuildAMol equivalents.
.. code-block:: python
import Bio.PDB as bio
from buildamol.base_classes import Structure
bio_structure = bio.PDBParser().get_structure("test", "test.pdb")
structure = Structure.from_biopython(bio_structure)
atoms = list(structure.get_atoms())
bio_atoms = list(bio_structure.get_atoms())
assert len(atoms) == len(bio_atoms) # True
"""
from copy import deepcopy
from typing import Union, List
# from uuid import uuid4
import Bio.PDB as bio
import numpy as np
import periodictable as pt
__all__ = ["Atom", "Residue", "Chain", "Model", "Structure", "Bond"]
__global_counters__ = {
"A": 1,
"R": 1,
"C": 1,
"M": 1,
"S": 1,
}
__global_element_counters__ = {}
class ID:
"""
The base class for BuildAMol's internal object identification.
All classes that inheret from this class will be recorded as unique objects.
"""
__global_idx__ = 0
def __init__(self):
self.__id = ID.__global_idx__ + 1
ID.__global_idx__ += 1
def copy(self):
new = deepcopy(self)
new._new_id()
return new
def get_id(self):
return self.__id
def has_id(self, id):
return id in self.child_dict
def _new_id(self):
self.__id = ID.__global_idx__ + 1
ID.__global_idx__ += 1
def _adopt_id(self, id):
self.__id = id
def __hash__(self):
return self.__id
def __eq__(self, other):
if not isinstance(other, ID):
return False
return self.__id == other.__id
def __ne__(self, other):
if not isinstance(other, ID):
return True
return self.__id != other.__id
[docs]
class Atom(ID, bio.Atom.Atom):
"""
An Atom object that inherits from Biopython's Atom class.
Parameters
----------
id : str
The atom identifier
coord : ndarray
The atom coordinates
serial_number : int, optional
The atom serial number. The default is 1.
bfactor : float, optional
The atom bfactor. The default is 0.0.
occupancy : float, optional
The atom occupancy. The default is 1.0.
fullname : str, optional
The atom fullname. The default is None, in which case the id is used again.
element : str, optional
The atom element. The default is None, in which case it is inferred based on the id.
altloc : str, optional
The atom altloc. The default is " ".
pqr_charge : float, optional
The atom pqr_charge. The default is None.
radius : float, optional
The atom radius. The default is None.
"""
__slots__ = (
"id",
"parent",
# "name",
"fullname",
"coord",
"mass",
"serial_number",
"bfactor",
"occupancy",
"altloc",
"element",
"pqr_charge",
"radius",
"level",
"disordered_flag",
"anisou_array",
"siguij_array",
"sigatm_array",
"xtra",
"_sorting_keys",
)
def __init__(
self,
id: str,
coord: "np.ndarray",
serial_number: int = 1,
bfactor: float = 0.0,
occupancy: float = 1.0,
fullname: str = None,
element: str = None,
altloc=" ",
pqr_charge=None,
radius=None,
):
if not fullname:
fullname = id
ID.__init__(self)
if element:
element = element.upper()
bio.Atom.Atom.__init__(
self,
id,
np.asarray(coord, dtype=np.float64),
bfactor,
occupancy,
altloc,
fullname,
serial_number,
element,
pqr_charge,
radius,
)
self.level = "A"
[docs]
@classmethod
def new(
cls,
element_or_id: str,
coord: "np.ndarray" = None,
generate_id: bool = True,
**kwargs,
) -> "Atom":
"""
Create a blank atom with a given element and coordinates.
Parameters
----------
element_or_id : str
The atom element. If the element is not found in the periodic table, it will be used as the atom id.
To ensure the correct element is assigned to the atom either pass it as it as keyword argument directly or
to let BuildAMol correctly infer the element use one of the following patterns with your atom id:
- <element+><number> -> C1, O2, CA2 (calcium id=CA2)
- <number><element+> -> 1C, 2O, 1CA (calcium id=1CA)
- <space><element><string|number+> -> " CA" (Carbon id=CA), " ND2 " (Nitrongen id=ND2), " OXT" (Oxygen id=OXT)
- <element+><space+> -> "CA " (Calcium id=CA), "FE " (Iron id=FE)
- <element+>_<string|number+> -> "CA_" (Calcium id=CA), "C_A" (Carbon id=CA)
(the + indicates multiple characters)
coord : ndarray, optional
The atom coordinates. The default is None.
generate_id : bool, optional
Whether to automatically generate a new id for the atom to avoid identically named atoms. The default is True.
**kwargs
Additional keyword arguments to pass to the Atom initializer
Returns
-------
Atom
The blank atom.
"""
if not generate_id:
_id = element_or_id
if coord is None:
coord = np.zeros(3)
if pt.elements.__dict__.get(element_or_id) or pt.elements.__dict__.get(
element_or_id.title()
):
element = element_or_id.upper()
__global_element_counters__[element] = (
__global_element_counters__.get(element, 0) + 1
)
id = f"{element}{__global_element_counters__[element]}"
else:
id, element = Atom._infer_element_from_id(element_or_id)
element = kwargs.pop("element", element)
if not generate_id:
id = _id
return cls(id=id, coord=coord, element=element, **kwargs)
[docs]
@classmethod
def from_element(self, element: str, **kwargs):
"""
Create a blank atom with a given element and coordinates.
Parameters
----------
element : str
The atom element.
**kwargs
Additional keyword arguments to pass to the new Atom initializer.
"""
return Atom.new(element, **kwargs)
@property
def name(self):
"""
Synonym for id.
"""
return self.id
@name.setter
def name(self, value):
self.id = value
@property
def weight(self):
"""
The atom mass (synonym for mass).
"""
return self.mass
@property
def charge(self):
"""
The atom charge.
"""
return self.pqr_charge
@charge.setter
def charge(self, value):
self.pqr_charge = value
@property
def full_id(self):
p = self.get_parent()
if p:
return (*p.get_full_id(), (self.id, self.altloc))
else:
return (None, None, None, None, (self.id, self.altloc))
@full_id.setter
def full_id(self, value):
pass
[docs]
def matches(
self, other, include_id: bool = True, include_coord: bool = False
) -> bool:
"""
Check if the atom matches another atom.
This will return True if the two atoms have the same element, id, parent-residue name, and altloc.
"""
verdict = (
self.element == other.element
and self.parent.resname == other.parent.resname
and self.altloc == other.altloc
)
if include_id:
verdict = verdict and self.id == other.id
if include_coord:
verdict = verdict and np.allclose(self.coord, other.coord)
return verdict
[docs]
def equals(self, other, include_coord: bool = False) -> bool:
"""
Check if the atom is equal to another atom.
This will return True if the two atoms match and have same the parent-serial number.
"""
return self.matches(other, include_id=True, include_coord=include_coord) and (
self.parent.id[1] == other.parent.id[1]
)
[docs]
@classmethod
def from_biopython(cls, atom) -> "Atom":
"""
Convert a Biopython atom to an Atom object
Parameters
----------
atom
The Biopython atom
Returns
-------
Atom
The Atom object
"""
return cls(
atom.id,
atom.coord,
atom.serial_number,
atom.bfactor,
atom.occupancy,
atom.fullname,
atom.element,
atom.altloc,
atom.pqr_charge,
atom.radius,
)
[docs]
def to_biopython(self):
"""
Convert the Atom object to a Biopython atom
Returns
-------
Atom
The Biopython atom
"""
return bio.Atom.Atom(
self.id,
self.coord,
self.bfactor,
self.occupancy,
self.altloc,
self.fullname,
self.serial_number,
self.element.upper(),
self.pqr_charge,
self.radius,
)
@property
def atomic_number(self):
"""
The atomic number of the atom's element.
"""
return pt.elements.symbol(self.element).number
[docs]
def set_id(self, id):
"""
Set the atom identifier.
Parameters
----------
id : str
The identifier to set.
"""
self.id = id
return self
[docs]
def set_element(self, element, adjust_id: bool = True):
"""
Set the atom element.
Parameters
----------
element : str
The element to set.
adjust_id : bool, optional
Whether to adjust the atom id to the new element. The default is True.
"""
current = self.element
self.element = element.upper()
self.mass = pt.elements.symbol(element).mass
if adjust_id:
count = __global_element_counters__.get(self.element, None)
if not count:
__global_element_counters__[self.element] = 1
count = 0
count += 1
self.id = self.element + str(count)
else:
self.id = self.id.replace(current, self.element)
return self
[docs]
def move(self, vector):
"""
Move the atom by a vector.
Parameters
----------
vector : ndarray
The vector to move the atom by.
"""
self.coord += vector
return self
[docs]
def get_neighbors(
self, n: int = 1, mode: str = "upto", filter: callable = None
) -> set:
"""
Get the neighboring atoms of this atom within n bonds.
Parameters
----------
n : int
The number of bonds to search for neighbors.
mode : str, optional
The mode to use for searching for neighbors. The default is "upto", which will return all neighbors within n bonds.
Other options are "at" which will return only neighbors that are exactly n bonds away.
filter : callable, optional
A function that takes an atom as input and returns True if the atom should be included in the output. The default is None.
Returns
-------
set
A set of neighboring atoms.
"""
if mode not in ("upto", "at"):
raise ValueError("mode must be either 'upto' or 'at'")
molecule = self.molecule
if molecule is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
out = molecule._AtomGraph.get_neighbors(self, n, mode)
if filter:
return {a for a in out if filter(a)}
return out
[docs]
def get_hydrogens(self) -> set:
"""
Get all hydrogen neighbors of an atom.
Parameters
----------
atom
The atom
Returns
-------
set
A set of hydrogen neighbors, if they exist, an empty set otherwise
"""
return self.get_neighbors(n=1, filter=lambda a: a.element == "H")
[docs]
def get_equatorial_neighbor(self):
"""
Get the equatorial neighbor of an atom, if the atom is in a ring structure.
Parameters
----------
atom
The atom
Returns
-------
Atom
The equatorial neighbor, if it exists, None otherwise
"""
molecule = self.molecule
if molecule is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
return molecule.get_equatorial_neighbor(self)
[docs]
def get_axial_neighbor(self):
"""
Get the axial neighbor of an atom, if the atom is in a ring structure.
Parameters
----------
atom
The atom
Returns
-------
Atom
The axial neighbor, if it exists, None otherwise
"""
molecule = self.molecule
if molecule is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
return molecule.get_axial_neighbor(self)
[docs]
def get_left_hydrogen(self):
"""
Get the "left-protruding" hydrogen neighbor of an atom with two hydrogens and two non-hydrogen neighbors.
Parameters
----------
atom
The atom
Returns
-------
Atom
The left hydrogen, if it exists, None otherwise
Example
-------
In a molecule:
```
H_B
|
CH3 -- C -- CH2 -- OH
|
H_A
```
We want to get the left and right hydrogens of the central C atom (labeled only C).
Using part of the logic behind R/S nomenclature for chiral centers, we prioritize the non-H neighbors
and then rotate the molecule such that the highest order non-H neighbor points toward the user and the other
non-H neighbor points away. The left and right hydrogens are then determined based on their orientation in this view.
In this case, the left hydrogen is H_A and the right hydrogen is H_B.
"""
molecule = self.molecule
if molecule is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
return molecule.get_left_hydrogen(self)
[docs]
def get_right_hydrogen(self):
"""
Get the "right-protruding" hydrogen neighbor of an atom with two hydrogens and two non-hydrogen neighbors.
Parameters
----------
atom
The atom
Returns
-------
Atom
The right hydrogen, if it exists, None otherwise
Example
-------
In a molecule:
```
H_B
|
CH3 -- C -- CH2 -- OH
|
H_A
```
We want to get the left and right hydrogens of the central C atom (labeled only C).
Using part of the logic behind R/S nomenclature for chiral centers, we prioritize the non-H neighbors
and then rotate the molecule such that the highest order non-H neighbor points toward the user and the other
non-H neighbor points away. The left and right hydrogens are then determined based on their orientation in this view.
In this case, the left hydrogen is H_A and the right hydrogen is H_B.
"""
molecule = self.molecule
if molecule is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
return molecule.get_right_hydrogen(self)
[docs]
def get_axial_hydrogen(self):
"""
Get the axial hydrogen neighbor of an atom, if the atom is in a ring structure.
Parameters
----------
atom
The atom
Returns
-------
Atom
The axial hydrogen, if it exists, None otherwise
"""
molecule = self.molecule
if molecule is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
return molecule.get_axial_hydrogen(self)
[docs]
def get_equatorial_hydrogen(self):
"""
Get the equatorial hydrogen neighbor of an atom, if the atom is in a ring structure.
Parameters
----------
atom
The atom
Returns
-------
Atom
The equatorial hydrogen, if it exists, None otherwise
"""
molecule = self.molecule
if molecule is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
return molecule.get_equatorial_hydrogen(self)
[docs]
def get_bonds(self) -> list:
"""
Get a list of all bonds that this atom is part of.
"""
molecule = self.molecule
if molecule is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
return molecule._get_bonds(self, None, either_way=True)
@staticmethod
def _infer_element_from_id(id: str) -> str:
"""
A method to infer the element from the atom id.
Rules that are considered (+ indicates multiple characters):
- <element+><number>, e.g. C1, O2, CA2 (calcium id=CA2)
- <number><element+>, e.g. 1C, 2O, 1CA (calcium id=1CA)
- <space><element><string|number+> -> " CA" (Carbon id=CA), " ND2 " (Nitrongen id=ND2), " OXT" (Oxygen id=OXT)
- <element+><space+> -> "CA " (Calcium), "FE " (Iron)
- <element+>_<string|number+> -> "CA_" (Calcium id=CA), "C_A" (Carbon id=CA)
"""
id = id.upper()
element = None
# <number><element+>, e.g. 1C, 2O, 3N, 1H
# or
# <element+><number>, e.g. C1, O2, N3, H1
if id[0].isdigit() or (id[0] != " " and id[-1].isdigit()):
element = id.translate(str.maketrans("", "", "0123456789"))
# <space><element><string+> -> " CA" (Carbon id=CA), " ND2" (Nitrongen id=ND2), " OXT" (Oxygen id=OXT)
elif id[0] == " ":
id = id.strip()
element = id[0]
# <element+>_<string+> -> "CA_" (Calcium id=CA), "C_A" (Carbon id=CA)
elif "_" in id:
element = id.split("_")[0]
id = id.replace("_", "")
# <element+><space+> -> "CA " (Calcium), "FE " (Iron)
elif id[-1] == " ":
element = id = id.strip()
return id, element
@property
def molecule(self):
p = self.parent
while p is not None and getattr(p, "parent", None):
p = p.parent
return getattr(p, "_molecule", None)
def __repr__(self):
return f"Atom({self.id}, {self.serial_number})"
def __lt__(self, other):
return (self.serial_number < other.serial_number) or (
pt.elements.symbol(self.element.title()).number
< pt.elements.symbol(other.element.title()).number
)
def __gt__(self, other):
return self.serial_number > other.serial_number or (
pt.elements.symbol(self.element.title()).number
> pt.elements.symbol(other.element.title()).number
)
def __le__(self, other):
return self.serial_number <= other.serial_number or (
pt.elements.symbol(self.element.title()).number
<= pt.elements.symbol(other.element.title()).number
)
def __ge__(self, other):
return self.serial_number >= other.serial_number or (
pt.elements.symbol(self.element.title()).number
>= pt.elements.symbol(other.element.title()).number
)
def __hash__(self):
return ID.__hash__(self)
# def __eq__(self, other):
# return self.serial_number == other.serial_number and (
# pt.elements.symbol(self.element.title()).number
# == pt.elements.symbol(other.element.title()).number
# )
# def __ne__(self, other):
# return self.serial_number != other.serial_number or (
# pt.elements.symbol(self.element.title()).number
# != pt.elements.symbol(other.element.title()).number
# )
[docs]
class Residue(ID, bio.Residue.Residue):
"""
A Residue object that inherits from Biopython's Residue class.
Parameters
----------
resname : str
The residue name
segid : str
The residue segid.
icode : int
The residue icode.
This is the residue serial number.
"""
__slots__ = (
"level",
"disordered",
"resname",
"segid",
"internal_coord",
"_id",
"parent",
"child_list",
"child_dict",
"xtra",
"_coord",
)
def __init__(self, resname, segid=" ", icode=1):
ID.__init__(self)
bio.Residue.Residue.__init__(
self, ("H_" + resname, icode, segid), resname, segid
)
self.level = "R"
self.serial_number = icode
self._coord = None
[docs]
@classmethod
def new(cls, resname: str, segid: str = " ", icode: int = None) -> "Residue":
"""
Create a blank residue with a given name and segid.
Parameters
----------
resname : str
The residue name.
segid : str, optional
The residue segid. The default is " ".
icode : int, optional
The residue icode. The default is None.
Returns
-------
Residue
The blank residue.
"""
if icode is None:
icode = __global_counters__["R"]
__global_counters__["R"] += 1
return cls(resname, segid, icode)
@property
def id(self):
return ("H_" + self.resname, self.serial_number, self.segid)
@id.setter
def id(self, value):
pass
@property
def name(self):
"""Synonym for resname."""
return self.resname
@name.setter
def name(self, value):
self.resname = value
@property
def full_id(self):
p = self.get_parent()
if p:
return (*p.get_full_id(), self.id)
else:
return (
None,
None,
None,
self.id,
)
@full_id.setter
def full_id(self, value):
pass
@property
def coord(self):
if self._coord is None:
return self.center_of_mass()
else:
return self._coord
@coord.setter
def coord(self, value):
self._coord = value
[docs]
def get_coord(self) -> "np.ndarray":
"""
Get the center of mass of the residue.
"""
return self.coord
[docs]
def set_coord(self, value):
"""
Set the center of mass of the residue.
"""
self.coord = value
[docs]
def get_coords(self) -> "np.ndarray":
"""
Get the coordinates of all atoms in the residue.
"""
return np.array([atom.coord for atom in self.get_atoms()])
@property
def atoms(self):
return sorted(self.get_atoms(), key=lambda x: x.serial_number)
[docs]
def count_atoms(self) -> int:
"""
Count the number of atoms in the residue.
"""
return len(self.child_list)
[docs]
def get_atom(self, atom: Union[str, int]) -> Atom:
"""
Get an atom by its name or serial number.
Parameters
----------
atom : str or int
The atom name or serial number.
Returns
-------
Atom
The atom.
"""
if isinstance(atom, int):
return next((i for i in self.child_list if i.serial_number == atom), None)
elif isinstance(atom, str):
return next((i for i in self.child_list if i.id == atom), None)
else:
raise TypeError(f"atom must be either a string or an integer, not {atom=}")
[docs]
def get_atoms(self, *atoms: Union[str, int]) -> "List[Atom]":
"""
Get all atoms in the residue.
Parameters
----------
atoms : str or int, optional
The atom name or serial number to filter by.
Returns
-------
List[Atom]
The list of atoms. If no atoms argument is specified the default generator is returned.
"""
if len(atoms) == 0:
return super().get_atoms()
elif len(atoms) == 1 and isinstance(atoms[0], (tuple, list, set)):
atoms = atoms[0]
if isinstance(atoms[0], str):
return [i for i in self.child_list if i.id in atoms]
# makes no sense but for consistency
elif isinstance(atoms[0], int):
return [i for i in self.child_list if i.serial_number in atoms]
else:
raise TypeError(
f"atoms must be either a list, tuple, or set of string or integer, not {atoms=}"
)
[docs]
def get_bonds(self, residue_internal: bool = True) -> list:
"""
Get a list of all bonds with participating atoms that belong to this residue.
Parameters
----------
residue_internal : bool, optional
Whether to only return bonds that are internal to this residue. Or also include bonds to atoms outside of this residue. The default is True.
"""
molecule = self.molecule
if molecule is None:
raise ValueError(
"Only residues that are part of a Molecule have information about connectivity."
)
return molecule.get_bonds(self, residue_internal=residue_internal)
[docs]
def get_neighbors(
self, n: int = 1, mode: str = "upto", filter: callable = None
) -> set:
"""
Get the neighboring residues of this residue as they appear in the topology ResidueGraph.
Parameters
----------
n : int
The number of bonds to search for neighbors.
mode : str, optional
The mode to use for searching for neighbors. The default is "upto", which will return all neighbors within n bonds.
Other options are "at" which will return only neighbors that are exactly n bonds away.
filter : callable, optional
A function that takes a residue as input and returns True if the residue should be included in the output. The default is None.
"""
if mode not in ("upto", "at"):
raise ValueError("mode must be either 'upto' or 'at'")
molecule = self.molecule
if molecule is None:
raise ValueError(
"Only residues that are part of a Molecule have information about connectivity."
)
out = molecule.get_residue_graph(detailed=False, locked=False).get_neighbors(
self, n, mode
)
if filter:
return {r for r in out if filter(r)}
return out
[docs]
def matches(self, other) -> bool:
"""
Check if the residue matches another residue.
This will return True if the two residues have the same resname, segid, and parent-chain id.
"""
return (
self.resname == other.resname
and self.segid == other.segid
and self.parent.id == other.parent.id
)
[docs]
def equals(self, other, include_serial: bool = False) -> bool:
"""
Check if the residue is equal to another residue.
This will check if the two residues are in the same parent and if all atoms are matching.
"""
verdict = self.matches(other) and all(
i.matches(j) for i, j in zip(self.get_atoms(), other.get_atoms())
)
if include_serial:
verdict = verdict and self.serial_number == other.serial_number
return verdict
[docs]
def link(self, atom):
"""
Softlink an atom into this residue's child_list without touching the atom's own parent references.
"""
self.child_dict[atom.get_id()] = atom
self.child_list.append(atom)
[docs]
def unlink(self, atom):
"""
Unlink an atom from this residue's child_list without touching the atom's own parent references.
"""
del self.child_dict[atom.get_id()]
self.child_list.remove(atom)
# def add(self, atom):
# if atom.get_id() not in self.child_dict:
# self.child_list.append(atom)
# self.child_dict[atom.id] = atom
# atom.set_parent(self)
# def remove(self, atom):
# if atom.get_id() in self.child_dict:
# self.child_list.remove(atom)
# del self.child_dict[atom.id]
# atom.set_parent(None)
[docs]
@classmethod
def from_biopython(cls, residue) -> "Residue":
"""
Convert a BioPython Residue object to a Residue object.
Parameters
----------
residue : BioPython Residue object
The residue to convert.
Returns
-------
Residue
The converted residue
"""
new = cls(residue.id[0], residue.id[1], residue.id[-1])
for atom in residue.get_atoms():
new.add(Atom.from_biopython(atom))
return new
[docs]
def to_biopython(self) -> bio.Residue.Residue:
"""
Convert a Residue object to a pure BioPython Residue object.
Returns
-------
bio.Residue.Residue
The converted residue.
"""
new = bio.Residue.Residue(self.id, self.resname, self.segid)
for atom in self.get_atoms():
new.add(atom.to_biopython())
return new
[docs]
def add(self, atom):
if not isinstance(atom, Atom):
atom = Atom.from_biopython(atom)
bio.Residue.Residue.add(self, atom)
[docs]
def move(self, vector):
"""
Move the residue by a vector.
Parameters
----------
vector : ndarray
The vector to move the residue by.
"""
for atom in self.get_atoms():
atom.move(vector)
[docs]
def copy(self) -> "Residue":
"""
Return a deep copy of the residue with a new UUID4.
Returns
-------
Residue
The copied residue.
"""
new = ID.copy(self)
for atom in new.get_atoms():
ID._new_id(atom)
return new
@property
def molecule(self):
p = self.parent
while p is not None and getattr(p, "parent", None):
p = p.parent
return getattr(p, "_molecule", None)
def __hash__(self):
return ID.__hash__(self)
def __eq__(self, other):
if not isinstance(other, Residue):
return False
return ID.__eq__(self, other)
def __repr__(self):
return f"Residue({self.resname}, {self.serial_number})"
def __lt__(self, other):
return self.serial_number < other.serial_number
def __gt__(self, other):
return self.serial_number > other.serial_number
def __le__(self, other):
return self.serial_number <= other.serial_number
def __ge__(self, other):
return self.serial_number >= other.serial_number
# def __eq__(self, other):
# return (
# self.serial_number == other.serial_number
# and self.resname == other.resname
# and self.parent == other.parent
# )
# def __ne__(self, other):
# return (
# self.serial_number != other.serial_number
# or self.resname != other.resname
# or self.parent != other.parent
# )
[docs]
class Chain(ID, bio.Chain.Chain):
"""
A Chain object that inherits from Biopython's Chain class.
Parameters
----------
id : str
The chain identifier
"""
__slots__ = (
"level",
"internal_coord",
"_id",
"parent",
"child_list",
"child_dict",
"xtra",
)
def __init__(self, id):
ID.__init__(self)
super(bio.Chain.Chain, self).__init__(id)
self.level = "C"
[docs]
@classmethod
def new(cls, id: str) -> "Chain":
"""
Create a blank chain with a given id.
Parameters
----------
id : str, optional
The chain identifier. The default is None.
Returns
-------
Chain
The blank chain.
"""
return cls(id)
@property
def name(self):
"""Synonym for id."""
return self.id
@name.setter
def name(self, value):
self._id = value
@property
def full_id(self):
p = self.get_parent()
if p:
return (*p.get_full_id(), self.id)
else:
return (None, None, self.id)
@full_id.setter
def full_id(self, value):
pass
[docs]
def add(self, residue):
if not isinstance(residue, Residue):
residue = Residue.from_biopython(residue)
bio.Chain.Chain.add(self, residue)
@property
def residues(self):
return sorted(self.get_residues(), key=lambda x: x.serial_number)
@property
def atoms(self):
return sorted(self.get_atoms(), key=lambda x: x.serial_number)
[docs]
def count_residues(self) -> int:
"""
Count the number of residues in the chain.
"""
return len(self.child_list)
[docs]
def get_residue(self, residue: Union[str, int]) -> Residue:
"""
Get a residue by its name or serial number.
Note
----
If there are multiple residues with the same name, the first one will be returned.
Parameters
----------
residue : str or int
The residue name or serial number.
Returns
-------
Residue
The residue.
"""
if isinstance(residue, int):
return next(
(i for i in self.child_list if i.serial_number == residue), None
)
elif isinstance(residue, str):
return next((i for i in self.child_list if i.resname == residue), None)
else:
raise TypeError(
f"residue must be either a string or an integer, not {residue=}"
)
[docs]
def get_residues(self, *residues: Union[str, int]) -> "List[Residue]":
"""
Get all residues in the chain.
Parameters
----------
residues : str or int, optional
The residue name or serial number to filter by.
Returns
-------
List[Residue]
The list of residues. If no residues argument is specified the default generator is returned.
"""
if len(residues) == 0:
return super().get_residues()
elif len(residues) == 1 and isinstance(residues[0], (tuple, list, set)):
residues = residues[0]
if isinstance(residues[0], str):
return [i for i in self.child_list if i.resname in residues]
# makes no sense but for consistency
elif isinstance(residues[0], int):
return [i for i in self.child_list if i.serial_number in residues]
else:
raise TypeError(
f"residues must be either a list, tuple, or set of string or integer, not {residues=}"
)
[docs]
def get_coords(self) -> "np.ndarray":
"""
Get the coordinates of all atoms in the chain.
"""
return np.array([atom.coord for atom in self.get_atoms()])
[docs]
def matches(self, other) -> bool:
"""
Check if the chain matches another chain.
This will return True if the two chains have matching residues.
"""
return all(
i.matches(j) for i, j in zip(self.get_residues(), other.get_residues())
)
[docs]
def equals(self, other) -> bool:
"""
Check if the chain is equal to another chain.
This will check if the two chains have the same id, the same parent-model id, and have equal residues.
"""
return (
self.id == other.id
and self.parent.id == other.parent.id
and all(
i.equals(j) for i, j in zip(self.get_residues(), other.get_residues())
)
)
[docs]
@classmethod
def from_biopython(cls, chain) -> "Chain":
"""
Convert a BioPython Chain object to a Chain object.
Parameters
----------
chain : BioPython Chain object
The chain to convert.
Returns
-------
Chain
The converted chain.
"""
new = cls(chain.id)
for residue in chain.get_residues():
new.add(Residue.from_biopython(residue))
return new
[docs]
def to_biopython(self) -> bio.Chain.Chain:
"""
Convert a Chain object to a pure BioPython Chain object.
Parameters
----------
with_children : bool, optional
Whether to convert the residues of the chain as well. The default is True.
Returns
-------
bio.Chain.Chain
The converted chain.
"""
new = bio.Chain.Chain(self.id)
for residue in self.get_residues():
new.add(residue.to_biopython())
return new
[docs]
def move(self, vector):
"""
Move the chain by a vector.
Parameters
----------
vector : ndarray
The vector to move the chain by.
"""
for residue in self.get_residues():
residue.move(vector)
[docs]
def copy(self):
"""
Return a deep copy of the chain with a new UUID4.
Returns
-------
Chain
The copied chain.
"""
new = ID.copy(self)
for residue in new.get_residues():
ID._new_id(residue)
for atom in residue.get_atoms():
ID._new_id(atom)
return new
[docs]
def link(self, residue):
"""
Softlink a residue into this chain's child_list without touching the residue's own parent references.
"""
self.child_dict[residue.get_id()] = residue
self.child_list.append(residue)
[docs]
def unlink(self, residue):
"""
Unlink a residue from this chain's child_list without touching the residue's own parent references.
"""
del self.child_dict[residue.get_id()]
self.child_list.remove(residue)
@property
def molecule(self):
p = self.parent
while p is not None and getattr(p, "parent", None):
p = p.parent
return getattr(p, "_molecule", None)
def __repr__(self):
return f"Chain({self._id})"
def __lt__(self, other):
return ord(self.id) < ord(other.id)
def __gt__(self, other):
return ord(self.id) > ord(other.id)
def __le__(self, other):
return ord(self.id) <= ord(other.id)
def __ge__(self, other):
return ord(self.id) >= ord(other.id)
# def __eq__(self, other):
# return ord(self.id) == ord(other.id)
# def __ne__(self, other):
# return ord(self.id) != ord(other.id)
[docs]
class Model(bio.Model.Model, ID):
"""
A Model object that inherits from Biopython's Model class.
Parameters
----------
id : int or str
The model identifier
"""
__slots__ = (
"level",
# "serial_num",
"_id",
"parent",
"child_list",
"child_dict",
"xtra",
)
def __init__(self, id):
ID.__init__(self)
super(bio.Model.Model, self).__init__(id)
self.level = "M"
[docs]
@classmethod
def new(cls, id: int = None) -> "Model":
"""
Create a blank model with a given id.
Parameters
----------
id : int
The model identifier.
Returns
-------
Model
The blank model.
"""
if id is None:
id = __global_counters__["M"]
__global_counters__["M"] += 1
return cls(id)
@property
def serial_number(self):
if isinstance(self.id, int):
return self.id
else:
return ord(self.id)
@serial_number.setter
def serial_number(self, value):
pass
@property
def serial_num(self):
return self.serial_number
@serial_num.setter
def serial_num(self, value):
pass
@property
def full_id(self):
p = self.get_parent()
if p:
return (p.get_full_id(), self.id)
else:
return (None, self.id)
@full_id.setter
def full_id(self, value):
pass
[docs]
def add(self, chain):
if not isinstance(chain, Chain):
chain = Chain.from_biopython(chain)
bio.Model.Model.add(self, chain)
@property
def chains(self):
"""
Get the chains in the model.
"""
return sorted(self.get_chains(), key=lambda x: x.id)
[docs]
def count_chains(self) -> int:
"""
Count the number of chains in the model.
"""
return len(self.child_list)
[docs]
def get_chain(self, chain: Union[str, int]) -> Chain:
"""
Get a chain by its id.
Parameters
----------
chain : str or int
The chain id.
Returns
-------
Chain
The chain.
"""
if isinstance(chain, int):
return next((i for j, i in enumerate(self.child_list) if j == chain), None)
elif isinstance(chain, str):
return next((i for i in self.child_list if i.id == chain), None)
else:
raise TypeError(
f"chain must be either a string or an integer, not {chain=}"
)
[docs]
def get_chains(self, *chains: Union[str, int]) -> "List[Chain]":
"""
Get all chains in the model.
Parameters
----------
chains : str or int, optional
The chain id to filter by.
Returns
-------
List[Chain]
The list of chains. If no chains argument is specified the default generator is returned.
"""
if len(chains) == 0:
return super().get_chains()
elif len(chains) == 1 and isinstance(chains[0], (tuple, list, set)):
chains = chains[0]
if isinstance(chains[0], str):
return [i for i in self.child_list if i.id in chains]
# makes no sense but for consistency
elif isinstance(chains[0], int):
return [i for j, i in enumerate(self.child_list) if j in chains]
else:
raise TypeError(
f"chains must be either a list, tuple, or set of string or integer, not {chains=}"
)
[docs]
def get_coords(self) -> "np.ndarray":
"""
Get the coordinates of all atoms in the model.
"""
return np.array([atom.coord for atom in self.get_atoms()])
[docs]
def move(self, vector):
"""
Move the model by a vector.
Parameters
----------
vector : ndarray
The vector to move the model by.
"""
for chain in self.get_chains():
chain.move(vector)
def link(self, chain):
"""
Softlink a chain into this model's child_list without touching the chain's own parent references.
"""
self.child_dict[chain.get_id()] = chain
self.child_list.append(chain)
def unlink(self, chain):
"""
Unlink a chain from this model's child_list without touching the chain's own parent references.
"""
del self.child_dict[chain.get_id()]
self.child_list.remove(chain)
[docs]
def matches(self, other) -> bool:
"""
Check if the model matches another model.
This will return True if the two models have matching chains.
"""
return all(i.matches(j) for i, j in zip(self.get_chains(), other.get_chains()))
[docs]
def equals(self, other) -> bool:
"""
Check if the model is equal to another model.
This will return True if the two models have the same id, same parent-structure id, and have matching chains.
"""
return (
self.matches(other)
and self.id == other.id
and self.parent.id == other.parent.id
)
[docs]
def copy(self):
"""
Return a deep copy of the model with a new UUID4.
Returns
-------
Model
The copied model.
"""
new = ID.copy(self)
for chain in new.get_chains():
ID._new_id(chain)
for residue in chain.get_residues():
ID._new_id(residue)
for atom in residue.get_atoms():
ID._new_id(atom)
return new
[docs]
@classmethod
def from_biopython(cls, model):
"""
Convert a BioPython Model object to a Model object.
Parameters
----------
model : BioPython Model object
The model to convert.
Returns
-------
Model
The converted model.
"""
new = cls(model.id)
for chain in model.get_chains():
new.add(Chain.from_biopython(chain, with_children=True))
return new
[docs]
def to_biopython(self):
"""
Convert a Model object to a pure BioPython Model object.
Returns
-------
bio.Model.Model
The converted model.
"""
new = bio.Model.Model(self.id)
for chain in self.get_chains():
new.add(chain.to_biopython(with_children=True))
return new
[docs]
def link(self, chain):
"""
Softlink a chain into this model's child_list without touching the chain's own parent references.
"""
self.child_dict[chain.get_id()] = chain
self.child_list.append(chain)
[docs]
def unlink(self, chain):
"""
Unlink a chain from this model's child_list without touching the chain's own parent references.
"""
del self.child_dict[chain.get_id()]
self.child_list.remove(chain)
@property
def molecule(self):
p = self.parent
while p is not None and getattr(p, "parent", None):
p = p.parent
return getattr(p, "_molecule", None)
def __repr__(self):
return f"Model({self._id})"
# somehow the __eq__ was not inherited from ID...
def __eq__(self, other):
if not isinstance(other, Model):
return False
return self._ID__id == other._ID__id
def __lt__(self, other):
return self.id < other.id
def __gt__(self, other):
return self.id > other.id
def __le__(self, other):
return self.id <= other.id
def __ge__(self, other):
return self.id >= other.id
[docs]
class Structure(ID, bio.Structure.Structure):
"""
A Structure object that inherits from Biopython's Structure class.
Parameters
----------
id : str
The structure identifier
"""
__slots__ = (
"level",
"_id",
"parent",
"child_list",
"child_dict",
"xtra",
"_molecule",
)
def __init__(self, id):
ID.__init__(self)
super(bio.Structure.Structure, self).__init__(id)
self.level = "S"
self._molecule = None
[docs]
@classmethod
def new(cls, id: str) -> "Structure":
"""
Create a blank structure with a given id.
Parameters
----------
id : str
The structure identifier.
Returns
-------
Structure
The blank structure.
"""
return cls(id)
@property
def full_id(self):
return self.id
@full_id.setter
def full_id(self, value):
pass
[docs]
def add(self, model):
if not isinstance(model, Model):
model = Model.from_biopython(model)
bio.Structure.Structure.add(self, model)
[docs]
def get_coords(self) -> "np.ndarray":
"""
Get the coordinates of all atoms in the structure.
"""
return np.array([atom.coord for atom in self.get_atoms()])
[docs]
def matches(self, other) -> bool:
"""
Check if the structure matches another structure.
This will return True if the two structures have the same id.
"""
return self.id == other.id
[docs]
def equals(self, other) -> bool:
"""
Check if the structure is equal to another structure.
This will return True if the two structures have the same id and have equal models.
"""
return self.matches(other) and all(
i.equals(j) for i, j in zip(self.get_models(), other.get_models())
)
[docs]
def copy(self):
"""
Return a deep copy of the structure with a new UUID4.
Returns
-------
Structure
The copied structure.
"""
new = ID.copy(self)
for model in new.get_models():
ID._new_id(model)
for chain in model.get_chains():
ID._new_id(chain)
for residue in chain.get_residues():
ID._new_id(residue)
for atom in residue.get_atoms():
ID._new_id(atom)
return new
[docs]
@classmethod
def from_biopython(cls, structure: "bio.Structure.Structure") -> "Structure":
"""
Convert a BioPython Structure object to a Structure object.
Parameters
----------
structure : BioPython Structure object
The structure to convert.
Returns
-------
Structure
The converted structure.
"""
s = cls(structure.id)
for model in structure.get_models():
m = Model(model.id)
for chain in model.get_chains():
c = Chain(chain.id)
for residue in chain.get_residues():
# ------------------------ NOTE -------------------------
# This is a little weird bugfix where I found
# that sometimes the segid was " " instead of " ".
# This is a little hacky, but it works.
# It could be that there is a problem with the pdb module
# but that one has already seen enough modification so
# I don't want to tinker with it again...
# -------------------------------------------------------
segid = residue.segid
if len(segid) > 1:
segid = segid[0]
# -------------------------------------------------------
r = Residue(residue.resname, segid, residue.id[1])
for atom in residue.get_atoms():
a = Atom(
atom.id,
atom.coord,
atom.serial_number,
atom.bfactor,
atom.occupancy,
atom.fullname,
atom.element,
atom.altloc,
atom.pqr_charge,
atom.radius,
)
r.add(a)
c.add(r)
m.add(c)
s.add(m)
return s
[docs]
def to_biopython(self) -> "bio.Structure.Structure":
"""
Convert a Structure object to a pure BioPython Structure object.
Returns
-------
bio.Structure.Structure
The converted structure.
"""
s = bio.Structure.Structure(self.id)
for model in self.get_models():
m = bio.Model.Model(model.id)
for chain in model.get_chains():
c = bio.Chain.Chain(chain.id)
for residue in chain.get_residues():
r = bio.Residue.Residue(
(residue.resname, residue.serial_number, residue.segid),
residue.resname,
residue.segid,
)
for atom in residue.get_atoms():
a = bio.Atom.Atom(
atom.id,
atom.coord,
atom.bfactor,
atom.occupancy,
atom.altloc,
atom.fullname,
atom.serial_number,
atom.element,
atom.pqr_charge,
atom.radius,
)
r.add(a)
c.add(r)
m.add(c)
s.add(m)
return s
[docs]
def move(self, vector):
"""
Move the structure by a vector.
Parameters
----------
vector : ndarray
The vector to move the structure by.
"""
for model in self.get_models():
model.move(vector)
[docs]
def link(self, model):
"""
Softlink a model into this structure's child_list without touching the model's own parent references.
"""
self.child_dict[model.get_id()] = model
self.child_list.append(model)
[docs]
def unlink(self, model):
"""
Unlink a model from this structure's child_list without touching the model's own parent references.
"""
del self.child_dict[model.get_id()]
self.child_list.remove(model)
@property
def molecule(self):
return self._molecule
def __eq__(self, other):
if not isinstance(other, Structure):
return False
return self._ID__id == other._ID__id
def __repr__(self):
return f"Structure({self._id})"
def __lt__(self, other):
return self.id < other.id
def __gt__(self, other):
return self.id > other.id
def __le__(self, other):
return self.id <= other.id
def __ge__(self, other):
return self.id >= other.id
[docs]
class Bond:
"""
A class representing a bond between two atoms.
Attributes
----------
atom1 : Atom
The first atom in the bond.
atom2 : Atom
The second atom in the bond.
"""
__linkers = {0: "<none>", 1: "--", 2: "==", 3: "#"}
__slots__ = ("atom1", "atom2", "order")
def __init__(self, *atoms) -> None:
if len(atoms) == 1:
self = Bond(*atoms[0])
elif len(atoms) == 2:
self.atom1 = atoms[0]
self.atom2 = atoms[1]
self.order = 1
elif len(atoms) == 3:
self.atom1 = atoms[0]
self.atom2 = atoms[1]
self.order = atoms[2]
else:
raise ValueError("Bond must be initialized with one tuple or two atoms")
@property
def length(self) -> float:
return self.compute_length()
[docs]
def invert(self):
"""
Invert the bond, i.e. swap the two atoms.
"""
self.atom1, self.atom2 = self.atom2, self.atom1
return self
[docs]
def single(self):
"""
Make the bond a single bond.
"""
molecule = self.atom1.molecule
if molecule is None:
self.order = 1
return self
else:
molecule.single(self.atom1, self.atom2, adjust_hydrogens=True)
self.order = 1
return self
[docs]
def double(self):
"""
Make the bond a double bond.
"""
molecule = self.atom1.molecule
if molecule is None:
self.order = 2
else:
molecule.double(self.atom1, self.atom2, adjust_hydrogens=True)
self.order = 2
return self
[docs]
def triple(self):
"""
Make the bond a triple bond.
"""
molecule = self.atom1.molecule
if molecule is None:
self.order = 3
return self
else:
molecule.triple(self.atom1, self.atom2, adjust_hydrogens=True)
self.order = 3
return self
[docs]
def is_single(self) -> bool:
"""
Check if the bond is a single bond.
Returns
-------
bool
True if the bond is a single bond, False otherwise.
"""
return self.order == 1
[docs]
def is_double(self) -> bool:
"""
Check if the bond is a double bond.
Returns
-------
bool
True if the bond is a double bond, False otherwise.
"""
return self.order == 2
[docs]
def is_triple(self) -> bool:
"""
Check if the bond is a triple bond.
Returns
-------
bool
True if the bond is a triple bond, False otherwise.
"""
return self.order == 3
[docs]
def is_cis(self) -> bool:
"""
Check if the bond is a cis bond.
Returns
-------
bool
True if the bond is a cis bond, False otherwise.
"""
mol = self.atom1.molecule
if mol is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
return mol.is_cis(self)
[docs]
def is_trans(self) -> bool:
"""
Check if the bond is a trans bond.
Returns
-------
bool
True if the bond is a trans bond, False otherwise.
"""
mol = self.atom1.molecule
if mol is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
return mol.is_trans(self)
[docs]
def cis(self):
"""
Make the bond a cis bond.
Returns
-------
Bond
The bond itself.
"""
mol = self.atom1.molecule
if mol is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
mol.cis(self)
return self
[docs]
def trans(self):
"""
Make the bond a trans bond.
Returns
-------
Bond
The bond itself.
"""
mol = self.atom1.molecule
if mol is None:
raise ValueError(
"Only atoms that are part of a Molecule have information about connectivity."
)
mol.trans(self)
return self
[docs]
def compute_length(self) -> float:
"""
Compute the bond length.
Returns
-------
float
The bond length.
"""
return self.atom1 - self.atom2
[docs]
def get_other_atom(self, atom: Atom) -> Atom:
"""
Get the other atom in the bond.
Parameters
----------
atom : Atom
The atom to get the other atom for.
Returns
-------
Atom
The other atom in the bond.
Raises
------
ValueError
If the atom is not in the bond.
"""
if atom == self.atom1:
return self.atom2
elif atom == self.atom2:
return self.atom1
else:
raise ValueError("Atom not in bond.")
[docs]
def to_vector(self) -> "np.ndarray":
"""
Convert the bond to a vector.
Returns
-------
ndarray
The bond as a vector from atom1 to atom2.
"""
vec = np.zeros(3)
vec[0] = self.atom2.coord[0] - self.atom1.coord[0]
vec[1] = self.atom2.coord[1] - self.atom1.coord[1]
vec[2] = self.atom2.coord[2] - self.atom1.coord[2]
return vec
[docs]
def to_list(self) -> list:
"""
Convert the bond to a list of
atom1, atom2, bond_order.
Returns
-------
list
The bond as a list.
"""
return [self.atom1, self.atom2, self.order]
[docs]
def to_tuple(self) -> tuple:
"""
Convert the bond to a tuple of
atom1, atom2, bond_order.
Returns
-------
tuple
The bond as a tuple.
"""
return (self.atom1, self.atom2, self.order)
def __iter__(self):
yield self.atom1
yield self.atom2
def __getitem__(self, idx):
if idx == 0:
return self.atom1
elif idx == 1:
return self.atom2
else:
raise IndexError("Bond only has two atoms")
def __repr__(self) -> str:
return f"Bond({self.atom1}, {self.atom2})"
def __str__(self) -> str:
return f"({self.atom1} {self.__linkers.get(self.order, '?') } {self.atom2})"
def __eq__(self, other):
a = self.atom1 == other[0] and self.atom2 == other[1]
b = self.atom1 == other[1] and self.atom2 == other[0]
return a or b
def __ne__(self, other):
return not self.__eq__(other)
def __hash__(self):
return hash(self.atom1) + hash(self.atom2)
def __len__(self):
return 2
def __contains__(self, item):
return item == self.atom1 or item == self.atom2
def __list__(self):
return [self.atom1, self.atom2]
def __tuple__(self):
return (self.atom1, self.atom2)
if __name__ == "__main__":
s = bio.PDBParser().get_structure(
"test", "/Users/noahhk/GIT/biobuild/support/examples/GLC.pdb"
)
_s = Structure.from_biopython(s)
__s = _s.to_biopython()
print(_s)