"""
Basic structure related functions
"""
import numpy as np
import Bio.PDB as bio
import buildamol.utils.auxiliary as aux
origin = np.array([0, 0, 0], dtype=np.float64)
"""
The origin of the 3D coordinate system
"""
x_axis = np.array([1, 0, 0], dtype=np.float64)
"""
Unit vector along the x-axis
"""
y_axis = np.array([0, 1, 0], dtype=np.float64)
"""
Unit vector along the y-axis
"""
z_axis = np.array([0, 0, 1], dtype=np.float64)
"""
Unit vector along the z-axis
"""
xy_plane = np.array([0, 0, 1], dtype=np.float64)
"""
Unit vector normal to the xy-plane
"""
xz_plane = np.array([0, 1, 0], dtype=np.float64)
"""
Unit vector normal to the xz-plane
"""
yz_plane = np.array([1, 0, 0], dtype=np.float64)
"""
Unit vector normal to the yz-plane
"""
[docs]
def atom_make_full_id(self):
"""
A self-adjusting full_id for an Biopython Atom
"""
p = self.get_parent()
if p:
return (*p.full_id, (self.id, self.altloc))
else:
return (self.id, self.altloc)
[docs]
def residue_make_full_id(self):
"""
A self-adjusting full_id for an Biopython Residue
"""
p = self.get_parent()
if p:
return (*p.full_id, self._id)
else:
return self._id
[docs]
def chain_make_full_id(self):
"""
A self-adjusting full_id for an Biopython Chain
"""
p = self.get_parent()
if p:
return (*p.full_id, self.id)
return (self.id,)
[docs]
def model_make_full_id(self):
"""
A self-adjusting full_id for an Biopython Model
"""
p = self.get_parent()
if p:
return (p.id, self.id)
return (self.id,)
# --------------------------- POSSIBLE DELETE ---------------------------
# the whole set_full_id and whatever can probably be deleted since
# we are using our own wrapper around the biopython structure anyway
# --------------------------- POSSIBLE DELETE ---------------------------
[docs]
def set_full_id(self, value):
pass
bio.Atom.Atom.full_id = property(atom_make_full_id, set_full_id)
bio.Residue.Residue.full_id = property(residue_make_full_id, set_full_id)
bio.Chain.Chain.full_id = property(chain_make_full_id, set_full_id)
bio.Model.Model.full_id = property(model_make_full_id, set_full_id)
# --------------------------- POSSIBLE DELETE ---------------------------
[docs]
def make_empty_structure(id: str = "empty"):
"""
Make an empty PDB structure with a single model and chain.
Returns
-------
structure : Bio.PDB.Structure
The empty structure
"""
s = bio.Structure.Structure(id)
m = bio.Model.Model(0)
s.add(m)
c = bio.Chain.Chain("A")
m.add(c)
return s
[docs]
def rename_residue(residue: "bio.Residue.Residue", new_name: str):
"""
Rename a biopython residue object.
This happens in-place.
Parameters
----------
residue : Bio.PDB.Residue.Residue
The residue to rename.
new_name : str
The new name of the residue.
Returns
-------
residue : Bio.PDB.Residue.Residue
The renamed residue.
"""
residue.resname = new_name
residue.id = ("H_" + new_name, *residue.id[1:])
for atom in residue.get_atoms():
atom.full_id = (*residue.full_id, atom.full_id[-1])
return residue
[docs]
def vector_between(atom1, atom2):
"""
Compute the vector between two atoms.
Parameters
----------
atom1 : Bio.PDB.Atom
The first atom
atom2 : Bio.PDB.Atom
The second atom
Returns
-------
vector : numpy.ndarray
The vector between the two atoms
"""
return atom2.coord - atom1.coord
[docs]
def norm_vector(atom1, atom2):
"""
Compute the normalized vector between two atoms.
Parameters
----------
atom1 : Bio.PDB.Atom
The first atom
atom2 : Bio.PDB.Atom
The second atom
Returns
-------
vector : numpy.ndarray
The normalized vector between the two atoms
"""
v = vector_between(atom1, atom2)
return v / np.linalg.norm(v)
[docs]
def plane_vector(vec1, vec2):
"""
Compute the vector of the plane of two vectors.
Parameters
----------
vec1 : numpy.ndarray
The first vector
vec2 : numpy.ndarray
The second vector
Returns
-------
vector : numpy.ndarray
The norm-vector of the plane of the two vectors
"""
v = np.cross(vec1, vec2)
return v / np.linalg.norm(v)
[docs]
def bond_vector(bond):
"""
Compute the vector between two atoms in a bond.
Parameters
----------
bond : Bio.PDB.Bond
The bond
Returns
-------
vector : numpy.ndarray
The vector between the two atoms in the bond
"""
return vector_between(*bond)
[docs]
def compute_angle(atom1, atom2, atom3):
"""
Compute the angle between three atoms.
Parameters
----------
atom1 : Bio.PDB.Atom
The first atom
atom2 : Bio.PDB.Atom
The second atom
atom3 : Bio.PDB.Atom
The third atom
Returns
-------
angle : float
The angle between the three atoms in degrees
"""
return angle_between(atom1.coord, atom2.coord, atom3.coord)
[docs]
def angle_between(coords1, coords2, coords3):
"""
Compute the angle between three atoms.
Parameters
----------
coords1 : numpy.ndarray
The coordinates of the first atom
coords2 : numpy.ndarray
The coordinates of the second atom
coords3 : numpy.ndarray
The coordinates of the third atom
Returns
-------
angle : float
The angle between the three atoms in degrees
"""
a = coords1 - coords2
b = coords3 - coords2
return np.degrees(np.arccos(np.dot(a, b) / (np.linalg.norm(a) * np.linalg.norm(b))))
[docs]
def bond_angle(bond1, bond2):
"""
Compute the angle between two bonds.
Parameters
----------
bond1 : Bio.PDB.Bond
The first bond
bond2 : Bio.PDB.Bond
The second bond
Returns
-------
angle : float
The angle between the two bonds in degrees
"""
if isinstance(bond1[0], np.ndarray):
return angle_between(bond1[0], bond1[1], bond2[1])
else:
return angle_between(bond1[0].coord, bond1[1].coord, bond2[1].coord)
[docs]
def compute_dihedral(atom1, atom2, atom3, atom4):
"""
Compute the dihedral angle between four atoms.
Parameters
----------
atom1 : Bio.PDB.Atom
The first atom
atom2 : Bio.PDB.Atom
The second atom
atom3 : Bio.PDB.Atom
The third atom
atom4 : Bio.PDB.Atom
The fourth atom
Returns
-------
dihedral : float
The dihedral angle between the four atoms in degrees
"""
return dihedral_between(atom1.coord, atom2.coord, atom3.coord, atom4.coord)
[docs]
def dihedral_between(coords1, coords2, coords3, coords4):
"""
Compute the dihedral angle between four points
Parameters
----------
coords1 : numpy.ndarray
The coordinates of the first atom
coords2 : numpy.ndarray
The coordinates of the second atom
coords3 : numpy.ndarray
The coordinates of the third atom
coords4 : numpy.ndarray
The coordinates of the fourth atom
Returns
-------
dihedral : float
The dihedral angle between the four atoms in degrees
"""
ab = coords1 - coords2
bc = coords3 - coords2
cd = coords4 - coords3
# normalize bc so that it does not influence magnitude of vector
# rejections that come next
bc /= np.linalg.norm(bc)
# vector rejections
v = ab - np.dot(ab, bc) * bc
w = cd - np.dot(cd, bc) * bc
# angle between v and w in radians
x = np.dot(v, w)
y = np.dot(np.cross(bc, v), w)
return np.degrees(np.arctan2(y, x))
compute_torsional = compute_dihedral
[docs]
def distance_between(coord1, coord2):
"""
Compute the distance between two 3d coordinates.
Parameters
----------
coord1 : numpy.ndarray
The first coordinate
coord2 : numpy.ndarray
The second coordinate
Returns
-------
distance : float
The euclidean distance between the two coordinates
"""
return np.linalg.norm(coord1 - coord2)
[docs]
def compute_distance(atom1, atom2):
"""
Compute the distance between two atoms.
Parameters
----------
atom1 : Bio.PDB.Atom
The first atom
atom2 : Bio.PDB.Atom
The second atom
Returns
-------
distance : float
The distance between the two atoms
"""
return np.linalg.norm(atom1.coord - atom2.coord)
[docs]
def center_of_gravity(masses, coords):
"""
Compute the center of gravity of a molecule.
Parameters
----------
masses : array-like
The masses of the atoms as an nx1 vector
coords : array-like
The coordinates of the atoms as an nx3 array
Returns
-------
cog : array-like
The center of gravity
"""
return np.average(coords, axis=0, weights=masses)
center_of_mass = center_of_gravity
[docs]
def center_of_geometry(coords):
"""
Compute the center of geometry of a molecule.
Parameters
----------
coords : array-like
The coordinates of the atoms as an nx3 array
Returns
-------
cog : array-like
The center of geometry
"""
return np.mean(coords, axis=0)
[docs]
def adjust_bond_length(bond, new_length: float):
"""
Adjust the bond length of a bond.
Parameters
----------
bond : buildamol.Bond
The bond to adjust
new_length : float
The new bond length
Returns
-------
adjusted_bond : Bio.PDB.Bond
The adjusted bond
"""
atom1, atom2 = bond
vec = atom2.coord - atom1.coord
vec /= np.linalg.norm(vec)
atom2.coord = atom1.coord + new_length * vec
return bond
[docs]
def adjust_distance(coord1, coord2, new_length: float):
"""
Adjust the distance between two points.
Parameters
----------
coord1 : array-like
The first point
coord2 : array-like
The second point
new_length : float
The new distance
Returns
-------
new_coord2 : array-like
The new coordinates of the second point
"""
vec = coord2 - coord1
vec /= np.linalg.norm(vec)
return coord1 + new_length * vec
[docs]
def rotate_molecule(
molecule, angle: float, axis: np.ndarray, center: np.ndarray = None
):
"""
Rotate a molecule around an axis by a given angle.
Parameters
----------
molecule : Bio.PDB.Structure
The molecule to rotate
axis : array-like
The axis to rotate around
angle : float
The angle to rotate by (in radians)
Returns
-------
rotated_molecule : Bio.PDB.Structure
The rotated molecule
"""
atoms = list(molecule.get_atoms())
coords = np.array([a.coord for a in atoms])
if center is not None:
coords -= center
new_coords = rotate_coords(coords=coords, angle=angle, axis=axis)
if center is not None:
new_coords += center
for a, c in zip(atoms, new_coords):
a.set_coord(c)
return molecule
[docs]
def rotate_coords(
coords: np.ndarray,
angle: float,
axis: np.ndarray,
):
"""
Rotate a set of coordinates around an axis by a given angle.
Parameters
----------
coords : array-like
The coordinates to rotate
angle : float
The angle to rotate by (in radians)
axis : array-like
The axis to rotate around
Returns
-------
rotated_coords : array-like
The rotated coordinates
"""
rot = _rotation_matrix(axis, angle)
return np.dot(np.asarray(coords), rot.T)
[docs]
def superimpose_points(
coords: np.ndarray,
points1: tuple,
points2: tuple,
):
"""
Superimpose two structures by aligning two sets of points.
This will place the first point of points1 on the first point of points2 and so on, while transforming the
rest of the points accordingly.
Parameters
----------
coords : array-like
The coordinates of the atoms to move and align. The coordinates of the participants of Bond1 must be
part of the coordinates.
points1 : tuple
The first set of points. This must be a tuple of two or three coordinates (numpy.ndarray) which are part of the coords.
points2 : tuple
The second bond. This must be a tuple of two or three coordinates (numpy.ndarray), depending on how many were provided as points1 (the same number of points).
Returns
-------
new_coords : array-like
The new coordinates of the atoms
"""
_old_coords = np.array(points1)
_new_coords = np.array(points2)
if len(_old_coords) != len(_new_coords):
raise ValueError(
"The number of points in points1 and points2 must be the same."
)
# compute translation vector
old_centroid = _old_coords.mean(axis=0)
new_centroid = _new_coords.mean(axis=0)
_relative_old_coords = _old_coords - old_centroid
_relative_new_coords = _new_coords - new_centroid
H = (_relative_old_coords).T.dot(_relative_new_coords)
U, S, VT = np.linalg.svd(H)
R = VT.T @ U.T
# Check for reflection
if np.linalg.det(R) < 0:
VT[-1, :] *= -1
R = VT.T @ U.T
new_coords = (R @ (coords - old_centroid).T).T + new_centroid
return new_coords
# get the bond vectors
v1 = points1[1] - points1[0]
v2 = points2[1] - points2[0]
# normalize the bond vectors
v1 /= np.linalg.norm(v1)
v2 /= np.linalg.norm(v2)
# compute the rotation axis
axis = np.cross(v1, v2)
axis /= np.linalg.norm(axis)
# compute the angle between the bond vectors
angle = np.arctan(np.dot(v1, v2))
# rotate the coordinates
centroid = np.mean([points2[0], points2[1]], axis=0)
new_coords = coords + centroid
new_coords = rotate_coords(new_coords, angle, axis)
new_coords -= centroid
return new_coords
# # get the bond vectors
# bond1_atom1_idx = int(np.where(coords == bond1[0])[0])
# v0 = bond2[0] - bond1[0]
# v1 = bond1[1] - bond1[0] - v0
# v2 = bond2[1] - bond2[0] - v0
# # normalize the bond vectors
# v1 /= np.linalg.norm(v1)
# v2 /= np.linalg.norm(v2)
# # compute the rotation axis
# axis = np.cross(v1, v2)
# axis /= np.linalg.norm(axis)
# # compute the angle between the bond vectors
# angle = np.arctan(np.dot(v1, v2))
# # rotate the coordinates
# new_coords = coords - v0 - bond1[0]
# new_coords = rotate_coords(new_coords, angle, axis)
# new_coords += bond1[0]
# return new_coords
[docs]
def plane_of_points(points) -> np.ndarray:
"""
Compute the plane vector that best describes the plane defined by a set of points
Parameters
----------
points : array-like
The points that define the plane
Returns
-------
plane : np.ndarray
The plane vector
"""
points = np.array(points)
centroid = np.mean(points, axis=0)
points -= centroid
_, _, v = np.linalg.svd(points)
return v[2]
[docs]
def principal_axis(coords: np.ndarray) -> np.ndarray:
"""
Compute the principle axis of a set of coordinates
The principle axis is the eigenvector of the covariance matrix with the largest eigenvalue
Parameters
----------
coords : array-like
The coordinates of the atoms
Returns
-------
axis : np.ndarray
The principle axis
"""
coords = coords - np.mean(coords, axis=0)
_, _, v = np.linalg.svd(coords)
return v[0]
[docs]
def length_along_axis(coords, axis) -> float:
"""
Compute the length of a set of coordinates along an axis.
Parameters
----------
coords : array-like
The coordinates of the atoms
axis : array-like
The axis to compute the length along
Returns
-------
length : float
The length of the coordinates along the axis
"""
return np.dot(coords, axis).ptp()
@aux.njit
def _numba_wrapper_rotate_coords(
coords: np.ndarray,
angle: float,
axis: np.ndarray,
):
rot = _numba_wrapper_rotation_matrix(axis, angle)
return np.dot(np.asarray(coords), rot.T)
def _rotate_coords_base_classes(
obj,
angle: float,
axis: np.ndarray,
axis_is_absolute: bool = False,
):
"""
Rotate a set of coordinates around an axis by a given angle.
Parameters
----------
obj
The object to rotate
angle : float
The angle to rotate by (in radians)
axis : array-like
The axis to rotate around
axis_is_absolute : bool
Whether the axis is absolute or relative to the object.
If True, the axis is absolute and the object is rotated around the axis
which will also incur a translation of the object. If False, the axis
is relative to the object and the object is rotated around the axis
without translation.
Returns
-------
rotated_coords : array-like
The rotated coordinates
"""
if hasattr(obj, "get_atoms"):
coords = np.array([a.coord for a in obj.get_atoms()])
if axis_is_absolute:
center = np.zeros(3)
else:
center = coords.mean()
else:
if axis_is_absolute:
center = np.zeros(3)
else:
# since we only have one point
# and we are just rotating around it
# there is not going to be any change
# in the position of the point
return obj
center = obj.coord
coords = obj.coord
rot = _rotation_matrix(axis, angle)
new = np.dot(coords - center, rot.T) + center
if hasattr(obj, "get_atoms"):
for a, c in zip(obj.get_atoms(), new):
a.set_coord(c)
else:
obj.set_coord(new)
return obj
bio.Atom.Atom.rotate = _rotate_coords_base_classes
bio.Residue.Residue.rotate = _rotate_coords_base_classes
bio.Chain.Chain.rotate = _rotate_coords_base_classes
bio.Model.Model.rotate = _rotate_coords_base_classes
bio.Structure.Structure.rotate = _rotate_coords_base_classes
[docs]
def flip_molecule(mol, plane_vector: np.ndarray, center: np.ndarray = None):
"""
Flip a molecule around an axis.
Parameters
----------
mol : Molecule
The molecule to flip
plane_vector : array-like
The vector describing the plane to flip around
Returns
-------
flipped_molecule : Bio.PDB.Structure
The flipped molecule
"""
atoms = list(mol.get_atoms())
coords = np.array([a.coord for a in atoms])
plane_vector = np.array(plane_vector)
if center is not None:
center = np.array(center)
coords -= center
new_coords = flip_coords(coords=coords, plane_vector=plane_vector)
if center is not None:
new_coords += center
for a, c in zip(atoms, new_coords):
a.set_coord(c)
return mol
[docs]
def flip_coords(coords: np.ndarray, plane_vector: np.ndarray):
"""
Flip a set of coordinates around an axis.
Parameters
----------
coords : array-like
The coordinates to flip
plane_vector : array-like
The vector describing the plane to flip around
Returns
-------
flipped_coords : array-like
The flipped coordinates
"""
# project the coordinates onto the plane
# and then subtract the projection from the
# original coordinates to get the flipped coordinates
return coords - 2 * np.dot(coords, plane_vector.T)[:, None] * plane_vector
def _rotation_matrix(axis, angle):
"""
Compute the rotation matrix about an arbitrary axis in 3D
Source
------
Stackoverflow thread: http://stackoverflow.com/questions/6802577/python-rotation-of-3d-vector
Parameters
----------
axis : array-like
The axis to rotate around
angle : float
The angle to rotate by (in radians)
Returns
-------
rotation_matrix : array-like
The rotation matrix
"""
# axis = np.asarray(axis)
# angle = np.asarray(angle)
axis = axis / np.sqrt(np.dot(axis, axis))
a = np.cos(angle / 2.0)
b, c, d = -axis * np.sin(angle / 2.0)
aa, bb, cc, dd = a * a, b * b, c * c, d * d
bc, ad, ac, ab, bd, cd = b * c, a * d, a * c, a * b, b * d, c * d
return np.array(
[
[aa + bb - cc - dd, 2 * (bc + ad), 2 * (bd - ac)],
[2 * (bc - ad), aa + cc - bb - dd, 2 * (cd + ab)],
[2 * (bd + ac), 2 * (cd - ab), aa + dd - bb - cc],
]
)
_numba_wrapper_rotation_matrix = aux.njit(_rotation_matrix)
def _euclidean_distances(X, Y):
"""
Compute the euclidean distances between two sets of points.
"""
result = np.empty((X.shape[0], Y.shape[0]), dtype=X.dtype)
for i in range(X.shape[0]):
for j in range(Y.shape[0]):
result[i, j] = np.sqrt(np.sum((X[i] - Y[j]) ** 2))
return result
_numba_wrapper_euclidean_distances = aux.njit(_euclidean_distances)
def _IC_to_xyz(a, b, c, anchor, r, theta, dihedral):
"""
compute the coordinates of a fourth atom from a proper internal coordinate
system and the other three atom coordinates.
Parameters
----------
a, b, c : np.ndarray
coordinates of the other three atoms
anchor : np.ndarray
coordinates of the anchor atom relative to which the new coordinate should be calculated
r : float
bond length of the new atom relative to the anchor
theta : float
bond angle between the new atom and its plane partners
dihedral : float
dihedral angle of the internal coordinates
"""
ab = b - a
bc = c - b
# compute normalized bond vectors for available atoms
ab /= np.linalg.norm(ab)
bc /= np.linalg.norm(bc)
# compute plane vector for atoms 1-2-3
plane_abc = np.cross(ab, bc)
plane_abc /= np.linalg.norm(plane_abc)
# rotate the plane vector around the middle bond (2-3) to get the plane 2-3-4
_rot = _rotation_matrix(bc, dihedral)
plane_bcd = np.dot(_rot, plane_abc)
plane_bcd /= np.linalg.norm(plane_bcd)
# rotate the middle bond around the new plane
_rot = _rotation_matrix(plane_bcd, theta)
cd = np.dot(_rot, bc)
cd /= np.linalg.norm(cd)
# compute the coordinates of the fourth atom
d = anchor + r * cd
return d
@aux.njit
def _numba_wrapper_IC_to_xyz(a, b, c, anchor, r, theta, dihedral):
ab = b - a
bc = c - b
# compute normalized bond vectors for available atoms
ab /= np.linalg.norm(ab)
bc /= np.linalg.norm(bc)
# compute plane vector for atoms 1-2-3
plane_abc = np.cross(ab, bc)
plane_abc /= np.linalg.norm(plane_abc)
# rotate the plane vector around the middle bond (2-3) to get the plane 2-3-4
_rot = _numba_wrapper_rotation_matrix(bc, dihedral)
plane_bcd = np.dot(_rot, plane_abc)
plane_bcd /= np.linalg.norm(plane_bcd)
# rotate the middle bond around the new plane
_rot = _numba_wrapper_rotation_matrix(plane_bcd, theta)
cd = np.dot(_rot, bc)
cd /= np.linalg.norm(cd)
# compute the coordinates of the fourth atom
d = anchor + r * cd
return d