Source code for biobuild.utils.pdb

"""
Auxiliary tools for PDB files.
"""

from tabulate import tabulate

__amino_acids = set(
    (
        "ALA",
        "ARG",
        "ASN",
        "ASP",
        "CYS",
        "GLN",
        "GLU",
        "GLY",
        "HIS",
        "ILE",
        "LEU",
        "LYS",
        "MET",
        "PHE",
        "PRO",
        "SER",
        "THR",
        "TRP",
        "TYR",
        "VAL",
        "CSE",  # selenocysteines
        "SEC",
    )
)


[docs] def write_pdb(mol, filename, symmetric: bool = True): """ Write a molecule to a PDB file. Parameters ---------- mol : Molecule The molecule to write. filename : str The filename to write to. symmetric : bool, optional Whether to write the molecule in a symmetric way, by default True. """ with open(filename, "w") as f: f.write(make_atoms_table(mol)) f.write("\n") f.write(make_connect_table(mol, symmetric)) f.write("\nEND\n")
[docs] def write_connect_lines(mol, filename): """ Write "CONECT" lines to a PDB file. This is necessary since Biopython by default does not do that... Parameters ---------- mol : bb.Molecule The molecule to generate the connectivity for. filename : str The filename to write to. """ with open(filename, "r") as f: c = f.read() with open(filename, "w") as f: f.write(c.replace("END", "").rstrip()) f.write("\n") f.write(make_connect_table(mol)) f.write("\nEND\n")
[docs] def parse_connect_lines(filename): """ Parse "CONECT" lines from a PDB file. This is necessary since Biopython by default does not do that... Parameters ---------- filename : str The filename to parse. Returns ------- bonds: list A list of tuples of atom serial numbers that are bonded. """ with open(filename, "r") as f: lines = f.readlines() bonds = {} known_bonds = set() for line in lines: if line.startswith("CONECT"): # split the line into tokens of length 5 line = line[6:] tokens = [ line[i : i + 5].strip() for i in range(0, len(line), 5) if len(line[i : i + 5].strip()) > 0 ] atom_a = int(tokens[0]) for token in tokens[1:]: b = (atom_a, int(token)) # make sure we don't add the same bond twice if b[::-1] in known_bonds: continue bonds.setdefault(b, 0) bonds[b] += 1 known_bonds.add(b) return [(*k, v) for k, v in bonds.items()]
[docs] def make_connect_table(mol, symmetric=True): """ Make a "CONECT" table for a PDB file. This is necessary since Biopython by default does not do that... Parameters ---------- mol : Molecule The molecule to generate the connectivity for. symmetric : bool, optional Whether to generate symmetric bonds (i.e. if A is bonded to B, then B is bonded to A as well). Default is True. And both are written to the file. Returns ------- connect_lines : str The lines to add to the PDB file. """ connectivity = {} for bond in mol.get_bonds(): a = bond.atom1.serial_number b = bond.atom2.serial_number if a not in connectivity: connectivity[a] = [b] * bond.order else: connectivity[a].extend([b] * bond.order) if symmetric: if b not in connectivity: connectivity[b] = [a] * bond.order else: connectivity[b].extend([a] * bond.order) lines = [] for atom in connectivity: line = "CONECT" + left_adjust(str(atom), 5) for c in connectivity[atom]: line += left_adjust(str(c), 5) if len(line) > 70: lines.append(line) line = "CONECT" + left_adjust(str(atom), 5) lines.append(line) return "\n".join(lines)
# atom_line = "{prefix}{serial}{neg_adj}{element}{id}{altloc}{residue} {chain}{res_serial}{icode} {x}{y}{z}{occ}{temp} {seg}{element}{charge}" atom_line = "{prefix}{serial}{neg_adj}{id}{altloc}{residue} {chain}{res_serial}{icode} {x}{y}{z}{occ}{temp} {seg}{element}{charge}"
[docs] def make_atoms_table(mol): """ Make a PDB atom table Parameters ---------- mol : bb.Molecule The molecule to generate the table for. Returns ------- str The table """ lines = [] for atom in mol.get_atoms(): neg_adj = " " # if len(atom.id) > 3: # neg_adj = "" # altloc_len = 2 # else: # neg_adj = " " # altloc_len = 1 if atom.pqr_charge is None: charge = "" else: charge = str(int(atom.pqr_charge)) + ("-" if atom.pqr_charge < 0 else "+") if atom.get_parent().resname in __amino_acids: prefix = "ATOM " else: prefix = "HETATM" lines.append( atom_line.format( prefix=prefix, serial=left_adjust(str(atom.serial_number), 5), neg_adj=neg_adj, id=right_adjust(atom.id.upper(), 4), # id=right_adjust( # atom.id.replace(atom.element.upper(), "").replace( # atom.element.title(), "" # ), # 2, # ), altloc=right_adjust(atom.altloc, 1), residue=left_adjust(atom.get_parent().resname, 3), chain=atom.get_parent().get_parent().id, # resseq=left_adjust(" ", 3), res_serial=left_adjust(str(atom.get_parent().id[1]), 3), icode=" ", # atom.get_parent().id[2], x=left_adjust(f"{atom.coord[0]:.3f}", 8), y=left_adjust(f"{atom.coord[1]:.3f}", 8), z=left_adjust(f"{atom.coord[2]:.3f}", 8), # occ=left_adjust("1.00", 6), occ=left_adjust(f"{atom.occupancy:.2f}", 6), # temp=left_adjust("0.00", 6), temp=left_adjust(f"{atom.bfactor:.2f}", 6), seg=right_adjust("", 3), element=left_adjust(atom.element.upper(), 2), charge=left_adjust(charge, 2), ) ) return "\n".join(lines)
[docs] def right_adjust(s, n): """ Right adjust a string to a certain length. Parameters ---------- s : str The string to adjust. n : int The length to adjust to. Returns ------- str The adjusted string. """ return s + " " * (n - len(s))
[docs] def left_adjust(s, n): """ Left adjust a string to a certain length. Parameters ---------- s : str The string to adjust. n : int The length to adjust to. Returns ------- str The adjusted string. """ return " " * (n - len(s)) + s