Source code for buildamol.utils.pdb

"""
Auxiliary tools for PDB files.
"""

__amino_acids = set(
    (
        "ALA",
        "ARG",
        "ASN",
        "ASP",
        "CYS",
        "GLN",
        "GLU",
        "GLY",
        "HIS",
        "ILE",
        "LEU",
        "LYS",
        "MET",
        "PHE",
        "PRO",
        "SER",
        "THR",
        "TRP",
        "TYR",
        "VAL",
        "CSE",  # selenocysteines
        "SEC",
    )
)


[docs] def find_models(filename): """ Get the models from a PDB file. Parameters ---------- filename : str The filename to read. Returns ------- list A list of models. """ with open(filename, "r") as f: lines = f.readlines() models = [] for line in lines: if line.startswith("MODEL"): models.append(line.replace("MODEL", "").strip()) return models
[docs] def write_pdb(mol, filename, symmetric: bool = True): """ Write a molecule to a PDB file. Parameters ---------- mol : Molecule The molecule to write. filename : str The filename to write to. symmetric : bool, optional Whether to write the molecule in a symmetric way, by default True. """ with open(filename, "w") as f: if len(mol.models) > 1: for model in mol.get_models(): f.write(f"MODEL {model.id}\n") mol.set_model(model) f.write(make_atoms_table(mol)) f.write("\nENDMDL\n") f.write(make_connect_table(mol, symmetric)) f.write("\nEND\n") else: f.write(make_atoms_table(mol)) f.write("\n") f.write(make_connect_table(mol, symmetric)) f.write("\nEND\n")
[docs] def encode_pdb(mol, symmetric: bool = True) -> str: """ Encode a molecule to a PDB file without actually writing it to disk. Parameters ---------- mol : Molecule The molecule to write. symmetric : bool, optional Whether to write the molecule in a symmetric way, by default True. Returns ------- str The PDB file contents. """ lines = [] if len(mol.models) > 1: for model in mol.get_models(): lines.append(f"MODEL {model.id}") mol.set_model(model) lines.append(make_atoms_table(mol)) lines.append("ENDMDL") lines.append(make_connect_table(mol, symmetric)) lines.append("END") else: lines.append(make_atoms_table(mol)) lines.append(make_connect_table(mol, symmetric)) lines.append("END") return "\n".join(lines)
[docs] def decode_pdb(pdb: str): """ Decode the contents of a PDB file. Parameters ---------- pdb : str The PDB file contents. Returns ------- atoms_table: dict The per-model atom tables connect_table: list A list of tuples of atom serial numbers that are bonded. """ lines = pdb.split("\n") atoms_table = _parse_atom_lines(lines) connect_table = _parse_connect_lines(lines) return atoms_table, connect_table
[docs] def write_connect_lines(mol, filename): """ Write "CONECT" lines to a PDB file. This is necessary since Biopython by default does not do that... Parameters ---------- mol : bam.Molecule The molecule to generate the connectivity for. filename : str The filename to write to. """ with open(filename, "r") as f: c = f.read() with open(filename, "w") as f: f.write(c.replace("END", "").rstrip()) f.write("\n") f.write(make_connect_table(mol)) f.write("\nEND\n")
[docs] def parse_connect_lines(filename): """ Parse "CONECT" lines from a PDB file. This is necessary since Biopython by default does not do that... Parameters ---------- filename : str The filename to parse. Returns ------- bonds: list A list of tuples of atom serial numbers that are bonded. """ with open(filename, "r") as f: return _parse_connect_lines(f)
def _parse_connect_lines(lines): bonds = {} known_bonds = set() for line in lines: if line.startswith("CONECT"): # split the line into tokens of length 5 line = line[6:] tokens = [ line[i : i + 5].strip() for i in range(0, len(line), 5) if len(line[i : i + 5].strip()) > 0 ] atom_a = int(tokens[0]) for token in tokens[1:]: b = (atom_a, int(token)) # make sure we don't add the same bond twice if b[::-1] in known_bonds: continue bonds.setdefault(b, 0) bonds[b] += 1 known_bonds.add(b) return [(*k, v) for k, v in bonds.items()]
[docs] def parse_atom_lines(filename, model=None): with open(filename, "r") as f: return _parse_atom_lines(f, model)
def _parse_atom_lines(lines, model=None): atoms = {-1: []} _model = -1 _skip_lines = False for line in lines: if line.startswith("MODEL"): _model = int(line.split()[-1]) if model is not None and not _model == model: _skip_lines = True else: _skip_lines = False atoms[_model] = [] continue if _skip_lines: continue if line.startswith("ATOM") or line.startswith("HETATM"): atoms[_model].append(_split_atom_line(line)) return atoms def _split_atom_line(line) -> tuple: info = { "serial": int(line[6:11].strip()), "id": line[12:16].strip(), "alt_loc": line[16].strip(), "residue": line[17:20].strip(), "chain": line[21].strip(), "res_seq": int(line[22:26].strip()), "icode": line[26].strip(), "x": line[30:38].strip(), "y": line[38:46].strip(), "z": line[46:54].strip(), "occ": float(line[54:60].strip()), "temp": float(line[60:66].strip()), "element": line[76:78].strip(), "charge": _parse_charge(line), } return info _plusminus = ("+", "-") def _parse_charge(line): charge = line[78:80].strip() if len(charge) == 0: return 0 elif charge[0] in _plusminus: return int(charge) elif charge[-1] in _plusminus: return int(charge[::-1])
[docs] def make_connect_table(mol, symmetric=True): """ Make a "CONECT" table for a PDB file. This is necessary since Biopython by default does not do that... Parameters ---------- mol : Molecule The molecule to generate the connectivity for. symmetric : bool, optional Whether to generate symmetric bonds (i.e. if A is bonded to B, then B is bonded to A as well). Default is True. And both are written to the file. Returns ------- connect_lines : str The lines to add to the PDB file. """ connectivity = {} for bond in mol.get_bonds(): a = bond.atom1.serial_number b = bond.atom2.serial_number if a not in connectivity: connectivity[a] = [b] * bond.order else: connectivity[a].extend([b] * bond.order) if symmetric: if b not in connectivity: connectivity[b] = [a] * bond.order else: connectivity[b].extend([a] * bond.order) lines = [] for atom in connectivity: line = "CONECT" + left_adjust(str(atom), 5) for c in connectivity[atom]: line += left_adjust(str(c), 5) if len(line) > 70: lines.append(line) line = "CONECT" + left_adjust(str(atom), 5) lines.append(line) return "\n".join(lines)
# atom_line = "{prefix}{serial}{neg_adj}{element}{id}{altloc}{residue} {chain}{res_serial}{icode} {x}{y}{z}{occ}{temp} {seg}{element}{charge}" atom_line = "{prefix}{serial}{neg_adj}{id}{altloc}{residue} {chain}{res_serial}{icode} {x}{y}{z}{occ}{temp} {seg}{element}{charge}"
[docs] def make_atoms_table(mol): """ Make a PDB atom table Parameters ---------- mol : bam.Molecule The molecule to generate the table for. Returns ------- str The table """ lines = [] for atom in mol.get_atoms(): new_line = encode_atom(atom) lines.append(new_line) return "\n".join(lines)
[docs] def encode_atom(atom) -> str: """ Make an ATOM line for a PDB file. """ neg_adj = " " # if len(atom.id) > 3: # neg_adj = "" # altloc_len = 2 # else: # neg_adj = " " # altloc_len = 1 if atom.pqr_charge is None or atom.pqr_charge == 0: charge = "" else: charge = str(abs(int(atom.pqr_charge))) + ("-" if atom.pqr_charge < 0 else "+") if atom.get_parent().resname in __amino_acids: prefix = "ATOM " else: prefix = "HETATM" new_line = atom_line.format( prefix=prefix, serial=left_adjust(str(atom.serial_number), 5), neg_adj=neg_adj, id=right_adjust(atom.id.upper()[:4], 4), # id=right_adjust( # atom.id.replace(atom.element.upper(), "").replace( # atom.element.title(), "" # ), # 2, # ), altloc=right_adjust(atom.altloc, 1), residue=left_adjust(atom.get_parent().resname, 3), chain=atom.get_parent().get_parent().id or " ", # resseq=left_adjust(" ", 3), res_serial=left_adjust(str(atom.get_parent().serial_number), 4), icode="", # atom.get_parent().id[2], x=left_adjust(f"{atom.coord[0]:.3f}", 8), y=left_adjust(f"{atom.coord[1]:.3f}", 8), z=left_adjust(f"{atom.coord[2]:.3f}", 8), # occ=left_adjust("1.00", 6), occ=left_adjust(f"{atom.occupancy:.2f}", 6), # temp=left_adjust("0.00", 6), temp=left_adjust(f"{atom.bfactor:.2f}", 6), seg=right_adjust("", 3), element=right_adjust(atom.element.upper(), 2), charge=left_adjust(charge, 2), ) return new_line
[docs] def right_adjust(s, n): """ Right adjust a string to a certain length. Parameters ---------- s : str The string to adjust. n : int The length to adjust to. Returns ------- str The adjusted string. """ return s + " " * (n - len(s))
[docs] def left_adjust(s, n): """ Left adjust a string to a certain length. Parameters ---------- s : str The string to adjust. n : int The length to adjust to. Returns ------- str The adjusted string. """ return " " * (n - len(s)) + s
if __name__ == "__main__": import buildamol as bam out = parse_connect_lines( "/Users/noahhk/GIT/glycosylator/__projects__/solf2/solf2_man5_glycosylated_raw.pdb" ) out pass