Source code for biobuild.utils.pdb

"""
Auxiliary tools for PDB files.
"""

from tabulate import tabulate

__amino_acids = set(
    (
        "ALA",
        "ARG",
        "ASN",
        "ASP",
        "CYS",
        "GLN",
        "GLU",
        "GLY",
        "HIS",
        "ILE",
        "LEU",
        "LYS",
        "MET",
        "PHE",
        "PRO",
        "SER",
        "THR",
        "TRP",
        "TYR",
        "VAL",
        "CSE",  # selenocysteines
        "SEC",
    )
)



[docs]
def write_pdb(mol, filename, symmetric: bool = True):
    """
    Write a molecule to a PDB file.

    Parameters
    ----------
    mol : Molecule
        The molecule to write.
    filename : str
        The filename to write to.
    symmetric : bool, optional
        Whether to write the molecule in a symmetric way, by default True.
    """
    with open(filename, "w") as f:
        f.write(make_atoms_table(mol))
        f.write("\n")
        f.write(make_connect_table(mol, symmetric))
        f.write("\nEND\n")




[docs]
def write_connect_lines(mol, filename):
    """
    Write "CONECT" lines to a PDB file.
    This is necessary since Biopython by default does not do that...

    Parameters
    ----------
    mol : bb.Molecule
        The molecule to generate the connectivity for.
    filename : str
        The filename to write to.
    """
    with open(filename, "r") as f:
        c = f.read()
    with open(filename, "w") as f:
        f.write(c.replace("END", "").rstrip())
        f.write("\n")
        f.write(make_connect_table(mol))
        f.write("\nEND\n")




[docs]
def parse_connect_lines(filename):
    """
    Parse "CONECT" lines from a PDB file.
    This is necessary since Biopython by default does not do that...

    Parameters
    ----------
    filename : str
        The filename to parse.

    Returns
    -------
    bonds: list
        A list of tuples of atom serial numbers that are bonded.
    """
    with open(filename, "r") as f:
        lines = f.readlines()
    bonds = {}
    known_bonds = set()
    for line in lines:
        if line.startswith("CONECT"):
            # split the line into tokens of length 5
            line = line[6:]
            tokens = [
                line[i : i + 5].strip()
                for i in range(0, len(line), 5)
                if len(line[i : i + 5].strip()) > 0
            ]

            atom_a = int(tokens[0])
            for token in tokens[1:]:
                b = (atom_a, int(token))
                # make sure we don't add the same bond twice
                if b[::-1] in known_bonds:
                    continue
                bonds.setdefault(b, 0)
                bonds[b] += 1
                known_bonds.add(b)
    return [(*k, v) for k, v in bonds.items()]




[docs]
def make_connect_table(mol, symmetric=True):
    """
    Make a "CONECT" table for a PDB file.
    This is necessary since Biopython by default does not do that...

    Parameters
    ----------
    mol : Molecule
        The molecule to generate the connectivity for.
    symmetric : bool, optional
        Whether to generate symmetric bonds (i.e. if A is bonded to B, then
        B is bonded to A as well). Default is True. And both are written to the file.

    Returns
    -------
    connect_lines : str
        The lines to add to the PDB file.
    """
    connectivity = {}
    for bond in mol.get_bonds():
        a = bond.atom1.serial_number
        b = bond.atom2.serial_number
        if a not in connectivity:
            connectivity[a] = [b] * bond.order
        else:
            connectivity[a].extend([b] * bond.order)

        if symmetric:
            if b not in connectivity:
                connectivity[b] = [a] * bond.order
            else:
                connectivity[b].extend([a] * bond.order)

    lines = []
    for atom in connectivity:
        line = "CONECT" + left_adjust(str(atom), 5)
        for c in connectivity[atom]:
            line += left_adjust(str(c), 5)
            if len(line) > 70:
                lines.append(line)
                line = "CONECT" + left_adjust(str(atom), 5)
        lines.append(line)
    return "\n".join(lines)



# atom_line = "{prefix}{serial}{neg_adj}{element}{id}{altloc}{residue} {chain}{res_serial}{icode}    {x}{y}{z}{occ}{temp}       {seg}{element}{charge}"
atom_line = "{prefix}{serial}{neg_adj}{id}{altloc}{residue} {chain}{res_serial}{icode}    {x}{y}{z}{occ}{temp}       {seg}{element}{charge}"



[docs]
def make_atoms_table(mol):
    """
    Make a PDB atom table

    Parameters
    ----------
    mol : bb.Molecule
        The molecule to generate the table for.

    Returns
    -------
    str
        The table
    """
    lines = []
    for atom in mol.get_atoms():
        neg_adj = " "
        # if len(atom.id) > 3:
        #     neg_adj = ""
        #     altloc_len = 2
        # else:
        #     neg_adj = " "
        # altloc_len = 1
        if atom.pqr_charge is None:
            charge = ""
        else:
            charge = str(int(atom.pqr_charge)) + ("-" if atom.pqr_charge < 0 else "+")

        if atom.get_parent().resname in __amino_acids:
            prefix = "ATOM  "
        else:
            prefix = "HETATM"

        lines.append(
            atom_line.format(
                prefix=prefix,
                serial=left_adjust(str(atom.serial_number), 5),
                neg_adj=neg_adj,
                id=right_adjust(atom.id.upper(), 4),
                # id=right_adjust(
                #     atom.id.replace(atom.element.upper(), "").replace(
                #         atom.element.title(), ""
                #     ),
                #     2,
                # ),
                altloc=right_adjust(atom.altloc, 1),
                residue=left_adjust(atom.get_parent().resname, 3),
                chain=atom.get_parent().get_parent().id,
                # resseq=left_adjust(" ", 3),
                res_serial=left_adjust(str(atom.get_parent().id[1]), 3),
                icode=" ",  # atom.get_parent().id[2],
                x=left_adjust(f"{atom.coord[0]:.3f}", 8),
                y=left_adjust(f"{atom.coord[1]:.3f}", 8),
                z=left_adjust(f"{atom.coord[2]:.3f}", 8),
                # occ=left_adjust("1.00", 6),
                occ=left_adjust(f"{atom.occupancy:.2f}", 6),
                # temp=left_adjust("0.00", 6),
                temp=left_adjust(f"{atom.bfactor:.2f}", 6),
                seg=right_adjust("", 3),
                element=left_adjust(atom.element.upper(), 2),
                charge=left_adjust(charge, 2),
            )
        )
    return "\n".join(lines)




[docs]
def right_adjust(s, n):
    """
    Right adjust a string to a certain length.

    Parameters
    ----------
    s : str
        The string to adjust.
    n : int
        The length to adjust to.

    Returns
    -------
    str
        The adjusted string.
    """
    return s + " " * (n - len(s))




[docs]
def left_adjust(s, n):
    """
    Left adjust a string to a certain length.

    Parameters
    ----------
    s : str
        The string to adjust.
    n : int
        The length to adjust to.

    Returns
    -------
    str
        The adjusted string.
    """
    return " " * (n - len(s)) + s