Source code for buildamol.extensions.bio.glycans.glycan

from .iupac import IUPACParser
from buildamol import core, resources

resources.load_sugars()

__all__ = ["glycan"]


[docs] def glycan(iupac: str, id: str = "UNK") -> core.Molecule: """ Create a glycan molecule from an IUPAC string Parameters ---------- iupac : str The IUPAC string of the glycan id : str The id of the glycan molecule Returns ------- Molecule The glycan molecule """ parser = IUPACParser() segments = parser.parse(iupac) return _segments_to_mol(id, segments)
def _segments_to_mol(id, glycan_segments, _topology=None): """ Make a molecule from a list of glycan segments that were generated by the IUPACParser class Parameters ---------- id : str The id of the molecule glycan_segments : list A list of glycan segments Returns ------- Molecule The molecule """ if not _topology: _topology = resources.get_default_topology() # Check that all segments have a known patch for segment in glycan_segments: link = segment[-1] if not _topology.has_patch(link): # make a linkage _link = core.linkage( f"O{link[0]}", f"C{link[1]}", [f"HO{link[0]}"], [f"O{link[1]}", f"HO{link[1]}"], id=link, ) _topology.add_patch(_link) mol = None first_mol = None second_mol = None at_residue = None other_residue = None residue_id_mapping = {} for i, segment in enumerate(glycan_segments): first, second, link = segment first_name = first.split("@")[0] second_name = second.split("@")[0] if first in residue_id_mapping: at_residue = residue_id_mapping[first] first_mol = mol else: if first_name.startswith("b-") and first_name[2:].isupper(): first_name = first_name[2:] first_mol = core.molecule(first_name) if isinstance(first_mol, list): first_mol = first_mol[0] # if we did not get the compound from the PDBE compounds, # we probably got them from PubChem, in which case we need to autolabel them if not resources.has_compound(first_mol.id): first_mol.autolabel() residue_id_mapping[first] = len(residue_id_mapping) + 1 at_residue = None if second in residue_id_mapping: other_residue = residue_id_mapping[second] second_mol = mol else: if second_name.startswith("b-") and second_name[2:].isupper(): second_name = second_name[2:] second_mol = core.molecule(second_name) if isinstance(second_mol, list): second_mol = second_mol[0] if not resources.has_compound(second_mol.id): second_mol.autolabel() residue_id_mapping[second] = len(residue_id_mapping) + 1 other_residue = None if not mol: mol = first_mol mol.attach( second_mol, link, at_residue=at_residue, other_residue=other_residue, _topology=_topology, ) mol.id = id return mol if __name__ == "__main__": iupac = "Gal(a1-4)Gal(b1-4)GlcNAc(b1-6)GalNAc(b1-" mol = glycan(iupac) mol.show()