Source code for buildamol.extensions.bio.nucleic_acids.simple_sequences
"""
Functions to work with simple DNA and RNA molecules
"""
import buildamol.core as core
import buildamol.resources as resources
resources.load_nucleotides()
__all__ = ["dna", "rna", "nucleic_acid", "get_5prime", "get_3prime"]
nucleotide_linkage = core.linkage(
"C3'", "OP3", delete_in_target=["O3'", "HO3'"], id="phosphodiester"
)
"""
The phosphodiester linkage between nucleotides
"""
resources.add_linkage(nucleotide_linkage)
[docs]
def get_5prime(mol: core.Molecule) -> core.Residue:
"""
Get the 5' residue of a nucleic acid
Parameters
----------
mol : Molecule
The nucleic acid molecule
Returns
-------
Residue
The 5' residue
"""
hop3 = mol.get_atom("HOP3", by="id")
if not hop3:
raise ValueError("No 5' residue found based on HOP3 atom")
return hop3.parent
[docs]
def get_3prime(mol: core.Molecule) -> core.Residue:
"""
Get the 3' residue of a nucleic acid
Parameters
----------
mol : Molecule
The nucleic acid molecule
Returns
-------
Residue
The 3' residue
"""
ho3 = mol.get_atom("HO3'", by="id")
if not ho3:
raise ValueError("No 3' residue found based on HO3'atom")
return ho3.parent
[docs]
def dna(sequence: str) -> core.Molecule:
"""
Create a DNA molecule from a sequence
Parameters
----------
sequence : str
The DNA sequence
Returns
-------
Molecule
The DNA molecule
"""
sequence = sequence.upper()
if not all(c in "ACGT" for c in sequence):
raise ValueError("Invalid DNA sequence")
mol = _construct_from_seq(sequence)
mol.id = sequence
return mol
[docs]
def rna(sequence: str) -> core.Molecule:
"""
Create an RNA molecule from a sequence
Parameters
----------
sequence : str
The RNA sequence
Returns
-------
Molecule
The RNA molecule
"""
sequence = sequence.upper()
if not all(c in "ACGU" for c in sequence):
raise ValueError("Invalid RNA sequence")
mol = _construct_from_seq(sequence)
mol.id = sequence
return mol
[docs]
def nucleic_acid(sequence: str) -> core.Molecule:
"""
Create a generic nucleic acid molecule from a sequence (DNA or RNA)
Parameters
----------
sequence : str
The nucleic acid sequence
Returns
-------
Molecule
The nucleic acid molecule
"""
sequence = sequence.upper()
if not all(c in "ACGTU" for c in sequence):
raise ValueError("Invalid nucleic acid sequence")
mol = _construct_from_seq(sequence)
mol.id = sequence
return mol
def _construct_from_seq(sequence: str) -> core.Molecule:
mol = resources.get_compound(sequence[0])
mol.set_linkage(nucleotide_linkage)
for base in sequence[1:]:
incoming = resources.get_compound(base)
mol.attach(incoming)
return mol