import numpy as np
import scipy.optimize as opt
from typing import Union
import biobuild.utils.auxiliary as aux
from biobuild.core import Molecule
[docs]
def scipy_optimize(
env,
steps: int = 1e5,
method: str = "L-BFGS-B",
**kws,
):
"""
Optimize a Rotatron environment through
a simple scipy optimization
Parameters
----------
env : biobuild.optimizers.environments.Rotatron
The environment to optimize
steps : int, optional
The number of steps to take.
method : str, optional
The optimizer to use, by default "L-BFGS-B".
This can be any optimizer from scipy.optimize.minimize
kws : dict, optional
Keyword arguments to pass as options to the optimizer
Returns
-------
solution, evaluation
The angles(s) and evaluation(s) of the best solution(s) found.
"""
x0 = env.action_space.sample()
def loss_fn(x):
state, _eval, *_ = env.step(x)
env.reset()
return _eval
if "bounds" in kws:
bounds = kws.pop("bounds")
elif hasattr(env, "_bounds_tuple"):
bounds = opt.Bounds(*env._bounds_tuple)
else:
bounds = None
kws["maxiter"] = int(steps)
result = opt.minimize(loss_fn, x0, method=method, bounds=bounds, options=kws)
return (
result.x,
result.fun,
)
[docs]
def genetic_optimize(
env,
max_generations: int = 5e2,
stop_if_done: bool = True,
threshold: float = 1e-6,
variation: float = 0.2,
population_size: int = 50,
parents: Union[int, float] = 0.25,
children: Union[int, float] = 0.3,
mutants: Union[int, float] = 0.3,
newcomers: Union[int, float] = 0.15,
variation_cooldown: float = 1,
n_best: int = 1,
):
"""
A simple genetic algorithm for optimizing a Rotatron environment.
Parameters
----------
env : biobuild.optimizers.environments.Rotatron
The environment to optimize
max_generations : int, optional
The maximum number of steps to take.
stop_if_done : bool, optional
Stop the optimization if the environment signals it is done or the solutions have converged.
threshold : float, optional
A thershold to use for convergence of the best solution found.
The algorithm will stop if the variation of the best solution evaluation history
is less than this threshold.
variation : float, optional
The variation to use for the initial action.
population_size : int, optional
The size of the population.
parents : int or float, optional
The number or fraction of parents (elites) to select. The parents are selected from the best solutions. Parents produce offspring and pass to the next generation.
children : int or float, optional
The number or fraction of children to generate from the parents. Children are generated by averaging the parents and adding some noise.
mutants : int or float, optional
The number or fraction of mutants to generate. Mutants are generated by adding noise to parents, therby
generating abarrent clones.
newcomers: int or float, optional
Newcomers are entirely new solution candidates.
variation_cooldown : float, optional
The rate at which the variation is reduced. The variation is reduced by this factor every generation. E.g. 0.95 will reduce the variation by 5% every generation.
n_best : int, optional
The number of best solutions to return at the end of the optimization.
Returns
-------
solution, evaluation
The angles(s) and evaluation(s) of the best solution(s) found.
"""
if isinstance(parents, float):
parents = round(parents * population_size)
if isinstance(children, float):
children = round(children * population_size)
if isinstance(mutants, float):
mutants = round(mutants * population_size)
if isinstance(newcomers, float):
newcomers = round(newcomers * population_size)
while children + mutants + parents + newcomers > population_size:
if newcomers > 1:
newcomers -= 1
elif children > 1:
children -= 1
elif mutants > 1:
mutants -= 1
elif parents > 1:
parents -= 1
while children + mutants + parents + newcomers < population_size:
mutants += 1
if any([n <= 0 for n in [children, mutants, parents, newcomers]]):
raise ValueError(
"n_children, n_mutations, n_parents, and n_newcomers must all be at least 1"
)
n_parents = parents
n_children = children
n_mutants = mutants
n_newcomers = newcomers
blank = env.blank()
if hasattr(env, "_bounds_tuple"):
min_angle, max_angle = env._bounds_tuple
else:
min_angle, max_angle = -np.pi, np.pi
population = np.stack([blank] * population_size)
evals = np.zeros(population_size)
bests = np.zeros(max(10, int(max_generations * 0.3)))
pop_range = np.arange(0, population_size)
parents_range = np.arange(0, n_parents)
children_range = np.arange(0, n_children)
mutations_range = np.arange(0, n_mutants)
newcomers_range = np.arange(0, n_newcomers)
for i in pop_range:
population[i] = env.action_space.sample()
_, evals[i], *_ = env.step(population[i])
env.reset()
steps = 0
while steps < max_generations:
sorting = np.argsort(evals)
parents = population[sorting[:n_parents]]
children = np.stack([blank] * n_children)
for i in children_range:
p1, p2 = np.random.choice(parents_range, size=2, replace=False)
p1, p2 = parents[int(p1)], parents[int(p2)]
children[i] = (p1 + p2) / 2 + np.random.uniform(
-variation / 3, variation / 3, size=blank.shape
)
mutations = np.stack([blank] * n_mutants)
for i in mutations_range:
mutations[i] = parents[np.random.randint(0, n_parents)] + np.random.uniform(
-variation, variation, size=blank.shape
)
newcomers = np.stack([blank] * n_newcomers)
for i in newcomers_range:
newcomers[i] = env.action_space.sample()
population = np.concatenate([parents, children, mutations, newcomers])
population = np.clip(population, min_angle, max_angle)
evals = np.zeros(population_size)
for i in pop_range:
_, evals[i], done, *_ = env.step(population[i])
env.reset(best=True) # (see comment below)
if done and stop_if_done:
break
if done and stop_if_done:
break
# -------------------------------------------------------------------------------------
# Generally I think this should not be here, however
# the very fact that the algorithm was running for 20min+
# for some setups, shows that this probably does not affect
# the results too much. A way of assuring this would be fine, is to also reset the best
# evaluation when resetting the environment...
bests = np.roll(bests, -1)
bests[-1] = np.min(evals)
if (
stop_if_done
and steps > max_generations / 10
and np.var(bests[bests != 0]) < threshold
):
break
# -------------------------------------------------------------------------------------
variation *= variation_cooldown
steps += 1
best = np.argsort(evals)[:n_best]
if n_best == 1:
return population[best[0]], evals[best[0]]
return population[best], evals[best]
class _Particle:
"""
Particle for particle swarm optimization and similar algorithms.
"""
bounds = None
variance = 0.2
def __init__(self, dim):
self.position = np.random.rand(dim) # Initialize position randomly
self.velocity = np.random.rand(dim) # Initialize velocity randomly
self.best_position = self.position.copy() # Personal best position
self.best_fitness = np.inf # Personal best fitness
self.fitness = np.inf # Current fitness
if self.bounds:
def update_position(self):
self.position += self.velocity
self.position = np.clip(self.position, self.bounds[0], self.bounds[1])
else:
def update_position(self):
self.position += self.velocity
self.update_position = update_position.__get__(self)
def update_velocity(self, inertia, cognitive, social, best_particle):
self.velocity = (
inertia * self.velocity
+ cognitive * np.random.rand() * (self.best_position - self.position)
+ social * np.random.rand() * (best_particle.position - self.position)
)
def random_scatter(self) -> np.ndarray:
new = self.position + np.random.uniform(
-self.variance, self.variance, size=self.position.shape
)
if self.bounds:
new = np.clip(new, self.bounds[0], self.bounds[1])
return new
[docs]
def swarm_optimize(
env,
n_particles: int = None,
max_steps: int = 30,
stop_if_done: bool = True,
threshold: float = 1e-6,
w: float = 0.9,
c1: float = 0.5,
c2: float = 0.3,
cooldown_rate: float = 0.99,
n_best: int = 1,
):
"""
Optimize a rotatron environment through a simple particle swarm optimization.
Parameters
----------
env : biobuild.optimizers.environments.Rotatron
The environment to optimize
n_particles : int, optional
The number of particles to use.
Set this to None in order to compute the number of particles
based on the number of rotatable edges in the environment.
max_steps : int, optional
The maximum number of steps to take.
stop_if_done : bool, optional
Stop the optimization if the environment signals it is done or the solutions have converged.
threshold : float, optional
A threshold to use for convergence of the best solution found.
The algorithm will stop if the variation of the best solution evaluation history
is less than this threshold.
w : float, optional
The inertia parameter for the particle swarm optimization.
c1 : float, optional
The cognitive parameter for the particle swarm optimization.
c2 : float, optional
The social parameter for the particle swarm optimization.
cooldown_rate : float, optional
The rate at which the inertia parameter is reduced. The inertia parameter is reduced by this factor every generation. E.g. 0.95 will reduce the inertia parameter by 5% every generation.
n_best : int, optional
The number of best solutions to return at the end of the optimization.
Returns
-------
solution, evaluation
The solution and evaluation for the solution
"""
if n_particles is None:
n_particles = max(10, len(env.rotatable_edges) // 3)
_Particle.bounds = env._bounds_tuple or None
population = [_Particle(env.action_space.shape[0]) for _ in range(n_particles)]
best_particle = population[0]
best_fitness = np.inf
best_solution = np.zeros(env.action_space.shape[0])
steps = 0
while steps < max_steps:
for particle in population:
fitness = env.step(particle.position)[1]
if fitness < particle.best_fitness:
particle.best_fitness = fitness
particle.best_position[:] = particle.position
if fitness < best_fitness:
best_fitness = fitness
best_particle = particle
best_solution[:] = particle.position
env.reset()
for particle in population:
particle.update_velocity(w, c1, c2, best_particle)
particle.update_position()
if stop_if_done:
if np.var([p.best_fitness for p in population]) < threshold:
break
w *= cooldown_rate
steps += 1
if n_best == 1:
return best_solution, best_fitness
else:
best = np.argsort([p.best_fitness for p in population])[:n_best]
return np.array([population[b].best_position for b in best]), np.array(
[population[b].best_fitness for b in best]
)
# THIS IS VERSION2 WHICH WORKS FINE OVERALL BUT IS NOT AS POWERFUL AS THE GENETIC ALGORITHM
# def swarm_optimize(
# env,
# n_particles: int = 30,
# max_steps: int = 30,
# stop_if_done: bool = True,
# threshold: float = 1e-6,
# variation: float = 0.3,
# recycle: float = 0.3,
# recycle_every: int = 5,
# n_best: int = 1,
# ):
# """
# Optimize a rotatron environment through a simple particle swarm optimization.
# Parameters
# ----------
# env : biobuild.optimizers.environments.Rotatron
# The environment to optimize
# n_particles : int, optional
# The number of particles to use.
# max_steps : int, optional
# The maximum number of steps to take.
# stop_if_done : bool, optional
# Stop the optimization if the environment signals it is done or the solutions have converged.
# threshold : float, optional
# A threshold to use for convergence of the best solution found.
# The algorithm will stop if the variation of the best solution evaluation history
# is less than this threshold.
# variation : float, optional
# The variation to use for updating particle positions.
# recycle : float, optional
# The fraction of particles to replace by variations of the best particle when updating the particle positions.
# This will remove the worst particles and replace them with the best particle + some noise.
# recycle_every : int, optional
# The number of steps to take before recycling particles.
# Returns
# -------
# solution, evaluation
# The solution and evaluation for the solution
# """
# blank = env.blank()
# particles = np.stack([blank] * n_particles)
# velocities = np.stack([blank] * n_particles)
# evals = np.zeros(n_particles)
# n_recycle = int(n_particles * recycle)
# particle_range = np.arange(0, n_particles)
# for i in particle_range:
# particles[i] = env.action_space.sample()
# _, evals[i], done, *_ = env.step(particles[i])
# env.reset()
# best = np.argmin(evals)
# best_eval = evals[best]
# best_solution = particles[best]
# steps = 0
# while steps < max_steps:
# for i in particle_range:
# velocities[i] += np.random.normal(0, variation, size=blank.shape)
# velocities[i] *= 0.9
# particles[i] += velocities[i]
# _, evals[i], done, *_ = env.step(particles[i])
# env.reset()
# if evals[i] < best_eval:
# best_eval = evals[i]
# best_solution = particles[i]
# if steps % recycle_every == 0:
# # remove the worst particles
# # and replace them with the best particle+noise
# particles[np.argsort(evals)[n_recycle:]] = best_solution + np.random.normal(
# 0, variation, size=blank.shape
# )
# if stop_if_done and done:
# break
# if stop_if_done and np.var(evals) < threshold:
# break
# steps += 1
# if n_best == 1:
# return best_solution, best_eval
# else:
# best = np.sort(evals)[:n_best]
# return particles[best], evals[best]
# -------------------------------------------------------------------------------------
# This is the original code for swarm_optimize using pyswarms.
# This used to work really well with the old implementations of Rotatron.
# However, it seems to be not adapting well to the new Rotatron so I decided
# to switch to the barebone implementation in numpy alone above.
# -------------------------------------------------------------------------------------
# import pyswarms as ps
# def swarm_optimize(
# env,
# steps: int = 30,
# n: int = 10,
# molecule: "Molecule" = None,
# bounds=(-np.pi, np.pi),
# **kws,
# ):
# """
# Optimize a Rotatron environment through
# a pyswarms swarm optimization
# Parameters
# ----------
# env : biobuild.optimizers.environments.Rotatron
# The environment to optimize
# steps : int, optional
# The number of steps to take, by default 1000
# n : int, optional
# The number of particles to use, by default 10
# molecule : Molecule, optional
# A molecule to apply the optimized solution to directly.
# If None, the solution is returned, by default None. The solution is
# applied in-place to the molecule directly.
# bounds : tuple, optional
# The bounds to use for solutions as a tuple of size 2 with a minimal and maximal angle to allow.
# kws : dict, optional
# Keyword arguments to pass as options to the optimizer
# Returns
# -------
# tuple or Molecule
# If molecule is None, a tuple of the optimized action and the reward for the optimized action.
# If molecule is not None, the optimized molecule.
# """
# x0 = env.action_space.sample()
# if bounds:
# bounds = (np.full(x0.shape, bounds[0]), np.full(x0.shape, bounds[1]))
# def loss_fn(sol):
# costs = np.zeros(n)
# i = 0
# for s in sol:
# costs[i] = env.step(s)[1]
# i += 1
# return costs
# # options = kws.pop("options", {})
# # options.setdefault("c1", 0.5)
# # options.setdefault("c2", 0.3)
# # options.setdefault("w", 0.9)
# # verbose = kws.pop("verbose", False)
# optimizer = ps.single.GlobalBestPSO(
# n_particles=n, dimensions=x0.shape[0], bounds=bounds, options=options, **kws
# )
# reward, result = optimizer.optimize(loss_fn, iters=steps, verbose=verbose)
# if molecule:
# molecule = outils.apply_solution(result, env, molecule)
# return molecule
# return result, reward
[docs]
def anneal_optimize(
env,
n_particles: int = None,
max_steps: int = 100,
stop_if_done: bool = True,
threshold: float = 1e-6,
variance: float = 0.3,
cooldown_rate: float = 0.98,
n_best: int = 1,
):
"""
Optimize a rotatron environment through a simple simulated annealing.
Parameters
----------
env : biobuild.optimizers.environments.Rotatron
The environment to optimize
n_particles : int, optional
The number of particles to use. Set to None in order to compute the number of particles
based on the number of rotatable edges in the environment, where one particle is used per two rotatable edges.
max_steps : int, optional
The maximum number of steps to take.
stop_if_done : bool, optional
Stop the optimization if the environment signals it is done or the solutions have converged.
threshold : float, optional
A threshold to use for convergence of the best solution found.
The algorithm will stop if the variation of the best solution evaluation history
is less than this threshold.
variance : float, optional
The variation to use for updating particle positions.
n_best : int, optional
The number of best solutions to return at the end of the optimization.
Returns
-------
solution, evaluation
The solution and evaluation for the solution
"""
if n_particles is None:
n_particles = max(3, len(env.rotatable_edges) // 2)
_Particle.variance = variance
particles = [_Particle(env.n_edges) for _ in range(n_particles)]
best_particle = particles[0]
best_fitness = np.inf
best_solution = np.zeros(env.n_edges)
for particle in particles:
particle.fitness = env.step(particle.position)[1]
temperature = 1.0
accept = lambda old, new, temp: np.exp((old - new) / temp) > np.random.rand()
steps = 0
while steps < max_steps:
for particle in particles:
position = particle.random_scatter()
fitness = env.step(position)[1]
if accept(particle.fitness, fitness, temperature):
particle.fitness = fitness
particle.position[:] = position
if fitness < best_fitness:
best_fitness = fitness
best_particle = particle
best_solution[:] = particle.position
if fitness < particle.best_fitness:
particle.best_fitness = fitness
particle.best_position[:] = position
env.reset()
temperature *= cooldown_rate
_Particle.variance *= cooldown_rate * 0.95
steps += 1
if stop_if_done:
if np.var([p.best_fitness for p in particles]) < threshold:
break
if n_best == 1:
return best_solution, best_fitness
else:
best = np.argsort([p.best_fitness for p in particles])[:n_best]
return np.array([particles[b].best_position for b in best]), np.array(
[particles[b].best_fitness for b in best]
)
def rdkit_optimize(mol, steps=1000):
"""
Optimize a molecule using RDKit's force field optimization.
Parameters
----------
mol : Molecule
The molecule to optimize
steps : int, optional
The number of steps to take, by default 1000
Returns
-------
Molecule
The optimized molecule
"""
cls = mol.__class__
if not aux.HAS_RDKIT:
raise ImportError("RDKit is not installed")
rdmol = mol.to_rdkit()
aux.AllChem.MMFFOptimizeMolecule(rdmol, maxIters=steps)
return cls.from_rdkit(rdmol)
__all__ = [
"swarm_optimize",
"genetic_optimize",
"scipy_optimize",
"rdkit_optimize",
"anneal_optimize",
]
if __name__ == "__main__":
import biobuild as bb
import time
mol = bb.molecule("/Users/noahhk/GIT/biobuild/__figure_makery/MAN8.json")
env = bb.optimizers.OverlapRotatron(mol.get_residue_graph(True))
for i in range(10):
t1 = time.time()
sol, eval = genetic_optimize(env, variation_cooldown=0.98, variation=0.1)
out = bb.optimizers.apply_solution(sol, env, mol.copy())
print(time.time() - t1, out.count_clashes())
out.show()
pass