evox.algorithms.so.es_variants.noise_reuse_es 源代码

from typing import Literal

import torch

from evox.core import Algorithm, Mutable, Parameter

from .adam_step import adam_single_tensor



[文档]
class NoiseReuseES(Algorithm):
    """The implementation of the Noise-Reuse-ES algorithm.

    Reference:
    Noise-Reuse in Online Evolution Strategies
    (https://arxiv.org/pdf/2304.12180.pdf)

    This code has been inspired by or utilizes the algorithmic implementation from evosax.
    More information about evosax can be found at the following URL:
    GitHub Link: https://github.com/RobertTLange/evosax
    """

    def __init__(
        self,
        pop_size: int,
        center_init: torch.Tensor,
        optimizer: Literal["adam"] | None = None,
        lr: float = 0.05,
        sigma: float = 0.03,
        T: int = 100,  # inner problem length
        K: int = 10,
        sigma_decay: float = 1.0,
        sigma_limit: float = 0.01,
        device: torch.device | None = None,
    ):
        """Initialize the Guided-ES algorithm with the given parameters.

        :param pop_size: The size of the population.
        :param center_init: The initial center of the population. Must be a 1D tensor.
        :param optimizer: The optimizer to use. Defaults to None. Currently, only "adam" or None is supported.
        :param lr: The learning rate for the optimizer. Defaults to 0.05.
        :param sigma: The standard deviation of the noise. Defaults to 0.03.
        :param sigma_decay: The decay factor for the standard deviation. Defaults to 1.0.
        :param sigma_limit: The minimum value for the standard deviation. Defaults to 0.01.
        :param T: The inner problem length. Defaults to 100.
        :param K: The number of inner problems. Defaults to 10.
        :param device: The device to use for the tensors. Defaults to None.
        """
        super().__init__()
        assert pop_size > 1
        dim = center_init.shape[0]
        # set hyperparameters
        self.lr = Parameter(lr, device=device)
        self.T = Parameter(T, device=device)
        self.K = Parameter(K, device=device)
        self.sigma_decay = Parameter(sigma_decay, device=device)
        self.sigma_limit = Parameter(sigma_limit, device=device)
        # set value
        self.dim = dim
        self.pop_size = pop_size
        self.optimizer = optimizer
        # setup
        center_init = center_init.to(device=device)
        self.center = Mutable(center_init)
        self.sigma = Mutable(torch.tensor(sigma))
        self.inner_step_counter = Mutable(torch.tensor(0.0, device=device))
        self.unroll_pert = Mutable(torch.zeros(pop_size, self.dim, device=device))

        if optimizer == "adam":
            self.exp_avg = Mutable(torch.zeros_like(self.center))
            self.exp_avg_sq = Mutable(torch.zeros_like(self.center))
            self.beta1 = Parameter(0.9, device=device)
            self.beta2 = Parameter(0.999, device=device)


[文档]
    def step(self):
        """
        Take a single step of the NoiseReuseES algorithm.

        This function follows the algorithm described in the reference paper.
        It first generates a set of perturbations for the current population.
        Then, it evaluates the fitness of the population with the perturbations.
        Afterwards, it calculates the gradient of the policy parameters using the
        perturbations and the fitness.
        Finally, it updates the policy parameters using the gradient and the
        learning rate.
        """
        device = self.center.device

        position_perturbations = torch.randn(self.pop_size // 2, self.dim, device=device) * self.sigma
        negative_perturbations = -position_perturbations
        perturbations = torch.cat([position_perturbations, negative_perturbations], dim=0)
        unroll_pert = torch.where(self.inner_step_counter == 0, perturbations, self.unroll_pert)

        population = self.center + unroll_pert

        fitness = self.evaluate(population)

        theta_grad = torch.mean(unroll_pert * fitness.reshape(-1, 1) / (self.sigma**2), dim=0)

        if self.optimizer is None:
            center = self.center - self.lr * theta_grad
        else:
            center, self.exp_avg, self.exp_avg_sq = adam_single_tensor(
                self.center,
                theta_grad,
                self.exp_avg,
                self.exp_avg_sq,
                self.beta1,
                self.beta2,
                self.lr,
            )
        self.center = center

        inner_step_counter = torch.where(self.inner_step_counter + self.K >= self.T, 0, self.inner_step_counter + self.K)
        self.inner_step_counter = inner_step_counter

        sigma = torch.maximum(self.sigma_decay * self.sigma, self.sigma_limit)
        self.sigma = sigma



[文档]
    def record_step(self):
        return {"center": self.center, "sigma": self.sigma}