Source code for digideep.agent.noises

"""
This module is dedicated to noise models used in other methods.

Each noise class should implement the ``__call__`` method. See the examples :class:`EGreedyNoise` and :class:`OrnsteinUhlenbeckNoise`.
"""
import numpy as np

[docs]class EGreedyNoise:
    """This class implements simple e-greedy noise. The noise is sampled from uniform distribution.
    
    Args:
        std (float): Standard deviation of the noise.
        e (float): The probability of choosing a noisy action.
        lim (float): Boundary of the noise (noise will be clipped beyond this value.)

    Note:
        This class is not dependant on its history.
    """

    def __init__(self, **params):
        self.params = params
        self.state = {}
    
[docs]    def reset(self):
        pass
    
    def __call__(self, action):
        # Add some noise to the action
        std = self.params["std"]
        e   = self.params["e"]
        lim = self.params["lim"] # A scalar
        
        noise = std * lim * np.random.randn(*action.shape)
        action += noise
        action = np.clip(action, a_min=-lim, a_max=lim)

        # By chance e, choose a completely random action.
        # By chance 1-e, let the current noised action survive.
        # We use this beautiful trick from openai to do the above:
        random_action = np.random.uniform(low=-lim, high=lim, size=action.shape)
        
        # When we have a batch of actions, this should be like this:
        # choice = np.random.binomial(1, e, size=(action.shape[0],1))
        # Where action.shape[0] is the batch-size
        choice = np.random.binomial(1, e, size=(action.shape[0],1))
        noise = choice * (random_action - action)
        action += noise
        return action
    
[docs]    def state_dict(self):
        return None
[docs]    def load_state_dict(self, state_dict):
        pass



[docs]class OrnsteinUhlenbeckNoise:
    """An implementation of the `Ornstein-Uhlenbeck noise <https://en.wikipedia.org/wiki/Ornstein%E2%80%93Uhlenbeck_process>`_.

    The noise model is :math:`{\displaystyle dx_{t}=\theta (\mu -x_{t})\,dt+\sigma \,dW_{t}}`.

    Args:
        mu: Parameter :math:`\mu` which indicates the final value that :math:`x` will converge to.
        theta: Parameter :math:`\theta`.
        sigma: Parameter :math:`\sigma` which is the std of the additional normal noise.
        lim: The action limit, which can be a :obj:`np.array` for a vector of actions.
    
    Note:
        This class is state serializable.
    """

    def __init__(self, **params):
        """ Ornstein-Uhlenbeck process noise generator
        params: mu, theta, sigma
        states: X
        """

        self.params = params
        self.state = {}
        self.state['needs_reset'] = True

[docs]    def reset(self, action):
        self.state['X'] = np.ones_like(action) * self.params['mu']
        self.state['needs_reset'] = False

    def __call__(self, action):
        if self.state['needs_reset']:
            self.reset(action)
        dx = self.params['theta'] * (self.params['mu'] - self.state['X'])
        dx = dx + self.params['sigma'] * np.random.randn(*self.state['X'].shape)
        self.state['X'] = self.state['X'] + dx
        noise = self.state['X'] * self.params["lim"]
        # print("noise", noise)
        action += noise
        return action
    
[docs]    def state_dict(self):
        return self.state
[docs]    def load_state_dict(self, state_dict):
        self.state.update(state_dict)