Source code for

"""Gaussian exploration strategy."""
import gym
import numpy as np

from import ExplorationPolicy

[docs]class AddGaussianNoise(ExplorationPolicy): """Add Gaussian noise to the action taken by the deterministic policy. Args: env_spec (EnvSpec): Environment spec to explore. policy (garage.Policy): Policy to wrap. max_sigma (float): Action noise standard deviation at the start of exploration. min_sigma (float): Action noise standard deviation at the end of the decay period. decay_period (int): Number of paths over which to linearly decay sigma from max_sigma to min_sigma. """ def __init__(self, env_spec, policy, max_sigma=1.0, min_sigma=0.1, decay_period=1000000): assert isinstance(env_spec.action_space, gym.spaces.Box) assert len(env_spec.action_space.shape) == 1 super().__init__(policy) self._max_sigma = max_sigma self._min_sigma = min_sigma self._decay_period = decay_period self._action_space = env_spec.action_space self._iteration = 0
[docs] def reset(self, dones=None): """Reset the state of the exploration. Args: dones (List[bool] or numpy.ndarray or None): Which vectorization states to reset. """ self._iteration += 1 super().reset(dones)
[docs] def get_action(self, observation): """Get action from this policy for the input observation. Args: observation(numpy.ndarray): Observation from the environment. Returns: np.ndarray: Actions with noise. List[dict]: Arbitrary policy state information (agent_info). """ action, agent_info = self.policy.get_action(observation) sigma = self._max_sigma - (self._max_sigma - self._min_sigma) * min( 1.0, self._iteration * 1.0 / self._decay_period) return np.clip(action + np.random.normal(size=len(action)) * sigma, self._action_space.low, self._action_space.high), agent_info
[docs] def get_actions(self, observations): """Get actions from this policy for the input observation. Args: observations(list): Observations from the environment. Returns: np.ndarray: Actions with noise. List[dict]: Arbitrary policy state information (agent_info). """ actions, agent_infos = self.policy.get_actions(observations) sigma = self._max_sigma - (self._max_sigma - self._min_sigma) * min( 1.0, self._iteration * 1.0 / self._decay_period) return np.clip(actions + np.random.normal(size=len(actions)) * sigma, self._action_space.low, self._action_space.high), agent_infos