Source code for garage.np.policies.uniform_random_policy
"""Uniform random exploration strategy."""
import gym
from garage.np.policies.policy import Policy
[docs]class UniformRandomPolicy(Policy):
"""Action taken is uniformly random.
Args:
env_spec (EnvSpec): Environment spec to explore.
"""
def __init__(
self,
env_spec,
):
assert isinstance(env_spec.action_space, gym.spaces.Box)
assert len(env_spec.action_space.shape) == 1
self._env_spec = env_spec
self._action_space = env_spec.action_space
self._iteration = 0
[docs] def reset(self, do_resets=None):
"""Reset the state of the exploration.
Args:
do_resets (List[bool] or numpy.ndarray or None): Which
vectorization states to reset.
"""
self._iteration += 1
super().reset(do_resets)
[docs] def get_action(self, observation):
"""Get action from this policy for the input observation.
Args:
observation(numpy.ndarray): Observation from the environment.
Returns:
np.ndarray: Actions with noise.
List[dict]: Arbitrary policy state information (agent_info).
"""
return self._env_spec.action_space.sample(), dict()
[docs] def get_actions(self, observations):
"""Get actions from this policy for the input observation.
Args:
observations(list): Observations from the environment.
Returns:
np.ndarray: Actions with noise.
List[dict]: Arbitrary policy state information (agent_info).
"""
return [self._env_spec.action_space.sample()
for obs in observations], dict()