Source code for garage.np.policies.policy

"""Base class for policies based on numpy."""
import abc


[docs]class Policy(abc.ABC): """Base class for policies based on numpy."""
[docs] @abc.abstractmethod def get_action(self, observation): """Get action sampled from the policy. Args: observation (np.ndarray): Observation from the environment. Returns: Tuple[np.ndarray, dict[str,np.ndarray]]: Actions and extra agent infos. """
[docs] @abc.abstractmethod def get_actions(self, observations): """Get actions given observations. Args: observations (torch.Tensor): Observations from the environment. Returns: Tuple[np.ndarray, dict[str,np.ndarray]]: Actions and extra agent infos. """
[docs] def reset(self, do_resets=None): """Reset the policy. This is effective only to recurrent policies. do_resets is an array of boolean indicating which internal states to be reset. The length of do_resets should be equal to the length of inputs, i.e. batch size. Args: do_resets (numpy.ndarray): Bool array indicating which states to be reset. """
@property def name(self): """Name of policy. Returns: str: Name of policy """ @property def env_spec(self): """Policy environment specification. Returns: garage.EnvSpec: Environment specification. """ @property def observation_space(self): """Observation space. Returns: akro.Space: The observation space of the environment. """ return self.env_spec.observation_space @property def action_space(self): """Action space. Returns: akro.Space: The action space of the environment. """ return self.env_spec.action_space