Source code for garage.sampler._dtypes

"""Datatypes used by multiple Samplers or Workers."""
import collections

import numpy as np

from garage import EpisodeBatch, StepType


[docs]class InProgressEpisode: """An in-progress episode. Compared to EpisodeBatch, this datatype does less checking, only contains one episodes, and uses lists instead of numpy arrays to make stepping faster. Args: env (Environment): The environment the trajectory is being collected in. initial_observation (np.ndarray): The first observation. If None, the environment will be reset to generate this observation. episode_info (dict[str, np.ndarray]): Info for this episode. Raises: ValueError: if either initial_observation and episode_info is passed in but not the other. Either both or neither should be passed in. """ def __init__(self, env, initial_observation=None, episode_info=None): if initial_observation is None and episode_info is not None: raise ValueError( 'Initial observation and episode info must be both or ' 'neither provided, but only episode info was passed in') if initial_observation is not None and episode_info is None: raise ValueError( 'Initial observation and episode info must be both or ' 'neither provided, but only initial observation was passed in') if initial_observation is None: initial_observation, episode_info = env.reset() self.env = env self.episode_info = episode_info self.observations = [initial_observation] self.actions = [] self.rewards = [] self.step_types = [] self.agent_infos = collections.defaultdict(list) self.env_infos = collections.defaultdict(list)
[docs] def step(self, action, agent_info): """Step the episode using an action from an agent. Args: action (np.ndarray): The action taken by the agent. agent_info (dict[str, np.ndarray]): Extra agent information. Returns: np.ndarray: The new observation from the environment. """ es = self.env.step(action) self.observations.append(es.observation) self.rewards.append(es.reward) self.actions.append(es.action) for k, v in agent_info.items(): self.agent_infos[k].append(v) for k, v in es.env_info.items(): self.env_infos[k].append(v) self.step_types.append(es.step_type) return es.observation
[docs] def to_batch(self): """Convert this in-progress episode into a EpisodeBatch. Returns: EpisodeBatch: This episode as a batch. Raises: AssertionError: If this episode contains no time steps. """ assert len(self.rewards) > 0 env_infos = dict(self.env_infos) agent_infos = dict(self.agent_infos) episode_infos = dict(self.episode_info) for k, v in env_infos.items(): env_infos[k] = np.asarray(v) for k, v in agent_infos.items(): agent_infos[k] = np.asarray(v) for k, v in episode_infos.items(): episode_infos[k] = np.asarray([v]) return EpisodeBatch(episode_infos=episode_infos, env_spec=self.env.spec, observations=np.asarray(self.observations[:-1]), last_observations=np.asarray([self.last_obs]), actions=np.asarray(self.actions), rewards=np.asarray(self.rewards), step_types=np.asarray(self.step_types, dtype=StepType), env_infos=env_infos, agent_infos=agent_infos, lengths=np.asarray([len(self.rewards)], dtype='l'))
@property def last_obs(self): """np.ndarray: The last observation in the epside.""" return self.observations[-1]