Source code for garage.sampler.vec_env_executor

"""Environment wrapper that runs multiple environments."""
import copy
import warnings

import numpy as np

from garage.misc import tensor_utils


[docs]class VecEnvExecutor: """Environment wrapper that runs multiple environments. Args: envs (list[gym.Env]): List of environments to batch together. max_path_length (int): Maximum length of any path. """ def __init__(self, envs, max_path_length): self.envs = envs self._action_space = envs[0].action_space self._observation_space = envs[0].observation_space self.ts = np.zeros(len(self.envs), dtype='int') self.max_path_length = max_path_length warnings.warn( DeprecationWarning( 'VecEnvExecutor is deprecated, and will be removed in the ' 'next release. Please use VecWorker and one of the new ' 'samplers which implement garage.sampler.Sampler, such as ' 'RaySampler'))
[docs] def step(self, action_n): """Step all environments using the provided actions. Inserts an environment infor 'vec_env_executor.complete' containing the episode end signal (time limit reached or done signal from environment). Args: action_n (np.ndarray): Array of actions. Returns: tuple: Tuple containing: * observations (np.ndarray) * rewards (np.ndarray) * dones (np.ndarray): The done signal from the environment. * env_infos (dict[str, np.ndarray]) * completes (np.ndarray): whether or not the path is complete. A path is complete at some time-step N if the done signal has been received at that or before N, or if max_path_length N >= max_path_length. """ all_results = [env.step(a) for (a, env) in zip(action_n, self.envs)] obs, rewards, dones, env_infos = list( map(list, list(zip(*all_results)))) dones = np.asarray(dones) rewards = np.asarray(rewards) self.ts += 1 completes = copy.deepcopy(dones) if self.max_path_length is not None: completes[self.ts >= self.max_path_length] = True for (i, complete) in enumerate(completes): if complete: obs[i] = self.envs[i].reset() self.ts[i] = 0 env_infos[i]['vec_env_executor.complete'] = completes return (obs, rewards, dones, tensor_utils.stack_tensor_dict_list(env_infos), completes)
[docs] def reset(self): """Reset all environments. Returns: np.ndarray: Observations of shape :math:`(K, O*)` """ results = [env.reset() for env in self.envs] self.ts[:] = 0 return results
@property def num_envs(self): """Read the number of environments. Returns: int: Number of environments """ return len(self.envs) @property def action_space(self): """Read the action space. Returns: gym.Space: The action space. """ return self._action_space @property def observation_space(self): """Read the observation space. Returns: gym.Space: The observation space. """ return self._observation_space
[docs] def close(self): """Close all environments."""