Source code for garage.experiment.meta_evaluator

"""Evaluator which tests Meta-RL algorithms on test environments."""

from dowel import logger, tabular

from garage import log_multitask_performance, TrajectoryBatch
from garage.experiment.deterministic import get_seed
from garage.sampler import DefaultWorker
from garage.sampler import LocalSampler
from garage.sampler import WorkerFactory


[docs]class MetaEvaluator: """Evaluates Meta-RL algorithms on test environments. Args: test_task_sampler (garage.experiment.TaskSampler): Sampler for test tasks. To demonstrate the effectiveness of a meta-learning method, these should be different from the training tasks. max_path_length (int): Maximum path length used for evaluation trajectories. n_test_tasks (int or None): Number of test tasks to sample each time evaluation is performed. Note that tasks are sampled "without replacement". If None, is set to `test_task_sampler.n_tasks`. n_exploration_traj (int): Number of trajectories to gather from the exploration policy before requesting the meta algorithm to produce an adapted policy. n_test_rollouts (int): Number of rollouts to use for each adapted policy. The adapted policy should forget previous rollouts when `.reset()` is called. prefix (str): Prefix to use when logging. Defaults to MetaTest. For example, this results in logging the key 'MetaTest/SuccessRate'. If not set to `MetaTest`, it should probably be set to `MetaTrain`. test_task_names (list[str]): List of task names to test. Should be in an order consistent with the `task_id` env_info, if that is present. worker_class (type): Type of worker the Sampler should use. worker_args (dict or None): Additional arguments that should be passed to the worker. """ # pylint: disable=too-few-public-methods def __init__(self, *, test_task_sampler, max_path_length, n_exploration_traj=10, n_test_tasks=None, n_test_rollouts=1, prefix='MetaTest', test_task_names=None, worker_class=DefaultWorker, worker_args=None): self._test_task_sampler = test_task_sampler self._worker_class = worker_class if worker_args is None: self._worker_args = {} else: self._worker_args = worker_args if n_test_tasks is None: n_test_tasks = test_task_sampler.n_tasks self._n_test_tasks = n_test_tasks self._n_test_rollouts = n_test_rollouts self._n_exploration_traj = n_exploration_traj self._max_path_length = max_path_length self._eval_itr = 0 self._prefix = prefix self._test_task_names = test_task_names self._test_sampler = None
[docs] def evaluate(self, algo, test_rollouts_per_task=None): """Evaluate the Meta-RL algorithm on the test tasks. Args: algo (garage.np.algos.MetaRLAlgorithm): The algorithm to evaluate. test_rollouts_per_task (int or None): Number of rollouts per task. """ if test_rollouts_per_task is None: test_rollouts_per_task = self._n_test_rollouts adapted_trajectories = [] logger.log('Sampling for adapation and meta-testing...') if self._test_sampler is None: self._test_sampler = LocalSampler.from_worker_factory( WorkerFactory(seed=get_seed(), max_path_length=self._max_path_length, n_workers=1, worker_class=self._worker_class, worker_args=self._worker_args), agents=algo.get_exploration_policy(), envs=self._test_task_sampler.sample(1)) for env_up in self._test_task_sampler.sample(self._n_test_tasks): policy = algo.get_exploration_policy() traj = TrajectoryBatch.concatenate(*[ self._test_sampler.obtain_samples(self._eval_itr, 1, policy, env_up) for _ in range(self._n_exploration_traj) ]) adapted_policy = algo.adapt_policy(policy, traj) adapted_traj = self._test_sampler.obtain_samples( self._eval_itr, test_rollouts_per_task * self._max_path_length, adapted_policy) adapted_trajectories.append(adapted_traj) logger.log('Finished meta-testing...') if self._test_task_names is not None: name_map = dict(enumerate(self._test_task_names)) else: name_map = None with tabular.prefix(self._prefix + '/' if self._prefix else ''): log_multitask_performance( self._eval_itr, TrajectoryBatch.concatenate(*adapted_trajectories), getattr(algo, 'discount', 1.0), name_map=name_map) self._eval_itr += 1