Source code for garage._functions

"""Functions exposed directly in the garage namespace."""
from collections import defaultdict

from dowel import tabular
import numpy as np

import garage
from garage.misc.tensor_utils import discount_cumsum


class _Default:  # pylint: disable=too-few-public-methods
    """A wrapper class to represent default arguments.

    Args:
        val (object): Argument value.

    """

    def __init__(self, val):
        self.val = val


[docs]def make_optimizer(optimizer_type, module=None, **kwargs): """Create an optimizer for pyTorch & tensorflow algos. Args: optimizer_type (Union[type, tuple[type, dict]]): Type of optimizer. This can be an optimizer type such as 'torch.optim.Adam' or a tuple of type and dictionary, where dictionary contains arguments to initialize the optimizer e.g. (torch.optim.Adam, {'lr' : 1e-3}) module (optional): If the optimizer type is a `torch.optimizer`. The `torch.nn.Module` module whose parameters needs to be optimized must be specify. kwargs (dict): Other keyword arguments to initialize optimizer. This is not used when `optimizer_type` is tuple. Returns: torch.optim.Optimizer: Constructed optimizer. Raises: ValueError: Raises value error when `optimizer_type` is tuple, and non-default argument is passed in `kwargs`. """ if isinstance(optimizer_type, tuple): opt_type, opt_args = optimizer_type for name, arg in kwargs.items(): if not isinstance(arg, _Default): raise ValueError('Should not specify {} and explicit \ optimizer args at the same time'.format(name)) if module is not None: return opt_type(module.parameters(), **opt_args) else: return opt_type(**opt_args) opt_args = { k: v.val if isinstance(v, _Default) else v for k, v in kwargs.items() } if module is not None: return optimizer_type(module.parameters(), **opt_args) else: return optimizer_type(**opt_args)
[docs]def log_multitask_performance(itr, batch, discount, name_map=None): r"""Log performance of trajectories from multiple tasks. Args: itr (int): Iteration number to be logged. batch (garage.TrajectoryBatch): Batch of trajectories. The trajectories should have either the "task_name" or "task_id" `env_infos`. If the "task_name" is not present, then `name_map` is required, and should map from task id's to task names. discount (float): Discount used in computing returns. name_map (dict[int, str] or None): Mapping from task id's to task names. Optional if the "task_name" environment info is present. Note that if provided, all tasks listed in this map will be logged, even if there are no trajectories present for them. Returns: numpy.ndarray: Undiscounted returns averaged across all tasks. Has shape :math:`(N \bullet [T])`. """ traj_by_name = defaultdict(list) for trajectory in batch.split(): task_name = '__unnamed_task__' if 'task_name' in trajectory.env_infos: task_name = trajectory.env_infos['task_name'][0] elif 'task_id' in trajectory.env_infos: name_map = {} if name_map is None else name_map task_id = trajectory.env_infos['task_id'][0] task_name = name_map.get(task_id, 'Task #{}'.format(task_id)) traj_by_name[task_name].append(trajectory) if name_map is None: task_names = traj_by_name.keys() else: task_names = name_map.values() for task_name in task_names: if task_name in traj_by_name: trajectories = traj_by_name[task_name] log_performance(itr, garage.TrajectoryBatch.concatenate(*trajectories), discount, prefix=task_name) else: with tabular.prefix(task_name + '/'): tabular.record('Iteration', itr) tabular.record('NumTrajs', 0) tabular.record('AverageDiscountedReturn', np.nan) tabular.record('AverageReturn', np.nan) tabular.record('StdReturn', np.nan) tabular.record('MaxReturn', np.nan) tabular.record('MinReturn', np.nan) tabular.record('CompletionRate', np.nan) tabular.record('SuccessRate', np.nan) return log_performance(itr, batch, discount=discount, prefix='Average')
[docs]def log_performance(itr, batch, discount, prefix='Evaluation'): """Evaluate the performance of an algorithm on a batch of trajectories. Args: itr (int): Iteration number. batch (TrajectoryBatch): The trajectories to evaluate with. discount (float): Discount value, from algorithm's property. prefix (str): Prefix to add to all logged keys. Returns: numpy.ndarray: Undiscounted returns. """ returns = [] undiscounted_returns = [] completion = [] success = [] for trajectory in batch.split(): returns.append(discount_cumsum(trajectory.rewards, discount)) undiscounted_returns.append(sum(trajectory.rewards)) completion.append(float(trajectory.terminals.any())) if 'success' in trajectory.env_infos: success.append(float(trajectory.env_infos['success'].any())) average_discounted_return = np.mean([rtn[0] for rtn in returns]) with tabular.prefix(prefix + '/'): tabular.record('Iteration', itr) tabular.record('NumTrajs', len(returns)) tabular.record('AverageDiscountedReturn', average_discounted_return) tabular.record('AverageReturn', np.mean(undiscounted_returns)) tabular.record('StdReturn', np.std(undiscounted_returns)) tabular.record('MaxReturn', np.max(undiscounted_returns)) tabular.record('MinReturn', np.min(undiscounted_returns)) tabular.record('CompletionRate', np.mean(completion)) if success: tabular.record('SuccessRate', np.mean(success)) return undiscounted_returns