Source code for garage.np.algos.meta_rl_algorithm

"""Interface of Meta-RL ALgorithms."""
import abc

from garage.np.algos.rl_algorithm import RLAlgorithm


[docs]class MetaRLAlgorithm(RLAlgorithm, abc.ABC):
    """Base class for Meta-RL Algorithms."""

[docs]    @abc.abstractmethod
    def get_exploration_policy(self):
        """Return a policy used before adaptation to a specific task.

        Each time it is retrieved, this policy should only be evaluated in one
        task.

        Returns:
            garage.Policy: The policy used to obtain samples that are later
                used for meta-RL adaptation.

        """

[docs]    @abc.abstractmethod
    def adapt_policy(self, exploration_policy, exploration_trajectories):
        """Produce a policy adapted for a task.

        Args:
            exploration_policy (garage.Policy): A policy which was returned
                from get_exploration_policy(), and which generated
                exploration_trajectories by interacting with an environment.
                The caller may not use this object after passing it into this
                method.
            exploration_trajectories (garage.TrajectoryBatch): Trajectories to
                adapt to, generated by exploration_policy exploring the
                environment.

        Returns:
            garage.Policy: A policy adapted to the task represented by the
                exploration_trajectories.

        """