Source code for garage.envs.point_env

"""Simple 2D environment containing a point and a goal location."""
import gym
import numpy as np

from garage.envs.step import Step


[docs]class PointEnv(gym.Env):
    """A simple 2D point environment.

    Attributes:
        observation_space (gym.spaces.Box): The observation space
        action_space (gym.spaces.Box): The action space

    Args:
        goal (np.ndarray): A 2D array representing the goal position
        arena_size (float): The size of arena where the point is constrained
            within (-arena_size, arena_size) in each dimension
        done_bonus (float): A numerical bonus added to the reward
            once the point as reached the goal
        never_done (bool): Never send a `done` signal, even if the
            agent achieves the goal

    """

    def __init__(
            self,
            goal=np.array((1., 1.), dtype=np.float32),
            arena_size=5.,
            done_bonus=0.,
            never_done=False,
    ):
        goal = np.array(goal, dtype=np.float32)
        self._goal = goal
        self._done_bonus = done_bonus
        self._never_done = never_done
        self._arena_size = arena_size

        assert ((goal >= -arena_size) & (goal <= arena_size)).all()

        self._point = np.zeros_like(self._goal)
        self._task = {'goal': self._goal}

    @property
    def observation_space(self):
        """gym.spaces.Box: The observation space."""
        return gym.spaces.Box(low=-np.inf,
                              high=np.inf,
                              shape=(3, ),
                              dtype=np.float32)

    @property
    def action_space(self):
        """gym.spaces.Box: The action space."""
        return gym.spaces.Box(low=-0.1,
                              high=0.1,
                              shape=(2, ),
                              dtype=np.float32)

[docs]    def reset(self):
        """Reset the environment.

        Returns:
            np.ndarray: Observation of the environment.

        """
        self._point = np.zeros_like(self._goal)
        dist = np.linalg.norm(self._point - self._goal)
        return np.concatenate([self._point, (dist, )])

[docs]    def step(self, action):
        """Step the environment state.

        Args:
            action (np.ndarray): The action to take in the environment.

        Returns:
            np.ndarray: Observation. The observation of the environment.
            float: Reward. The reward acquired at this time step.
            boolean: Done. Whether the environment was completed at this
                time step. Always False for this environment.

        """
        # enforce action space
        a = action.copy()  # NOTE: we MUST copy the action before modifying it
        a = np.clip(a, self.action_space.low, self.action_space.high)

        self._point = np.clip(self._point + a, -self._arena_size,
                              self._arena_size)
        dist = np.linalg.norm(self._point - self._goal)
        succ = dist < np.linalg.norm(self.action_space.low)

        # dense reward
        reward = -dist
        # done bonus
        if succ:
            reward += self._done_bonus

        # sometimes we don't want to terminate
        done = succ and not self._never_done

        obs = np.concatenate([self._point, (dist, )])

        return Step(obs, reward, done, task=self._task, success=succ)

[docs]    def render(self, mode='human'):
        """Draw the environment.

        Not implemented.

        Args:
            mode (str): Ignored.

        """
        # pylint: disable=no-self-use

[docs]    def sample_tasks(self, num_tasks):
        """Sample a list of `num_tasks` tasks.

        Args:
            num_tasks (int): Number of tasks to sample.

        Returns:
            list[dict[str, np.ndarray]]: A list of "tasks", where each task is
                a dictionary containing a single key, "goal", mapping to a
                point in 2D space.

        """
        goals = np.random.uniform(-2, 2, size=(num_tasks, 2))
        tasks = [{'goal': goal} for goal in goals]
        return tasks

[docs]    def set_task(self, task):
        """Reset with a task.

        Args:
            task (dict[str, np.ndarray]): A task (a dictionary containing a
                single key, "goal", which should be a point in 2D space).

        """
        self._task = task
        self._goal = task['goal']