"""Simple 2D environment containing a point and a goal location."""
import gym
import numpy as np
from garage.envs.step import Step
[docs]class PointEnv(gym.Env):
"""A simple 2D point environment.
Attributes:
observation_space (gym.spaces.Box): The observation space
action_space (gym.spaces.Box): The action space
Args:
goal (np.ndarray): A 2D array representing the goal position
arena_size (float): The size of arena where the point is constrained
within (-arena_size, arena_size) in each dimension
done_bonus (float): A numerical bonus added to the reward
once the point as reached the goal
never_done (bool): Never send a `done` signal, even if the
agent achieves the goal
"""
def __init__(
self,
goal=np.array((1., 1.), dtype=np.float32),
arena_size=5.,
done_bonus=0.,
never_done=False,
):
goal = np.array(goal, dtype=np.float32)
self._goal = goal
self._done_bonus = done_bonus
self._never_done = never_done
self._arena_size = arena_size
assert ((goal >= -arena_size) & (goal <= arena_size)).all()
self._point = np.zeros_like(self._goal)
self._task = {'goal': self._goal}
@property
def observation_space(self):
"""gym.spaces.Box: The observation space."""
return gym.spaces.Box(low=-np.inf,
high=np.inf,
shape=(3, ),
dtype=np.float32)
@property
def action_space(self):
"""gym.spaces.Box: The action space."""
return gym.spaces.Box(low=-0.1,
high=0.1,
shape=(2, ),
dtype=np.float32)
[docs] def reset(self):
"""Reset the environment.
Returns:
np.ndarray: Observation of the environment.
"""
self._point = np.zeros_like(self._goal)
dist = np.linalg.norm(self._point - self._goal)
return np.concatenate([self._point, (dist, )])
[docs] def step(self, action):
"""Step the environment state.
Args:
action (np.ndarray): The action to take in the environment.
Returns:
np.ndarray: Observation. The observation of the environment.
float: Reward. The reward acquired at this time step.
boolean: Done. Whether the environment was completed at this
time step. Always False for this environment.
"""
# enforce action space
a = action.copy() # NOTE: we MUST copy the action before modifying it
a = np.clip(a, self.action_space.low, self.action_space.high)
self._point = np.clip(self._point + a, -self._arena_size,
self._arena_size)
dist = np.linalg.norm(self._point - self._goal)
succ = dist < np.linalg.norm(self.action_space.low)
# dense reward
reward = -dist
# done bonus
if succ:
reward += self._done_bonus
# sometimes we don't want to terminate
done = succ and not self._never_done
obs = np.concatenate([self._point, (dist, )])
return Step(obs, reward, done, task=self._task, success=succ)
[docs] def render(self, mode='human'):
"""Draw the environment.
Not implemented.
Args:
mode (str): Ignored.
"""
# pylint: disable=no-self-use
[docs] def sample_tasks(self, num_tasks):
"""Sample a list of `num_tasks` tasks.
Args:
num_tasks (int): Number of tasks to sample.
Returns:
list[dict[str, np.ndarray]]: A list of "tasks", where each task is
a dictionary containing a single key, "goal", mapping to a
point in 2D space.
"""
goals = np.random.uniform(-2, 2, size=(num_tasks, 2))
tasks = [{'goal': goal} for goal in goals]
return tasks
[docs] def set_task(self, task):
"""Reset with a task.
Args:
task (dict[str, np.ndarray]): A task (a dictionary containing a
single key, "goal", which should be a point in 2D space).
"""
self._task = task
self._goal = task['goal']