Source code for garage.envs.point_env

import gym
import numpy as np

from garage.envs.base import Step


[docs]class PointEnv(gym.Env): """A simple 2D point environment. Attributes: observation_space (:obj:`gym.spaces.Box`): The observation space action_space (:obj:`gym.spaces.Box`): The action space Args: goal (:obj:`np.ndarray`, optional): A 2D array representing the goal position done_bonus (float, optional): A numerical bonus added to the reward once the point as reached the goal never_done (bool, optional): Never send a `done` signal, even if the agent achieves the goal. """ def __init__( self, goal=np.array((1., 1.), dtype=np.float32), done_bonus=0., never_done=False, ): self._goal = np.array(goal, dtype=np.float32) self._done_bonus = done_bonus self._never_done = never_done self._point = np.zeros_like(self._goal) @property def observation_space(self): return gym.spaces.Box(low=-np.inf, high=np.inf, shape=(2, ), dtype=np.float32) @property def action_space(self): return gym.spaces.Box(low=-0.1, high=0.1, shape=(2, ), dtype=np.float32)
[docs] def reset(self): self._point = np.zeros_like(self._goal) return np.copy(self._point)
[docs] def step(self, action): # enforce action space a = action.copy() # NOTE: we MUST copy the action before modifying it a = np.clip(a, self.action_space.low, self.action_space.high) dist = np.linalg.norm(self._point - self._goal) done = dist < np.linalg.norm(self.action_space.low) # dense reward reward = -dist # done bonus if done: reward += self._done_bonus # sometimes we don't want to terminate done = done and not self._never_done return Step(np.copy(self._point), reward, done)
[docs] def render(self, mode='human'): pass