Source code for garage.tf.baselines.gaussian_cnn_baseline

"""Gaussian CNN Baseline."""
import akro
import numpy as np

from garage.misc.tensor_utils import normalize_pixel_batch
from garage.np.baselines import Baseline
from garage.tf.regressors import GaussianCNNRegressor


[docs]class GaussianCNNBaseline(Baseline):
    """GaussianCNNBaseline With Model.

    It fits the input data to a gaussian distribution estimated by a CNN.

    Args:
        env_spec (garage.envs.env_spec.EnvSpec): Environment specification.
        subsample_factor (float): The factor to subsample the data. By
            default it is 1.0, which means using all the data.
        regressor_args (dict): Arguments for regressor.
        name (str): Name of baseline.

    """

    def __init__(
            self,
            env_spec,
            subsample_factor=1.,
            regressor_args=None,
            name='GaussianCNNBaseline',
    ):
        if not isinstance(env_spec.observation_space, akro.Box) or \
                not len(env_spec.observation_space.shape) in (2, 3):
            raise ValueError(
                '{} can only process 2D, 3D akro.Image or'
                ' akro.Box observations, but received an env_spec with '
                'observation_space of type {} and shape {}'.format(
                    type(self).__name__,
                    type(env_spec.observation_space).__name__,
                    env_spec.observation_space.shape))

        super().__init__(env_spec)
        if regressor_args is None:
            regressor_args = dict()

        self._regressor = GaussianCNNRegressor(
            input_shape=(env_spec.observation_space.shape),
            output_dim=1,
            subsample_factor=subsample_factor,
            name=name,
            **regressor_args)
        self.name = name
        self.env_spec = env_spec

[docs]    def fit(self, paths):
        """Fit regressor based on paths.

        Args:
            paths (dict[numpy.ndarray]): Sample paths.

        """
        observations = np.concatenate([p['observations'] for p in paths])
        if isinstance(self.env_spec.observation_space, akro.Image):
            observations = normalize_pixel_batch(observations)

        returns = np.concatenate([p['returns'] for p in paths])
        self._regressor.fit(observations, returns.reshape((-1, 1)))

[docs]    def predict(self, path):
        """Predict value based on paths.

        Args:
            path (dict[numpy.ndarray]): Sample paths.

        Returns:
            numpy.ndarray: Predicted value.

        """
        observations = path['observations']
        if isinstance(self.env_spec.observation_space, akro.Image):
            observations = normalize_pixel_batch(observations)

        return self._regressor.predict(observations).flatten()

[docs]    def get_param_values(self):
        """Get parameter values.

        Returns:
            List[np.ndarray]: A list of values of each parameter.

        """
        return self._regressor.get_param_values()

[docs]    def set_param_values(self, flattened_params):
        """Set param values.

        Args:
            flattened_params (np.ndarray): A numpy array of parameter values.

        """
        self._regressor.set_param_values(flattened_params)

[docs]    def get_params_internal(self):
        """Get the params, which are the trainable variables.

        Returns:
            List[tf.Variable]: A list of trainable variables in the current
            variable scope.

        """
        return self._regressor.get_params_internal()