Source code for garage.tf.regressors.gaussian_mlp_regressor_model

"""GaussianMLPRegressorModel."""
import numpy as np
import tensorflow as tf
import tensorflow_probability as tfp

from garage.tf.models import GaussianMLPModel


[docs]class GaussianMLPRegressorModel(GaussianMLPModel): """GaussianMLPRegressor based on garage.tf.models.Model class. This class can be used to perform regression by fitting a Gaussian distribution to the outputs. Args: input_shape (tuple[int]): Input shape of the training data. output_dim (int): Output dimension of the model. name (str): Model name, also the variable scope. hidden_sizes (list[int]): Output dimension of dense layer(s) for the MLP for mean. For example, (32, 32) means the MLP consists of two hidden layers, each with 32 hidden units. hidden_nonlinearity (callable): Activation function for intermediate dense layer(s). It should return a tf.Tensor. Set it to None to maintain a linear activation. hidden_w_init (callable): Initializer function for the weight of intermediate dense layer(s). The function should return a tf.Tensor. hidden_b_init (callable): Initializer function for the bias of intermediate dense layer(s). The function should return a tf.Tensor. output_nonlinearity (callable): Activation function for output dense layer. It should return a tf.Tensor. Set it to None to maintain a linear activation. output_w_init (callable): Initializer function for the weight of output dense layer(s). The function should return a tf.Tensor. output_b_init (callable): Initializer function for the bias of output dense layer(s). The function should return a tf.Tensor. learn_std (bool): Is std trainable. init_std (float): Initial value for std. adaptive_std (bool): Is std a neural network. If False, it will be a parameter. std_share_network (bool): Boolean for whether mean and std share the same network. std_hidden_sizes (list[int]): Output dimension of dense layer(s) for the MLP for std. For example, (32, 32) means the MLP consists of two hidden layers, each with 32 hidden units. min_std (float): If not None, the std is at least the value of min_std, to avoid numerical issues. max_std (float): If not None, the std is at most the value of max_std, to avoid numerical issues. std_hidden_nonlinearity (callable): Nonlinearity for each hidden layer in the std network. std_hidden_w_init (callable): Initializer function for the weight of intermediate dense layer(s) in the std network. std_hidden_b_init (callable): Initializer function for the bias of intermediate dense layer(s) in the std network. std_output_nonlinearity (callable): Activation function for output dense layer in the std network. It should return a tf.Tensor. Set it to None to maintain a linear activation. std_output_w_init (callable): Initializer function for the weight of output dense layer(s) in the std network. std_parameterization (str): How the std should be parametrized. There are two options: - exp: the logarithm of the std will be stored, and applied a exponential transformation - softplus: the std will be computed as log(1+exp(x)) layer_normalization (bool): Bool for using layer normalization or not. """ def __init__(self, input_shape, output_dim, name='GaussianMLPRegressorModel', hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.tanh, hidden_w_init=tf.initializers.glorot_uniform(), hidden_b_init=tf.zeros_initializer(), output_nonlinearity=None, output_w_init=tf.initializers.glorot_uniform(), output_b_init=tf.zeros_initializer(), learn_std=True, adaptive_std=False, std_share_network=False, init_std=1.0, min_std=1e-6, max_std=None, std_hidden_sizes=(32, 32), std_hidden_nonlinearity=tf.nn.tanh, std_hidden_w_init=tf.initializers.glorot_uniform(), std_hidden_b_init=tf.zeros_initializer(), std_output_nonlinearity=None, std_output_w_init=tf.initializers.glorot_uniform(), std_parameterization='exp', layer_normalization=False): super().__init__(output_dim=output_dim, name=name, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, learn_std=learn_std, adaptive_std=adaptive_std, std_share_network=std_share_network, init_std=init_std, min_std=min_std, max_std=max_std, std_hidden_sizes=std_hidden_sizes, std_hidden_nonlinearity=std_hidden_nonlinearity, std_output_nonlinearity=std_output_nonlinearity, std_parameterization=std_parameterization, layer_normalization=layer_normalization) self._input_shape = input_shape
[docs] def network_output_spec(self): """Network output spec. Return: list[str]: List of key(str) for the network outputs. """ return [ 'normalized_dist', 'normalized_mean', 'normalized_log_std', 'dist', 'mean', 'log_std', 'x_mean', 'x_std', 'y_mean', 'y_std' ]
def _build(self, state_input, name=None): """Build model given input placeholder(s). Args: state_input (tf.Tensor): Place holder for state input. name (str): Inner model name, also the variable scope of the inner model, if exist. One example is garage.tf.models.Sequential. Return: tfp.distributions.MultivariateNormalDiag: Normlizaed distribution. tf.Tensor: Normalized mean. tf.Tensor: Normalized log_std. tfp.distributions.MultivariateNormalDiag: Vanilla distribution. tf.Tensor: Vanilla mean. tf.Tensor: Vanilla log_std. tf.Tensor: Mean for data. tf.Tensor: log_std for data. tf.Tensor: Mean for label. tf.Tensor: log_std for label. """ with tf.compat.v1.variable_scope('normalized_vars'): x_mean_var = tf.compat.v1.get_variable( name='x_mean', shape=(1, ) + self._input_shape, dtype=np.float32, initializer=tf.zeros_initializer(), trainable=False) x_std_var = tf.compat.v1.get_variable( name='x_std_var', shape=(1, ) + self._input_shape, dtype=np.float32, initializer=tf.ones_initializer(), trainable=False) y_mean_var = tf.compat.v1.get_variable( name='y_mean_var', shape=(1, self._output_dim), dtype=np.float32, initializer=tf.zeros_initializer(), trainable=False) y_std_var = tf.compat.v1.get_variable( name='y_std_var', shape=(1, self._output_dim), dtype=np.float32, initializer=tf.ones_initializer(), trainable=False) normalized_xs_var = (state_input - x_mean_var) / x_std_var _, normalized_dist_mean, normalized_dist_log_std = super()._build( normalized_xs_var) # Since regressor expects [N, *dims], we need to squeeze the extra # dimension normalized_dist_log_std = tf.squeeze(normalized_dist_log_std, 1) with tf.name_scope('mean_network'): means_var = normalized_dist_mean * y_std_var + y_mean_var with tf.name_scope('std_network'): log_stds_var = normalized_dist_log_std + tf.math.log(y_std_var) normalized_dist = tfp.distributions.MultivariateNormalDiag( loc=normalized_dist_mean, scale_diag=tf.exp(normalized_dist_log_std)) vanilla_dist = tfp.distributions.MultivariateNormalDiag( loc=means_var, scale_diag=tf.exp(log_stds_var)) return (normalized_dist, normalized_dist_mean, normalized_dist_log_std, vanilla_dist, means_var, log_stds_var, x_mean_var, x_std_var, y_mean_var, y_std_var)
[docs] def clone(self, name): """Return a clone of the model. It only copies the configuration of the primitive, not the parameters. Args: name (str): Name of the newly created model. It has to be different from source model if cloned under the same computational graph. Returns: garage.tf.policies.GaussianMLPModel: Newly cloned model. """ return self.__class__( name=name, input_shape=self._input_shape, output_dim=self._output_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, learn_std=self._learn_std, adaptive_std=self._adaptive_std, std_share_network=self._std_share_network, init_std=self._init_std, min_std=self._min_std, max_std=self._max_std, std_hidden_sizes=self._std_hidden_sizes, std_hidden_nonlinearity=self._std_hidden_nonlinearity, std_hidden_w_init=self._std_hidden_w_init, std_hidden_b_init=self._std_hidden_b_init, std_output_nonlinearity=self._std_output_nonlinearity, std_output_w_init=self._std_output_w_init, std_parameterization=self._std_parameterization, layer_normalization=self._layer_normalization)