Source code for garage.torch.modules.gaussian_mlp_module

"""GaussianMLPModule."""
import abc

import torch
from torch import nn
from torch.distributions import Normal
from torch.distributions.independent import Independent

from garage.torch.distributions import TanhNormal
from garage.torch.modules.mlp_module import MLPModule
from garage.torch.modules.multi_headed_mlp_module import MultiHeadedMLPModule


[docs]class GaussianMLPBaseModule(nn.Module): """Base of GaussianMLPModel. Args: input_dim (int): Input dimension of the model. output_dim (int): Output dimension of the model. hidden_sizes (list[int]): Output dimension of dense layer(s) for the MLP for mean. For example, (32, 32) means the MLP consists of two hidden layers, each with 32 hidden units. hidden_nonlinearity (callable): Activation function for intermediate dense layer(s). It should return a torch.Tensor. Set it to None to maintain a linear activation. hidden_w_init (callable): Initializer function for the weight of intermediate dense layer(s). The function should return a torch.Tensor. hidden_b_init (callable): Initializer function for the bias of intermediate dense layer(s). The function should return a torch.Tensor. output_nonlinearity (callable): Activation function for output dense layer. It should return a torch.Tensor. Set it to None to maintain a linear activation. output_w_init (callable): Initializer function for the weight of output dense layer(s). The function should return a torch.Tensor. output_b_init (callable): Initializer function for the bias of output dense layer(s). The function should return a torch.Tensor. learn_std (bool): Is std trainable. init_std (float): Initial value for std. (plain value - not log or exponentiated). std_hidden_sizes (list[int]): Output dimension of dense layer(s) for the MLP for std. For example, (32, 32) means the MLP consists of two hidden layers, each with 32 hidden units. min_std (float): If not None, the std is at least the value of min_std, to avoid numerical issues (plain value - not log or exponentiated). max_std (float): If not None, the std is at most the value of max_std, to avoid numerical issues (plain value - not log or exponentiated). std_hidden_nonlinearity (callable): Nonlinearity for each hidden layer in the std network. std_hidden_w_init (callable): Initializer function for the weight of hidden layer (s). std_hidden_b_init (callable): Initializer function for the bias of intermediate dense layer(s). std_output_nonlinearity (callable): Activation function for output dense layer in the std network. It should return a torch.Tensor. Set it to None to maintain a linear activation. std_output_w_init (callable): Initializer function for the weight of output dense layer(s) in the std network. std_parameterization (str): How the std should be parametrized. There are two options: - exp: the logarithm of the std will be stored, and applied a exponential transformation. - softplus: the std will be computed as log(1+exp(x)). layer_normalization (bool): Bool for using layer normalization or not. normal_distribution_cls (torch.distribution): normal distribution class to be constructed and returned by a call to forward. By default, is `torch.distributions.Normal`. """ def __init__(self, input_dim, output_dim, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, output_nonlinearity=None, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, learn_std=True, init_std=1.0, min_std=1e-6, max_std=None, std_hidden_sizes=(32, 32), std_hidden_nonlinearity=torch.tanh, std_hidden_w_init=nn.init.xavier_uniform_, std_hidden_b_init=nn.init.zeros_, std_output_nonlinearity=None, std_output_w_init=nn.init.xavier_uniform_, std_parameterization='exp', layer_normalization=False, normal_distribution_cls=Normal): super().__init__() self._input_dim = input_dim self._hidden_sizes = hidden_sizes self._action_dim = output_dim self._learn_std = learn_std self._std_hidden_sizes = std_hidden_sizes self._min_std = min_std self._max_std = max_std self._std_hidden_nonlinearity = std_hidden_nonlinearity self._std_hidden_w_init = std_hidden_w_init self._std_hidden_b_init = std_hidden_b_init self._std_output_nonlinearity = std_output_nonlinearity self._std_output_w_init = std_output_w_init self._std_parameterization = std_parameterization self._hidden_nonlinearity = hidden_nonlinearity self._hidden_w_init = hidden_w_init self._hidden_b_init = hidden_b_init self._output_nonlinearity = output_nonlinearity self._output_w_init = output_w_init self._output_b_init = output_b_init self._layer_normalization = layer_normalization self._norm_dist_class = normal_distribution_cls if self._std_parameterization not in ('exp', 'softplus'): raise NotImplementedError init_std_param = torch.Tensor([init_std]).log() if self._learn_std: self._init_std = torch.nn.Parameter(init_std_param) else: self._init_std = init_std_param self.register_buffer('init_std', self._init_std) self._min_std_param = self._max_std_param = None if min_std is not None: self._min_std_param = torch.Tensor([min_std]).log() self.register_buffer('min_std_param', self._min_std_param) if max_std is not None: self._max_std_param = torch.Tensor([max_std]).log() self.register_buffer('max_std_param', self._max_std_param)
[docs] def to(self, *args, **kwargs): """Move the module to the specified device. Args: *args: args to pytorch to function. **kwargs: keyword args to pytorch to function. """ super().to(*args, **kwargs) buffers = dict(self.named_buffers()) if not isinstance(self._init_std, torch.nn.Parameter): self._init_std = buffers['init_std'] self._min_std_param = buffers['min_std_param'] self._max_std_param = buffers['max_std_param']
@abc.abstractmethod def _get_mean_and_log_std(self, *inputs): pass
[docs] def forward(self, *inputs): """Forward method. Args: *inputs: Input to the module. Returns: torch.distributions.independent.Independent: Independent distribution. """ mean, log_std_uncentered = self._get_mean_and_log_std(*inputs) if self._min_std_param or self._max_std_param: log_std_uncentered = log_std_uncentered.clamp( min=(None if self._min_std_param is None else self._min_std_param.item()), max=(None if self._max_std_param is None else self._max_std_param.item())) if self._std_parameterization == 'exp': std = log_std_uncentered.exp() else: std = log_std_uncentered.exp().exp().add(1.).log() dist = self._norm_dist_class(mean, std) # This control flow is needed because if a TanhNormal distribution is # wrapped by torch.distributions.Independent, then custom functions # such as rsample_with_pretanh_value of the TanhNormal distribution # are not accessable. if not isinstance(dist, TanhNormal): # Makes it so that a sample from the distribution is treated as a # single sample and not dist.batch_shape samples. dist = Independent(dist, 1) return dist
[docs]class GaussianMLPModule(GaussianMLPBaseModule): """GaussianMLPModule that mean and std share the same network. Args: input_dim (int): Input dimension of the model. output_dim (int): Output dimension of the model. hidden_sizes (list[int]): Output dimension of dense layer(s) for the MLP for mean. For example, (32, 32) means the MLP consists of two hidden layers, each with 32 hidden units. hidden_nonlinearity (callable): Activation function for intermediate dense layer(s). It should return a torch.Tensor. Set it to None to maintain a linear activation. hidden_w_init (callable): Initializer function for the weight of intermediate dense layer(s). The function should return a torch.Tensor. hidden_b_init (callable): Initializer function for the bias of intermediate dense layer(s). The function should return a torch.Tensor. output_nonlinearity (callable): Activation function for output dense layer. It should return a torch.Tensor. Set it to None to maintain a linear activation. output_w_init (callable): Initializer function for the weight of output dense layer(s). The function should return a torch.Tensor. output_b_init (callable): Initializer function for the bias of output dense layer(s). The function should return a torch.Tensor. learn_std (bool): Is std trainable. init_std (float): Initial value for std. (plain value - not log or exponentiated). min_std (float): If not None, the std is at least the value of min_std, to avoid numerical issues (plain value - not log or exponentiated). max_std (float): If not None, the std is at most the value of max_std, to avoid numerical issues (plain value - not log or exponentiated). std_parameterization (str): How the std should be parametrized. There are two options: - exp: the logarithm of the std will be stored, and applied a exponential transformation - softplus: the std will be computed as log(1+exp(x)) layer_normalization (bool): Bool for using layer normalization or not. normal_distribution_cls (torch.distribution): normal distribution class to be constructed and returned by a call to forward. By default, is `torch.distributions.Normal`. """ def __init__(self, input_dim, output_dim, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, output_nonlinearity=None, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, learn_std=True, init_std=1.0, min_std=1e-6, max_std=None, std_parameterization='exp', layer_normalization=False, normal_distribution_cls=Normal): super(GaussianMLPModule, self).__init__(input_dim=input_dim, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, learn_std=learn_std, init_std=init_std, min_std=min_std, max_std=max_std, std_parameterization=std_parameterization, layer_normalization=layer_normalization, normal_distribution_cls=normal_distribution_cls) self._mean_module = MLPModule( input_dim=self._input_dim, output_dim=self._action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, layer_normalization=self._layer_normalization) def _get_mean_and_log_std(self, *inputs): """Get mean and std of Gaussian distribution given inputs. Args: *inputs: Input to the module. Returns: torch.Tensor: The mean of Gaussian distribution. torch.Tensor: The variance of Gaussian distribution. """ assert len(inputs) == 1 mean = self._mean_module(*inputs) broadcast_shape = list(inputs[0].shape[:-1]) + [self._action_dim] uncentered_log_std = torch.zeros(*broadcast_shape) + self._init_std return mean, uncentered_log_std
[docs]class GaussianMLPIndependentStdModule(GaussianMLPBaseModule): """GaussianMLPModule which has two different mean and std network. Args: input_dim (int): Input dimension of the model. output_dim (int): Output dimension of the model. hidden_sizes (list[int]): Output dimension of dense layer(s) for the MLP for mean. For example, (32, 32) means the MLP consists of two hidden layers, each with 32 hidden units. hidden_nonlinearity (callable): Activation function for intermediate dense layer(s). It should return a torch.Tensor. Set it to None to maintain a linear activation. hidden_w_init (callable): Initializer function for the weight of intermediate dense layer(s). The function should return a torch.Tensor. hidden_b_init (callable): Initializer function for the bias of intermediate dense layer(s). The function should return a torch.Tensor. output_nonlinearity (callable): Activation function for output dense layer. It should return a torch.Tensor. Set it to None to maintain a linear activation. output_w_init (callable): Initializer function for the weight of output dense layer(s). The function should return a torch.Tensor. output_b_init (callable): Initializer function for the bias of output dense layer(s). The function should return a torch.Tensor. learn_std (bool): Is std trainable. init_std (float): Initial value for std. (plain value - not log or exponentiated). min_std (float): If not None, the std is at least the value of min_std, to avoid numerical issues (plain value - not log or exponentiated). max_std (float): If not None, the std is at most the value of max_std, to avoid numerical issues (plain value - not log or exponentiated). std_hidden_sizes (list[int]): Output dimension of dense layer(s) for the MLP for std. For example, (32, 32) means the MLP consists of two hidden layers, each with 32 hidden units. std_hidden_nonlinearity (callable): Nonlinearity for each hidden layer in the std network. std_hidden_w_init (callable): Initializer function for the weight of hidden layer (s). std_hidden_b_init (callable): Initializer function for the bias of intermediate dense layer(s). std_output_nonlinearity (callable): Activation function for output dense layer in the std network. It should return a torch.Tensor. Set it to None to maintain a linear activation. std_output_w_init (callable): Initializer function for the weight of output dense layer(s) in the std network. std_parameterization (str): How the std should be parametrized. There are two options: - exp: the logarithm of the std will be stored, and applied a exponential transformation - softplus: the std will be computed as log(1+exp(x)) layer_normalization (bool): Bool for using layer normalization or not. normal_distribution_cls (torch.distribution): normal distribution class to be constructed and returned by a call to forward. By default, is `torch.distributions.Normal`. """ def __init__(self, input_dim, output_dim, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, output_nonlinearity=None, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, learn_std=True, init_std=1.0, min_std=1e-6, max_std=None, std_hidden_sizes=(32, 32), std_hidden_nonlinearity=torch.tanh, std_hidden_w_init=nn.init.xavier_uniform_, std_hidden_b_init=nn.init.zeros_, std_output_nonlinearity=None, std_output_w_init=nn.init.xavier_uniform_, std_parameterization='exp', layer_normalization=False, normal_distribution_cls=Normal): super(GaussianMLPIndependentStdModule, self).__init__(input_dim=input_dim, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, learn_std=learn_std, init_std=init_std, min_std=min_std, max_std=max_std, std_hidden_sizes=std_hidden_sizes, std_hidden_nonlinearity=std_hidden_nonlinearity, std_hidden_w_init=std_hidden_w_init, std_hidden_b_init=std_hidden_b_init, std_output_nonlinearity=std_output_nonlinearity, std_output_w_init=std_output_w_init, std_parameterization=std_parameterization, layer_normalization=layer_normalization, normal_distribution_cls=normal_distribution_cls) self._mean_module = MLPModule( input_dim=self._input_dim, output_dim=self._action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, layer_normalization=self._layer_normalization) self._log_std_module = MLPModule( input_dim=self._input_dim, output_dim=self._action_dim, hidden_sizes=self._std_hidden_sizes, hidden_nonlinearity=self._std_hidden_nonlinearity, hidden_w_init=self._std_hidden_w_init, hidden_b_init=self._std_hidden_b_init, output_nonlinearity=self._std_output_nonlinearity, output_w_init=self._std_output_w_init, output_b_init=self._init_std_b, layer_normalization=self._layer_normalization) def _init_std_b(self, b): """Default bias initialization function. Args: b (torch.Tensor): The bias tensor. Returns: torch.Tensor: The bias tensor itself. """ return nn.init.constant_(b, self._init_std.item()) def _get_mean_and_log_std(self, *inputs): """Get mean and std of Gaussian distribution given inputs. Args: *inputs: Input to the module. Returns: torch.Tensor: The mean of Gaussian distribution. torch.Tensor: The variance of Gaussian distribution. """ return self._mean_module(*inputs), self._log_std_module(*inputs)
[docs]class GaussianMLPTwoHeadedModule(GaussianMLPBaseModule): """GaussianMLPModule which has only one mean network. Args: input_dim (int): Input dimension of the model. output_dim (int): Output dimension of the model. hidden_sizes (list[int]): Output dimension of dense layer(s) for the MLP for mean. For example, (32, 32) means the MLP consists of two hidden layers, each with 32 hidden units. hidden_nonlinearity (callable): Activation function for intermediate dense layer(s). It should return a torch.Tensor. Set it to None to maintain a linear activation. hidden_w_init (callable): Initializer function for the weight of intermediate dense layer(s). The function should return a torch.Tensor. hidden_b_init (callable): Initializer function for the bias of intermediate dense layer(s). The function should return a torch.Tensor. output_nonlinearity (callable): Activation function for output dense layer. It should return a torch.Tensor. Set it to None to maintain a linear activation. output_w_init (callable): Initializer function for the weight of output dense layer(s). The function should return a torch.Tensor. output_b_init (callable): Initializer function for the bias of output dense layer(s). The function should return a torch.Tensor. learn_std (bool): Is std trainable. init_std (float): Initial value for std. (plain value - not log or exponentiated). min_std (float): If not None, the std is at least the value of min_std, to avoid numerical issues (plain value - not log or exponentiated). max_std (float): If not None, the std is at most the value of max_std, to avoid numerical issues (plain value - not log or exponentiated). std_parameterization (str): How the std should be parametrized. There are two options: - exp: the logarithm of the std will be stored, and applied a exponential transformation - softplus: the std will be computed as log(1+exp(x)) layer_normalization (bool): Bool for using layer normalization or not. normal_distribution_cls (torch.distribution): normal distribution class to be constructed and returned by a call to forward. By default, is `torch.distributions.Normal`. """ def __init__(self, input_dim, output_dim, hidden_sizes=(32, 32), hidden_nonlinearity=torch.tanh, hidden_w_init=nn.init.xavier_uniform_, hidden_b_init=nn.init.zeros_, output_nonlinearity=None, output_w_init=nn.init.xavier_uniform_, output_b_init=nn.init.zeros_, learn_std=True, init_std=1.0, min_std=1e-6, max_std=None, std_parameterization='exp', layer_normalization=False, normal_distribution_cls=Normal): super(GaussianMLPTwoHeadedModule, self).__init__(input_dim=input_dim, output_dim=output_dim, hidden_sizes=hidden_sizes, hidden_nonlinearity=hidden_nonlinearity, hidden_w_init=hidden_w_init, hidden_b_init=hidden_b_init, output_nonlinearity=output_nonlinearity, output_w_init=output_w_init, output_b_init=output_b_init, learn_std=learn_std, init_std=init_std, min_std=min_std, max_std=max_std, std_parameterization=std_parameterization, layer_normalization=layer_normalization, normal_distribution_cls=normal_distribution_cls) self._shared_mean_log_std_network = MultiHeadedMLPModule( n_heads=2, input_dim=self._input_dim, output_dims=self._action_dim, hidden_sizes=self._hidden_sizes, hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearities=self._output_nonlinearity, output_w_inits=self._output_w_init, output_b_inits=[ nn.init.zeros_, lambda x: nn.init.constant_(x, self._init_std.item()) ], layer_normalization=self._layer_normalization) def _get_mean_and_log_std(self, *inputs): """Get mean and std of Gaussian distribution given inputs. Args: *inputs: Input to the module. Returns: torch.Tensor: The mean of Gaussian distribution. torch.Tensor: The variance of Gaussian distribution. """ return self._shared_mean_log_std_network(*inputs)