"""GaussianMLPModule."""
import abc
import torch
from torch import nn
from torch.distributions import Normal
from torch.distributions.independent import Independent
from garage.torch.distributions import TanhNormal
from garage.torch.modules.mlp_module import MLPModule
from garage.torch.modules.multi_headed_mlp_module import MultiHeadedMLPModule
[docs]class GaussianMLPBaseModule(nn.Module):
"""Base of GaussianMLPModel.
Args:
input_dim (int): Input dimension of the model.
output_dim (int): Output dimension of the model.
hidden_sizes (list[int]): Output dimension of dense layer(s) for
the MLP for mean. For example, (32, 32) means the MLP consists
of two hidden layers, each with 32 hidden units.
hidden_nonlinearity (callable): Activation function for intermediate
dense layer(s). It should return a torch.Tensor. Set it to
None to maintain a linear activation.
hidden_w_init (callable): Initializer function for the weight
of intermediate dense layer(s). The function should return a
torch.Tensor.
hidden_b_init (callable): Initializer function for the bias
of intermediate dense layer(s). The function should return a
torch.Tensor.
output_nonlinearity (callable): Activation function for output dense
layer. It should return a torch.Tensor. Set it to None to
maintain a linear activation.
output_w_init (callable): Initializer function for the weight
of output dense layer(s). The function should return a
torch.Tensor.
output_b_init (callable): Initializer function for the bias
of output dense layer(s). The function should return a
torch.Tensor.
learn_std (bool): Is std trainable.
init_std (float): Initial value for std.
(plain value - not log or exponentiated).
std_hidden_sizes (list[int]): Output dimension of dense layer(s) for
the MLP for std. For example, (32, 32) means the MLP consists
of two hidden layers, each with 32 hidden units.
min_std (float): If not None, the std is at least the value of min_std,
to avoid numerical issues (plain value - not log or exponentiated).
max_std (float): If not None, the std is at most the value of max_std,
to avoid numerical issues (plain value - not log or exponentiated).
std_hidden_nonlinearity (callable): Nonlinearity for each hidden layer
in the std network.
std_hidden_w_init (callable): Initializer function for the weight
of hidden layer (s).
std_hidden_b_init (callable): Initializer function for the bias
of intermediate dense layer(s).
std_output_nonlinearity (callable): Activation function for output
dense layer in the std network. It should return a torch.Tensor.
Set it to None to maintain a linear activation.
std_output_w_init (callable): Initializer function for the weight
of output dense layer(s) in the std network.
std_parameterization (str): How the std should be parametrized. There
are two options:
- exp: the logarithm of the std will be stored, and applied a
exponential transformation.
- softplus: the std will be computed as log(1+exp(x)).
layer_normalization (bool): Bool for using layer normalization or not.
normal_distribution_cls (torch.distribution): normal distribution class
to be constructed and returned by a call to forward. By default, is
`torch.distributions.Normal`.
"""
def __init__(self,
input_dim,
output_dim,
hidden_sizes=(32, 32),
hidden_nonlinearity=torch.tanh,
hidden_w_init=nn.init.xavier_uniform_,
hidden_b_init=nn.init.zeros_,
output_nonlinearity=None,
output_w_init=nn.init.xavier_uniform_,
output_b_init=nn.init.zeros_,
learn_std=True,
init_std=1.0,
min_std=1e-6,
max_std=None,
std_hidden_sizes=(32, 32),
std_hidden_nonlinearity=torch.tanh,
std_hidden_w_init=nn.init.xavier_uniform_,
std_hidden_b_init=nn.init.zeros_,
std_output_nonlinearity=None,
std_output_w_init=nn.init.xavier_uniform_,
std_parameterization='exp',
layer_normalization=False,
normal_distribution_cls=Normal):
super().__init__()
self._input_dim = input_dim
self._hidden_sizes = hidden_sizes
self._action_dim = output_dim
self._learn_std = learn_std
self._std_hidden_sizes = std_hidden_sizes
self._min_std = min_std
self._max_std = max_std
self._std_hidden_nonlinearity = std_hidden_nonlinearity
self._std_hidden_w_init = std_hidden_w_init
self._std_hidden_b_init = std_hidden_b_init
self._std_output_nonlinearity = std_output_nonlinearity
self._std_output_w_init = std_output_w_init
self._std_parameterization = std_parameterization
self._hidden_nonlinearity = hidden_nonlinearity
self._hidden_w_init = hidden_w_init
self._hidden_b_init = hidden_b_init
self._output_nonlinearity = output_nonlinearity
self._output_w_init = output_w_init
self._output_b_init = output_b_init
self._layer_normalization = layer_normalization
self._norm_dist_class = normal_distribution_cls
if self._std_parameterization not in ('exp', 'softplus'):
raise NotImplementedError
init_std_param = torch.Tensor([init_std]).log()
if self._learn_std:
self._init_std = torch.nn.Parameter(init_std_param)
else:
self._init_std = init_std_param
self.register_buffer('init_std', self._init_std)
self._min_std_param = self._max_std_param = None
if min_std is not None:
self._min_std_param = torch.Tensor([min_std]).log()
self.register_buffer('min_std_param', self._min_std_param)
if max_std is not None:
self._max_std_param = torch.Tensor([max_std]).log()
self.register_buffer('max_std_param', self._max_std_param)
[docs] def to(self, *args, **kwargs):
"""Move the module to the specified device.
Args:
*args: args to pytorch to function.
**kwargs: keyword args to pytorch to function.
"""
super().to(*args, **kwargs)
buffers = dict(self.named_buffers())
if not isinstance(self._init_std, torch.nn.Parameter):
self._init_std = buffers['init_std']
self._min_std_param = buffers['min_std_param']
self._max_std_param = buffers['max_std_param']
@abc.abstractmethod
def _get_mean_and_log_std(self, *inputs):
pass
[docs] def forward(self, *inputs):
"""Forward method.
Args:
*inputs: Input to the module.
Returns:
torch.distributions.independent.Independent: Independent
distribution.
"""
mean, log_std_uncentered = self._get_mean_and_log_std(*inputs)
if self._min_std_param or self._max_std_param:
log_std_uncentered = log_std_uncentered.clamp(
min=(None if self._min_std_param is None else
self._min_std_param.item()),
max=(None if self._max_std_param is None else
self._max_std_param.item()))
if self._std_parameterization == 'exp':
std = log_std_uncentered.exp()
else:
std = log_std_uncentered.exp().exp().add(1.).log()
dist = self._norm_dist_class(mean, std)
# This control flow is needed because if a TanhNormal distribution is
# wrapped by torch.distributions.Independent, then custom functions
# such as rsample_with_pretanh_value of the TanhNormal distribution
# are not accessable.
if not isinstance(dist, TanhNormal):
# Makes it so that a sample from the distribution is treated as a
# single sample and not dist.batch_shape samples.
dist = Independent(dist, 1)
return dist
[docs]class GaussianMLPModule(GaussianMLPBaseModule):
"""GaussianMLPModule that mean and std share the same network.
Args:
input_dim (int): Input dimension of the model.
output_dim (int): Output dimension of the model.
hidden_sizes (list[int]): Output dimension of dense layer(s) for
the MLP for mean. For example, (32, 32) means the MLP consists
of two hidden layers, each with 32 hidden units.
hidden_nonlinearity (callable): Activation function for intermediate
dense layer(s). It should return a torch.Tensor. Set it to
None to maintain a linear activation.
hidden_w_init (callable): Initializer function for the weight
of intermediate dense layer(s). The function should return a
torch.Tensor.
hidden_b_init (callable): Initializer function for the bias
of intermediate dense layer(s). The function should return a
torch.Tensor.
output_nonlinearity (callable): Activation function for output dense
layer. It should return a torch.Tensor. Set it to None to
maintain a linear activation.
output_w_init (callable): Initializer function for the weight
of output dense layer(s). The function should return a
torch.Tensor.
output_b_init (callable): Initializer function for the bias
of output dense layer(s). The function should return a
torch.Tensor.
learn_std (bool): Is std trainable.
init_std (float): Initial value for std.
(plain value - not log or exponentiated).
min_std (float): If not None, the std is at least the value of min_std,
to avoid numerical issues (plain value - not log or exponentiated).
max_std (float): If not None, the std is at most the value of max_std,
to avoid numerical issues (plain value - not log or exponentiated).
std_parameterization (str): How the std should be parametrized. There
are two options:
- exp: the logarithm of the std will be stored, and applied a
exponential transformation
- softplus: the std will be computed as log(1+exp(x))
layer_normalization (bool): Bool for using layer normalization or not.
normal_distribution_cls (torch.distribution): normal distribution class
to be constructed and returned by a call to forward. By default, is
`torch.distributions.Normal`.
"""
def __init__(self,
input_dim,
output_dim,
hidden_sizes=(32, 32),
hidden_nonlinearity=torch.tanh,
hidden_w_init=nn.init.xavier_uniform_,
hidden_b_init=nn.init.zeros_,
output_nonlinearity=None,
output_w_init=nn.init.xavier_uniform_,
output_b_init=nn.init.zeros_,
learn_std=True,
init_std=1.0,
min_std=1e-6,
max_std=None,
std_parameterization='exp',
layer_normalization=False,
normal_distribution_cls=Normal):
super(GaussianMLPModule,
self).__init__(input_dim=input_dim,
output_dim=output_dim,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
hidden_w_init=hidden_w_init,
hidden_b_init=hidden_b_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
output_b_init=output_b_init,
learn_std=learn_std,
init_std=init_std,
min_std=min_std,
max_std=max_std,
std_parameterization=std_parameterization,
layer_normalization=layer_normalization,
normal_distribution_cls=normal_distribution_cls)
self._mean_module = MLPModule(
input_dim=self._input_dim,
output_dim=self._action_dim,
hidden_sizes=self._hidden_sizes,
hidden_nonlinearity=self._hidden_nonlinearity,
hidden_w_init=self._hidden_w_init,
hidden_b_init=self._hidden_b_init,
output_nonlinearity=self._output_nonlinearity,
output_w_init=self._output_w_init,
output_b_init=self._output_b_init,
layer_normalization=self._layer_normalization)
def _get_mean_and_log_std(self, *inputs):
"""Get mean and std of Gaussian distribution given inputs.
Args:
*inputs: Input to the module.
Returns:
torch.Tensor: The mean of Gaussian distribution.
torch.Tensor: The variance of Gaussian distribution.
"""
assert len(inputs) == 1
mean = self._mean_module(*inputs)
broadcast_shape = list(inputs[0].shape[:-1]) + [self._action_dim]
uncentered_log_std = torch.zeros(*broadcast_shape) + self._init_std
return mean, uncentered_log_std
[docs]class GaussianMLPIndependentStdModule(GaussianMLPBaseModule):
"""GaussianMLPModule which has two different mean and std network.
Args:
input_dim (int): Input dimension of the model.
output_dim (int): Output dimension of the model.
hidden_sizes (list[int]): Output dimension of dense layer(s) for
the MLP for mean. For example, (32, 32) means the MLP consists
of two hidden layers, each with 32 hidden units.
hidden_nonlinearity (callable): Activation function for intermediate
dense layer(s). It should return a torch.Tensor. Set it to
None to maintain a linear activation.
hidden_w_init (callable): Initializer function for the weight
of intermediate dense layer(s). The function should return a
torch.Tensor.
hidden_b_init (callable): Initializer function for the bias
of intermediate dense layer(s). The function should return a
torch.Tensor.
output_nonlinearity (callable): Activation function for output dense
layer. It should return a torch.Tensor. Set it to None to
maintain a linear activation.
output_w_init (callable): Initializer function for the weight
of output dense layer(s). The function should return a
torch.Tensor.
output_b_init (callable): Initializer function for the bias
of output dense layer(s). The function should return a
torch.Tensor.
learn_std (bool): Is std trainable.
init_std (float): Initial value for std.
(plain value - not log or exponentiated).
min_std (float): If not None, the std is at least the value of min_std,
to avoid numerical issues (plain value - not log or exponentiated).
max_std (float): If not None, the std is at most the value of max_std,
to avoid numerical issues (plain value - not log or exponentiated).
std_hidden_sizes (list[int]): Output dimension of dense layer(s) for
the MLP for std. For example, (32, 32) means the MLP consists
of two hidden layers, each with 32 hidden units.
std_hidden_nonlinearity (callable): Nonlinearity for each hidden layer
in the std network.
std_hidden_w_init (callable): Initializer function for the weight
of hidden layer (s).
std_hidden_b_init (callable): Initializer function for the bias
of intermediate dense layer(s).
std_output_nonlinearity (callable): Activation function for output
dense layer in the std network. It should return a torch.Tensor.
Set it to None to maintain a linear activation.
std_output_w_init (callable): Initializer function for the weight
of output dense layer(s) in the std network.
std_parameterization (str): How the std should be parametrized. There
are two options:
- exp: the logarithm of the std will be stored, and applied a
exponential transformation
- softplus: the std will be computed as log(1+exp(x))
layer_normalization (bool): Bool for using layer normalization or not.
normal_distribution_cls (torch.distribution): normal distribution class
to be constructed and returned by a call to forward. By default, is
`torch.distributions.Normal`.
"""
def __init__(self,
input_dim,
output_dim,
hidden_sizes=(32, 32),
hidden_nonlinearity=torch.tanh,
hidden_w_init=nn.init.xavier_uniform_,
hidden_b_init=nn.init.zeros_,
output_nonlinearity=None,
output_w_init=nn.init.xavier_uniform_,
output_b_init=nn.init.zeros_,
learn_std=True,
init_std=1.0,
min_std=1e-6,
max_std=None,
std_hidden_sizes=(32, 32),
std_hidden_nonlinearity=torch.tanh,
std_hidden_w_init=nn.init.xavier_uniform_,
std_hidden_b_init=nn.init.zeros_,
std_output_nonlinearity=None,
std_output_w_init=nn.init.xavier_uniform_,
std_parameterization='exp',
layer_normalization=False,
normal_distribution_cls=Normal):
super(GaussianMLPIndependentStdModule,
self).__init__(input_dim=input_dim,
output_dim=output_dim,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
hidden_w_init=hidden_w_init,
hidden_b_init=hidden_b_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
output_b_init=output_b_init,
learn_std=learn_std,
init_std=init_std,
min_std=min_std,
max_std=max_std,
std_hidden_sizes=std_hidden_sizes,
std_hidden_nonlinearity=std_hidden_nonlinearity,
std_hidden_w_init=std_hidden_w_init,
std_hidden_b_init=std_hidden_b_init,
std_output_nonlinearity=std_output_nonlinearity,
std_output_w_init=std_output_w_init,
std_parameterization=std_parameterization,
layer_normalization=layer_normalization,
normal_distribution_cls=normal_distribution_cls)
self._mean_module = MLPModule(
input_dim=self._input_dim,
output_dim=self._action_dim,
hidden_sizes=self._hidden_sizes,
hidden_nonlinearity=self._hidden_nonlinearity,
hidden_w_init=self._hidden_w_init,
hidden_b_init=self._hidden_b_init,
output_nonlinearity=self._output_nonlinearity,
output_w_init=self._output_w_init,
output_b_init=self._output_b_init,
layer_normalization=self._layer_normalization)
self._log_std_module = MLPModule(
input_dim=self._input_dim,
output_dim=self._action_dim,
hidden_sizes=self._std_hidden_sizes,
hidden_nonlinearity=self._std_hidden_nonlinearity,
hidden_w_init=self._std_hidden_w_init,
hidden_b_init=self._std_hidden_b_init,
output_nonlinearity=self._std_output_nonlinearity,
output_w_init=self._std_output_w_init,
output_b_init=self._init_std_b,
layer_normalization=self._layer_normalization)
def _init_std_b(self, b):
"""Default bias initialization function.
Args:
b (torch.Tensor): The bias tensor.
Returns:
torch.Tensor: The bias tensor itself.
"""
return nn.init.constant_(b, self._init_std.item())
def _get_mean_and_log_std(self, *inputs):
"""Get mean and std of Gaussian distribution given inputs.
Args:
*inputs: Input to the module.
Returns:
torch.Tensor: The mean of Gaussian distribution.
torch.Tensor: The variance of Gaussian distribution.
"""
return self._mean_module(*inputs), self._log_std_module(*inputs)
[docs]class GaussianMLPTwoHeadedModule(GaussianMLPBaseModule):
"""GaussianMLPModule which has only one mean network.
Args:
input_dim (int): Input dimension of the model.
output_dim (int): Output dimension of the model.
hidden_sizes (list[int]): Output dimension of dense layer(s) for
the MLP for mean. For example, (32, 32) means the MLP consists
of two hidden layers, each with 32 hidden units.
hidden_nonlinearity (callable): Activation function for intermediate
dense layer(s). It should return a torch.Tensor. Set it to
None to maintain a linear activation.
hidden_w_init (callable): Initializer function for the weight
of intermediate dense layer(s). The function should return a
torch.Tensor.
hidden_b_init (callable): Initializer function for the bias
of intermediate dense layer(s). The function should return a
torch.Tensor.
output_nonlinearity (callable): Activation function for output dense
layer. It should return a torch.Tensor. Set it to None to
maintain a linear activation.
output_w_init (callable): Initializer function for the weight
of output dense layer(s). The function should return a
torch.Tensor.
output_b_init (callable): Initializer function for the bias
of output dense layer(s). The function should return a
torch.Tensor.
learn_std (bool): Is std trainable.
init_std (float): Initial value for std.
(plain value - not log or exponentiated).
min_std (float): If not None, the std is at least the value of min_std,
to avoid numerical issues (plain value - not log or exponentiated).
max_std (float): If not None, the std is at most the value of max_std,
to avoid numerical issues (plain value - not log or exponentiated).
std_parameterization (str): How the std should be parametrized. There
are two options:
- exp: the logarithm of the std will be stored, and applied a
exponential transformation
- softplus: the std will be computed as log(1+exp(x))
layer_normalization (bool): Bool for using layer normalization or not.
normal_distribution_cls (torch.distribution): normal distribution class
to be constructed and returned by a call to forward. By default, is
`torch.distributions.Normal`.
"""
def __init__(self,
input_dim,
output_dim,
hidden_sizes=(32, 32),
hidden_nonlinearity=torch.tanh,
hidden_w_init=nn.init.xavier_uniform_,
hidden_b_init=nn.init.zeros_,
output_nonlinearity=None,
output_w_init=nn.init.xavier_uniform_,
output_b_init=nn.init.zeros_,
learn_std=True,
init_std=1.0,
min_std=1e-6,
max_std=None,
std_parameterization='exp',
layer_normalization=False,
normal_distribution_cls=Normal):
super(GaussianMLPTwoHeadedModule,
self).__init__(input_dim=input_dim,
output_dim=output_dim,
hidden_sizes=hidden_sizes,
hidden_nonlinearity=hidden_nonlinearity,
hidden_w_init=hidden_w_init,
hidden_b_init=hidden_b_init,
output_nonlinearity=output_nonlinearity,
output_w_init=output_w_init,
output_b_init=output_b_init,
learn_std=learn_std,
init_std=init_std,
min_std=min_std,
max_std=max_std,
std_parameterization=std_parameterization,
layer_normalization=layer_normalization,
normal_distribution_cls=normal_distribution_cls)
self._shared_mean_log_std_network = MultiHeadedMLPModule(
n_heads=2,
input_dim=self._input_dim,
output_dims=self._action_dim,
hidden_sizes=self._hidden_sizes,
hidden_nonlinearity=self._hidden_nonlinearity,
hidden_w_init=self._hidden_w_init,
hidden_b_init=self._hidden_b_init,
output_nonlinearities=self._output_nonlinearity,
output_w_inits=self._output_w_init,
output_b_inits=[
nn.init.zeros_,
lambda x: nn.init.constant_(x, self._init_std.item())
],
layer_normalization=self._layer_normalization)
def _get_mean_and_log_std(self, *inputs):
"""Get mean and std of Gaussian distribution given inputs.
Args:
*inputs: Input to the module.
Returns:
torch.Tensor: The mean of Gaussian distribution.
torch.Tensor: The variance of Gaussian distribution.
"""
return self._shared_mean_log_std_network(*inputs)