Source code for garage.torch.embeddings.mlp_encoder

"""An MLP network for encoding context of RL tasks."""
import akro
import numpy as np

from garage import InOutSpec
from garage.np.embeddings import Encoder
from garage.torch.modules import MLPModule


[docs]class MLPEncoder(MLPModule, Encoder):
    """This MLP network encodes context of RL tasks.

    Context is stored in the terms of observation, action, and reward, and this
    network uses an MLP module for encoding it.

    Args:
        input_dim (int) : Dimension of the network input.
        output_dim (int): Dimension of the network output.
        hidden_sizes (list[int]): Output dimension of dense layer(s).
            For example, (32, 32) means this MLP consists of two
            hidden layers, each with 32 hidden units.
        hidden_nonlinearity (callable or torch.nn.Module): Activation
            function for intermediate dense layer(s). It should return a
            torch.Tensor.Set it to None to maintain a linear activation.
        hidden_w_init (callable): Initializer function for the weight
            of intermediate dense layer(s). The function should return a
            torch.Tensor.
        hidden_b_init (callable): Initializer function for the bias
            of intermediate dense layer(s). The function should return a
            torch.Tensor.
        output_nonlinearity (callable or torch.nn.Module): Activation
            function for output dense layer. It should return a
            torch.Tensor. Set it to None to maintain a linear activation.
        output_w_init (callable): Initializer function for the weight
            of output dense layer(s). The function should return a
            torch.Tensor.
        output_b_init (callable): Initializer function for the bias
            of output dense layer(s). The function should return a
            torch.Tensor.
        layer_normalization (bool): Bool for using layer normalization or not.

    """

    @property
    def spec(self):
        """garage.InOutSpec: Input and output space."""
        input_space = akro.Box(-np.inf, np.inf, self._input_dim)
        output_space = akro.Box(-np.inf, np.inf, self._output_dim)
        return InOutSpec(input_space, output_space)

    @property
    def input_dim(self):
        """int: Dimension of the encoder input."""
        return self._input_dim

    @property
    def output_dim(self):
        """int: Dimension of the encoder output (embedding)."""
        return self._output_dim

[docs]    def reset(self, do_resets=None):
        """Reset the encoder.

        This is effective only to recurrent encoder. do_resets is effective
        only to vectoried encoder.

        For a vectorized encoder, do_resets is an array of boolean indicating
        which internal states to be reset. The length of do_resets should be
        equal to the length of inputs.

        Args:
            do_resets (numpy.ndarray): Bool array indicating which states
                to be reset.

        """