Source code for garage.tf.models.mlp_dueling_model

"""MLP Dueling Model."""
import tensorflow as tf

from garage.tf.models.base import Model
from garage.tf.models.mlp import mlp


[docs]class MLPDuelingModel(Model): """MLP Model with dueling network structure. Args: output_dim (int): Dimension of the network output. hidden_sizes (list[int]): Output dimension of dense layer(s). For example, (32, 32) means this MLP consists of two hidden layers, each with 32 hidden units. name (str): Model name, also the variable scope. hidden_nonlinearity (callable): Activation function for intermediate dense layer(s). It should return a tf.Tensor. Set it to None to maintain a linear activation. hidden_w_init (callable): Initializer function for the weight of intermediate dense layer(s). The function should return a tf.Tensor. hidden_b_init (callable): Initializer function for the bias of intermediate dense layer(s). The function should return a tf.Tensor. output_nonlinearity (callable): Activation function for output dense layer. It should return a tf.Tensor. Set it to None to maintain a linear activation. output_w_init (callable): Initializer function for the weight of output dense layer(s). The function should return a tf.Tensor. output_b_init (callable): Initializer function for the bias of output dense layer(s). The function should return a tf.Tensor. layer_normalization (bool): Bool for using layer normalization or not. """ def __init__(self, output_dim, name=None, hidden_sizes=(32, 32), hidden_nonlinearity=tf.nn.relu, hidden_w_init=tf.contrib.layers.xavier_initializer, hidden_b_init=tf.zeros_initializer, output_nonlinearity=None, output_w_init=tf.contrib.layers.xavier_initializer, output_b_init=tf.zeros_initializer, layer_normalization=False): super().__init__(name) self._output_dim = output_dim self._hidden_sizes = hidden_sizes self._hidden_nonlinearity = hidden_nonlinearity self._hidden_w_init = hidden_w_init self._hidden_b_init = hidden_b_init self._output_nonlinearity = output_nonlinearity self._output_w_init = output_w_init self._output_b_init = output_b_init self._layer_normalization = layer_normalization def _build(self, state_input, name=None): action_out = mlp(input_var=state_input, output_dim=self._output_dim, hidden_sizes=self._hidden_sizes, name='action_value', hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, layer_normalization=self._layer_normalization) state_out = mlp(input_var=state_input, output_dim=1, hidden_sizes=self._hidden_sizes, name='state_value', hidden_nonlinearity=self._hidden_nonlinearity, hidden_w_init=self._hidden_w_init, hidden_b_init=self._hidden_b_init, output_nonlinearity=self._output_nonlinearity, output_w_init=self._output_w_init, output_b_init=self._output_b_init, layer_normalization=self._layer_normalization) action_out_mean = tf.reduce_mean(action_out, 1) # calculate the advantage of performing certain action # over other action in a particular state action_out_advantage = action_out - tf.expand_dims(action_out_mean, 1) q_func_out = state_out + action_out_advantage return q_func_out