Source code for garage.tf.models.cnn

"""CNN in TensorFlow."""

import tensorflow as tf


[docs]def cnn(input_var, filter_dims, num_filters, strides, name, padding, hidden_nonlinearity=tf.nn.relu, hidden_w_init=tf.glorot_uniform_initializer(), hidden_b_init=tf.zeros_initializer()): """Convolutional neural network (CNN). Note: Based on 'NHWC' data format: [batch, height, width, channel]. Args: input_var (tf.Tensor): Input tf.Tensor to the CNN. filter_dims (tuple[int]): Dimension of the filters. For example, (3, 5) means there are two convolutional layers. The filter for first layer is of dimension (3 x 3) and the second one is of dimension (5 x 5). num_filters (tuple[int]): Number of filters. For example, (3, 32) means there are two convolutional layers. The filter for the first layer has 3 channels and the second one with 32 channels. strides (tuple[int]): The stride of the sliding window. For example, (1, 2) means there are two convolutional layers. The stride of the filter for first layer is 1 and that of the second layer is 2. name (str): Network name, also the variable scope. padding (str): The type of padding algorithm to use, either 'SAME' or 'VALID'. hidden_nonlinearity (callable): Activation function for intermediate dense layer(s). It should return a tf.Tensor. Set it to None to maintain a linear activation. hidden_w_init (callable): Initializer function for the weight of intermediate dense layer(s). The function should return a tf.Tensor. hidden_b_init (callable): Initializer function for the bias of intermediate dense layer(s). The function should return a tf.Tensor. Return: The output tf.Tensor of the CNN. """ with tf.compat.v1.variable_scope(name): h = input_var for index, (filter_dim, num_filter, stride) in enumerate(zip(filter_dims, num_filters, strides)): _stride = [1, stride, stride, 1] h = _conv(h, 'h{}'.format(index), filter_dim, num_filter, _stride, hidden_w_init, hidden_b_init, padding) if hidden_nonlinearity is not None: h = hidden_nonlinearity(h) # flatten dim = tf.reduce_prod(h.get_shape()[1:].as_list()) return tf.reshape(h, [-1, dim])
[docs]def cnn_with_max_pooling(input_var, filter_dims, num_filters, strides, name, pool_shapes, pool_strides, padding, hidden_nonlinearity=tf.nn.relu, hidden_w_init=tf.glorot_uniform_initializer(), hidden_b_init=tf.zeros_initializer()): """Convolutional neural network (CNN) with max-pooling. Note: Based on 'NHWC' data format: [batch, height, width, channel]. Args: input_var (tf.Tensor): Input tf.Tensor to the CNN. filter_dims (tuple[int]): Dimension of the filters. For example, (3, 5) means there are two convolutional layers. The filter for first layer is of dimension (3 x 3) and the second one is of dimension (5 x 5). num_filters (tuple[int]): Number of filters. For example, (3, 32) means there are two convolutional layers. The filter for the first layer has 3 channels and the second one with 32 channels. strides (tuple[int]): The stride of the sliding window. For example, (1, 2) means there are two convolutional layers. The stride of the filter for first layer is 1 and that of the second layer is 2. name (str): Model name, also the variable scope of the cnn. pool_shapes (tuple[int]): Dimension of the pooling layer(s). For example, (2, 2) means that all the pooling layers have shape (2, 2). pool_strides (tuple[int]): The strides of the pooling layer(s). For example, (2, 2) means that all the pooling layers have strides (2, 2). padding (str): The type of padding algorithm to use, either 'SAME' or 'VALID'. hidden_nonlinearity (callable): Activation function for intermediate dense layer(s). It should return a tf.Tensor. Set it to None to maintain a linear activation. hidden_w_init (callable): Initializer function for the weight of intermediate dense layer(s). The function should return a tf.Tensor. hidden_b_init (callable): Initializer function for the bias of intermediate dense layer(s). The function should return a tf.Tensor. Return: The output tf.Tensor of the CNN. """ pool_strides = [1, pool_strides[0], pool_strides[1], 1] pool_shapes = [1, pool_shapes[0], pool_shapes[1], 1] with tf.compat.v1.variable_scope(name): h = input_var for index, (filter_dim, num_filter, stride) in enumerate(zip(filter_dims, num_filters, strides)): _stride = [1, stride, stride, 1] h = _conv(h, 'h{}'.format(index), filter_dim, num_filter, _stride, hidden_w_init, hidden_b_init, padding) if hidden_nonlinearity is not None: h = hidden_nonlinearity(h) h = tf.nn.max_pool2d(h, ksize=pool_shapes, strides=pool_strides, padding=padding) # flatten dim = tf.reduce_prod(h.get_shape()[1:].as_list()) return tf.reshape(h, [-1, dim])
def _conv(input_var, name, filter_size, num_filter, strides, hidden_w_init, hidden_b_init, padding): # channel from input input_shape = input_var.get_shape()[-1].value # [filter_height, filter_width, in_channels, out_channels] w_shape = [filter_size, filter_size, input_shape, num_filter] b_shape = [1, 1, 1, num_filter] with tf.compat.v1.variable_scope(name): weight = tf.compat.v1.get_variable('weight', w_shape, initializer=hidden_w_init) bias = tf.compat.v1.get_variable('bias', b_shape, initializer=hidden_b_init) return tf.nn.conv2d( input_var, weight, strides=strides, padding=padding) + bias