Source code for garage.tf.distributions.categorical

import numpy as np
import tensorflow as tf

from garage.tf.distributions.base import Distribution
from garage.tf.misc.tensor_utils import compile_function

TINY = 1e-8


[docs]def from_onehot(x_var):
    ret = np.zeros((len(x_var), ), 'int32')
    nonzero_n, nonzero_a = np.nonzero(x_var)
    ret[nonzero_n] = nonzero_a
    return ret


[docs]class Categorical(Distribution):
    def __init__(self, dim, name=None):
        with tf.compat.v1.variable_scope(name, 'Categorical'):
            self._dim = dim
            self._name = name
            weights_var = tf.compat.v1.placeholder(
                dtype=tf.float32, shape=(None, dim), name='weights')
            self._f_sample = compile_function(
                inputs=[weights_var],
                outputs=tf.random.categorical(
                    tf.math.log(weights_var + 1e-8), num_samples=1)[:, 0],
            )

    @property
    def dim(self):
        return self._dim

[docs]    def kl_sym(self, old_dist_info_vars, new_dist_info_vars, name=None):
        """
        Compute the symbolic KL divergence of two categorical distributions
        """
        with tf.name_scope(name, 'kl_sym',
                           [old_dist_info_vars, new_dist_info_vars]):
            old_prob_var = old_dist_info_vars['prob']
            new_prob_var = new_dist_info_vars['prob']
            ndims = old_prob_var.get_shape().ndims
            # Assume layout is N * A
            return tf.reduce_sum(
                old_prob_var * (tf.math.log(old_prob_var + TINY) -
                                tf.math.log(new_prob_var + TINY)),
                axis=ndims - 1)

[docs]    def kl(self, old_dist_info, new_dist_info):
        """
        Compute the KL divergence of two categorical distributions
        """
        old_prob = old_dist_info['prob']
        new_prob = new_dist_info['prob']
        return np.sum(
            old_prob * (np.log(old_prob + TINY) - np.log(new_prob + TINY)),
            axis=-1)

[docs]    def likelihood_ratio_sym(self,
                             x_var,
                             old_dist_info_vars,
                             new_dist_info_vars,
                             name=None):
        with tf.name_scope(name, 'likelihood_ratio_sym',
                           [x_var, old_dist_info_vars, new_dist_info_vars]):
            old_prob_var = old_dist_info_vars['prob']
            new_prob_var = new_dist_info_vars['prob']
            ndims = old_prob_var.get_shape().ndims
            x_var = tf.cast(x_var, tf.float32)
            # Assume layout is N * A
            return (tf.reduce_sum(new_prob_var * x_var, ndims - 1) + TINY) / \
                   (tf.reduce_sum(old_prob_var * x_var, ndims - 1) + TINY)

[docs]    def entropy_sym(self, dist_info_vars, name=None):
        with tf.name_scope(name, 'entropy_sym', [dist_info_vars]):
            probs = dist_info_vars['prob']
            return -tf.reduce_sum(probs * tf.math.log(probs + TINY), axis=1)

[docs]    def cross_entropy_sym(self,
                          old_dist_info_vars,
                          new_dist_info_vars,
                          name=None):
        with tf.name_scope(name, 'cross_entropy_sym',
                           [old_dist_info_vars, new_dist_info_vars]):
            old_prob_var = old_dist_info_vars['prob']
            new_prob_var = new_dist_info_vars['prob']
            ndims = old_prob_var.get_shape().ndims
            # Assume layout is N * A
            return tf.reduce_sum(
                old_prob_var * (-tf.math.log(new_prob_var + TINY)),
                axis=ndims - 1)

[docs]    def entropy(self, info):
        probs = info['prob']
        return -np.sum(probs * np.log(probs + TINY), axis=-1)

[docs]    def log_likelihood_sym(self, x_var, dist_info_vars, name=None):
        with tf.name_scope(name, 'log_likelihood_sym',
                           [x_var, dist_info_vars]):
            probs = dist_info_vars['prob']
            ndims = probs.get_shape().ndims
            return tf.math.log(
                tf.reduce_sum(probs * tf.cast(x_var, tf.float32), ndims - 1) +
                TINY)

[docs]    def log_likelihood(self, xs, dist_info):
        probs = dist_info['prob']
        # Assume layout is N * A
        return np.log(np.sum(probs * xs, axis=-1) + TINY)

    @property
    def dist_info_specs(self):
        return [('prob', (self.dim, ))]

[docs]    def sample(self, dist_info):
        return self._f_sample(dist_info['prob'])

[docs]    def sample_sym(self, dist_info, name=None):
        with tf.name_scope(name, 'sample_sym', [dist_info]):
            probs = dist_info['prob']
            samples = tf.multinomial(
                tf.math.log(probs + 1e-8), num_samples=1)[:, 0]

            return tf.nn.embedding_lookup(
                np.eye(self.dim, dtype=np.float32), samples)