Source code for garage.tf.distributions.bernoulli

import numpy as np
import tensorflow as tf

from garage.tf.distributions.base import Distribution

TINY = 1e-8


[docs]class Bernoulli(Distribution):
    def __init__(self, dim, name='Bernoulli'):
        self._name = name
        self._dim = dim

    @property
    def dim(self):
        return self._dim

[docs]    def kl_sym(self, old_dist_info_vars, new_dist_info_vars, name=None):
        with tf.name_scope(name, 'kl_sym',
                           [old_dist_info_vars, new_dist_info_vars]):
            old_p = old_dist_info_vars['p']
            new_p = new_dist_info_vars['p']
            kl = (
                old_p * (tf.math.log(old_p + TINY) - tf.math.log(new_p + TINY))
                + (1 - old_p) *
                (tf.math.log(1 - old_p + TINY) - tf.math.log(1 - new_p + TINY))
            )
            ndims = kl.get_shape().ndims
            return tf.reduce_sum(kl, axis=ndims - 1)

[docs]    def kl(self, old_dist_info, new_dist_info):
        old_p = old_dist_info['p']
        new_p = new_dist_info['p']
        kl = old_p * (np.log(old_p + TINY) - np.log(new_p + TINY)) \
            + (1 - old_p) \
            * (np.log(1 - old_p + TINY) - np.log(1 - new_p + TINY))
        return np.sum(kl, axis=-1)

[docs]    def sample(self, dist_info):
        p = np.asarray(dist_info['p'])
        return np.cast['int'](
            np.random.uniform(low=0., high=1., size=p.shape) < p)

[docs]    def likelihood_ratio_sym(self,
                             x_var,
                             old_dist_info_vars,
                             new_dist_info_vars,
                             name=None):
        with tf.name_scope(name, 'likelihood_ratio_sym',
                           [x_var, old_dist_info_vars, new_dist_info_vars]):
            old_p = old_dist_info_vars['p']
            new_p = new_dist_info_vars['p']
            ndims = old_p.get_shape().ndims
            return tf.reduce_prod(
                x_var * new_p / (old_p + TINY) +
                (1 - x_var) * (1 - new_p) / (1 - old_p + TINY),
                axis=ndims - 1)

[docs]    def log_likelihood_sym(self, x_var, dist_info_vars, name=None):
        with tf.name_scope(name, 'log_likelihood_sym',
                           [x_var, dist_info_vars]):
            p = dist_info_vars['p']
            ndims = p.get_shape().ndims
            return tf.reduce_sum(
                x_var * tf.math.log(p + TINY) +
                (1 - x_var) * tf.math.log(1 - p + TINY),
                axis=ndims - 1)

[docs]    def log_likelihood(self, xs, dist_info):
        p = dist_info['p']
        return np.sum(
            xs * np.log(p + TINY) + (1 - xs) * np.log(1 - p + TINY), axis=-1)

[docs]    def entropy(self, dist_info):
        p = dist_info['p']
        return np.sum(
            -p * np.log(p + TINY) - (1 - p) * np.log(1 - p + TINY), axis=-1)

    @property
    def dist_info_keys(self):
        return ['p']