import numpy as np
import tensorflow as tf
from garage.tf.distributions.base import Distribution
TINY = 1e-8
[docs]class Bernoulli(Distribution):
def __init__(self, dim, name='Bernoulli'):
self._name = name
self._dim = dim
@property
def dim(self):
return self._dim
[docs] def kl_sym(self, old_dist_info_vars, new_dist_info_vars, name=None):
with tf.name_scope(name, 'kl_sym',
[old_dist_info_vars, new_dist_info_vars]):
old_p = old_dist_info_vars['p']
new_p = new_dist_info_vars['p']
kl = (
old_p * (tf.math.log(old_p + TINY) - tf.math.log(new_p + TINY))
+ (1 - old_p) *
(tf.math.log(1 - old_p + TINY) - tf.math.log(1 - new_p + TINY))
)
ndims = kl.get_shape().ndims
return tf.reduce_sum(kl, axis=ndims - 1)
[docs] def kl(self, old_dist_info, new_dist_info):
old_p = old_dist_info['p']
new_p = new_dist_info['p']
kl = old_p * (np.log(old_p + TINY) - np.log(new_p + TINY)) \
+ (1 - old_p) \
* (np.log(1 - old_p + TINY) - np.log(1 - new_p + TINY))
return np.sum(kl, axis=-1)
[docs] def sample(self, dist_info):
p = np.asarray(dist_info['p'])
return np.cast['int'](
np.random.uniform(low=0., high=1., size=p.shape) < p)
[docs] def likelihood_ratio_sym(self,
x_var,
old_dist_info_vars,
new_dist_info_vars,
name=None):
with tf.name_scope(name, 'likelihood_ratio_sym',
[x_var, old_dist_info_vars, new_dist_info_vars]):
old_p = old_dist_info_vars['p']
new_p = new_dist_info_vars['p']
ndims = old_p.get_shape().ndims
return tf.reduce_prod(
x_var * new_p / (old_p + TINY) +
(1 - x_var) * (1 - new_p) / (1 - old_p + TINY),
axis=ndims - 1)
[docs] def log_likelihood_sym(self, x_var, dist_info_vars, name=None):
with tf.name_scope(name, 'log_likelihood_sym',
[x_var, dist_info_vars]):
p = dist_info_vars['p']
ndims = p.get_shape().ndims
return tf.reduce_sum(
x_var * tf.math.log(p + TINY) +
(1 - x_var) * tf.math.log(1 - p + TINY),
axis=ndims - 1)
[docs] def log_likelihood(self, xs, dist_info):
p = dist_info['p']
return np.sum(
xs * np.log(p + TINY) + (1 - xs) * np.log(1 - p + TINY), axis=-1)
[docs] def entropy(self, dist_info):
p = dist_info['p']
return np.sum(
-p * np.log(p + TINY) - (1 - p) * np.log(1 - p + TINY), axis=-1)
@property
def dist_info_keys(self):
return ['p']