import numpy as np
import tensorflow as tf
from garage.tf.distributions.base import Distribution
[docs]class DiagonalGaussian(Distribution):
def __init__(self, dim, name='DiagonalGaussian'):
self._dim = dim
self._name = name
@property
def dim(self):
return self._dim
[docs] def kl(self, old_dist_info, new_dist_info):
old_means = old_dist_info['mean']
old_log_stds = old_dist_info['log_std']
new_means = new_dist_info['mean']
new_log_stds = new_dist_info['log_std']
"""
Compute the KL divergence of two multivariate Gaussian distribution
with diagonal covariance matrices
"""
old_std = np.exp(old_log_stds)
new_std = np.exp(new_log_stds)
# means: (N*A)
# std: (N*A)
# formula:
# { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
# ln(\sigma_2/\sigma_1)
numerator = np.square(old_means - new_means) + \
np.square(old_std) - np.square(new_std)
denominator = 2 * np.square(new_std) + 1e-8
return np.sum(
numerator / denominator + new_log_stds - old_log_stds, axis=-1)
[docs] def kl_sym(self, old_dist_info_vars, new_dist_info_vars, name=None):
with tf.name_scope(name, 'kl_sym',
[old_dist_info_vars, new_dist_info_vars]):
old_means = old_dist_info_vars['mean']
old_log_stds = old_dist_info_vars['log_std']
new_means = new_dist_info_vars['mean']
new_log_stds = new_dist_info_vars['log_std']
"""
Compute the KL divergence of two multivariate Gaussian distribution
with diagonal covariance matrices
"""
old_std = tf.exp(old_log_stds)
new_std = tf.exp(new_log_stds)
# means: (N*A)
# std: (N*A)
# formula:
# { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
# ln(\sigma_2/\sigma_1)
numerator = tf.square(old_means - new_means) + \
tf.square(old_std) - tf.square(new_std)
denominator = 2 * tf.square(new_std) + 1e-8
return tf.reduce_sum(
numerator / denominator + new_log_stds - old_log_stds, axis=-1)
[docs] def likelihood_ratio_sym(self,
x_var,
old_dist_info_vars,
new_dist_info_vars,
name=None):
with tf.name_scope(name, 'likelihood_ratio_sym',
[x_var, old_dist_info_vars, new_dist_info_vars]):
logli_new = self.log_likelihood_sym(x_var, new_dist_info_vars)
logli_old = self.log_likelihood_sym(x_var, old_dist_info_vars)
return tf.exp(logli_new - logli_old)
[docs] def log_likelihood_sym(self, x_var, dist_info_vars, name=None):
with tf.name_scope(name, 'log_likelihood_sym',
[x_var, dist_info_vars]):
means = dist_info_vars['mean']
log_stds = dist_info_vars['log_std']
zs = (x_var - means) / tf.exp(log_stds)
return - tf.reduce_sum(log_stds, axis=-1) - \
0.5 * tf.reduce_sum(tf.square(zs), axis=-1) - \
0.5 * self.dim * np.log(2 * np.pi)
[docs] def sample(self, dist_info):
means = dist_info['mean']
log_stds = dist_info['log_std']
rnd = np.random.normal(size=means.shape)
return rnd * np.exp(log_stds) + means
[docs] def log_likelihood(self, xs, dist_info):
means = dist_info['mean']
log_stds = dist_info['log_std']
zs = (xs - means) / np.exp(log_stds)
return - np.sum(log_stds, axis=-1) - \
0.5 * np.sum(np.square(zs), axis=-1) - \
0.5 * self.dim * np.log(2 * np.pi)
[docs] def entropy(self, dist_info):
log_stds = dist_info['log_std']
return np.sum(log_stds + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)
[docs] def entropy_sym(self, dist_info_var, name=None):
with tf.name_scope(name, 'entropy_sym', [dist_info_var]):
log_std_var = dist_info_var['log_std']
return tf.reduce_sum(
log_std_var + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)
@property
def dist_info_specs(self):
return [('mean', (self.dim, )), ('log_std', (self.dim, ))]