Source code for garage.tf.distributions.diagonal_gaussian

import numpy as np
import tensorflow as tf

from garage.tf.distributions.base import Distribution


[docs]class DiagonalGaussian(Distribution): def __init__(self, dim, name='DiagonalGaussian'): self._dim = dim self._name = name @property def dim(self): return self._dim
[docs] def kl(self, old_dist_info, new_dist_info): old_means = old_dist_info['mean'] old_log_stds = old_dist_info['log_std'] new_means = new_dist_info['mean'] new_log_stds = new_dist_info['log_std'] """ Compute the KL divergence of two multivariate Gaussian distribution with diagonal covariance matrices """ old_std = np.exp(old_log_stds) new_std = np.exp(new_log_stds) # means: (N*A) # std: (N*A) # formula: # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) + # ln(\sigma_2/\sigma_1) numerator = np.square(old_means - new_means) + \ np.square(old_std) - np.square(new_std) denominator = 2 * np.square(new_std) + 1e-8 return np.sum( numerator / denominator + new_log_stds - old_log_stds, axis=-1)
[docs] def kl_sym(self, old_dist_info_vars, new_dist_info_vars, name=None): with tf.name_scope(name, 'kl_sym', [old_dist_info_vars, new_dist_info_vars]): old_means = old_dist_info_vars['mean'] old_log_stds = old_dist_info_vars['log_std'] new_means = new_dist_info_vars['mean'] new_log_stds = new_dist_info_vars['log_std'] """ Compute the KL divergence of two multivariate Gaussian distribution with diagonal covariance matrices """ old_std = tf.exp(old_log_stds) new_std = tf.exp(new_log_stds) # means: (N*A) # std: (N*A) # formula: # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) + # ln(\sigma_2/\sigma_1) numerator = tf.square(old_means - new_means) + \ tf.square(old_std) - tf.square(new_std) denominator = 2 * tf.square(new_std) + 1e-8 return tf.reduce_sum( numerator / denominator + new_log_stds - old_log_stds, axis=-1)
[docs] def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars, name=None): with tf.name_scope(name, 'likelihood_ratio_sym', [x_var, old_dist_info_vars, new_dist_info_vars]): logli_new = self.log_likelihood_sym(x_var, new_dist_info_vars) logli_old = self.log_likelihood_sym(x_var, old_dist_info_vars) return tf.exp(logli_new - logli_old)
[docs] def log_likelihood_sym(self, x_var, dist_info_vars, name=None): with tf.name_scope(name, 'log_likelihood_sym', [x_var, dist_info_vars]): means = dist_info_vars['mean'] log_stds = dist_info_vars['log_std'] zs = (x_var - means) / tf.exp(log_stds) return - tf.reduce_sum(log_stds, axis=-1) - \ 0.5 * tf.reduce_sum(tf.square(zs), axis=-1) - \ 0.5 * self.dim * np.log(2 * np.pi)
[docs] def sample(self, dist_info): means = dist_info['mean'] log_stds = dist_info['log_std'] rnd = np.random.normal(size=means.shape) return rnd * np.exp(log_stds) + means
[docs] def log_likelihood(self, xs, dist_info): means = dist_info['mean'] log_stds = dist_info['log_std'] zs = (xs - means) / np.exp(log_stds) return - np.sum(log_stds, axis=-1) - \ 0.5 * np.sum(np.square(zs), axis=-1) - \ 0.5 * self.dim * np.log(2 * np.pi)
[docs] def entropy(self, dist_info): log_stds = dist_info['log_std'] return np.sum(log_stds + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)
[docs] def entropy_sym(self, dist_info_var, name=None): with tf.name_scope(name, 'entropy_sym', [dist_info_var]): log_std_var = dist_info_var['log_std'] return tf.reduce_sum( log_std_var + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)
@property def dist_info_specs(self): return [('mean', (self.dim, )), ('log_std', (self.dim, ))]