Source code for garage.tf.distributions.diagonal_gaussian

"""Diagonal Gaussian Distribution."""
import numpy as np
import tensorflow as tf

from garage.experiment import deterministic
from garage.tf.distributions.distribution import Distribution


[docs]class DiagonalGaussian(Distribution): """Diagonal Gaussian Distribution. Args: dim (int): Dimension of the distribution. name (str): Name (scope) of the distribution. """ def __init__(self, dim, name='DiagonalGaussian'): self._dim = dim self._name = name @property def dim(self): """int: Dimension of the distribution.""" return self._dim
[docs] def kl(self, old_dist_info, new_dist_info): """KL Divergence between the old and the new distribution. Args: old_dist_info (dict): Parameters of the old distribution. new_dist_info (dict): Parameters of the new distribution. Returns: float: KL Divergence between two distributions. """ old_means = old_dist_info['mean'] old_log_stds = old_dist_info['log_std'] new_means = new_dist_info['mean'] new_log_stds = new_dist_info['log_std'] # Compute the KL divergence of two multivariate Gaussian distribution # with diagonal covariance matrices old_std = np.exp(old_log_stds) new_std = np.exp(new_log_stds) # means: (N*A) # std: (N*A) # formula: # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) + # ln(\sigma_2/\sigma_1) numerator = np.square(old_means - new_means) + \ np.square(old_std) - np.square(new_std) denominator = 2 * np.square(new_std) + 1e-8 return np.sum(numerator / denominator + new_log_stds - old_log_stds, axis=-1)
[docs] def kl_sym(self, old_dist_info_vars, new_dist_info_vars, name='kl_sym'): """Symbolic KL between the old and the new distribution. Args: old_dist_info_vars (tf.Tensor): Symbolic parameters of the old distribution. new_dist_info_vars (tf.Tensor): Symbolic parameters of the new distribution. name (str): TensorFlow scope name. Returns: tf.Tensor: Symbolic KL divergence between the two distributions. """ with tf.name_scope(name): old_means = old_dist_info_vars['mean'] old_log_stds = old_dist_info_vars['log_std'] new_means = new_dist_info_vars['mean'] new_log_stds = new_dist_info_vars['log_std'] # Compute the KL divergence of two multivariate # Gaussian distribution with diagonal covariance matrices old_std = tf.exp(old_log_stds) new_std = tf.exp(new_log_stds) # means: (N*A) # std: (N*A) # formula: # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) + # ln(\sigma_2/\sigma_1) numerator = tf.square(old_means - new_means) + \ tf.square(old_std) - tf.square(new_std) denominator = 2 * tf.square(new_std) + 1e-8 return tf.reduce_sum(numerator / denominator + new_log_stds - old_log_stds, axis=-1)
[docs] def likelihood_ratio_sym(self, x_var, old_dist_info_vars, new_dist_info_vars, name='likelihood_ratio_sym'): """Symbolic likelihood ratio. Args: x_var (tf.Tensor): Input placeholder. old_dist_info_vars (dict): Old distribution tensors. new_dist_info_vars (dict): New distribution tensors. name (str): TensorFlow scope name. Returns: tf.Tensor: Symbolic likelihood ratio. """ with tf.name_scope(name): logli_new = self.log_likelihood_sym(x_var, new_dist_info_vars) logli_old = self.log_likelihood_sym(x_var, old_dist_info_vars) return tf.exp(logli_new - logli_old)
[docs] def log_likelihood_sym(self, x_var, dist_info_vars, name='log_likelihood_sym'): """Symbolic log likelihood. Args: x_var (tf.Tensor): Input placeholder. dist_info_vars (dict): Parameters of a distribution. name (str): TensorFlow scope name. Returns: tf.Tensor: Symbolic log likelihood. """ with tf.name_scope(name): means = dist_info_vars['mean'] log_stds = dist_info_vars['log_std'] zs = (x_var - means) / tf.exp(log_stds) return - tf.reduce_sum(log_stds, axis=-1) - \ 0.5 * tf.reduce_sum(tf.square(zs), axis=-1) - \ 0.5 * self.dim * np.log(2 * np.pi)
[docs] def sample(self, dist_info): """Sample a value given a distribution. Args: dist_info (dict): Parameters of a distribution. Returns: np.ndarray: A sample from the distribution. """ # pylint: disable=no-self-use means = dist_info['mean'] log_stds = dist_info['log_std'] rnd = np.random.normal(size=means.shape) return rnd * np.exp(log_stds) + means
[docs] def sample_sym(self, dist_info_vars): """Sample a symbolic value given a distribution. Args: dist_info_vars (dict): Symbolic parameters of a distribution. Returns: tf.Tensor: A symbolic sample from the distribution. """ # pylint: disable=no-self-use means = dist_info_vars['mean'] log_stds = dist_info_vars['log_std'] rnd = tf.random.normal(shape=tf.shape(means), seed=deterministic.get_tf_seed_stream()) return rnd * tf.math.exp(log_stds) + means
[docs] def log_likelihood(self, xs, dist_info): """Log likelihood of a sample under a distribution. Args: xs (np.ndarray): Input value. dist_info (dict): Parameters of a distribution. Returns: float: Log likelihood of a sample under the distribution. """ means = dist_info['mean'] log_stds = dist_info['log_std'] zs = (xs - means) / np.exp(log_stds) return - np.sum(log_stds, axis=-1) - \ 0.5 * np.sum(np.square(zs), axis=-1) - \ 0.5 * self.dim * np.log(2 * np.pi)
[docs] def entropy(self, dist_info): """Entropy of a distribution. Args: dist_info (dict): Parameters of a distribution. Returns: float: Entropy of the distribution. """ log_stds = dist_info['log_std'] return np.sum(log_stds + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)
[docs] def entropy_sym(self, dist_info_vars, name='entropy_sym'): """Symbolic entropy of a distribution. Args: dist_info_vars (dict): Symbolic parameters of a distribution. name (str): TensorFlow scope name. Returns: tf.Tensor: Symbolic entropy of the distribution. """ with tf.name_scope(name): log_std_var = dist_info_vars['log_std'] return tf.reduce_sum(log_std_var + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)
@property def dist_info_specs(self): """list: Specification of the parameter of a distribution.""" return [('mean', (self.dim, )), ('log_std', (self.dim, ))]