Source code for garage.tf.distributions.diagonal_gaussian

"""Diagonal Gaussian Distribution."""
import numpy as np
import tensorflow as tf

from garage.experiment import deterministic
from garage.tf.distributions.distribution import Distribution


[docs]class DiagonalGaussian(Distribution):
    """Diagonal Gaussian Distribution.

    Args:
        dim (int): Dimension of the distribution.
        name (str): Name (scope) of the distribution.

    """

    def __init__(self, dim, name='DiagonalGaussian'):
        self._dim = dim
        self._name = name

    @property
    def dim(self):
        """int: Dimension of the distribution."""
        return self._dim

[docs]    def kl(self, old_dist_info, new_dist_info):
        """KL Divergence between the old and the new distribution.

        Args:
            old_dist_info (dict): Parameters of the old distribution.
            new_dist_info (dict): Parameters of the new distribution.

        Returns:
            float: KL Divergence between two distributions.

        """
        old_means = old_dist_info['mean']
        old_log_stds = old_dist_info['log_std']
        new_means = new_dist_info['mean']
        new_log_stds = new_dist_info['log_std']
        # Compute the KL divergence of two multivariate Gaussian distribution
        # with diagonal covariance matrices
        old_std = np.exp(old_log_stds)
        new_std = np.exp(new_log_stds)
        # means: (N*A)
        # std: (N*A)
        # formula:
        # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
        # ln(\sigma_2/\sigma_1)
        numerator = np.square(old_means - new_means) + \
            np.square(old_std) - np.square(new_std)
        denominator = 2 * np.square(new_std) + 1e-8
        return np.sum(numerator / denominator + new_log_stds - old_log_stds,
                      axis=-1)

[docs]    def kl_sym(self, old_dist_info_vars, new_dist_info_vars, name='kl_sym'):
        """Symbolic KL between the old and the new distribution.

        Args:
            old_dist_info_vars (tf.Tensor): Symbolic parameters of
                the old distribution.
            new_dist_info_vars (tf.Tensor): Symbolic parameters of
                the new distribution.
            name (str): TensorFlow scope name.

        Returns:
            tf.Tensor: Symbolic KL divergence between the two distributions.

        """
        with tf.name_scope(name):
            old_means = old_dist_info_vars['mean']
            old_log_stds = old_dist_info_vars['log_std']
            new_means = new_dist_info_vars['mean']
            new_log_stds = new_dist_info_vars['log_std']
            # Compute the KL divergence of two multivariate
            # Gaussian distribution with diagonal covariance matrices
            old_std = tf.exp(old_log_stds)
            new_std = tf.exp(new_log_stds)
            # means: (N*A)
            # std: (N*A)
            # formula:
            # { (\mu_1 - \mu_2)^2 + \sigma_1^2 - \sigma_2^2 } / (2\sigma_2^2) +
            # ln(\sigma_2/\sigma_1)
            numerator = tf.square(old_means - new_means) + \
                tf.square(old_std) - tf.square(new_std)
            denominator = 2 * tf.square(new_std) + 1e-8
            return tf.reduce_sum(numerator / denominator + new_log_stds -
                                 old_log_stds,
                                 axis=-1)

[docs]    def likelihood_ratio_sym(self,
                             x_var,
                             old_dist_info_vars,
                             new_dist_info_vars,
                             name='likelihood_ratio_sym'):
        """Symbolic likelihood ratio.

        Args:
            x_var (tf.Tensor): Input placeholder.
            old_dist_info_vars (dict): Old distribution tensors.
            new_dist_info_vars (dict): New distribution tensors.
            name (str): TensorFlow scope name.

        Returns:
            tf.Tensor: Symbolic likelihood ratio.

        """
        with tf.name_scope(name):
            logli_new = self.log_likelihood_sym(x_var, new_dist_info_vars)
            logli_old = self.log_likelihood_sym(x_var, old_dist_info_vars)
            return tf.exp(logli_new - logli_old)

[docs]    def log_likelihood_sym(self,
                           x_var,
                           dist_info_vars,
                           name='log_likelihood_sym'):
        """Symbolic log likelihood.

        Args:
            x_var (tf.Tensor): Input placeholder.
            dist_info_vars (dict): Parameters of a distribution.
            name (str): TensorFlow scope name.

        Returns:
            tf.Tensor: Symbolic log likelihood.

        """
        with tf.name_scope(name):
            means = dist_info_vars['mean']
            log_stds = dist_info_vars['log_std']
            zs = (x_var - means) / tf.exp(log_stds)
            return - tf.reduce_sum(log_stds, axis=-1) - \
                0.5 * tf.reduce_sum(tf.square(zs), axis=-1) - \
                0.5 * self.dim * np.log(2 * np.pi)

[docs]    def sample(self, dist_info):
        """Sample a value given a distribution.

        Args:
            dist_info (dict): Parameters of a distribution.

        Returns:
            np.ndarray: A sample from the distribution.

        """
        # pylint: disable=no-self-use
        means = dist_info['mean']
        log_stds = dist_info['log_std']
        rnd = np.random.normal(size=means.shape)
        return rnd * np.exp(log_stds) + means

[docs]    def sample_sym(self, dist_info_vars):
        """Sample a symbolic value given a distribution.

        Args:
            dist_info_vars (dict): Symbolic parameters of a distribution.

        Returns:
            tf.Tensor: A symbolic sample from the distribution.

        """
        # pylint: disable=no-self-use
        means = dist_info_vars['mean']
        log_stds = dist_info_vars['log_std']
        rnd = tf.random.normal(shape=tf.shape(means),
                               seed=deterministic.get_tf_seed_stream())
        return rnd * tf.math.exp(log_stds) + means

[docs]    def log_likelihood(self, xs, dist_info):
        """Log likelihood of a sample under a distribution.

        Args:
            xs (np.ndarray): Input value.
            dist_info (dict): Parameters of a distribution.

        Returns:
            float: Log likelihood of a sample under the distribution.

        """
        means = dist_info['mean']
        log_stds = dist_info['log_std']
        zs = (xs - means) / np.exp(log_stds)
        return - np.sum(log_stds, axis=-1) - \
            0.5 * np.sum(np.square(zs), axis=-1) - \
            0.5 * self.dim * np.log(2 * np.pi)

[docs]    def entropy(self, dist_info):
        """Entropy of a distribution.

        Args:
            dist_info (dict): Parameters of a distribution.

        Returns:
            float: Entropy of the distribution.

        """
        log_stds = dist_info['log_std']
        return np.sum(log_stds + np.log(np.sqrt(2 * np.pi * np.e)), axis=-1)

[docs]    def entropy_sym(self, dist_info_vars, name='entropy_sym'):
        """Symbolic entropy of a distribution.

        Args:
            dist_info_vars (dict): Symbolic parameters of a distribution.
            name (str): TensorFlow scope name.

        Returns:
            tf.Tensor: Symbolic entropy of the distribution.

        """
        with tf.name_scope(name):
            log_std_var = dist_info_vars['log_std']
            return tf.reduce_sum(log_std_var +
                                 np.log(np.sqrt(2 * np.pi * np.e)),
                                 axis=-1)

    @property
    def dist_info_specs(self):
        """list: Specification of the parameter of a distribution."""
        return [('mean', (self.dim, )), ('log_std', (self.dim, ))]