Source code for garage.np.baselines.linear_feature_baseline

import numpy as np

from garage.np.baselines.base import Baseline


[docs]class LinearFeatureBaseline(Baseline): def __init__(self, env_spec, reg_coeff=1e-5, name='LinearFeatureBaseline'): self._coeffs = None self._reg_coeff = reg_coeff self.name = name
[docs] def get_param_values(self, **tags): return self._coeffs
[docs] def set_param_values(self, val, **tags): self._coeffs = val
def _features(self, path): obs = np.clip(path['observations'], -10, 10) length = len(path['rewards']) al = np.arange(length).reshape(-1, 1) / 100.0 return np.concatenate( [obs, obs**2, al, al**2, al**3, np.ones((length, 1))], axis=1)
[docs] def fit(self, paths): featmat = np.concatenate([self._features(path) for path in paths]) returns = np.concatenate([path['returns'] for path in paths]) reg_coeff = self._reg_coeff for _ in range(5): self._coeffs = np.linalg.lstsq( featmat.T.dot(featmat) + reg_coeff * np.identity(featmat.shape[1]), featmat.T.dot(returns), rcond=-1)[0] if not np.any(np.isnan(self._coeffs)): break reg_coeff *= 10
[docs] def predict(self, path): if self._coeffs is None: return np.zeros(len(path['rewards'])) return self._features(path).dot(self._coeffs)