Source code for lagom.transform.running_mean_var

import numpy as np


[docs]class RunningMeanVar(object): r"""Estimates sample mean and variance by using `Chan's method`_. It supports for both scalar and multi-dimensional data, however, the input is expected to be batched. The first dimension is always treated as batch dimension. .. note:: For better precision, we handle the data with `np.float64`. .. warning:: To use estimated moments for standardization, remember to keep the precision `np.float64` and calculated as ..math:`\frac{x - \mu}{\sqrt{\sigma^2 + 10^{-8}}}`. Example: >>> f = RunningMeanVar(shape=()) >>> f([1, 2]) >>> f([3]) >>> f([4]) >>> f.mean 2.499937501562461 >>> f.var 1.2501499923440393 .. _Chan's method: https://en.wikipedia.org/wiki/Algorithms_for_calculating_variance#Parallel_algorithm """ def __init__(self, shape): self.shape = shape self.mean = np.zeros(shape, dtype=np.float64) self.var = np.ones(shape, dtype=np.float64) self.N = 1e-4 # or zero ?
[docs] def __call__(self, x): r"""Update the mean and variance given an additional batched data. Args: x (object): additional batched data. """ x = np.asarray(x, dtype=np.float64) assert x.ndim == len(self.shape) + 1, f'expected {len(self.shape) + 1}, got {x.ndim}' batch_mean = x.mean(axis=0) batch_var = x.var(axis=0) batch_N = x.shape[0] new_N = self.N + batch_N delta = batch_mean - self.mean new_mean = self.mean + delta*(batch_N/new_N) M_A = self.N*self.var M_B = batch_N*batch_var M_X = M_A + M_B + (delta**2)*((self.N*batch_N)/new_N) new_var = M_X/new_N self.mean = new_mean self.var = new_var self.N = new_N
@property def n(self): r"""Returns the total number of samples so far. """ return int(self.N)