From 653891e2544cd8065ea4afead12f5b9e37d6d38e Mon Sep 17 00:00:00 2001 From: Adam Fontenot Date: Sat, 3 Jun 2023 15:47:09 -0400 Subject: [PATCH] Improve description of initial value behavior in estimator Rather than describing this as "debiasing", instead make clear that we are properly normalizing a weighted average with a sum of weights less than 1. --- src/state.rs | 56 +++++++++++++++++++++++++++++----------------------- 1 file changed, 31 insertions(+), 25 deletions(-) diff --git a/src/state.rs b/src/state.rs index a1e65219..c9127675 100644 --- a/src/state.rs +++ b/src/state.rs @@ -430,15 +430,19 @@ impl Estimator { self.smoothed_steps_per_sec = self.smoothed_steps_per_sec * weight + new_steps_per_second * (1.0 - weight); - // Get an unbiased estimate of `smoothed_steps_per_sec` to serve as the data source for the - // double smoothed estimate. See comment on debiasing in `steps_per_second` for details. + // An iterative estimate like `smoothed_steps_per_sec` is supposed to be an exponentially + // weighted average from t=0 back to t=-inf; Since we initialize it to 0, we neglect the + // (non-existent) samples in the weighted average prior to the first one, so the resulting + // average must be normalized. We normalize the single estimate here in order to use it as + // a source for the double smoothed estimate. See comment on normalization in + // `steps_per_second` for details. let delta_t_start = duration_to_secs(now - self.start_time); - let debias = 1.0 - estimator_weight(delta_t_start); - let debiased_smoothed_steps_per_sec = self.smoothed_steps_per_sec / debias; + let total_weight = 1.0 - estimator_weight(delta_t_start); + let normalized_smoothed_steps_per_sec = self.smoothed_steps_per_sec / total_weight; // determine the double smoothed value (EWA smoothing of the single EWA) self.double_smoothed_steps_per_sec = self.double_smoothed_steps_per_sec * weight - + debiased_smoothed_steps_per_sec * (1.0 - weight); + + normalized_smoothed_steps_per_sec * (1.0 - weight); self.prev_steps = new_steps; self.prev_time = now; @@ -464,32 +468,34 @@ impl Estimator { let delta_t = duration_to_secs(now - self.prev_time); let reweight = estimator_weight(delta_t); - // Debiasing: + // Normalization of estimates: // - // Our exponentially weighted estimate is a single value (smoothed_steps_per_second) that - // is iteratively updated. At each update, the previous value of the estimate is - // re-weighted according its age. At any point in time, the raw value of this estimate - // reflects the assumption that it contains properly weighted sample values going back - // indefinitely in time. But this assumption is false. + // The raw estimate is a single value (smoothed_steps_per_second) that is iteratively + // updated. At each update, the previous value of the estimate is downweighted according to + // its age, receiving the iterative weight W(t) = 0.1 ^ (t/15). // - // The value is initialized with some value when the estimator starts. The raw value of the - // estimator treats this as an appropriately weighted sample average across all times - // before t=0. Of course, the value is actually arbitrary. In other words, because the raw - // estimate gives a positive weight to this initial value, the resulting estimate will be - // *biased* towards the initial value. + // Since W(Sum(t_n)) = Prod(W(t_n)), the total weight of a sample after a series of + // iterative steps is simply W(t_e) - W(t_b), where t_e is the time since the end of the + // sample, and t_b is the time since the beginning. The resulting estimate is therefore a + // weighted average with sample weights W(t_e) - W(t_b). // - // A debiased estimate is the result of correcting the raw estimate by assigning 0 weight - // to the initial value. We can do this with a simple trick: set the initial value to 0, - // and then divide the raw estimate by the estimator weight for all time *since* t=0. + // Notice that the weighting function generates sample weights that sum to 1 only when the + // sample times span from t=0 to t=inf; but this is not the case. We have a first sample + // with finite, positive t_b = t_f. In the raw estimate, we handle times prior to t_f by + // setting an initial value of 0, meaning that these (non-existent) samples have no weight. + // + // Therefore, the raw estimate must be normalized by dividing it by the sum of the weights + // in the weighted average. This sum is just W(0) - W(t_f), where t_f is the time since the + // first sample, and W(0) = 1. let delta_t_start = duration_to_secs(now - self.start_time); - let debias = 1.0 - estimator_weight(delta_t_start); + let total_weight = 1.0 - estimator_weight(delta_t_start); // Generate updated values for `smoothed_steps_per_sec` and `double_smoothed_steps_per_sec` - // (sps and dsps) without storing them. Note that we debias sps when using it as a source - // to update dsps, and then debias dsps itself before returning it. - let sps = self.smoothed_steps_per_sec * reweight / debias; + // (sps and dsps) without storing them. Note that we normalize sps when using it as a + // source to update dsps, and then normalize dsps itself before returning it. + let sps = self.smoothed_steps_per_sec * reweight / total_weight; let dsps = self.double_smoothed_steps_per_sec * reweight + sps * (1.0 - reweight); - dsps / debias + dsps / total_weight } } @@ -713,7 +719,7 @@ mod tests { // The first level EWA: // -> 90% weight @ 0 eps, 9% weight @ 1 eps, 1% weight @ 0 eps - // -> then debiased by deweighting the 1% weight (before -30 seconds) + // -> then normalized by deweighting the 1% weight (before -30 seconds) let single_target = 0.09 / 0.99; // The second level EWA: