Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: add exponential moving average to vega-lite #9225

Open
wants to merge 6 commits into
base: main
Choose a base branch
from
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
57 changes: 55 additions & 2 deletions build/vega-lite-schema.json
Expand Up @@ -12,6 +12,19 @@
},
{
"$ref": "#/definitions/ArgminDef"
},
{
"$ref": "#/definitions/ExponentialDef"
}
]
},
"AggregateFieldOp": {
"anyOf": [
{
"$ref": "#/definitions/NonArgAggregateFieldOp"
},
{
"$ref": "#/definitions/ExponentialDef"
}
]
},
Expand Down Expand Up @@ -80,7 +93,7 @@
"description": "The data field for which to compute aggregate function. This is required for all aggregation operations except `\"count\"`."
},
"op": {
"$ref": "#/definitions/AggregateOp",
"$ref": "#/definitions/AggregateFieldOp",
Copy link
Member

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Did you test the docs as well? I see we use AggregateOp in https://github.com/julieg18/vega-lite/blob/261728bbeadb674041266dc3276ffd46a4c7510d/site/_data/link.yml#L61 so we need to make sure the links are still correct.

"description": "The aggregation operation to apply to the fields (e.g., `\"sum\"`, `\"average\"`, or `\"count\"`). See the [full list of supported aggregation operations](https://vega.github.io/vega-lite/docs/aggregate.html#ops) for more information."
}
},
Expand Down Expand Up @@ -8797,6 +8810,18 @@
],
"type": "string"
},
"ExponentialDef": {
"additionalProperties": false,
"properties": {
"exponential": {
"type": "number"
}
},
"required": [
"exponential"
],
"type": "object"
},
"Expr": {
"type": "string"
},
Expand Down Expand Up @@ -17862,6 +17887,35 @@
],
"type": "object"
},
"NonArgAggregateFieldOp": {
"enum": [
"argmax",
"argmin",
"average",
"count",
"distinct",
"max",
"mean",
"median",
"min",
"missing",
"product",
"q1",
"q3",
"ci0",
"ci1",
"stderr",
"stdev",
"stdevp",
"sum",
"valid",
"values",
"variance",
"variancep",
"exponentialb"
],
"type": "string"
},
"NonArgAggregateOp": {
"enum": [
"average",
Expand All @@ -17885,7 +17939,6 @@
"values",
"variance",
"variancep",
"exponential",
"exponentialb"
],
"type": "string"
Expand Down
Binary file added examples/compiled/layer_line_exponential.png
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
1 change: 1 addition & 0 deletions examples/compiled/layer_line_exponential.svg
Sorry, something went wrong. Reload?
Sorry, we cannot display this file.
Sorry, this file is invalid so it cannot be displayed.
137 changes: 137 additions & 0 deletions examples/compiled/layer_line_exponential.vg.json
@@ -0,0 +1,137 @@
{
"$schema": "https://vega.github.io/schema/vega/v5.json",
"background": "white",
"padding": 5,
"width": 400,
"height": 200,
"style": "cell",
"data": [
{
"name": "source_0",
"values": [
{"price": 9.2, "year": 2020},
{"price": 10.76, "year": 2020},
{"price": 36.88, "year": 2021},
{"price": 3.44, "year": 2021},
{"price": 10.55, "year": 2022},
{"price": 9.65, "year": 2022},
{"price": 7.15, "year": 2023},
{"price": 15, "year": 2023},
{"price": 10.19, "year": 2024},
{"price": 8.86, "year": 2024}
]
},
{
"name": "data_0",
"source": "source_0",
"transform": [
{
"type": "aggregate",
"groupby": ["year"],
"ops": ["exponential", "mean"],
"fields": ["price", "price"],
"as": ["exponential_price", "mean_price"],
"aggregate_params": [0.5, null]
}
]
}
],
"marks": [
{
"name": "layer_0_marks",
"type": "line",
"style": ["line"],
"sort": {"field": "datum[\"year\"]"},
"from": {"data": "data_0"},
"encode": {
"update": {
"stroke": {"value": "#4c78a8"},
"description": {
"signal": "\"year: \" + (isValid(datum[\"year\"]) ? datum[\"year\"] : \"\"+datum[\"year\"]) + \"; Avg Price: \" + (format(datum[\"mean_price\"], \"\"))"
},
"x": {"scale": "x", "field": "year"},
"y": {"scale": "y", "field": "mean_price"},
"defined": {
"signal": "isValid(datum[\"mean_price\"]) && isFinite(+datum[\"mean_price\"])"
}
}
}
},
{
"name": "layer_1_marks",
"type": "line",
"style": ["line"],
"sort": {"field": "datum[\"year\"]"},
"from": {"data": "data_0"},
"encode": {
"update": {
"opacity": {"value": 0.5},
"stroke": {"value": "#4c78a8"},
"description": {
"signal": "\"year: \" + (isValid(datum[\"year\"]) ? datum[\"year\"] : \"\"+datum[\"year\"]) + \"; Exponential of price: \" + (format(datum[\"exponential_price\"], \"\"))"
},
"x": {"scale": "x", "field": "year"},
"y": {"scale": "y", "field": "exponential_price"},
"defined": {
"signal": "isValid(datum[\"exponential_price\"]) && isFinite(+datum[\"exponential_price\"])"
}
}
}
}
],
"scales": [
{
"name": "x",
"type": "point",
"domain": {"data": "data_0", "field": "year", "sort": true},
"range": [0, {"signal": "width"}],
"padding": 0.5
},
{
"name": "y",
"type": "linear",
"domain": {
"data": "data_0",
"fields": ["mean_price", "exponential_price"]
},
"range": [{"signal": "height"}, 0],
"nice": true,
"zero": true
}
],
"axes": [
{
"scale": "y",
"orient": "left",
"gridScale": "x",
"grid": true,
"tickCount": {"signal": "ceil(height/40)"},
"domain": false,
"labels": false,
"aria": false,
"maxExtent": 0,
"minExtent": 0,
"ticks": false,
"zindex": 0
},
{
"scale": "x",
"orient": "bottom",
"grid": false,
"title": "year",
"labelAlign": "right",
"labelAngle": 270,
"labelBaseline": "middle",
"zindex": 0
},
{
"scale": "y",
"orient": "left",
"grid": false,
"title": "Avg Price",
"labelOverlap": true,
"tickCount": {"signal": "ceil(height/40)"},
"zindex": 0
}
]
}
52 changes: 52 additions & 0 deletions examples/specs/layer_line_exponential.vl.json
@@ -0,0 +1,52 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"width": 400,
"data": {
"values": [
{"price": 9.2, "year": 2020},
{"price": 10.76, "year": 2020},
{"price": 36.88, "year": 2021},
{"price": 3.44, "year": 2021},
{"price": 10.55, "year": 2022},
{"price": 9.65, "year": 2022},
{"price": 7.15, "year": 2023},
{"price": 15.0, "year": 2023},
{"price": 10.19, "year": 2024},
{"price": 8.86, "year": 2024}
]
Copy link
Author

@julieg18 julieg18 Feb 13, 2024

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

Originally, I was planning on using some advanced data like data/stocks.csv for an example, but exponential was returning NaN whenever I wanted to use timeUnit in the x encoding.

Since the created spec in vega looked correct, could there be an error with how exponential works in vega or am I misunderstanding how to use timeUnit? Here's what I'm seeing on my local editor(vega 5.27.0)

image image

},
"layer": [
{
"mark": "line",
"encoding": {
"x": {
"field": "year"
},
"y": {
"field": "price",
"aggregate": "mean",
"type": "quantitative",
"title": "Avg Price"
}
}
},
{
"mark": {
"type": "line",
"opacity": 0.5
},
"encoding": {
"x": {
"field": "year"
},
"y": {
"field": "price",
"aggregate": {
"exponential": 0.5
},
"type": "quantitative"
}
}
}
]
}
9 changes: 9 additions & 0 deletions site/docs/transform/aggregate.md
Expand Up @@ -121,6 +121,7 @@ The supported **aggregation operations** are:
| max | The maximum field value. |
| argmin | An input data object containing the minimum field value. <br/> **Note:** When used inside encoding, `argmin` must be specified as an object. (See below for an example.) |
| argmax | An input data object containing the maximum field value. <br/> **Note:** When used inside encoding, `argmax` must be specified as an object. (See below for an example.) |
| exponential | The exponential moving average of field values. <br/> **Note:** `exponential` must be specified as an object. (See below for an example.) |

{:#argmax}

Expand All @@ -141,3 +142,11 @@ This is equivalent to specifying argmax in an aggregate transform and encode its
`argmax` can be useful for getting the last value in a line for label placement.

<span class="vl-example" data-name="line_color_label"></span>

## Exponential

You can use the exponential aggregate to get the exponential moving average of a field, which forms a smooth alternative to a simple moving average. It is commonly used when you want to more heavily weigh recent values, but don't want a discontinuous drop-off when numbers drop out of an averaging window.

The exponential operation can be specified by setting it to an object with `exponential` describing the weight (a number between 0 and 1) to use in the transformation.

<div class="vl-example" data-name="layer_line_exponential"></div>
14 changes: 11 additions & 3 deletions src/aggregate.ts
Expand Up @@ -45,9 +45,13 @@ export interface ArgmaxDef {
argmax: FieldName;
}

export type NonArgAggregateOp = Exclude<AggregateOp, 'argmin' | 'argmax'>;
export interface ExponentialDef {
exponential: number;
}

export type NonArgAggregateOp = Exclude<AggregateOp, 'argmin' | 'argmax' | 'exponential'>;

export type Aggregate = NonArgAggregateOp | ArgmaxDef | ArgminDef;
export type Aggregate = NonArgAggregateOp | ArgmaxDef | ArgminDef | ExponentialDef;

export function isArgminDef(a: Aggregate | string): a is ArgminDef {
return !!a && !!a['argmin'];
Expand All @@ -57,7 +61,11 @@ export function isArgmaxDef(a: Aggregate | string): a is ArgmaxDef {
return !!a && !!a['argmax'];
}

export function isAggregateOp(a: string | ArgminDef | ArgmaxDef): a is AggregateOp {
export function isExponentialDef(a: Aggregate | string): a is ExponentialDef {
return !!a && !!a['exponential'];
}

export function isAggregateOp(a: string | ArgminDef | ArgmaxDef | ExponentialDef): a is AggregateOp {
return isString(a) && !!AGGREGATE_OP_INDEX[a];
}

Expand Down
22 changes: 16 additions & 6 deletions src/channeldef.ts
@@ -1,6 +1,6 @@
import {Gradient, ScaleType, SignalRef, Text} from 'vega';
import {isArray, isBoolean, isNumber, isString} from 'vega-util';
import {Aggregate, isAggregateOp, isArgmaxDef, isArgminDef, isCountingAggregateOp} from './aggregate';
import {Aggregate, isAggregateOp, isArgmaxDef, isArgminDef, isCountingAggregateOp, isExponentialDef} from './aggregate';
import {Axis} from './axis';
import {autoMaxBins, Bin, BinParams, binToString, isBinned, isBinning} from './bin';
import {
Expand Down Expand Up @@ -805,7 +805,7 @@ export function vgField(

if (!opt.nofn) {
if (isOpFieldDef(fieldDef)) {
fn = fieldDef.op;
fn = isExponentialDef(fieldDef.op) ? 'exponential' : fieldDef.op;
} else {
const {bin, aggregate, timeUnit} = fieldDef;
if (isBinning(bin)) {
Expand All @@ -819,7 +819,7 @@ export function vgField(
argAccessor = `["${field}"]`;
field = `argmin_${aggregate.argmin}`;
} else {
fn = String(aggregate);
fn = isExponentialDef(aggregate) ? 'exponential' : String(aggregate);
}
} else if (timeUnit && !isBinnedTimeUnit(timeUnit)) {
fn = timeUnitToString(timeUnit);
Expand Down Expand Up @@ -893,7 +893,8 @@ export function verbalTitleFormatter(fieldDef: FieldDefBase<string>, config: Con
} else if (isArgminDef(aggregate)) {
return `${field} for min ${aggregate.argmin}`;
} else {
return `${titleCase(aggregate)} of ${field}`;
const aggregateOp = isExponentialDef(aggregate) ? 'exponential' : aggregate;
Copy link
Author

Choose a reason for hiding this comment

The reason will be displayed to describe this comment to others. Learn more.

I ended up adding a mixture of ternary operators and if statements into the code to handle exponential being an object which I know can increase code complexity. Please let me know if there are more clean ways to handle this :)

return `${titleCase(aggregateOp)} of ${field}`;
}
}
return field;
Expand All @@ -909,7 +910,9 @@ export function functionalTitleFormatter(fieldDef: FieldDefBase<string>) {

const timeUnitParams = timeUnit && !isBinnedTimeUnit(timeUnit) ? normalizeTimeUnit(timeUnit) : undefined;

const fn = aggregate || timeUnitParams?.unit || (timeUnitParams?.maxbins && 'timeunit') || (isBinning(bin) && 'bin');
const aggregateOp = isExponentialDef(aggregate) ? 'exponential' : aggregate;
const fn =
aggregateOp || timeUnitParams?.unit || (timeUnitParams?.maxbins && 'timeunit') || (isBinning(bin) && 'bin');
if (fn) {
return `${fn.toUpperCase()}(${field})`;
} else {
Expand Down Expand Up @@ -1136,7 +1139,14 @@ export function initFieldDef(
const fieldDef = {...fd};

// Drop invalid aggregate
if (!compositeMark && aggregate && !isAggregateOp(aggregate) && !isArgmaxDef(aggregate) && !isArgminDef(aggregate)) {
if (
!compositeMark &&
aggregate &&
!isAggregateOp(aggregate) &&
!isArgmaxDef(aggregate) &&
!isArgminDef(aggregate) &&
!isExponentialDef(aggregate)
) {
log.warn(log.message.invalidAggregate(aggregate));
delete fieldDef.aggregate;
}
Expand Down