Skip to content

Commit

Permalink
feat: more options for handling invalid
Browse files Browse the repository at this point in the history
  • Loading branch information
kanitw committed May 7, 2024
1 parent 386df68 commit e28256c
Show file tree
Hide file tree
Showing 35 changed files with 1,673 additions and 526 deletions.
956 changes: 759 additions & 197 deletions build/vega-lite-schema.json

Large diffs are not rendered by default.

91 changes: 91 additions & 0 deletions examples/specs/test_invalid_include.vl.json
@@ -0,0 +1,91 @@
{
"$schema": "https://vega.github.io/schema/vega-lite/v5.json",
"description": "Testing invalid",
"data": {
"values": [
{"a": null, "b": 1000},
{"a": -10, "b": null},
{"a": -5, "b": 25},
{"a": -1, "b": 20},
{"a": 0, "b": null},
{"a": 1, "b": 30},
{"a": 5, "b": 40},
{"a": 10, "b": null}
]
},

"config": {
"mark": {"invalid": "include"}
},
"hconcat": [{
"title": "Quantitative X",
"vconcat": [{
"width": 100,
"height": 100,
"mark": "point",
"encoding": {
"x": {"field": "a", "type": "quantitative"},
"y": {"field": "b", "type": "quantitative"}
}
}, {
"width": 100,
"height": 100,
"mark": "bar",
"encoding": {
"x": {"field": "a", "type": "quantitative"},
"y": {"field": "b", "type": "quantitative"}
}
}, {
"width": 100,
"height": 100,
"mark": "line",
"encoding": {
"x": {"field": "a", "type": "quantitative"},
"y": {"field": "b", "type": "quantitative"}
}
}, {
"width": 100,
"height": 100,
"mark": "area",
"encoding": {
"x": {"field": "a", "type": "quantitative"},
"y": {"field": "b", "type": "quantitative"}
}
}]
},{
"title": "Ordinal X",
"vconcat": [{
"width": 100,
"height": 100,
"mark": "point",
"encoding": {
"x": {"field": "a", "type": "ordinal"},
"y": {"field": "b", "type": "quantitative"}
}
}, {
"width": 100,
"height": 100,
"mark": "bar",
"encoding": {
"x": {"field": "a", "type": "ordinal"},
"y": {"field": "b", "type": "quantitative"}
}
}, {
"width": 100,
"height": 100,
"mark": "line",
"encoding": {
"x": {"field": "a", "type": "ordinal"},
"y": {"field": "b", "type": "quantitative"}
}
}, {
"width": 100,
"height": 100,
"mark": "area",
"encoding": {
"x": {"field": "a", "type": "ordinal"},
"y": {"field": "b", "type": "quantitative"}
}
}]
}]
}
2 changes: 1 addition & 1 deletion src/channel.ts
Expand Up @@ -480,7 +480,7 @@ const SCALE_CHANNEL_INDEX = {
export const SCALE_CHANNELS = keys(SCALE_CHANNEL_INDEX);
export type ScaleChannel = (typeof SCALE_CHANNELS)[number];

export function isScaleChannel(channel: Channel): channel is ScaleChannel {
export function isScaleChannel(channel: Channel | ExtendedChannel): channel is ScaleChannel {
return !!SCALE_CHANNEL_INDEX[channel];
}

Expand Down
35 changes: 24 additions & 11 deletions src/compile/data/filterinvalid.ts
@@ -1,12 +1,12 @@
import {FilterTransform as VgFilterTransform} from 'vega';
import {isScaleChannel} from '../../channel';
import {TypedFieldDef, vgField as fieldRef} from '../../channeldef';
import {isPathMark} from '../../mark';
import {hasContinuousDomain} from '../../scale';
import {Dict, hash, keys} from '../../util';
import {getMarkPropOrConfig} from '../common';
import {getScaleInvalidDataMode, isScaleInvalidDataInclude} from '../invalid/ScaleInvalidDataMode';
import {DataSourcesForHandlingInvalidValues} from '../invalid/datasources';
import {UnitModel} from '../unit';
import {DataFlowNode} from './dataflow';
import {isCountingAggregateOp} from '../../aggregate';

export class FilterInvalidNode extends DataFlowNode {
public clone() {
Expand All @@ -20,27 +20,40 @@ export class FilterInvalidNode extends DataFlowNode {
super(parent);
}

public static make(parent: DataFlowNode, model: UnitModel): FilterInvalidNode {
const {config, mark, markDef} = model;
public static make(
parent: DataFlowNode,
model: UnitModel,
dataSourcesForHandlingInvalidValues: DataSourcesForHandlingInvalidValues
): FilterInvalidNode {
const {config, markDef} = model;

const invalid = getMarkPropOrConfig('invalid', markDef, config);
if (invalid !== 'filter') {
const {marks, scales} = dataSourcesForHandlingInvalidValues;
if (marks === 'pre-filter' && scales === 'pre-filter') {
// If neither marks nor scale domains need data source to filter null values, then don't add the filter.
return null;
}

const filter = model.reduceFieldDef(
(aggregator: Dict<TypedFieldDef<string>>, fieldDef, channel) => {
const scaleComponent = isScaleChannel(channel) && model.getScaleComponent(channel);

if (scaleComponent) {
const scaleType = scaleComponent.get('type');
const {aggregate} = fieldDef;
const invalidDataMode = getScaleInvalidDataMode({
scaleChannel: channel,
markDef,
config,
scaleType,
isCountAggregate: isCountingAggregateOp(aggregate)
});

// While discrete domain scales can handle invalid values, continuous scales can't.
// Thus, for non-path marks, we have to filter null for scales with continuous domains.
// (For path marks, we will use "defined" property and skip these values instead.)
if (hasContinuousDomain(scaleType) && fieldDef.aggregate !== 'count' && !isPathMark(mark)) {
// If the invalid data mode is include or always-valid, we don't need to filter invalid values as the scale can handle invalid values.
if (!isScaleInvalidDataInclude(invalidDataMode) && invalidDataMode !== 'always-valid') {
aggregator[fieldDef.field] = fieldDef as any; // we know that the fieldDef is a typed field def
}
}

return aggregator;
},
{} as Dict<TypedFieldDef<string>>
Expand Down
10 changes: 10 additions & 0 deletions src/compile/data/index.ts
Expand Up @@ -27,6 +27,16 @@ export interface DataComponent {
*/
raw?: OutputNode;

/**
* The output node for scale domain before filter invalid.
*/
preFilterInvalid?: OutputNode;

/**
* The output node for scale domain after filter invalid.
*/
postFilterInvalid?: OutputNode;

/**
* The main output node.
*/
Expand Down
53 changes: 49 additions & 4 deletions src/compile/data/parse.ts
Expand Up @@ -10,7 +10,9 @@ import {
DataSourceType,
ParseValue
} from '../../data';
import {getDataSourcesForHandlingInvalidValues, DataSourcesForHandlingInvalidValues} from '../invalid/datasources';
import * as log from '../../log';
import {isPathMark} from '../../mark';
import {
isAggregate,
isBin,
Expand All @@ -33,6 +35,7 @@ import {
isWindow
} from '../../transform';
import {deepEqual, mergeDeep} from '../../util';
import {getMarkPropOrConfig} from '../common';
import {isFacetModel, isLayerModel, isUnitModel, Model} from '../model';
import {requiresSelectionId} from '../selection';
import {materializeSelections} from '../selection/parse';
Expand Down Expand Up @@ -285,14 +288,26 @@ Formula From Sort Array
Stack (in `encoding`)
|
v
Invalid Filter
+- - - - - - - - - - -+
| PreFilterInvalid | - - - -> scale domains
|(when scales need it)|
+- - - - - - - - - - -+
|
v
Invalid Filter (if the main data source needs it)
|
v
+----------+
| Main |
| Main | - - - -> scale domains
+----------+
|
v
+- - - - - - - - - - -+
| PostFilterInvalid | - - - -> scale domains
|(when scales need it)|
+- - - - - - - - - - -+
|
v
+-------+
| Facet |----> "column", "column-layout", and "row"
+-------+
Expand Down Expand Up @@ -384,13 +399,41 @@ export function parseData(model: Model): DataComponent {
head = StackNode.makeFromEncoding(head, model) ?? head;
}

let preFilterInvalid: OutputNode | undefined;
let dataSourcesForHandlingInvalidValues: DataSourcesForHandlingInvalidValues | undefined;
if (isUnitModel(model)) {
head = FilterInvalidNode.make(head, model) ?? head;
const {markDef, mark, config} = model;
const invalid = getMarkPropOrConfig('invalid', markDef, config);

const {marks, scales} = (dataSourcesForHandlingInvalidValues = getDataSourcesForHandlingInvalidValues({
invalid,
isPath: isPathMark(mark)
}));

if (marks !== scales && scales === 'pre-filter') {
// Create a seperate preFilterInvalid dataSource if scales need pre-filter data but marks needs post-filter.
preFilterInvalid = head = makeOutputNode(DataSourceType.PreFilterInvalid, model, head);
}

if (marks === 'post-filter') {
head = FilterInvalidNode.make(head, model, dataSourcesForHandlingInvalidValues) ?? head;
}
}

// output node for marks
const main = (head = makeOutputNode(DataSourceType.Main, model, head));

let postFilterInvalid: OutputNode | undefined;
if (isUnitModel(model) && dataSourcesForHandlingInvalidValues) {
const {marks, scales} = dataSourcesForHandlingInvalidValues;
if (marks === 'pre-filter' && scales === 'post-filter') {
// Create a seperate postFilterInvalid dataSource if scales need post-filter data but marks needs pre-filter.
head = FilterInvalidNode.make(head, model, dataSourcesForHandlingInvalidValues) ?? head;

postFilterInvalid = head = makeOutputNode(DataSourceType.PostFilterInvalid, model, head);
}
}

if (isUnitModel(model)) {
materializeSelections(model, main);
}
Expand All @@ -415,7 +458,9 @@ export function parseData(model: Model): DataComponent {
raw,
main,
facetRoot,
ancestorParse
ancestorParse,
preFilterInvalid,
postFilterInvalid
};
}

Expand Down
11 changes: 8 additions & 3 deletions src/compile/guide.ts
Expand Up @@ -2,15 +2,20 @@ import {GuideEncodingEntry} from '../guide';
import {keys} from '../util';
import {VgEncodeChannel} from '../vega.schema';
import {signalOrValueRef} from './common';
import {wrapCondition} from './mark/encode';
import {wrapCondition} from './mark/encode/conditional';
import {UnitModel} from './unit';

export function guideEncodeEntry(encoding: GuideEncodingEntry, model: UnitModel) {
return keys(encoding).reduce((encode, channel: VgEncodeChannel) => {
const valueDef = encoding[channel];
return {
...encode,
...wrapCondition(model, valueDef, channel, def => signalOrValueRef(def.value))
...wrapCondition({
model,
channelDef: encoding[channel],
vgChannel: channel,
mainRefFn: def => signalOrValueRef(def.value),
invalidValueRef: undefined // guide encoding won't show invalid values for the scale
})
};
}, {});
}
68 changes: 68 additions & 0 deletions src/compile/invalid/ScaleInvalidDataMode.ts
@@ -0,0 +1,68 @@
import {SignalRef} from 'vega';
import {ScaleChannel, isPolarPositionChannel, isXorY} from '../../channel';
import {Config} from '../../config';
import {ScaleInvalidDataIncludeAs} from '../../invalid';
import {MarkDef, isPathMark} from '../../mark';
import {ScaleType, hasContinuousDomain} from '../../scale';
import {getMarkPropOrConfig} from '../common';
import {normalizeInvalidDataMode} from './normalizeInvalidDataMode';

export type ScaleInvalidDataMode<C extends ScaleChannel> =
| 'always-valid'
| 'filter'
| 'break-paths'
| 'break-paths-keep-domains'
| ScaleInvalidDataInclude<C>;

export interface ScaleInvalidDataInclude<C extends ScaleChannel> {
includeAs: ScaleInvalidDataIncludeAs<C>;
}

export function isScaleInvalidDataInclude<C extends ScaleChannel>(
invalidDataMode: ScaleInvalidDataMode<C>
): invalidDataMode is ScaleInvalidDataInclude<C> {
return (invalidDataMode as ScaleInvalidDataInclude<C>).includeAs !== undefined;
}

export function getScaleInvalidDataMode<C extends ScaleChannel>({
markDef,
config,
scaleChannel,
scaleType,
isCountAggregate
}: {
markDef: MarkDef;
config: Config<SignalRef>;
scaleChannel: C;
scaleType: ScaleType;
isCountAggregate: boolean;
}): ScaleInvalidDataMode<C> {
if (!scaleType || !hasContinuousDomain(scaleType) || isCountAggregate) {
// - Discrete scales can always display null as another category
// - Count cannot output null values
return 'always-valid';
}

const invalidMode = normalizeInvalidDataMode(getMarkPropOrConfig('invalid', markDef, config), {
isPath: isPathMark(markDef.type)
});

const scaleOutputForInvalid = config.scale?.invalid?.[scaleChannel];
if (scaleOutputForInvalid !== undefined) {
// Regardless of the current invalid mode, if the channel has a default value, we consider the field valid.
return {includeAs: scaleOutputForInvalid};
}

if (invalidMode === 'include') {
// TODO: it's arguable if we should make the behavior inconsistent between position and non-position.
// But this initial PR, we keep the behavior consistent (no breaking changes).
if (isXorY(scaleChannel) || isPolarPositionChannel(scaleChannel)) {
return {includeAs: 'min'};
}
return {includeAs: 'zero-or-min'};
}
return invalidMode;
}
export function shouldBreakPath(mode: ScaleInvalidDataMode<any>): boolean {
return mode === 'break-paths' || mode === 'break-paths-keep-domains';
}

0 comments on commit e28256c

Please sign in to comment.