From d05672a43301610f0c88254856af5cf5516ba14e Mon Sep 17 00:00:00 2001 From: Mike Bostock Date: Sun, 19 Sep 2021 11:27:22 -0700 Subject: [PATCH] adopt interning for set operations (#231) --- README.md | 6 +++--- src/difference.js | 4 +++- src/disjoint.js | 4 +++- src/intersection.js | 8 ++++++-- src/set.js | 3 --- src/superset.js | 12 +++++++++--- src/union.js | 4 +++- test/asserts.js | 15 ++++++--------- test/difference-test.js | 14 ++++++++++---- test/disjoint-test.js | 6 ++++++ test/intersection-test.js | 14 +++++++++----- test/subset-test.js | 6 ++++++ test/superset-test.js | 6 ++++++ test/union-test.js | 15 ++++++++++----- yarn.lock | 6 +++--- 15 files changed, 83 insertions(+), 40 deletions(-) delete mode 100644 src/set.js diff --git a/README.md b/README.md index b0f60be7..d51686a9 100644 --- a/README.md +++ b/README.md @@ -765,7 +765,7 @@ This methods implement basic set operations for any iterable. # d3.difference(iterable, ...others) · [Source](https://github.com/d3/d3-array/blob/master/src/difference.js) -Returns a new Set containing every value in *iterable* that is not in any of the *others* iterables. +Returns a new InternSet containing every value in *iterable* that is not in any of the *others* iterables. ```js d3.difference([0, 1, 2, 0], [1]) // Set {0, 2} @@ -773,7 +773,7 @@ d3.difference([0, 1, 2, 0], [1]) // Set {0, 2} # d3.union(...iterables) · [Source](https://github.com/d3/d3-array/blob/master/src/union.js) -Returns a new Set containing every (distinct) value that appears in any of the given *iterables*. The order of values in the returned Set is based on their first occurrence in the given *iterables*. +Returns a new InternSet containing every (distinct) value that appears in any of the given *iterables*. The order of values in the returned set is based on their first occurrence in the given *iterables*. ```js d3.union([0, 2, 1, 0], [1, 3]) // Set {0, 2, 1, 3} @@ -781,7 +781,7 @@ d3.union([0, 2, 1, 0], [1, 3]) // Set {0, 2, 1, 3} # d3.intersection(...iterables) · [Source](https://github.com/d3/d3-array/blob/master/src/intersection.js) -Returns a new Set containing every (distinct) value that appears in all of the given *iterables*. The order of values in the returned Set is based on their first occurrence in the given *iterables*. +Returns a new InternSet containing every (distinct) value that appears in all of the given *iterables*. The order of values in the returned set is based on their first occurrence in the given *iterables*. ```js d3.intersection([0, 2, 1, 0], [1, 3]) // Set {1} diff --git a/src/difference.js b/src/difference.js index 066334ba..c19ba788 100644 --- a/src/difference.js +++ b/src/difference.js @@ -1,5 +1,7 @@ +import {InternSet} from "internmap"; + export default function difference(values, ...others) { - values = new Set(values); + values = new InternSet(values); for (const other of others) { for (const value of other) { values.delete(value); diff --git a/src/disjoint.js b/src/disjoint.js index 02dfd03a..d62a6701 100644 --- a/src/disjoint.js +++ b/src/disjoint.js @@ -1,5 +1,7 @@ +import {InternSet} from "internmap"; + export default function disjoint(values, other) { - const iterator = other[Symbol.iterator](), set = new Set(); + const iterator = other[Symbol.iterator](), set = new InternSet(); for (const v of values) { if (set.has(v)) return false; let value, done; diff --git a/src/intersection.js b/src/intersection.js index 2c6af2ab..43aff399 100644 --- a/src/intersection.js +++ b/src/intersection.js @@ -1,7 +1,7 @@ -import set from "./set.js"; +import {InternSet} from "internmap"; export default function intersection(values, ...others) { - values = new Set(values); + values = new InternSet(values); others = others.map(set); out: for (const value of values) { for (const other of others) { @@ -13,3 +13,7 @@ export default function intersection(values, ...others) { } return values; } + +function set(values) { + return values instanceof InternSet ? values : new InternSet(values); +} diff --git a/src/set.js b/src/set.js deleted file mode 100644 index a115f9a4..00000000 --- a/src/set.js +++ /dev/null @@ -1,3 +0,0 @@ -export default function set(values) { - return values instanceof Set ? values : new Set(values); -} diff --git a/src/superset.js b/src/superset.js index 1097f262..d178e9c2 100644 --- a/src/superset.js +++ b/src/superset.js @@ -1,13 +1,19 @@ export default function superset(values, other) { const iterator = values[Symbol.iterator](), set = new Set(); for (const o of other) { - if (set.has(o)) continue; + const io = intern(o); + if (set.has(io)) continue; let value, done; while (({value, done} = iterator.next())) { if (done) return false; - set.add(value); - if (Object.is(o, value)) break; + const ivalue = intern(value); + set.add(ivalue); + if (Object.is(io, ivalue)) break; } } return true; } + +function intern(value) { + return value !== null && typeof value === "object" ? value.valueOf() : value; +} diff --git a/src/union.js b/src/union.js index eb0856e5..57c20b10 100644 --- a/src/union.js +++ b/src/union.js @@ -1,5 +1,7 @@ +import {InternSet} from "internmap"; + export default function union(...others) { - const set = new Set(); + const set = new InternSet(); for (const other of others) { for (const o of other) { set.add(o); diff --git a/test/asserts.js b/test/asserts.js index 9c2222cf..4a3c4f8f 100644 --- a/test/asserts.js +++ b/test/asserts.js @@ -1,12 +1,9 @@ import assert from "assert"; +import {InternSet} from "internmap"; -export function assertSetEqual(A, B) { - assert(setEqual(A, B)); -} - -function setEqual(A, B) { - if (!(A instanceof Set)) throw new Error("not a set"); - for (const a of A) if (!B.has(a)) return false; - for (const b of B) if (!A.has(b)) return false; - return true; +export function assertSetEqual(actual, expected) { + assert(actual instanceof Set); + expected = new InternSet(expected); + for (const a of actual) assert(expected.has(a), `unexpected ${a}`); + for (const e of expected) assert(actual.has(e), `expected ${e}`); } diff --git a/test/difference-test.js b/test/difference-test.js index e7d94b2c..b74f21b0 100644 --- a/test/difference-test.js +++ b/test/difference-test.js @@ -2,11 +2,17 @@ import {difference} from "../src/index.js"; import {assertSetEqual} from "./asserts.js"; it("difference(values, other) returns a set of values", () => { - assertSetEqual(difference([1, 2, 3], [2, 1]), new Set([3])); - assertSetEqual(difference([1, 2], [2, 3, 1]), new Set([])); - assertSetEqual(difference([2, 1, 3], [4, 3, 1]), new Set([2])); + assertSetEqual(difference([1, 2, 3], [2, 1]), [3]); + assertSetEqual(difference([1, 2], [2, 3, 1]), []); + assertSetEqual(difference([2, 1, 3], [4, 3, 1]), [2]); }); it("difference(...values) accepts iterables", () => { - assertSetEqual(difference(new Set([1, 2, 3]), new Set([1])), new Set([2, 3])); + assertSetEqual(difference(new Set([1, 2, 3]), new Set([1])), [2, 3]); +}); + +it("difference(values, other) performs interning", () => { + assertSetEqual(difference([new Date("2021-01-01"), new Date("2021-01-02"), new Date("2021-01-03")], [new Date("2021-01-02"), new Date("2021-01-01")]), [new Date("2021-01-03")]); + assertSetEqual(difference([new Date("2021-01-01"), new Date("2021-01-02")], [new Date("2021-01-02"), new Date("2021-01-03"), new Date("2021-01-01")]), []); + assertSetEqual(difference([new Date("2021-01-02"), new Date("2021-01-01"), new Date("2021-01-03")], [new Date("2021-01-04"), new Date("2021-01-03"), new Date("2021-01-01")]), [new Date("2021-01-02")]); }); diff --git a/test/disjoint-test.js b/test/disjoint-test.js index c918314b..cc01c359 100644 --- a/test/disjoint-test.js +++ b/test/disjoint-test.js @@ -15,6 +15,12 @@ it("disjoint(values, other) allows other to be infinite", () => { assert.strictEqual(disjoint([2], repeat(1, 3, 2)), false); }); +it("disjoint(values, other) performs interning", () => { + assert.strictEqual(disjoint([new Date("2021-01-01")], [new Date("2021-01-02")]), true); + assert.strictEqual(disjoint([new Date("2021-01-02"), new Date("2021-01-03")], [new Date("2021-01-03"), new Date("2021-01-04")]), false); + assert.strictEqual(disjoint([new Date("2021-01-01")], []), true); +}); + function* odds() { for (let i = 1; true; i += 2) { yield i; diff --git a/test/intersection-test.js b/test/intersection-test.js index b4d08fdb..eb0cfccf 100644 --- a/test/intersection-test.js +++ b/test/intersection-test.js @@ -2,18 +2,22 @@ import {intersection} from "../src/index.js"; import {assertSetEqual} from "./asserts.js"; it("intersection(values) returns a set of values", () => { - assertSetEqual(intersection([1, 2, 3, 2, 1]), new Set([1, 2, 3])); + assertSetEqual(intersection([1, 2, 3, 2, 1]), [1, 2, 3]); }); it("intersection(values, other) returns a set of values", () => { - assertSetEqual(intersection([1, 2], [2, 3, 1]), new Set([1, 2])); - assertSetEqual(intersection([2, 1, 3], [4, 3, 1]), new Set([1, 3])); + assertSetEqual(intersection([1, 2], [2, 3, 1]), [1, 2]); + assertSetEqual(intersection([2, 1, 3], [4, 3, 1]), [1, 3]); }); it("intersection(...values) returns a set of values", () => { - assertSetEqual(intersection([1, 2], [2, 1], [2, 3]), new Set([2])); + assertSetEqual(intersection([1, 2], [2, 1], [2, 3]), [2]); }); it("intersection(...values) accepts iterables", () => { - assertSetEqual(intersection(new Set([1, 2, 3])), new Set([1, 2, 3])); + assertSetEqual(intersection(new Set([1, 2, 3])), [1, 2, 3]); +}); + +it("intersection(...values) performs interning", () => { + assertSetEqual(intersection([new Date("2021-01-01"), new Date("2021-01-03")], [new Date("2021-01-01"), new Date("2021-01-02")]), [new Date("2021-01-01")]); }); diff --git a/test/subset-test.js b/test/subset-test.js index 3fa801b1..53ecfbdb 100644 --- a/test/subset-test.js +++ b/test/subset-test.js @@ -6,3 +6,9 @@ it("subset(values, other) returns true if values is a subset of others", () => { assert.strictEqual(subset([3, 4], [2, 3]), false); assert.strictEqual(subset([], [1]), true); }); + +it("subset(values, other) performs interning", () => { + assert.strictEqual(subset([new Date("2021-01-02")], [new Date("2021-01-01"), new Date("2021-01-02")]), true); + assert.strictEqual(subset([new Date("2021-01-03"), new Date("2021-01-04")], [new Date("2021-01-02"), new Date("2021-01-03")]), false); + assert.strictEqual(subset([], [new Date("2021-01-01")]), true); +}); diff --git a/test/superset-test.js b/test/superset-test.js index ffac4b4a..e1b1a6c0 100644 --- a/test/superset-test.js +++ b/test/superset-test.js @@ -15,6 +15,12 @@ it("superset(values, other) allows other to be infinite", () => { assert.strictEqual(superset([1, 3, 5], repeat(1, 3, 2)), false); }); +it("superset(values, other) performs interning", () => { + assert.strictEqual(superset([new Date("2021-01-01"), new Date("2021-01-02")], [new Date("2021-01-02")]), true); + assert.strictEqual(superset([new Date("2021-01-02"), new Date("2021-01-03")], [new Date("2021-01-03"), new Date("2021-01-04")]), false); + assert.strictEqual(superset([new Date("2021-01-01")], []), true); +}); + function* odds() { for (let i = 1; true; i += 2) { yield i; diff --git a/test/union-test.js b/test/union-test.js index 00635ae2..39796ce5 100644 --- a/test/union-test.js +++ b/test/union-test.js @@ -2,18 +2,23 @@ import {union} from "../src/index.js"; import {assertSetEqual} from "./asserts.js"; it("union(values) returns a set of values", () => { - assertSetEqual(union([1, 2, 3, 2, 1]), new Set([1, 2, 3])); + assertSetEqual(union([1, 2, 3, 2, 1]), [1, 2, 3]); }); it("union(values, other) returns a set of values", () => { - assertSetEqual(union([1, 2], [2, 3, 1]), new Set([1, 2, 3])); + assertSetEqual(union([1, 2], [2, 3, 1]), [1, 2, 3]); }); it("union(...values) returns a set of values", () => { - assertSetEqual(union([1], [2], [2, 3], [1]), new Set([1, 2, 3])); + assertSetEqual(union([1], [2], [2, 3], [1]), [1, 2, 3]); }); it("union(...values) accepts iterables", () => { - assertSetEqual(union(new Set([1, 2, 3])), new Set([1, 2, 3])); - assertSetEqual(union(Uint8Array.of(1, 2, 3)), new Set([1, 2, 3])); + assertSetEqual(union(new Set([1, 2, 3])), [1, 2, 3]); + assertSetEqual(union(Uint8Array.of(1, 2, 3)), [1, 2, 3]); +}); + +it("union(...values) performs interning", () => { + assertSetEqual(union([new Date("2021-01-01"), new Date("2021-01-01"), new Date("2021-01-02")]), [new Date("2021-01-01"), new Date("2021-01-02")]); + assertSetEqual(union([new Date("2021-01-01"), new Date("2021-01-03")], [new Date("2021-01-01"), new Date("2021-01-02")]), [new Date("2021-01-01"), new Date("2021-01-02"), new Date("2021-01-03")]); }); diff --git a/yarn.lock b/yarn.lock index 8cb366d7..a53a7dd4 100644 --- a/yarn.lock +++ b/yarn.lock @@ -824,9 +824,9 @@ inherits@2: integrity sha512-k/vGaX4/Yla3WzyMCvTQOXYeIHvqOKtnqBduzTHpzpQZzAskKMhZ2K+EnBiSM9zGSoIFeMpXKxa4dYeZIQqewQ== "internmap@1 - 2": - version "2.0.1" - resolved "https://registry.yarnpkg.com/internmap/-/internmap-2.0.1.tgz#33d0fa016185397549fb1a14ea3dbe5a2949d1cd" - integrity sha512-Ujwccrj9FkGqjbY3iVoxD1VV+KdZZeENx0rphrtzmRXbFvkFO88L80BL/zeSIguX/7T+y8k04xqtgWgS5vxwxw== + version "2.0.2" + resolved "https://registry.yarnpkg.com/internmap/-/internmap-2.0.2.tgz#3efa1165209cc56133df1400df9c34a73e0dad93" + integrity sha512-6O4dJQZN4+83kg9agi21fbasiAn7V2JRvLv29/YT1Kz8f+ngakB1hMG+AP0mYquLOtjWhNO8CvKhhXT/7Tla/g== is-binary-path@~2.1.0: version "2.1.0"