Skip to content

Commit

Permalink
fix: support indexing of quoted triples (#369)
Browse files Browse the repository at this point in the history
* fix: support deeply nested triples in termFromId and termToId

* chore: extend dataset tests for deeply nested triples

* perf: index terms of quoted triples

* chore: add performance test for quoted triples

* Update test/N3Store-test.js
  • Loading branch information
jeswr committed Oct 27, 2023
1 parent 3b25596 commit e1cc8dd
Show file tree
Hide file tree
Showing 6 changed files with 585 additions and 121 deletions.
118 changes: 118 additions & 0 deletions perf/N3StoreStar-perf.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/usr/bin/env node
const N3 = require('..');
const assert = require('assert');

console.log('N3Store performance test');

const prefix = 'http://example.org/#';

/* Test triples */
const dim = Number.parseInt(process.argv[2], 10) || 22;
const dimSquared = dim * dim;
const dimCubed = dimSquared * dim;
const dimToTheFour = dimCubed * dim;
const dimToTheFive = dimToTheFour * dim;

const store = new N3.Store();
let TEST = `- Adding ${dimToTheFive} triples to the default graph`;
console.time(TEST);
let i, j, k, l, m;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < dim; l++)
for (m = 0; m < dim; m++)
store.addQuad(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
N3.DataFactory.namedNode(prefix + l),
N3.DataFactory.namedNode(prefix + m)
);
console.timeEnd(TEST);

console.log(`* Memory usage for triples: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB`);

TEST = `- Finding all ${dimToTheFive} triples in the default graph ${dimSquared * 1} times (0 variables)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < dim; l++)
for (m = 0; m < dim; m++)
assert.equal(store.getQuads(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
N3.DataFactory.namedNode(prefix + l),
N3.DataFactory.namedNode(prefix + m)
).length, 1);
console.timeEnd(TEST);

TEST = `- Finding all ${dimCubed} triples in the default graph ${dimSquared * 2} times (1 variable subject)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
assert.equal(store.getQuads(null, N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j)).length, dimCubed);
console.timeEnd(TEST);

TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable predicate)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), null, N3.DataFactory.namedNode(prefix + j)).length, 0);
console.timeEnd(TEST);

TEST = `- Finding all ${dim} triples in the default graph ${dimSquared * 4} times (1 variable predicate)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < dim; l++)
assert.equal(store.getQuads(N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
), null, N3.DataFactory.namedNode(prefix + l)).length, dim);
console.timeEnd(TEST);

TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable object)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j), null).length, 0);
console.timeEnd(TEST);

TEST = `- Finding all ${dim} triples in the default graph ${dimSquared * 4} times (1 variable objects)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < dim; l++)
assert.equal(store.getQuads(N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
), N3.DataFactory.namedNode(prefix + l), null).length, dim);
console.timeEnd(TEST);

TEST = `- Finding all ${dimSquared} triples in the default graph ${dimSquared * 1} times (2 variables)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
assert.equal(store.getQuads(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
null,
null
).length,
dimSquared);
console.timeEnd(TEST);
118 changes: 118 additions & 0 deletions perf/N3StoreStarViews-perf.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,118 @@
#!/usr/bin/env node
const N3 = require('../lib');
const assert = require('assert');

console.log('N3Store performance test');

const prefix = 'http://example.org/#';

/* Test triples */
const dim = Number.parseInt(process.argv[2], 10) || 64;
const dimSquared = dim * dim;
const dimCubed = dimSquared * dim;
const dimToTheFour = dimCubed * dim;
const dimToTheFive = dimToTheFour * dim;

const store = new N3.Store();
let TEST = `- Adding ${dimToTheFive} triples to the default graph`;
console.time(TEST);
let i, j, k, l, m;
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < 3; l++)
for (m = 0; m < 3; m++)
store.addQuad(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
N3.DataFactory.namedNode(prefix + l),
N3.DataFactory.namedNode(prefix + m)
);
console.timeEnd(TEST);

console.log(`* Memory usage for triples: ${Math.round(process.memoryUsage().rss / 1024 / 1024)}MB`);

TEST = `- Finding all ${dimToTheFive} triples in the default graph ${dimSquared * 1} times (0 variables)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < 3; l++)
for (m = 0; m < 3; m++)
assert.equal(store.getQuads(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
N3.DataFactory.namedNode(prefix + l),
N3.DataFactory.namedNode(prefix + m)
).length, 1);
console.timeEnd(TEST);

TEST = `- Finding all ${dimCubed} triples in the default graph ${dimSquared * 2} times (1 variable subject)`;
console.time(TEST);
for (i = 0; i < 3; i++)
for (j = 0; j < 3; j++)
assert.equal(store.getQuads(null, N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j)).length, dimCubed);
console.timeEnd(TEST);

TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable predicate)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), null, N3.DataFactory.namedNode(prefix + j)).length, 0);
console.timeEnd(TEST);

TEST = `- Finding all ${3} triples in the default graph ${dimCubed * 3} times (1 variable predicate)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < 3; l++)
assert.equal(store.getQuads(N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
), null, N3.DataFactory.namedNode(prefix + l)).length, 3);
console.timeEnd(TEST);

TEST = `- Finding all ${0} triples in the default graph ${dimSquared * 2} times (1 variable object)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
assert.equal(store.getQuads(N3.DataFactory.namedNode(prefix + i), N3.DataFactory.namedNode(prefix + j), null).length, 0);
console.timeEnd(TEST);

TEST = `- Finding all ${3} triples in the default graph ${dimCubed * 3} times (1 variable objects)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
for (l = 0; l < 3; l++)
assert.equal(store.getQuads(N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
), N3.DataFactory.namedNode(prefix + l), null).length, 3);
console.timeEnd(TEST);

TEST = `- Finding all ${9} triples in the default graph ${dimCubed} times (2 variables)`;
console.time(TEST);
for (i = 0; i < dim; i++)
for (j = 0; j < dim; j++)
for (k = 0; k < dim; k++)
assert.equal(store.getQuads(
N3.DataFactory.quad(
N3.DataFactory.namedNode(prefix + i),
N3.DataFactory.namedNode(prefix + j),
N3.DataFactory.namedNode(prefix + k)
),
null,
null
).length,
9);
console.timeEnd(TEST);
55 changes: 31 additions & 24 deletions src/N3DataFactory.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,7 +10,6 @@ let DEFAULTGRAPH;
let _blankNodeCounter = 0;

const escapedLiteral = /^"(.*".*)(?="[^"]*$)/;
const quadId = /^<<("(?:""|[^"])*"[^ ]*|[^ ]+) ("(?:""|[^"])*"[^ ]*|[^ ]+) ("(?:""|[^"])*"[^ ]*|[^ ]+) ?("(?:""|[^"])*"[^ ]*|[^ ]+)?>>$/;

// ## DataFactory singleton
const DataFactory = {
Expand Down Expand Up @@ -188,9 +187,12 @@ export class DefaultGraph extends Term {
// ## DefaultGraph singleton
DEFAULTGRAPH = new DefaultGraph();


// ### Constructs a term from the given internal string ID
export function termFromId(id, factory) {
// The third 'nested' parameter of this function is to aid
// with recursion over nested terms. It should not be used
// by consumers of this library.
// See https://github.com/rdfjs/N3.js/pull/311#discussion_r1061042725
export function termFromId(id, factory, nested) {
factory = factory || DataFactory;

// Falsy value or empty string indicate the default graph
Expand All @@ -215,21 +217,28 @@ export function termFromId(id, factory) {
return factory.literal(id.substr(1, endPos - 1),
id[endPos + 1] === '@' ? id.substr(endPos + 2)
: factory.namedNode(id.substr(endPos + 3)));
case '<':
const components = quadId.exec(id);
return factory.quad(
termFromId(unescapeQuotes(components[1]), factory),
termFromId(unescapeQuotes(components[2]), factory),
termFromId(unescapeQuotes(components[3]), factory),
components[4] && termFromId(unescapeQuotes(components[4]), factory)
);
case '[':
id = JSON.parse(id);
break;
default:
return factory.namedNode(id);
if (!nested || !Array.isArray(id)) {
return factory.namedNode(id);
}
}
return factory.quad(
termFromId(id[0], factory, true),
termFromId(id[1], factory, true),
termFromId(id[2], factory, true),
id[3] && termFromId(id[3], factory, true)
);
}

// ### Constructs an internal string ID from the given term or ID string
export function termToId(term) {
// The third 'nested' parameter of this function is to aid
// with recursion over nested terms. It should not be used
// by consumers of this library.
// See https://github.com/rdfjs/N3.js/pull/311#discussion_r1061042725
export function termToId(term, nested) {
if (typeof term === 'string')
return term;
if (term instanceof Term && term.termType !== 'Quad')
Expand All @@ -247,17 +256,15 @@ export function termToId(term) {
term.language ? `@${term.language}` :
(term.datatype && term.datatype.value !== xsd.string ? `^^${term.datatype.value}` : '')}`;
case 'Quad':
// To identify RDF* quad components, we escape quotes by doubling them.
// This avoids the overhead of backslash parsing of Turtle-like syntaxes.
return `<<${
escapeQuotes(termToId(term.subject))
} ${
escapeQuotes(termToId(term.predicate))
} ${
escapeQuotes(termToId(term.object))
}${
(isDefaultGraph(term.graph)) ? '' : ` ${termToId(term.graph)}`
}>>`;
const res = [
termToId(term.subject, true),
termToId(term.predicate, true),
termToId(term.object, true),
];
if (!isDefaultGraph(term.graph)) {
res.push(termToId(term.graph, true));
}
return nested ? res : JSON.stringify(res);
default: throw new Error(`Unexpected termType: ${term.termType}`);
}
}
Expand Down

0 comments on commit e1cc8dd

Please sign in to comment.