From 0acf88b6b524bc56a9d5dff327045c51293cd8a5 Mon Sep 17 00:00:00 2001 From: Toni Lastre Date: Wed, 11 May 2022 11:44:26 +0200 Subject: [PATCH] Add Monarch config and tokenizer for query language Cypher --- .../cypher/cypher.contribution.ts | 24 ++ src/basic-languages/cypher/cypher.test.ts | 327 ++++++++++++++++++ src/basic-languages/cypher/cypher.ts | 274 +++++++++++++++ src/basic-languages/monaco.contribution.ts | 1 + 4 files changed, 626 insertions(+) create mode 100644 src/basic-languages/cypher/cypher.contribution.ts create mode 100644 src/basic-languages/cypher/cypher.test.ts create mode 100644 src/basic-languages/cypher/cypher.ts diff --git a/src/basic-languages/cypher/cypher.contribution.ts b/src/basic-languages/cypher/cypher.contribution.ts new file mode 100644 index 0000000000..e9d0643051 --- /dev/null +++ b/src/basic-languages/cypher/cypher.contribution.ts @@ -0,0 +1,24 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { registerLanguage } from '../_.contribution'; + +declare var AMD: any; +declare var require: any; + +registerLanguage({ + id: 'cypher', + extensions: ['.cypher', '.cyp'], + aliases: ['Cypher', 'OpenCypher'], + loader: () => { + if (AMD) { + return new Promise((resolve, reject) => { + require(['vs/basic-languages/cypher/cypher'], resolve, reject); + }); + } else { + return import('./cypher'); + } + } +}); diff --git a/src/basic-languages/cypher/cypher.test.ts b/src/basic-languages/cypher/cypher.test.ts new file mode 100644 index 0000000000..51b9daf06e --- /dev/null +++ b/src/basic-languages/cypher/cypher.test.ts @@ -0,0 +1,327 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { testTokenization } from '../test/testRunner'; + +testTokenization('cypher', [ + // Comments + [ + { + line: '// Single line comment', + tokens: [{ startIndex: 0, type: 'comment.cypher' }] + } + ], + [ + { + line: 'MATCH /* comment part */ xyz', + tokens: [ + { startIndex: 0, type: 'keyword.cypher' }, + { startIndex: 5, type: 'white.cypher' }, + { startIndex: 6, type: 'comment.cypher' }, + { startIndex: 24, type: 'white.cypher' }, + { startIndex: 25, type: 'identifier.cypher' } + ] + } + ], + [ + { + line: '/* multi line comment', + tokens: [{ startIndex: 0, type: 'comment.cypher' }] + }, + { + line: 'comment continues MATCH // not done yet', + tokens: [{ startIndex: 0, type: 'comment.cypher' }] + }, + { + line: 'comment ends */ MATCH', + tokens: [ + { startIndex: 0, type: 'comment.cypher' }, + { startIndex: 15, type: 'white.cypher' }, + { startIndex: 16, type: 'keyword.cypher' } + ] + } + ], + + // Numbers: A decimal (integer or float) literal: + [ + { + line: '13', + tokens: [{ startIndex: 0, type: 'number.cypher' }] + } + ], + [ + { + line: '-40000', + tokens: [{ startIndex: 0, type: 'number.cypher' }] + } + ], + [ + { + line: '3.14', + tokens: [{ startIndex: 0, type: 'number.float.cypher' }] + } + ], + [ + { + line: '.314', + tokens: [{ startIndex: 0, type: 'number.float.cypher' }] + } + ], + [ + { + line: '-.314', + tokens: [{ startIndex: 0, type: 'number.float.cypher' }] + } + ], + [ + { + line: '6.022E23', + tokens: [{ startIndex: 0, type: 'number.float.cypher' }] + } + ], + [ + { + line: '-6.022e23', + tokens: [{ startIndex: 0, type: 'number.float.cypher' }] + } + ], + [ + { + line: '12E10', + tokens: [{ startIndex: 0, type: 'number.float.cypher' }] + } + ], + [ + { + line: '12e10', + tokens: [{ startIndex: 0, type: 'number.float.cypher' }] + } + ], + [ + { + line: '12e-10', + tokens: [{ startIndex: 0, type: 'number.float.cypher' }] + } + ], + [ + { + line: '12E-10', + tokens: [{ startIndex: 0, type: 'number.float.cypher' }] + } + ], + + // Numbers: A hexadecimal integer literal (starting with 0x) + [ + { + line: '0x13af', + tokens: [{ startIndex: 0, type: 'number.hex.cypher' }] + } + ], + [ + { + line: '0xFC3A9', + tokens: [{ startIndex: 0, type: 'number.hex.cypher' }] + } + ], + [ + { + line: '-0x66eff', + tokens: [{ startIndex: 0, type: 'number.hex.cypher' }] + } + ], + + // Numbers: An octal integer literal (starting with 0) + [ + { + line: '01372', + tokens: [{ startIndex: 0, type: 'number.octal.cypher' }] + } + ], + [ + { + line: '02127', + tokens: [{ startIndex: 0, type: 'number.octal.cypher' }] + } + ], + [ + { + line: '-05671', + tokens: [{ startIndex: 0, type: 'number.octal.cypher' }] + } + ], + + // Strings: A String literal ('', ""), escaped and non-escaped + [ + { + line: '"two \'words\'"', + tokens: [{ startIndex: 0, type: 'string.cypher' }] + } + ], + [ + { + line: '"two \\"words\\""', + tokens: [{ startIndex: 0, type: 'string.cypher' }] + } + ], + [ + { + line: '\'two "words"\'', + tokens: [{ startIndex: 0, type: 'string.cypher' }] + } + ], + [ + { + line: "'two \\'words\\''", + tokens: [{ startIndex: 0, type: 'string.cypher' }] + } + ], + + // Identifiers wrapped with backtick (``) + [ + { + line: '`variable`', + tokens: [{ startIndex: 0, type: 'identifier.escape.cypher' }] + } + ], + [ + { + line: '`A variable with weird stuff in it[]!`', + tokens: [{ startIndex: 0, type: 'identifier.escape.cypher' }] + } + ], + [ + { + line: '`Escaped \\`variable\\``', + tokens: [{ startIndex: 0, type: 'identifier.escape.cypher' }] + } + ], + + // Operators + [ + { + line: '1+2', + tokens: [ + { startIndex: 0, type: 'number.cypher' }, + { startIndex: 1, type: 'delimiter.cypher' }, + { startIndex: 2, type: 'number.cypher' } + ] + } + ], + [ + { + line: '1++2', + tokens: [ + { startIndex: 0, type: 'number.cypher' }, + { startIndex: 1, type: '' }, + { startIndex: 3, type: 'number.cypher' } + ] + } + ], + + // Builtin literals: A boolean literal (true | false) + [ + { + line: 'true', + tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }] + } + ], + [ + { + line: 'false', + tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }] + } + ], + [ + { + line: 'TRUE', + tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }] + } + ], + [ + { + line: 'FALSE', + tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }] + } + ], + + // Builtin literals: A null literal + [ + { + line: 'null', + tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }] + } + ], + [ + { + line: 'NULL', + tokens: [{ startIndex: 0, type: 'predefined.literal.cypher' }] + } + ], + + // Builtin functions + [ + { + line: 'properties(node)', + tokens: [ + { startIndex: 0, type: 'predefined.function.cypher' }, + { startIndex: 10, type: 'delimiter.parenthesis.cypher' }, + { startIndex: 11, type: 'identifier.cypher' }, + { startIndex: 15, type: 'delimiter.parenthesis.cypher' } + ] + } + ], + [ + { + line: 'left(right("Hello Cypher"))', + tokens: [ + { startIndex: 0, type: 'predefined.function.cypher' }, + { startIndex: 4, type: 'delimiter.parenthesis.cypher' }, + { startIndex: 5, type: 'predefined.function.cypher' }, + { startIndex: 10, type: 'delimiter.parenthesis.cypher' }, + { startIndex: 11, type: 'string.cypher' }, + { startIndex: 25, type: 'delimiter.parenthesis.cypher' } + ] + } + ], + + // Keywords + [ + { + line: 'MATCH (n) RETURN n', + tokens: [ + { startIndex: 0, type: 'keyword.cypher' }, + { startIndex: 5, type: 'white.cypher' }, + { startIndex: 6, type: 'delimiter.parenthesis.cypher' }, + { startIndex: 7, type: 'identifier.cypher' }, + { startIndex: 8, type: 'delimiter.parenthesis.cypher' }, + { startIndex: 9, type: 'white.cypher' }, + { startIndex: 10, type: 'keyword.cypher' }, + { startIndex: 16, type: 'white.cypher' }, + { startIndex: 17, type: 'identifier.cypher' } + ] + } + ], + + // Labels on nodes and relationships + [ + { + line: '(n:NodeLabel1)-[:RelationshipType]->(:NodeLabel2:NodeLabel3)', + tokens: [ + { startIndex: 0, type: 'delimiter.parenthesis.cypher' }, + { startIndex: 1, type: 'identifier.cypher' }, + { startIndex: 2, type: 'type.identifier.cypher' }, + { startIndex: 13, type: 'delimiter.parenthesis.cypher' }, + { startIndex: 14, type: 'delimiter.cypher' }, + { startIndex: 15, type: 'delimiter.bracket.cypher' }, + { startIndex: 16, type: 'type.identifier.cypher' }, + { startIndex: 33, type: 'delimiter.bracket.cypher' }, + { startIndex: 34, type: 'delimiter.cypher' }, + { startIndex: 36, type: 'delimiter.parenthesis.cypher' }, + { startIndex: 37, type: 'type.identifier.cypher' }, + { startIndex: 59, type: 'delimiter.parenthesis.cypher' } + ] + } + ] +]); diff --git a/src/basic-languages/cypher/cypher.ts b/src/basic-languages/cypher/cypher.ts new file mode 100644 index 0000000000..c790151701 --- /dev/null +++ b/src/basic-languages/cypher/cypher.ts @@ -0,0 +1,274 @@ +/*--------------------------------------------------------------------------------------------- + * Copyright (c) Microsoft Corporation. All rights reserved. + * Licensed under the MIT License. See License.txt in the project root for license information. + *--------------------------------------------------------------------------------------------*/ + +import { languages } from '../../fillers/monaco-editor-core'; + +export const conf: languages.LanguageConfiguration = { + comments: { + lineComment: '//', + blockComment: ['/*', '*/'] + }, + brackets: [ + ['{', '}'], + ['[', ']'], + ['(', ')'] + ], + autoClosingPairs: [ + { open: '{', close: '}' }, + { open: '[', close: ']' }, + { open: '(', close: ')' }, + { open: '"', close: '"' }, + { open: "'", close: "'" }, + { open: '`', close: '`' } + ], + surroundingPairs: [ + { open: '{', close: '}' }, + { open: '[', close: ']' }, + { open: '(', close: ')' }, + { open: '"', close: '"' }, + { open: "'", close: "'" }, + { open: '`', close: '`' } + ] +}; + +// Ref: Cypher Query Language Reference, Version 9 (https://opencypher.org/resources/) +export const language = { + defaultToken: '', + tokenPostfix: `.cypher`, + ignoreCase: true, + + brackets: [ + { open: '{', close: '}', token: 'delimiter.curly' }, + { open: '[', close: ']', token: 'delimiter.bracket' }, + { open: '(', close: ')', token: 'delimiter.parenthesis' } + ], + + keywords: [ + 'ALL', + 'AND', + 'AS', + 'ASC', + 'ASCENDING', + 'BY', + 'CALL', + 'CASE', + 'CONTAINS', + 'CREATE', + 'DELETE', + 'DESC', + 'DESCENDING', + 'DETACH', + 'DISTINCT', + 'ELSE', + 'END', + 'ENDS', + 'EXISTS', + 'IN', + 'IS', + 'LIMIT', + 'MANDATORY', + 'MATCH', + 'MERGE', + 'NOT', + 'ON', + 'ON', + 'OPTIONAL', + 'OR', + 'ORDER', + 'REMOVE', + 'RETURN', + 'SET', + 'SKIP', + 'STARTS', + 'THEN', + 'UNION', + 'UNWIND', + 'WHEN', + 'WHERE', + 'WITH', + 'XOR', + 'YIELD' + ], + builtinLiterals: ['true', 'TRUE', 'false', 'FALSE', 'null', 'NULL'], + builtinFunctions: [ + 'abs', + 'acos', + 'asin', + 'atan', + 'atan2', + 'avg', + 'ceil', + 'coalesce', + 'collect', + 'cos', + 'cot', + 'count', + 'degrees', + 'e', + 'endNode', + 'exists', + 'exp', + 'floor', + 'head', + 'id', + 'keys', + 'labels', + 'last', + 'left', + 'length', + 'log', + 'log10', + 'lTrim', + 'max', + 'min', + 'nodes', + 'percentileCont', + 'percentileDisc', + 'pi', + 'properties', + 'radians', + 'rand', + 'range', + 'relationships', + 'replace', + 'reverse', + 'right', + 'round', + 'rTrim', + 'sign', + 'sin', + 'size', + 'split', + 'sqrt', + 'startNode', + 'stDev', + 'stDevP', + 'substring', + 'sum', + 'tail', + 'tan', + 'timestamp', + 'toBoolean', + 'toFloat', + 'toInteger', + 'toLower', + 'toString', + 'toUpper', + 'trim', + 'type' + ], + + operators: [ + // Math operators + '+', + '-', + '*', + '/', + '%', + '^', + // Comparison operators + '=', + '<>', + '<', + '>', + '<=', + '>=', + // Pattern operators + '->', + '<-', + '-->', + '<--' + ], + + escapes: /\\(?:[tbnrf\\"'`]|u[0-9A-Fa-f]{4}|U[0-9A-Fa-f]{8})/, + digits: /\d+/, + octaldigits: /[0-7]+/, + hexdigits: /[0-9a-fA-F]+/, + + tokenizer: { + root: [[/[{}[\]()]/, '@brackets'], { include: 'common' }], + common: [ + { include: '@whitespace' }, + { include: '@numbers' }, + { include: '@strings' }, + + // Cypher labels on nodes/relationships, e.g. (n:NodeLabel)-[e:RelationshipLabel] + [/:[a-zA-Z_][\w]*/, 'type.identifier'], + [ + /[a-zA-Z_][\w]*(?=\()/, + { + cases: { + '@builtinFunctions': 'predefined.function' + } + } + ], + [ + /[a-zA-Z_$][\w$]*/, + { + cases: { + '@keywords': 'keyword', + '@builtinLiterals': 'predefined.literal', + '@default': 'identifier' + } + } + ], + [/`/, 'identifier.escape', '@identifierBacktick'], + + // delimiter and operator after number because of `.\d` floats and `:` in labels + [/[;,.:|]/, 'delimiter'], + [ + /[<>=%+\-*/^]+/, + { + cases: { + '@operators': 'delimiter', + '@default': '' + } + } + ] + ], + numbers: [ + [/-?(@digits)[eE](-?(@digits))?/, 'number.float'], + [/-?(@digits)?\.(@digits)([eE]-?(@digits))?/, 'number.float'], + [/-?0x(@hexdigits)/, 'number.hex'], + [/-?0(@octaldigits)/, 'number.octal'], + [/-?(@digits)/, 'number'] + ], + strings: [ + [/"([^"\\]|\\.)*$/, 'string.invalid'], // non-teminated string + [/'([^'\\]|\\.)*$/, 'string.invalid'], // non-teminated string + [/"/, 'string', '@stringDouble'], + [/'/, 'string', '@stringSingle'] + ], + whitespace: [ + [/[ \t\r\n]+/, 'white'], + [/\/\*/, 'comment', '@comment'], + [/\/\/.*$/, 'comment'] + ], + comment: [ + [/\/\/.*/, 'comment'], + [/[^/*]+/, 'comment'], + [/\*\//, 'comment', '@pop'], + [/[/*]/, 'comment'] + ], + stringDouble: [ + [/[^\\"]+/, 'string'], + [/@escapes/, 'string'], + [/\\./, 'string.invalid'], + [/"/, 'string', '@pop'] + ], + stringSingle: [ + [/[^\\']+/, 'string'], + [/@escapes/, 'string'], + [/\\./, 'string.invalid'], + [/'/, 'string', '@pop'] + ], + identifierBacktick: [ + [/[^\\`]+/, 'identifier.escape'], + [/@escapes/, 'identifier.escape'], + [/\\./, 'identifier.escape.invalid'], + [/`/, 'identifier.escape', '@pop'] + ] + } +}; diff --git a/src/basic-languages/monaco.contribution.ts b/src/basic-languages/monaco.contribution.ts index 56b078c1de..db51ca61e5 100644 --- a/src/basic-languages/monaco.contribution.ts +++ b/src/basic-languages/monaco.contribution.ts @@ -15,6 +15,7 @@ import './cpp/cpp.contribution'; import './csharp/csharp.contribution'; import './csp/csp.contribution'; import './css/css.contribution'; +import './cypher/cypher.contribution'; import './dart/dart.contribution'; import './dockerfile/dockerfile.contribution'; import './ecl/ecl.contribution';