From ca540b0d03fd6fc0bdff7344992485fbb688ae17 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 8 Oct 2020 11:30:17 -0400 Subject: [PATCH 01/30] fix(autodetect) swift should not get double relevance for `import` --- src/languages/swift.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/languages/swift.js b/src/languages/swift.js index ad38d8777f..1b829584ee 100644 --- a/src/languages/swift.js +++ b/src/languages/swift.js @@ -156,7 +156,8 @@ export default function(hljs) { }, { beginKeywords: 'import', end: /$/, - contains: [hljs.C_LINE_COMMENT_MODE, BLOCK_COMMENT] + contains: [hljs.C_LINE_COMMENT_MODE, BLOCK_COMMENT], + relevance: 0 } ] }; From d6e9d2251932fa2fef4f375fadd355dcc19ae853 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 8 Oct 2020 11:31:55 -0400 Subject: [PATCH 02/30] fix(autodetect) css can include a forward slash --- src/languages/css.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/languages/css.js b/src/languages/css.js index 2ef3976c1b..2e336e575a 100644 --- a/src/languages/css.js +++ b/src/languages/css.js @@ -58,7 +58,7 @@ export default function(hljs) { return { name: 'CSS', case_insensitive: true, - illegal: /[=\/|'\$]/, + illegal: /[=|'\$]/, contains: [ hljs.C_BLOCK_COMMENT_MODE, { From f55bc37ca037c576d6a019c6292084c6ea06f208 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 8 Oct 2020 11:32:23 -0400 Subject: [PATCH 03/30] fix(autodetect) css class selectors must be valid identifiers --- src/languages/css.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/languages/css.js b/src/languages/css.js index 2e336e575a..2b381c091f 100644 --- a/src/languages/css.js +++ b/src/languages/css.js @@ -65,7 +65,7 @@ export default function(hljs) { className: 'selector-id', begin: /#[A-Za-z0-9_-]+/ }, { - className: 'selector-class', begin: /\.[A-Za-z0-9_-]+/ + className: 'selector-class', begin: '\\.' + IDENT_RE }, { className: 'selector-attr', From dd0295329eef1ecc60f04a3a83b3aa321698e41e Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 8 Oct 2020 11:32:43 -0400 Subject: [PATCH 04/30] fix(autodetect) css: allow extra ; --- src/languages/css.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/languages/css.js b/src/languages/css.js index 2b381c091f..4913c59c36 100644 --- a/src/languages/css.js +++ b/src/languages/css.js @@ -125,6 +125,7 @@ export default function(hljs) { illegal: /\S/, contains: [ hljs.C_BLOCK_COMMENT_MODE, + { begin: /;/ }, // empty ; rule RULE, ] } From 5bdd357ca72610bdee7a37c15e07fed296c1975c Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 8 Oct 2020 11:33:12 -0400 Subject: [PATCH 05/30] fix(autodetect) improve rule matcher - can start with `*` (css hacks) - can include a comment after attribute name before : (css hacks) --- src/languages/css.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/languages/css.js b/src/languages/css.js index 4913c59c36..50ef1982f1 100644 --- a/src/languages/css.js +++ b/src/languages/css.js @@ -49,7 +49,7 @@ export default function(hljs) { var AT_PROPERTY_RE = /@-?\w[\w]*(-\w+)*/ // @-webkit-keyframes var IDENT_RE = '[a-zA-Z-][a-zA-Z0-9_-]*'; var RULE = { - begin: /(?:[A-Z_.-]+|--[a-zA-Z0-9_-]+)\s*:/, returnBegin: true, end: ';', endsWithParent: true, + begin: /([*]\s?)?(?:[A-Z_.\-\\]+|--[a-zA-Z0-9_-]+)\s*(\/\*\*\/)?:/, returnBegin: true, end: ';', endsWithParent: true, contains: [ ATTRIBUTE ] From dc6bbaf9af476b8d60a818bc3b042b9facf86432 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 8 Oct 2020 16:14:06 -0400 Subject: [PATCH 06/30] enh(autodetect) csharp: improve autodetection - `value` is too common variable name to score points as keyword - reduce 2x relevance for beginKeywords - bump csharp relevance slightly --- src/languages/csharp.js | 19 ++++++++++++++----- test/detect/csharp/default.txt | 1 + 2 files changed, 15 insertions(+), 5 deletions(-) diff --git a/src/languages/csharp.js b/src/languages/csharp.js index 5d90471271..7e002ee832 100644 --- a/src/languages/csharp.js +++ b/src/languages/csharp.js @@ -139,7 +139,7 @@ export default function(hljs) { 'select', 'set', 'unmanaged', - 'value', + 'value|0', 'var', 'when', 'where', @@ -274,7 +274,9 @@ export default function(hljs) { STRING, NUMBERS, { - beginKeywords: 'class interface', end: /[{;=]/, + beginKeywords: 'class interface', + relevance: 0, + end: /[{;=]/, illegal: /[^\s:,]/, contains: [ { beginKeywords: "where class" }, @@ -285,7 +287,9 @@ export default function(hljs) { ] }, { - beginKeywords: 'namespace', end: /[{;=]/, + beginKeywords: 'namespace', + relevance: 0, + end: /[{;=]/, illegal: /[^\s:]/, contains: [ TITLE_MODE, @@ -294,7 +298,9 @@ export default function(hljs) { ] }, { - beginKeywords: 'record', end: /[{;=]/, + beginKeywords: 'record', + relevance: 0, + end: /[{;=]/, illegal: /[^\s:]/, contains: [ TITLE_MODE, @@ -324,7 +330,10 @@ export default function(hljs) { keywords: KEYWORDS, contains: [ // prevents these from being highlighted `title` - { beginKeywords: FUNCTION_MODIFIERS.join(" ")}, + { + beginKeywords: FUNCTION_MODIFIERS.join(" "), + relevance: 0 + }, { begin: hljs.IDENT_RE + '\\s*(<.+>)?\\s*\\(', returnBegin: true, contains: [ diff --git a/test/detect/csharp/default.txt b/test/detect/csharp/default.txt index 14c36769c2..5302057983 100644 --- a/test/detect/csharp/default.txt +++ b/test/detect/csharp/default.txt @@ -9,6 +9,7 @@ namespace MyApplication { public static List JustDoIt(int count) { + Span numbers = stackalloc int[length]; Console.WriteLine($"Hello {Name}!"); return new List(new int[] { 1, 2, 3 }) } From 8e7b5822139df5969f042380484fec659ae9a2aa Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 8 Oct 2020 16:14:50 -0400 Subject: [PATCH 07/30] enh(autodetect) clojure: reduce runaway relevance - operators get 0 relevance (consistency: no other grammars score them) - "name" gets 0 relevance since almost any identifier will match This reduces false positives in the language-detection.el rosetta data set significantly. --- src/languages/clojure.js | 6 ++++-- 1 file changed, 4 insertions(+), 2 deletions(-) diff --git a/src/languages/clojure.js b/src/languages/clojure.js index a243e5805f..22df9dd21e 100644 --- a/src/languages/clojure.js +++ b/src/languages/clojure.js @@ -16,7 +16,7 @@ export default function(hljs) { 'builtin-name': // Clojure keywords globals + ' ' + - 'cond apply if-not if-let if not not= = < > <= >= == + / * - rem ' + + 'cond apply if-not if-let if not not= =|0 <|0 >|0 <=|0 >=|0 ==|0 +|0 /|0 *|0 -|0 rem ' + 'quot neg? pos? delay? symbol? keyword? true? false? integer? empty? coll? list? ' + 'set? ifn? fn? associative? sequential? sorted? counted? reversible? number? decimal? ' + 'class? distinct? isa? float? rational? reduced? ratio? odd? even? char? seq? vector? ' + @@ -88,7 +88,9 @@ export default function(hljs) { }; var NAME = { keywords: keywords, - className: 'name', begin: SYMBOL_RE, + className: 'name', + begin: SYMBOL_RE, + relevance: 0, starts: BODY }; var DEFAULT_CONTAINS = [LIST, STRING, HINT, HINT_COL, COMMENT, KEY, COLLECTION, NUMBER, LITERAL, SYMBOL]; From aa45193178bf2b47167e4fc191764bb811fbaa4c Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 8 Oct 2020 16:38:11 -0400 Subject: [PATCH 08/30] enh(autodetect) matlab: remove relevancy from `i` and `j` --- src/languages/matlab.js | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/src/languages/matlab.js b/src/languages/matlab.js index b89a5de22e..06f99fe23e 100644 --- a/src/languages/matlab.js +++ b/src/languages/matlab.js @@ -38,8 +38,8 @@ export default function(hljs) { 'eye repmat rand randn linspace logspace freqspace meshgrid accumarray size length ' + 'ndims numel disp isempty isequal isequalwithequalnans cat reshape diag blkdiag tril ' + 'triu fliplr flipud flipdim rot90 find sub2ind ind2sub bsxfun ndgrid permute ipermute ' + - 'shiftdim circshift squeeze isscalar isvector ans eps realmax realmin pi i inf nan ' + - 'isnan isinf isfinite j why compan gallery hadamard hankel hilb invhilb magic pascal ' + + 'shiftdim circshift squeeze isscalar isvector ans eps realmax realmin pi i|0 inf nan ' + + 'isnan isinf isfinite j|0 why compan gallery hadamard hankel hilb invhilb magic pascal ' + 'rosser toeplitz vander wilkinson max min nanmax nanmin mean nanmean type table ' + 'readtable writetable sortrows sort figure plot plot3 scatter scatter3 cellfun ' + 'legend intersect ismember procrustes hold num2cell ' From ad2aea4e8ba4bae14a3f6c78622248d7150e8413 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 9 Oct 2020 08:16:33 -0400 Subject: [PATCH 09/30] enh(autodetect) groovy - Add relevance for groovy shebang line - Ternary should not grant extra relevance --- src/languages/groovy.js | 6 +++++- 1 file changed, 5 insertions(+), 1 deletion(-) diff --git a/src/languages/groovy.js b/src/languages/groovy.js index 898f258e28..97b80a9fa9 100644 --- a/src/languages/groovy.js +++ b/src/languages/groovy.js @@ -78,7 +78,10 @@ export default function(hljs) { 'throw throws try catch finally implements extends new import package return instanceof' }, contains: [ - hljs.SHEBANG(), + hljs.SHEBANG({ + binary: "groovy", + relevance: 10 + }), COMMENT, STRING, REGEXP, @@ -104,6 +107,7 @@ export default function(hljs) { // to avoid highlight it as a label, named parameter, or map key begin: /\?/, end: /:/, + relevance: 0, contains: [ COMMENT, STRING, From 659f794f568427704e4df293fe2c8a957d663444 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 9 Oct 2020 08:17:58 -0400 Subject: [PATCH 10/30] enh(autodetect) lisp: tune relevancy - "name" gets 0 relevance since almost any identifier will match Applying same logic as used with Clojure. --- src/languages/lisp.js | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/languages/lisp.js b/src/languages/lisp.js index 365e8dda9e..971ff069a8 100644 --- a/src/languages/lisp.js +++ b/src/languages/lisp.js @@ -80,7 +80,10 @@ export default function(hljs) { { className: 'name', variants: [ - {begin: LISP_IDENT_RE}, + { + begin: LISP_IDENT_RE, + relevance: 0, + }, {begin: MEC_RE} ] }, From c4062f35547cdfa90d56f72c00479617d04a14e8 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 9 Oct 2020 13:03:42 -0400 Subject: [PATCH 11/30] enh(autodetect) php: improve auto-detection - only count => in `fn` context - prevent beginKeywords double relevancy - reduce relevance of `match` --- src/languages/php.js | 26 +++++++++++++++++++------- 1 file changed, 19 insertions(+), 7 deletions(-) diff --git a/src/languages/php.js b/src/languages/php.js index ef35c47f6c..c06651ad56 100644 --- a/src/languages/php.js +++ b/src/languages/php.js @@ -71,7 +71,11 @@ export default function(hljs) { // Other keywords: // // - 'array abstract and as binary bool boolean break callable case catch class clone const continue declare default do double else elseif empty enddeclare endfor endforeach endif endswitch endwhile eval extends final finally float for foreach from global goto if implements instanceof insteadof int integer interface isset iterable list match new object or private protected public real return string switch throw trait try unset use var void while xor yield', + 'array abstract and as binary bool boolean break callable case catch class clone const continue declare ' + + 'default do double else elseif empty enddeclare endfor endforeach endif endswitch endwhile eval extends ' + + 'final finally float for foreach from global goto if implements instanceof insteadof int integer interface ' + + 'isset iterable list match|0 new object or private protected public real return string switch throw trait ' + + 'try unset use var void while xor yield', literal: 'false null true', built_in: // Standard PHP library: @@ -123,10 +127,14 @@ export default function(hljs) { }, { className: 'function', + relevance: 0, beginKeywords: 'fn function', end: /[;{]/, excludeEnd: true, illegal: '[$%\\[]', contains: [ hljs.UNDERSCORE_TITLE_MODE, + { + begin: '=>' // No markup, just a relevance booster + }, { className: 'params', begin: '\\(', end: '\\)', @@ -145,7 +153,10 @@ export default function(hljs) { }, { className: 'class', - beginKeywords: 'class interface', end: /\{/, excludeEnd: true, + beginKeywords: 'class interface', + relevance: 0, + end: /\{/, + excludeEnd: true, illegal: /[:($"]/, contains: [ {beginKeywords: 'extends implements'}, @@ -153,17 +164,18 @@ export default function(hljs) { ] }, { - beginKeywords: 'namespace', end: ';', + beginKeywords: 'namespace', + relevance: 0, + end: ';', illegal: /[.']/, contains: [hljs.UNDERSCORE_TITLE_MODE] }, { - beginKeywords: 'use', end: ';', + beginKeywords: 'use', + relevance: 0, + end: ';', contains: [hljs.UNDERSCORE_TITLE_MODE] }, - { - begin: '=>' // No markup, just a relevance booster - }, STRING, NUMBER ] From 36ab9a2171bc0589a4f90f9bb07fcf9885dd4109 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 9 Oct 2020 13:10:58 -0400 Subject: [PATCH 12/30] enh(autodetect) add additional common keywords --- src/lib/mode_compiler.js | 13 ++++++++++++- 1 file changed, 12 insertions(+), 1 deletion(-) diff --git a/src/lib/mode_compiler.js b/src/lib/mode_compiler.js index 9265be22aa..a048478088 100644 --- a/src/lib/mode_compiler.js +++ b/src/lib/mode_compiler.js @@ -2,7 +2,18 @@ import * as regex from './regex.js'; import { inherit } from './utils.js'; // keywords that should have no default relevance value -const COMMON_KEYWORDS = 'of and for in not or if then'.split(' '); +const COMMON_KEYWORDS = [ + 'of', + 'and', + 'for', + 'in', + 'not', + 'or', + 'if', + 'then', + 'parent', // common variable name + 'list' // common variable name +]; // compilation From e36c1612863729a2771de5d88c8329cafdaa29f8 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Fri, 9 Oct 2020 14:06:53 -0400 Subject: [PATCH 13/30] enh(autodetect) java: relevance boost for `import java.*.` --- src/languages/java.js | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/languages/java.js b/src/languages/java.js index 1e9ab182dc..9b7c83fb7a 100644 --- a/src/languages/java.js +++ b/src/languages/java.js @@ -84,6 +84,12 @@ export default function(hljs) { ] } ), + // relevance boost + { + begin: /import java\.[a-z]+\./, + keywords: "import", + relevance: 2 + }, hljs.C_LINE_COMMENT_MODE, hljs.C_BLOCK_COMMENT_MODE, hljs.APOS_STRING_MODE, From 250fa6206dc2aa9e93ab704cc983d8fab804a731 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 10 Oct 2020 09:25:56 -0400 Subject: [PATCH 14/30] enh(autodetect) python: self is super common convention --- src/languages/python.js | 1 + 1 file changed, 1 insertion(+) diff --git a/src/languages/python.js b/src/languages/python.js index 3840c57b62..3b7ef5ef98 100644 --- a/src/languages/python.js +++ b/src/languages/python.js @@ -254,6 +254,7 @@ export default function(hljs) { NUMBER, // eat "if" prior to string so that it won't accidentally be // labeled as an f-string as in: + { begin: /\bself\b/, }, // very common convention { beginKeywords: "if", relevance: 0 }, STRING, hljs.HASH_COMMENT_MODE, From cb66c9f89d7df24bacfb085b5570c0d956457be1 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 10 Oct 2020 09:29:16 -0400 Subject: [PATCH 15/30] enh(autodetect) ruby improvements - add `__FILE__` to keywords - add `proc` and `lambda` Kernel methods to build_ins - stricter rule for identifying method definition - highlight variables - `|` style params now get no relevance (can be too many other things) - add SHEBANG rule - make Ruby REPL matching a little stricter --- src/languages/ruby.js | 44 +++++++++++++++++++++++++++++++------------ 1 file changed, 32 insertions(+), 12 deletions(-) diff --git a/src/languages/ruby.js b/src/languages/ruby.js index 818063e955..f3cc12d142 100644 --- a/src/languages/ruby.js +++ b/src/languages/ruby.js @@ -7,13 +7,17 @@ Contributors: Peter Leonov , Vasily Polovnyov > + // ?> var SIMPLE_PROMPT = "[>?]>"; + // irb(main):001:0> var DEFAULT_PROMPT = "[\\w#]+\\(\\w+\\):\\d+:\\d+>"; var RVM_PROMPT = "(\\w+-)?\\d+\\.\\d+\\.\\d(p\\d+)?[^>]+>"; @@ -197,18 +210,25 @@ export default function(hljs) { }, { className: 'meta', - begin: '^('+SIMPLE_PROMPT+"|"+DEFAULT_PROMPT+'|'+RVM_PROMPT+')', + begin: '^('+SIMPLE_PROMPT+"|"+DEFAULT_PROMPT+'|'+RVM_PROMPT+')(?=[ ])', starts: { end: '$', contains: RUBY_DEFAULT_CONTAINS } } ]; + COMMENT_MODES.unshift(IRB_OBJECT); + return { name: 'Ruby', aliases: ['rb', 'gemspec', 'podspec', 'thor', 'irb'], keywords: RUBY_KEYWORDS, illegal: /\/\*/, - contains: COMMENT_MODES.concat(IRB_DEFAULT).concat(RUBY_DEFAULT_CONTAINS) + contains: [ + hljs.SHEBANG({binary:"ruby"}), + ] + .concat(IRB_DEFAULT) + .concat(COMMENT_MODES) + .concat(RUBY_DEFAULT_CONTAINS) }; } From e4319dbdb9df59d1def83b6452e78e8feeae7525 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 10 Oct 2020 09:30:55 -0400 Subject: [PATCH 16/30] enh(autodetect) groovy: reduce @meta tags relevance --- src/languages/groovy.js | 4 +++- 1 file changed, 3 insertions(+), 1 deletion(-) diff --git a/src/languages/groovy.js b/src/languages/groovy.js index 97b80a9fa9..4e3c63a6ce 100644 --- a/src/languages/groovy.js +++ b/src/languages/groovy.js @@ -96,7 +96,9 @@ export default function(hljs) { ] }, { - className: 'meta', begin: '@[A-Za-z]+' + className: 'meta', + begin: '@[A-Za-z]+', + relevance: 0 }, { // highlight map keys and named parameters as attrs From 2421e24393b409c7d2c4cf15ce62800c3afc7c4e Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 10 Oct 2020 09:56:01 -0400 Subject: [PATCH 17/30] enh(autodetect) vbscript: improve auto-detection - built-ins should only match if they are a call - fix function detection --- src/languages/vbscript.js | 46 +++++++++++++++++++++++++++++---------- 1 file changed, 35 insertions(+), 11 deletions(-) diff --git a/src/languages/vbscript.js b/src/languages/vbscript.js index 8e5ab8ff92..9c7b84abfc 100644 --- a/src/languages/vbscript.js +++ b/src/languages/vbscript.js @@ -7,7 +7,40 @@ Website: https://en.wikipedia.org/wiki/VBScript Category: scripting */ +import * as regex from '../lib/regex.js'; + +/** @type LanguageFn */ export default function(hljs) { + const BUILT_IN_FUNCTIONS = ('lcase month vartype instrrev ubound setlocale getobject rgb getref string ' + + 'weekdayname rnd dateadd monthname now day minute isarray cbool round formatcurrency ' + + 'conversions csng timevalue second year space abs clng timeserial fixs len asc ' + + 'isempty maths dateserial atn timer isobject filter weekday datevalue ccur isdate ' + + 'instr datediff formatdatetime replace isnull right sgn array snumeric log cdbl hex ' + + 'chr lbound msgbox ucase getlocale cos cdate cbyte rtrim join hour oct typename trim ' + + 'strcomp int createobject loadpicture tan formatnumber mid ' + + 'split cint sin datepart ltrim sqr ' + + 'time derived eval date formatpercent exp inputbox left ascw ' + + 'chrw regexp cstr err').split(" "); + const BUILT_IN_OBJECTS = [ + "server", + "response", + "request", + // take no arguments so can be caleld without () + "scriptengine", + "scriptenginebuildversion", + "scriptengineminorversion", + "scriptenginemajorversion" + ]; + + const BUILT_IN_CALL = { + begin: regex.concat(regex.either(...BUILT_IN_FUNCTIONS), "\\s*\\("), + // relevance 0 because this is acting as a beginKeywords really + relevance:0, + keywords: { + built_in: BUILT_IN_FUNCTIONS.join(" ") + } + }; + return { name: 'VBScript', aliases: ['vbs'], @@ -18,22 +51,13 @@ export default function(hljs) { 'if then else on error option explicit new private property let get public randomize ' + 'redim rem select case set stop sub while wend with end to elseif is or xor and not ' + 'class_initialize class_terminate default preserve in me byval byref step resume goto', - built_in: - 'lcase month vartype instrrev ubound setlocale getobject rgb getref string ' + - 'weekdayname rnd dateadd monthname now day minute isarray cbool round formatcurrency ' + - 'conversions csng timevalue second year space abs clng timeserial fixs len asc ' + - 'isempty maths dateserial atn timer isobject filter weekday datevalue ccur isdate ' + - 'instr datediff formatdatetime replace isnull right sgn array snumeric log cdbl hex ' + - 'chr lbound msgbox ucase getlocale cos cdate cbyte rtrim join hour oct typename trim ' + - 'strcomp int createobject loadpicture tan formatnumber mid scriptenginebuildversion ' + - 'scriptengine split scriptengineminorversion cint sin datepart ltrim sqr ' + - 'scriptenginemajorversion time derived eval date formatpercent exp inputbox left ascw ' + - 'chrw regexp server response request cstr err', + built_in: BUILT_IN_OBJECTS.join(" "), literal: 'true false null nothing empty' }, illegal: '//', contains: [ + BUILT_IN_CALL, hljs.inherit(hljs.QUOTE_STRING_MODE, {contains: [{begin: '""'}]}), hljs.COMMENT( /'/, From 656402c2a72eacdbb1613ebb4a3ce365594c01a7 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 10 Oct 2020 10:15:10 -0400 Subject: [PATCH 18/30] enh(autodetect) r: detect `<-`, illegal: `->` --- src/languages/r.js | 10 +++++++++- 1 file changed, 9 insertions(+), 1 deletion(-) diff --git a/src/languages/r.js b/src/languages/r.js index 82a9874882..1c22f62ba4 100644 --- a/src/languages/r.js +++ b/src/languages/r.js @@ -7,6 +7,8 @@ Website: https://www.r-project.org Category: scientific */ +import * as regex from '../lib/regex.js'; + export default function(hljs) { // Identifiers in R cannot start with `_`, but they can start with `.` if it // is not immediately followed by a digit. @@ -15,10 +17,13 @@ export default function(hljs) { // handled in a separate mode. See `test/markup/r/names.txt` for examples. // FIXME: Support Unicode identifiers. const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/; + const SIMPLE_IDENT = /[a-zA-Z][a-zA-Z_0-9]*/; return { name: 'R', + // only in Haskell, not R + illegal: /->/, keywords: { $pattern: IDENT_RE, keyword: @@ -168,7 +173,10 @@ export default function(hljs) { begin: '%', end: '%' }, - + // relevance boost for assignment + { + begin: regex.concat(SIMPLE_IDENT, "\\s+<-\\s+") + }, { // escaped identifier begin: '`', From de11ad1bfbc055b4e65494cc2a51208989a1f93f Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 12 Oct 2020 16:47:19 -0400 Subject: [PATCH 19/30] enh(autodetect) fewer false positives on variables For languages with $ident and @ident style variables this attempts to prevent positives for $ident$ and @ident@ type expressions, which are likely something else entirely. - bash - perl - php - ruby --- src/languages/bash.js | 4 +++- src/languages/perl.js | 10 +++++++++- src/languages/php.js | 2 +- src/languages/ruby.js | 4 +++- 4 files changed, 16 insertions(+), 4 deletions(-) diff --git a/src/languages/bash.js b/src/languages/bash.js index eb62efe04d..043de0f2aa 100644 --- a/src/languages/bash.js +++ b/src/languages/bash.js @@ -6,6 +6,8 @@ Website: https://www.gnu.org/software/bash/ Category: common */ +import * as regex from '../lib/regex.js'; + /** @type LanguageFn */ export default function(hljs) { const VAR = {}; @@ -23,7 +25,7 @@ export default function(hljs) { Object.assign(VAR,{ className: 'variable', variants: [ - {begin: /\$[\w\d#@][\w\d_]*/}, + {begin: regex.concat(/\$[\w\d#@][\w\d_]*/, `(?![\\w\\d])(?![$])`) }, BRACED_VAR ] }); diff --git a/src/languages/perl.js b/src/languages/perl.js index 9ea76bef87..881e5fbcd1 100644 --- a/src/languages/perl.js +++ b/src/languages/perl.js @@ -5,6 +5,9 @@ Website: https://www.perl.org Category: common */ +import * as regex from '../lib/regex.js'; + +/** @type LanguageFn */ export default function(hljs) { var PERL_KEYWORDS = { $pattern: /[\w.]+/, @@ -40,7 +43,12 @@ export default function(hljs) { var VAR = { variants: [ {begin: /\$\d/}, - {begin: /[$%@](\^\w\b|#\w+(::\w+)*|\{\w+\}|\w+(::\w*)*)/}, + {begin: regex.concat( + /[$%@](\^\w\b|#\w+(::\w+)*|\{\w+\}|\w+(::\w*)*)/, + // negative look-ahead tries to avoid matching patterns that are not + // Perl at all like $ident$, @ident@, etc. + `(?![A-Za-z])(?![@$%])` + )}, {begin: /[$%@][^\s\w{]/, relevance: 0} ] }; diff --git a/src/languages/php.js b/src/languages/php.js index c06651ad56..0faa8b38f1 100644 --- a/src/languages/php.js +++ b/src/languages/php.js @@ -13,7 +13,7 @@ Category: common export default function(hljs) { const VARIABLE = { className: 'variable', - begin: '\\$+[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*' + begin: '\\$+[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*' + `(?![A-Za-z0-9])(?![$])` }; const PREPROCESSOR = { className: 'meta', diff --git a/src/languages/ruby.js b/src/languages/ruby.js index f3cc12d142..47763bf9e3 100644 --- a/src/languages/ruby.js +++ b/src/languages/ruby.js @@ -160,8 +160,10 @@ export default function(hljs) { }, NUMBER, { + // negative-look forward attemps to prevent false matches like: + // @ident@ or $ident$ that might indicate this is not ruby at all className: "variable", - begin: '(\\$\\W)|((\\$|@@?)(\\w+))' // variables + begin: '(\\$\\W)|((\\$|@@?)(\\w+))(?=[^@$?])' + `(?![A-Za-z])(?![@$?'])` }, { className: 'params', From 49f430a8e979db5e5b2e7da1b8914c6459f69798 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Mon, 12 Oct 2020 18:06:14 -0400 Subject: [PATCH 20/30] fix(autodetect) yaml: simple numbers should not add relevancy --- src/languages/yaml.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/languages/yaml.js b/src/languages/yaml.js index f4257be176..35f9105670 100644 --- a/src/languages/yaml.js +++ b/src/languages/yaml.js @@ -153,7 +153,8 @@ export default function(hljs) { // sit isolated from other words { className: 'number', - begin: hljs.C_NUMBER_RE + '\\b' + begin: hljs.C_NUMBER_RE + '\\b', + relevance: 0 }, OBJECT, ARRAY, From de74ab80b40efe3df0a22734b67a2b4f998b2b33 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 13 Oct 2020 10:26:08 -0400 Subject: [PATCH 21/30] fix(autodetect) crystal: lower function relevance (5 -> 2) --- src/languages/crystal.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/languages/crystal.js b/src/languages/crystal.js index 3224886803..7f4c5b0834 100644 --- a/src/languages/crystal.js +++ b/src/languages/crystal.js @@ -158,7 +158,7 @@ export default function(hljs) { endsParent: true }) ], - relevance: 5 + relevance: 2 }, { className: 'symbol', From f92cc463b8a75966b983a08e299a6a8dffd3c8ba Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 13 Oct 2020 10:34:00 -0400 Subject: [PATCH 22/30] fix(autodetect) hy/scheme: bring in line with new `name` relevance from clojure --- src/languages/hy.js | 4 +++- src/languages/scheme.js | 1 + 2 files changed, 4 insertions(+), 1 deletion(-) diff --git a/src/languages/hy.js b/src/languages/hy.js index fe913e10df..d603b23d62 100644 --- a/src/languages/hy.js +++ b/src/languages/hy.js @@ -86,8 +86,10 @@ export default function(hljs) { relevance: 0 }; var NAME = { + className: 'name', + relevance: 0, keywords: keywords, - className: 'name', begin: SYMBOL_RE, + begin: SYMBOL_RE, starts: BODY }; var DEFAULT_CONTAINS = [LIST, STRING, HINT, HINT_COL, COMMENT, KEY, COLLECTION, NUMBER, LITERAL, SYMBOL]; diff --git a/src/languages/scheme.js b/src/languages/scheme.js index 715ac60608..f90c90f45b 100644 --- a/src/languages/scheme.js +++ b/src/languages/scheme.js @@ -116,6 +116,7 @@ export default function(hljs) { var NAME = { className: 'name', + relevance: 0, begin: SCHEME_IDENT_RE, keywords: KEYWORDS }; From c8c52e8ee585e8c64dc59972149f4f53bee79320 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 29 Oct 2020 16:50:10 -0400 Subject: [PATCH 23/30] fix(autodetect) protobuf: tighten enum item rule --- src/languages/protobuf.js | 6 +++--- 1 file changed, 3 insertions(+), 3 deletions(-) diff --git a/src/languages/protobuf.js b/src/languages/protobuf.js index d4bf4e1353..13c88426ee 100644 --- a/src/languages/protobuf.js +++ b/src/languages/protobuf.js @@ -36,9 +36,9 @@ export default function(hljs) { end: /[{;]/, excludeEnd: true, keywords: 'rpc returns' }, - { - begin: /^\s*[A-Z_]+/, - end: /\s*=/, excludeEnd: true + { // match enum items (relevance) + // BLAH = ...; + begin: /^\s*[A-Z_]+(?=\s*=[^\n]+;$)/ } ] }; From b60b1c4f6afa355920175f0931ff168858753e8b Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 29 Oct 2020 17:07:19 -0400 Subject: [PATCH 24/30] fix(ocaml) `=>` does not actually seem to be a part of language - I looked but couldn't find any reference to this. --- src/languages/ocaml.js | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/languages/ocaml.js b/src/languages/ocaml.js index ff4a918fac..04cc7e7852 100644 --- a/src/languages/ocaml.js +++ b/src/languages/ocaml.js @@ -73,7 +73,7 @@ export default function(hljs) { relevance: 0 }, { - begin: /[-=]>/ // relevance booster + begin: /->/ // relevance booster } ] } From d39c107bcc1bff9cc51089d4cd906693c7a7da8c Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Thu, 29 Oct 2020 23:27:59 -0400 Subject: [PATCH 25/30] fix(parser) add `value` to common keywords (0 relevance) --- src/lib/mode_compiler.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/lib/mode_compiler.js b/src/lib/mode_compiler.js index a048478088..b8202d664d 100644 --- a/src/lib/mode_compiler.js +++ b/src/lib/mode_compiler.js @@ -12,7 +12,8 @@ const COMMON_KEYWORDS = [ 'if', 'then', 'parent', // common variable name - 'list' // common variable name + 'list', // common variable name + 'value' // common variable name ]; // compilation From 7c60e5125c261968a7c5cd7c5020df1900f08da3 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 31 Oct 2020 06:20:16 -0400 Subject: [PATCH 26/30] fix(n1ql) do not hobble relevancy of strings - There is no reason to do this every other language gets credit for simple strings. --- src/languages/n1ql.js | 6 ++---- 1 file changed, 2 insertions(+), 4 deletions(-) diff --git a/src/languages/n1ql.js b/src/languages/n1ql.js index 3e8bc716a5..d631ea901e 100644 --- a/src/languages/n1ql.js +++ b/src/languages/n1ql.js @@ -52,14 +52,12 @@ export default function(hljs) { { className: 'string', begin: '\'', end: '\'', - contains: [hljs.BACKSLASH_ESCAPE], - relevance: 0 + contains: [hljs.BACKSLASH_ESCAPE] }, { className: 'string', begin: '"', end: '"', - contains: [hljs.BACKSLASH_ESCAPE], - relevance: 0 + contains: [hljs.BACKSLASH_ESCAPE] }, { className: 'symbol', From 8e55681b396448a2446f716a4ac581f3eda0149e Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 31 Oct 2020 12:43:04 -0400 Subject: [PATCH 27/30] fix(javascript) remove relevance of `ident =>` - This is found in other langauges and isn't a strong signal. --- src/languages/javascript.js | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/src/languages/javascript.js b/src/languages/javascript.js index 2bccc2190c..3372b917bf 100644 --- a/src/languages/javascript.js +++ b/src/languages/javascript.js @@ -292,7 +292,8 @@ export default function(hljs) { className: 'params', variants: [ { - begin: hljs.UNDERSCORE_IDENT_RE + begin: hljs.UNDERSCORE_IDENT_RE, + relevance: 0 }, { className: null, From 50a0483d75ca1541a7aed7b7ffa4450668c9a1b9 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 3 Nov 2020 15:28:23 -0500 Subject: [PATCH 28/30] fix(angelscript/lsl) no relevance for simple numbers --- src/languages/angelscript.js | 1 + src/languages/lsl.js | 1 + 2 files changed, 2 insertions(+) diff --git a/src/languages/angelscript.js b/src/languages/angelscript.js index 5f88af397a..8472ccb646 100644 --- a/src/languages/angelscript.js +++ b/src/languages/angelscript.js @@ -113,6 +113,7 @@ export default function(hljs) { { // numbers className: 'number', + relevance: 0, begin: '(-?)(\\b0[xXbBoOdD][a-fA-F0-9]+|(\\b\\d+(\\.\\d*)?f?|\\.\\d+f?)([eE][-+]?\\d+f?)?)' } ] diff --git a/src/languages/lsl.js b/src/languages/lsl.js index 08f57d782c..c998894611 100644 --- a/src/languages/lsl.js +++ b/src/languages/lsl.js @@ -24,6 +24,7 @@ export default function(hljs) { var LSL_NUMBERS = { className: 'number', + relevance:0, begin: hljs.C_NUMBER_RE }; From b0de56dcb183efcdc39bbfc56ef7f1589da207c9 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Tue, 3 Nov 2020 15:30:19 -0500 Subject: [PATCH 29/30] fix(properties) auto-detect: no points for ident[space]ident --- src/languages/properties.js | 9 +++++++-- 1 file changed, 7 insertions(+), 2 deletions(-) diff --git a/src/languages/properties.js b/src/languages/properties.js index 5fcc92d00e..64cd341f69 100644 --- a/src/languages/properties.js +++ b/src/languages/properties.js @@ -11,7 +11,9 @@ export default function(hljs) { var WS0 = '[ \\t\\f]*'; var WS1 = '[ \\t\\f]+'; // delimiter - var DELIM = '(' + WS0+'[:=]'+WS0+ '|' + WS1 + ')'; + var EQUAL_DELIM = WS0+'[:=]'+WS0; + var WS_DELIM = WS1; + var DELIM = '(' + EQUAL_DELIM + '|' + WS_DELIM + ')'; var KEY_ALPHANUM = '([^\\\\\\W:= \\t\\f\\n]|\\\\.)+'; var KEY_OTHER = '([^\\\\:= \\t\\f\\n]|\\\\.)+'; @@ -39,8 +41,11 @@ export default function(hljs) { // key: everything until whitespace or = or : (taking into account backslashes) // case of a "normal" key { - begin: KEY_ALPHANUM + DELIM, returnBegin: true, + variants: [ + { begin: KEY_ALPHANUM + EQUAL_DELIM, relevance: 1 }, + { begin: KEY_ALPHANUM + WS_DELIM, relevance: 0 } + ], contains: [ { className: 'attr', From 9d06fe9ed84fb7f069e6bc863cd96518bff88da3 Mon Sep 17 00:00:00 2001 From: Josh Goebel Date: Sat, 14 Nov 2020 21:51:20 -0500 Subject: [PATCH 30/30] add comment, fix typos - also fix pgsql markup test --- src/languages/bash.js | 5 ++++- src/languages/php.js | 5 ++++- src/languages/vbscript.js | 2 +- test/markup/pgsql/dollar_strings.expect.txt | 8 ++++---- 4 files changed, 13 insertions(+), 7 deletions(-) diff --git a/src/languages/bash.js b/src/languages/bash.js index 043de0f2aa..9dd90dd6bd 100644 --- a/src/languages/bash.js +++ b/src/languages/bash.js @@ -25,7 +25,10 @@ export default function(hljs) { Object.assign(VAR,{ className: 'variable', variants: [ - {begin: regex.concat(/\$[\w\d#@][\w\d_]*/, `(?![\\w\\d])(?![$])`) }, + {begin: regex.concat(/\$[\w\d#@][\w\d_]*/, + // negative look-ahead tries to avoid matching patterns that are not + // Perl at all like $ident$, @ident@, etc. + `(?![\\w\\d])(?![$])`) }, BRACED_VAR ] }); diff --git a/src/languages/php.js b/src/languages/php.js index 0faa8b38f1..203f579c60 100644 --- a/src/languages/php.js +++ b/src/languages/php.js @@ -13,7 +13,10 @@ Category: common export default function(hljs) { const VARIABLE = { className: 'variable', - begin: '\\$+[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*' + `(?![A-Za-z0-9])(?![$])` + begin: '\\$+[a-zA-Z_\x7f-\xff][a-zA-Z0-9_\x7f-\xff]*' + + // negative look-ahead tries to avoid matching patterns that are not + // Perl at all like $ident$, @ident@, etc. + `(?![A-Za-z0-9])(?![$])` }; const PREPROCESSOR = { className: 'meta', diff --git a/src/languages/vbscript.js b/src/languages/vbscript.js index 9c7b84abfc..a43f4e2c89 100644 --- a/src/languages/vbscript.js +++ b/src/languages/vbscript.js @@ -25,7 +25,7 @@ export default function(hljs) { "server", "response", "request", - // take no arguments so can be caleld without () + // take no arguments so can be called without () "scriptengine", "scriptenginebuildversion", "scriptengineminorversion", diff --git a/test/markup/pgsql/dollar_strings.expect.txt b/test/markup/pgsql/dollar_strings.expect.txt index 52f584761a..978866a3a5 100644 --- a/test/markup/pgsql/dollar_strings.expect.txt +++ b/test/markup/pgsql/dollar_strings.expect.txt @@ -1,9 +1,9 @@ CREATE OR REPLACE FUNCTION hello_world(param_your_name text) RETURNS text AS -$$ -SELECT 'Hello world. My name is ' || param_your_name || '.'; +$$ +SELECT 'Hello world. My name is ' || param_your_name || '.'; $$ language sql STRICT; -SELECT sql_expression($sql$SELECT hello_world($phrase$Regina's elephant's dog$phrase$) - || $phrase$ I made a cat's meow today.$phrase$ $sql$); +SELECT sql_expression($sql$SELECT hello_world($phrase$Regina's elephant's dog$phrase$) + || $phrase$ I made a cat's meow today.$phrase$ $sql$);