forked from highlightjs/highlight.js
/
r.js
224 lines (213 loc) · 7.41 KB
/
r.js
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
128
129
130
131
132
133
134
135
136
137
138
139
140
141
142
143
144
145
146
147
148
149
150
151
152
153
154
155
156
157
158
159
160
161
162
163
164
165
166
167
168
169
170
171
172
173
174
175
176
177
178
179
180
181
182
183
184
185
186
187
188
189
190
191
192
193
194
195
196
197
198
199
200
201
202
203
204
205
206
207
208
209
210
211
212
213
214
215
216
217
218
219
220
221
222
223
224
/*
Language: R
Description: R is a free software environment for statistical computing and graphics.
Author: Joe Cheng <joe@rstudio.org>
Contributors: Konrad Rudolph <konrad.rudolph@gmail.com>
Website: https://www.r-project.org
Category: common,scientific
*/
import * as regex from '../lib/regex.js';
/** @type LanguageFn */
export default function(hljs) {
// Identifiers in R cannot start with `_`, but they can start with `.` if it
// is not immediately followed by a digit.
// R also supports quoted identifiers, which are near-arbitrary sequences
// delimited by backticks (`…`), which may contain escape sequences. These are
// handled in a separate mode. See `test/markup/r/names.txt` for examples.
// FIXME: Support Unicode identifiers.
const IDENT_RE = /(?:(?:[a-zA-Z]|\.[._a-zA-Z])[._a-zA-Z0-9]*)|\.(?!\d)/;
const NUMBER_TYPES = regex.either(
// Special case: only hexadecimal binary powers can contain fractions
/0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/,
// Hexadecimal numbers without fraction and optional binary power
/0[xX][0-9a-fA-F]+(?:[pP][+-]?\d+)?[Li]?/,
// Decimal numbers
/(?:\d+(?:\.\d*)?|\.\d+)(?:[eE][+-]?\d+)?[Li]?/
);
const OPERATORS = /[=!<>:]=|\|\||&&|:::?|<-|<<-|->>|->|\|>|[-+*\/?!$&|:<=>@^~]|\*\*/;
const PUNCTUATION = /\[\[|[(){}[\]\\,]/;
return {
name: 'R',
keywords: {
$pattern: IDENT_RE,
keyword:
'function if in break next repeat else for while',
literal:
'NULL NA TRUE FALSE Inf NaN NA_integer_|10 NA_real_|10 ' +
'NA_character_|10 NA_complex_|10',
built_in:
// Builtin constants
'LETTERS letters month.abb month.name pi T F ' +
// Primitive functions
// These are all the functions in `base` that are implemented as a
// `.Primitive`, minus those functions that are also keywords.
'abs acos acosh all any anyNA Arg as.call as.character ' +
'as.complex as.double as.environment as.integer as.logical ' +
'as.null.default as.numeric as.raw asin asinh atan atanh attr ' +
'attributes baseenv browser c call ceiling class Conj cos cosh ' +
'cospi cummax cummin cumprod cumsum digamma dim dimnames ' +
'emptyenv exp expression floor forceAndCall gamma gc.time ' +
'globalenv Im interactive invisible is.array is.atomic is.call ' +
'is.character is.complex is.double is.environment is.expression ' +
'is.finite is.function is.infinite is.integer is.language ' +
'is.list is.logical is.matrix is.na is.name is.nan is.null ' +
'is.numeric is.object is.pairlist is.raw is.recursive is.single ' +
'is.symbol lazyLoadDBfetch length lgamma list log max min ' +
'missing Mod names nargs nzchar oldClass on.exit pos.to.env ' +
'proc.time prod quote range Re rep retracemem return round ' +
'seq_along seq_len seq.int sign signif sin sinh sinpi sqrt ' +
'standardGeneric substitute sum switch tan tanh tanpi tracemem ' +
'trigamma trunc unclass untracemem UseMethod xtfrm',
},
contains: [
// Roxygen comments
hljs.COMMENT(
/#'/,
/$/,
{
contains: [
{
// Handle `@examples` separately to cause all subsequent code
// until the next `@`-tag on its own line to be kept as-is,
// preventing highlighting. This code is example R code, so nested
// doctags shouldn’t be treated as such. See
// `test/markup/r/roxygen.txt` for an example.
scope: 'doctag',
begin: '@examples',
starts: {
contains: [
{ begin: /\n/ },
{
begin: /#'\s*(?=@[a-zA-Z]+)/,
endsParent: true,
},
{
begin: /#'/,
end: /$/,
excludeBegin: true,
}
]
}
},
{
// Handle `@param` to highlight the parameter name following
// after.
scope: 'doctag',
begin: '@param',
end: /$/,
contains: [
{
scope: 'variable',
variants: [
{ begin: IDENT_RE },
{ begin: /`(?:\\.|[^`\\])+`/ }
],
endsParent: true
}
]
},
{
scope: 'doctag',
begin: /@[a-zA-Z]+/
},
{
scope: 'keyword',
begin: /\\[a-zA-Z]+/,
}
]
}
),
hljs.HASH_COMMENT_MODE,
{
scope: 'string',
contains: [hljs.BACKSLASH_ESCAPE],
variants: [
hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\(/, end: /\)(-*)"/ }),
hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\{/, end: /\}(-*)"/ }),
hljs.END_SAME_AS_BEGIN({ begin: /[rR]"(-*)\[/, end: /\](-*)"/ }),
hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\(/, end: /\)(-*)'/ }),
hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\{/, end: /\}(-*)'/ }),
hljs.END_SAME_AS_BEGIN({ begin: /[rR]'(-*)\[/, end: /\](-*)'/ }),
{begin: '"', end: '"', relevance: 0},
{begin: "'", end: "'", relevance: 0}
],
},
// Matching numbers immediately following punctuation and operators is
// tricky since we need to look at the character ahead of a number to
// ensure the number is not part of an identifier, and we cannot use
// negative look-behind assertions. So instead we explicitly handle all
// possible combinations of (operator|punctuation), number.
// TODO: replace with negative look-behind when available
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+\.[0-9a-fA-F]*[pP][+-]?\d+i?/ },
// { begin: /(?<![a-zA-Z0-9._])0[xX][0-9a-fA-F]+([pP][+-]?\d+)?[Li]?/ },
// { begin: /(?<![a-zA-Z0-9._])(\d+(\.\d*)?|\.\d+)([eE][+-]?\d+)?[Li]?/ }
{
relevance: 0,
variants: [
{
scope: {
1: 'operator',
2: 'number'
},
match: [
OPERATORS,
NUMBER_TYPES
]
},
{
scope: {
1: 'operator',
2: 'number'
},
match: [
/%[^%]*%/,
NUMBER_TYPES
]
},
{
scope: {
1: 'punctuation',
2: 'number'
},
match: [
PUNCTUATION_RE,
NUMBER_TYPES_RE
]
},
{
scope: { 2: 'number' },
relevance: 0,
match: [
/[^a-zA-Z0-9._]|^/, // not part of an identifier
NUMBER_TYPES
]
}
]
},
// Operators/punctuation when they're not directly followed by numbers
{
scope: 'operator',
variants: [
{
relevance: 0,
match: OPERATORS
},
{ match: /%[^%]*%/ }
]
},
{
scope: 'punctuation',
relevance: 0,
match: PUNCTUATION
},
{
// Escaped identifier
begin: '`',
end: '`',
contains: [
{ begin: /\\./ }
]
}
]
};
}