Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Improve detection of Praat interpolated variables #1277

Merged
merged 1 commit into from
Nov 26, 2019
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
66 changes: 37 additions & 29 deletions pygments/lexers/praat.py
Original file line number Diff line number Diff line change
Expand Up @@ -55,17 +55,17 @@ class PraatLexer(RegexLexer):
'exitScript', 'exp', 'extractNumber', 'fileReadable', 'fisherP', 'fisherQ',
'floor', 'gaussP', 'gaussQ', 'hertzToBark', 'hertzToErb', 'hertzToMel',
'hertzToSemitones', 'imax', 'imin', 'incompleteBeta', 'incompleteGammaP', 'index',
'index_regex', 'invBinomialP', 'invBinomialQ', 'invChiSquareQ', 'invFisherQ',
'index_regex', 'integer', 'invBinomialP', 'invBinomialQ', 'invChiSquareQ', 'invFisherQ',
'invGaussQ', 'invSigmoid', 'invStudentQ', 'length', 'ln', 'lnBeta', 'lnGamma',
'log10', 'log2', 'max', 'melToHertz', 'min', 'minusObject', 'natural', 'number',
'numberOfColumns', 'numberOfRows', 'numberOfSelected', 'objectsAreIdentical',
'option', 'optionMenu', 'pauseScript', 'phonToDifferenceLimens', 'plusObject',
'positive', 'randomBinomial', 'randomGauss', 'randomInteger', 'randomPoisson',
'randomUniform', 'real', 'readFile', 'removeObject', 'rindex', 'rindex_regex',
'round', 'runScript', 'runSystem', 'runSystem_nocheck', 'selectObject',
'selected', 'semitonesToHertz', 'sentencetext', 'sigmoid', 'sin', 'sinc',
'selected', 'semitonesToHertz', 'sentence', 'sentencetext', 'sigmoid', 'sin', 'sinc',
'sincpi', 'sinh', 'soundPressureToPhon', 'sqrt', 'startsWith', 'studentP',
'studentQ', 'tan', 'tanh', 'variableExists', 'word', 'writeFile', 'writeFileLine',
'studentQ', 'tan', 'tanh', 'text', 'variableExists', 'word', 'writeFile', 'writeFileLine',
'writeInfo', 'writeInfoLine',
)

Expand All @@ -90,9 +90,9 @@ class PraatLexer(RegexLexer):
'KNN', 'KlattGrid', 'KlattTable', 'LFCC', 'LPC', 'Label', 'LegendreSeries',
'LinearRegression', 'LogisticRegression', 'LongSound', 'Ltas', 'MFCC', 'MSpline',
'ManPages', 'Manipulation', 'Matrix', 'MelFilter', 'MelSpectrogram',
'MixingMatrix', 'Movie', 'Network', 'OTGrammar', 'OTHistory', 'OTMulti', 'PCA',
'PairDistribution', 'ParamCurve', 'Pattern', 'Permutation', 'Photo', 'Pitch',
'PitchModeler', 'PitchTier', 'PointProcess', 'Polygon', 'Polynomial',
'MixingMatrix', 'Movie', 'Network', 'Object', 'OTGrammar', 'OTHistory', 'OTMulti',
'PCA', 'PairDistribution', 'ParamCurve', 'Pattern', 'Permutation', 'Photo',
'Pitch', 'PitchModeler', 'PitchTier', 'PointProcess', 'Polygon', 'Polynomial',
'PowerCepstrogram', 'PowerCepstrum', 'Procrustes', 'RealPoint', 'RealTier',
'ResultsMFC', 'Roots', 'SPINET', 'SSCP', 'SVD', 'Salience', 'ScalarProduct',
'Similarity', 'SimpleString', 'SortedSetOfString', 'Sound', 'Speaker',
Expand All @@ -112,6 +112,10 @@ class PraatLexer(RegexLexer):
'defaultDirectory',
)

object_attributes = (
'ncol', 'nrow', 'xmin', 'ymin', 'xmax', 'ymax', 'nx', 'ny', 'dx', 'dy',
)

tokens = {
'root': [
(r'(\s+)(#.*?$)', bygroups(Text, Comment.Single)),
Expand Down Expand Up @@ -148,7 +152,9 @@ class PraatLexer(RegexLexer):
],
'command': [
(r'( ?[\w()-]+ ?)', Keyword),
(r"'(?=.*')", String.Interpol, 'string_interpolated'),

include('string_interpolated'),

(r'\.{3}', Keyword, ('#pop', 'old_arguments')),
(r':', Keyword, ('#pop', 'comma_list')),
(r'\s', Text, '#pop'),
Expand Down Expand Up @@ -207,62 +213,64 @@ class PraatLexer(RegexLexer):
(r'\n', Text, '#pop'),
(r'\b\d+(\.\d*)?([eE][-+]?\d+)?%?', Number),
],
'object_attributes': [
(r'\.?(n(col|row)|[xy]min|[xy]max|[nd][xy])\b', Name.Builtin, '#pop'),
(r'(\.?(?:col|row)\$)(\[)',
bygroups(Name.Builtin, Text), 'variable_name'),
(r'(\$?)(\[)',
bygroups(Name.Builtin, Text), ('#pop', 'comma_list')),
'object_reference': [
include('string_interpolated'),
(r'([a-z][a-zA-Z0-9_]*|\d+)', Name.Builtin),

(words(object_attributes, prefix=r'\.'), Name.Builtin, '#pop'),

(r'\$', Name.Builtin),
(r'\[', Text, '#pop'),
],
'variable_name': [
include('operator'),
include('number'),

(words(variables_string, suffix=r'\$'), Name.Variable.Global),
(words(variables_numeric, suffix=r'\b'), Name.Variable.Global),

(r'\bObject_\w+', Name.Builtin, 'object_attributes'),
(words(objects, prefix=r'\b', suffix=r'_\w+'),
Name.Builtin, 'object_attributes'),
(words(variables_numeric,
suffix=r'(?=[^a-zA-Z0-9\._"\'\$#\[:\(]|\s|^|$)'),
Name.Variable.Global),

(r"\b(Object_)(')",
bygroups(Name.Builtin, String.Interpol),
('object_attributes', 'string_interpolated')),
(words(objects, prefix=r'\b', suffix=r"(_)(')"),
bygroups(Name.Builtin, Name.Builtin, String.Interpol),
('object_attributes', 'string_interpolated')),
(words(objects, prefix=r'\b', suffix=r"(_)"),
bygroups(Name.Builtin, Name.Builtin),
'object_reference'),

(r'\.?_?[a-z][\w.]*(\$|#)?', Text),
(r'[\[\]]', Punctuation, 'comma_list'),
(r"'(?=.*')", String.Interpol, 'string_interpolated'),

include('string_interpolated'),
],
'operator': [
(r'([+\/*<>=!-]=?|[&*|][&*|]?|\^|<>)', Operator),
(r'(?<![\w.])(and|or|not|div|mod)(?![\w.])', Operator.Word),
],
'string_interpolated': [
(r'\.?[_a-z][\w.]*[$#]?(?:\[[a-zA-Z0-9,]+\])?(:[0-9]+)?',
(r'\'[_a-z][^\[\]\'":]*(\[([\d,]+|"[\w\d,]+")\])?(:[0-9]+)?\'',
String.Interpol),
(r"'", String.Interpol, '#pop'),
],
'string_unquoted': [
(r'(\n\s*)(\.{3})', bygroups(Text, Punctuation)),

(r'\n', Text, '#pop'),
(r'\s', Text),
(r"'(?=.*')", String.Interpol, 'string_interpolated'),

include('string_interpolated'),

(r"'", String),
(r"[^'\n]+", String),
],
'string': [
(r'(\n\s*)(\.{3})', bygroups(Text, Punctuation)),

(r'"', String, '#pop'),
(r"'(?=.*')", String.Interpol, 'string_interpolated'),

include('string_interpolated'),

(r"'", String),
(r'[^\'"\n]+', String),
],
'old_form': [
(r'(\s+)(#.*?$)', bygroups(Text, Comment.Single)),
(r'\s+', Text),

(r'(optionmenu|choice)([ \t]+\S+:[ \t]+)',
Expand Down
82 changes: 73 additions & 9 deletions tests/examplefiles/example.praat
Original file line number Diff line number Diff line change
@@ -1,4 +1,5 @@
form Highlighter test
# This is a regular comment
sentence Blank
sentence My_sentence This should all be a string
text My_text This should also all be a string
Expand All @@ -7,16 +8,37 @@ form Highlighter test
boolean Text no
boolean Quoted "yes"
comment This should be a string
optionmenu Choice: 1
optionmenu Drop-down: 1
option Foo
option 100
choice Radio: 1
option Foo
option Bar
option 100
real left_Range -123.6
positive right_Range_max 3.3
integer Int 4
natural Nat 4
endform

beginPause: "Highlighter test"
sentence: "Blank", ""
sentence: "My sentence", "This should all be a string"
text: "My text", "This should also all be a string"
word: "My word", "Only the first word is a string, the rest is discarded"
boolean: "Binary", 1
comment: "This should be a string"
optionMenu: "Drop-down", 1
option: "Foo"
option: "100"
choice: "Choice", 1
option: "Foo"
option: "100"
real: "left Range", -123.6
positive: "right Range max", 3.3
integer: "Int", 4
natural: "Nat", 4
button = endPause("Cancel", "OK", 1, 2)

# Periods do not establish boundaries for keywords
form.var = 10
# Or operators
Expand All @@ -30,8 +52,7 @@ execute /path/to/file

# Predefined variables
a = praatVersion
a = e
a = pi
a = e + pi * ( all+right) / left mod average + (mono - stereo)
a$ = homeDirectory$ + tab$ + newline$
a$ = temporaryDirectory$
a$ = praatVersion$
Expand All @@ -40,6 +61,9 @@ a$ = homeDirectory$
a$ = preferencesDirectory$
a$ = defaultDirectory$
nocheck selectObject: undefined
# Not predefined variables
a$ = e$
a$ = pi$

# Arrays are not comments
a# = zero# (5, 6)
Expand All @@ -59,9 +83,43 @@ else macintosh == 1
exit We are on Mac
endif

string$ = "Strings can be 'interpolated'"
# Interpolation with precision digits
echo unquoted 'a:3'
echo unquoted 'a.a:3'
echo unquoted 'a[1]:3'
echo unquoted 'a1:3'

appendInfoLine: "quoted 'a:3'"
appendInfoLine: "quoted 'a.a:3'"
appendInfoLine: "quoted 'a[1]:3'"
appendInfoLine: "quoted 'a1:3'"

# Interpolations are not recursive
echo unquoted 'a'1':3'
appendInfoLine: "quoted 'a'1':3'"

# Interpolation without precision digits
echo unquoted 'var' numeric
echo unquoted 'var$' string
echo unquoted 'var["a"]' numeric hash
echo unquoted 'var$["a"]' string hash
echo unquoted 'var[1]' numeric indexed variable
echo unquoted 'var$[1]' string indexed variable

appendInfoLine: "quoted 'var' numeric"
appendInfoLine: "quoted 'var$' string"
appendInfoLine: "quoted 'var["a"]' numeric hash"
appendInfoLine: "quoted 'var$["a"]' string hash"
appendInfoLine: "quoted 'var[1]' numeric indexed variable"
appendInfoLine: "quoted 'var$[1]' string indexed variable"

# Indeces in interpolations must be literal
echo 'var[a]'
echo 'var[a$]'

string$ = "But don't interpolate everything!"
string$(10)
string$ = "interpolatin' " + "across" + " strings ain't cool either"
string$(10) ; This is a function

repeat
string$ = string$ - right$(string$)
Expand All @@ -77,6 +135,12 @@ value$ = Table_'table'$[25, "f0"]
fixed = Sound_10.xmin
fixed = Object_foo.xmin
fixed = Procrustes_foo.nx
var["vaa"] = 1 ; Hash

# Special two-word keyword
select all
# Keyword with a predefined variable
select all

# old-style procedure call
call oldStyle "quoted" 2 unquoted string
Expand All @@ -103,7 +167,7 @@ endfor

i = 1
while i < n
i++
i += 1
# Different styles of object selection
select sound'i'
sound = selected()
Expand Down Expand Up @@ -153,7 +217,7 @@ while i < n
..."duration response"

# Function call with trailing space
removeObject: pitch, table
removeObject: pitch, table

# Picture window commands
selectObject: sound
Expand Down Expand Up @@ -251,7 +315,7 @@ procedure newStyle (.str1$, .num, .str2$)
.local = Get total duration
.local = Get 'some' duration
.local = Get 'some[1]' value... hello 10 p[i]
.local = Get 'some[1,3]' value: "hello", 10, 'p[i]'
.local = Get 'some[1,3]' value: "hello", 10, p[i]
.local = Get 'some$' duration
.local = Get 'some$[1]' duration
endproc
Expand Down
76 changes: 73 additions & 3 deletions tests/test_praat.py
Original file line number Diff line number Diff line change
Expand Up @@ -95,9 +95,7 @@ def test_broken_unquoted_string(lexer):
(Token.Text, u'\n'),
(Token.Punctuation, u'...'),
(Token.Text, u' '),
(Token.Literal.String.Interpol, u"'"),
(Token.Literal.String.Interpol, u'interpolated'),
(Token.Literal.String.Interpol, u"'"),
(Token.Literal.String.Interpol, u"'interpolated'"),
(Token.Text, u' '),
(Token.Literal.String, u'string'),
(Token.Text, u'\n'),
Expand Down Expand Up @@ -133,3 +131,75 @@ def test_inline_if(lexer):
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens

def test_interpolation_boundary(lexer):
fragment = u'"\'" + "\'"'
tokens = [
(Token.Literal.String, u'"'),
(Token.Literal.String, u"'"),
(Token.Literal.String, u'"'),
(Token.Text, u' '),
(Token.Operator, u'+'),
(Token.Text, u' '),
(Token.Literal.String, u'"'),
(Token.Literal.String, u"'"),
(Token.Literal.String, u'"'),
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens

def test_interpolated_numeric_indexed(lexer):
fragment = u"'a[3]'"
tokens = [
(Token.Literal.String.Interpol, u"'a[3]'"),
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens

def test_interpolated_numeric_hash(lexer):
fragment = u"'a[\"b\"]'"
tokens = [
(Token.Literal.String.Interpol, u"'a[\"b\"]'"),
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens

def test_interpolated_string_indexed(lexer):
fragment = u"'a$[3]'"
tokens = [
(Token.Literal.String.Interpol, u"'a$[3]'"),
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens

def test_interpolated_string_hash(lexer):
fragment = u"'a$[\"b\"]'"
tokens = [
(Token.Literal.String.Interpol, u"'a$[\"b\"]'"),
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens

def test_interpolated_numeric_with_precision(lexer):
fragment = u"'a:3'"
tokens = [
(Token.Literal.String.Interpol, u"'a:3'"),
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens

def test_interpolated_indexed_numeric_with_precision(lexer):
fragment = u"'a[3]:3'"
tokens = [
(Token.Literal.String.Interpol, u"'a[3]:3'"),
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens

def test_interpolated_local_numeric_with_precision(lexer):
fragment = u"'a.a:3'"
tokens = [
(Token.Literal.String.Interpol, u"'a.a:3'"),
(Token.Text, u'\n'),
]
assert list(lexer.get_tokens(fragment)) == tokens