From e1a07d0b0603efcb43b991885d919ace8189e5c5 Mon Sep 17 00:00:00 2001 From: Werner Lemberg Date: Sat, 26 Nov 2022 18:13:21 +0100 Subject: [PATCH] LilyPond: slightly improve lexing * Handle `--` and `__` as `Token.Punctuation` only if preceded by whitespace: ``` -- ==> hyphen (in lyrics mode) __ ==> extender line (in lyrics mode) -- ==> tenuto, neutral attachment (in music mode) __ ==> portato, down attachment (in music mode) ``` * Handle `-` followed by a number as `Token.Number` only if preceded by whitespace. This is purely heuristic, derived from the coding style shown in the LilyPond manuals. ``` -3 ==> integer (e.g., as a function argument) -3 ==> fingering instruction ``` * Add forgotten `\maxima` note duration. * Some legibility improvements by using verbose mode for regular expressions. * Some comment additions and fixes. * Update tests. --- pygments/lexers/lilypond.py | 48 ++++++++++++++----- tests/examplefiles/lilypond/example.ly | 5 +- tests/examplefiles/lilypond/example.ly.output | 18 +++++-- 3 files changed, 55 insertions(+), 16 deletions(-) diff --git a/pygments/lexers/lilypond.py b/pygments/lexers/lilypond.py index c39e0b03e7..52a007dea2 100644 --- a/pygments/lexers/lilypond.py +++ b/pygments/lexers/lilypond.py @@ -23,6 +23,14 @@ __all__ = ["LilyPondLexer"] +# In LilyPond, (unquoted) name tokens only contain letters, hyphens, +# and underscores, where hyphens and underscores must not start or end +# a name token. +# +# Note that many of the entities listed as LilyPond built-in keywords +# (in file `_lilypond_builtins.py`) are only valid if surrounded by +# double quotes, for example, 'hufnagel-fa1'. This means that +# `NAME_END_RE` doesn't apply to such entities in valid LilyPond code. NAME_END_RE = r"(?=\d|[^\w\-]|[\-_][\W\d])" def builtin_words(names, backslash, suffix=NAME_END_RE): @@ -80,10 +88,10 @@ def get_tokens_unprocessed(self, text): # Whitespace. (r"\s+", Token.Text.Whitespace), - # Multi-line comment. These are non-nestable. + # Multi-line comments. These are non-nestable. (r"%\{.*?%\}", Token.Comment.Multiline), - # Simple comment. + # Simple comments. (r"%.*?$", Token.Comment.Single), # End of embedded LilyPond in Scheme. @@ -105,22 +113,37 @@ def get_tokens_unprocessed(self, text): # \override Stem.color = red, # - comma as alternative syntax for lists: \time 3,3,2 4/4, # - colon in tremolos: c:32, - # - double hyphen in lyrics: li -- ly -- pond, - (r"\\\\|--|[{}<>=.,:|]", Token.Punctuation), - - # Pitch, with optional octavation marks, octave check, + # - double hyphen and underscore in lyrics: li -- ly -- pond __ + # (which must be preceded by ASCII whitespace) + (r"""(?x) + \\\\ + | (?<= \s ) (?: -- | __ ) + | [{}<>=.,:|] + """, Token.Punctuation), + + # Pitches, with optional octavation marks, octave check, # and forced or cautionary accidental. (words(pitches, suffix=r"=?[',]*!?\??" + NAME_END_RE), Token.Pitch), - # String, optionally with direction specifier. + # Strings, optionally with direction specifier. (r'[\-_^]?"', Token.String, "string"), # Numbers. (r"-?\d+\.\d+", Token.Number.Float), # 5. and .5 are not allowed (r"-?\d+/\d+", Token.Number.Fraction), - # Integer, or duration with optional augmentation dots. We have no - # way to distinguish these, so we highlight them all as numbers. - (r"-?(\d+|\\longa|\\breve)\.*", Token.Number), + # Integers, or durations with optional augmentation dots. + # We have no way to distinguish these, so we highlight + # them all as numbers. + # + # Normally, there is a space before the integer (being an + # argument to a music function), which we check here. The + # case without a space is handled below (as a fingering + # number). + (r"""(?x) + (?<= \s ) -\d+ + | (?: (?: \d+ | \\breve | \\longa | \\maxima ) + \.* ) + """, Token.Number), # Separates duration and duration multiplier highlighted as fraction. (r"\*", Token.Number), @@ -168,7 +191,10 @@ def get_tokens_unprocessed(self, text): # Definition of a variable. Support assignments to alist keys # (myAlist.my-key.my-nested-key = \markup \spam \eggs). - (r"([^\W\d]|-)+(?=([^\W\d]|[\-.])*\s*=)", Token.Name.Lvalue), + (r"""(?x) + (?: [^\W\d] | - )+ + (?= (?: [^\W\d] | [\-.] )* \s* = ) + """, Token.Name.Lvalue), # Virtually everything can appear in markup mode, so we highlight # as text. Try to get a complete word, or we might wrongly lex diff --git a/tests/examplefiles/lilypond/example.ly b/tests/examplefiles/lilypond/example.ly index b2cbb81c36..8a2eb1d659 100644 --- a/tests/examplefiles/lilypond/example.ly +++ b/tests/examplefiles/lilypond/example.ly @@ -76,7 +76,8 @@ piuPiano = \markup \italic "più piano" \key d \major \cadenzaOn deses'!4.~(\tweak thickness 4\( deses^\p-\signumcongruentiae_1\4 - deses\longa) \myFunc { r } 4 des8 8[ 8]\) + deses\longa) \myFunc { r } 4 + des8-- 8[__ \ottava -1 8]\) \ottava 0 \bar "||" \cadenzaOff \pageBreak @@ -100,7 +101,7 @@ piuPiano = \markup \italic "più piano" } \addlyrics { \set Score.melismaBusyProperties = #'() - My Lily -- Song + My Lily -- Song __ } \chordmode { c cis:dim3+\dim des:maj7/+e\! diff --git a/tests/examplefiles/lilypond/example.ly.output b/tests/examplefiles/lilypond/example.ly.output index f5fbe53105..b54953d44e 100644 --- a/tests/examplefiles/lilypond/example.ly.output +++ b/tests/examplefiles/lilypond/example.ly.output @@ -399,21 +399,31 @@ '}' Punctuation ' ' Text.Whitespace '4' Literal.Number -' ' Text.Whitespace +'\n ' Text.Whitespace 'des' Pitch '8' Literal.Number +'--' Name.Builtin.Articulation ' ' Text.Whitespace '8' Literal.Number '[' Name.Builtin.Articulation +'__' Name.Builtin.Articulation +' ' Text.Whitespace +'\\ottava' Name.Builtin.MusicFunction +' ' Text.Whitespace +'-1' Literal.Number ' ' Text.Whitespace '<' Punctuation -'des' Pitch +'des,' Pitch ' ' Text.Whitespace 'ges' Pitch '>' Punctuation '8' Literal.Number ']' Name.Builtin.Articulation '\\)' Name.Builtin.Articulation +' ' Text.Whitespace +'\\ottava' Name.Builtin.MusicFunction +' ' Text.Whitespace +'0' Literal.Number '\n ' Text.Whitespace '\\bar' Name.Builtin.MusicFunction ' ' Text.Whitespace @@ -477,7 +487,7 @@ 'left' Name.Builtin.GrobProperty '.' Punctuation 'padding' Name.Builtin.GrobProperty -'-5' Literal.Number +'-5' Name.Builtin.Articulation '\\-' Name.Builtin.Articulation '^"' Literal.String 'Some music' Literal.String @@ -573,6 +583,8 @@ '--' Punctuation ' ' Text.Whitespace 'Song' Text +' ' Text.Whitespace +'__' Punctuation '\n ' Text.Whitespace '}' Punctuation '\n ' Text.Whitespace