From 12c34ff4b9dad0021199e0f183b05bd6a3634032 Mon Sep 17 00:00:00 2001 From: Werner Lemberg Date: Sat, 26 Nov 2022 18:13:21 +0100 Subject: [PATCH] LilyPond: slightly improve lexing * Handle `--` and `__` as `Token.Punctuation` only if preceded by whitespace: ``` -- ==> hyphen (in lyrics mode) __ ==> extender line (in lyrics mode) -- ==> tenuto, neutral attachment (in music mode) __ ==> portato, down attachment (in music mode) ``` * Add forgotten `\maxima` note duration. * Some legibility improvements by using verbose mode for regular expressions. * Some comment additions and fixes. * Update tests. --- pygments/lexers/lilypond.py | 43 ++++++++++++++----- tests/examplefiles/lilypond/example.ly | 4 +- tests/examplefiles/lilypond/example.ly.output | 4 ++ 3 files changed, 38 insertions(+), 13 deletions(-) diff --git a/pygments/lexers/lilypond.py b/pygments/lexers/lilypond.py index c39e0b03e7..be6a97f8c2 100644 --- a/pygments/lexers/lilypond.py +++ b/pygments/lexers/lilypond.py @@ -23,6 +23,14 @@ __all__ = ["LilyPondLexer"] +# In LilyPond, (unquoted) name tokens only contain letters, hyphens, +# and underscores, where hyphens and underscores must not start or end +# a name token. +# +# Note that many of the entities listed as LilyPond built-in keywords +# (in file `_lilypond_builtins.py`) are only valid if surrounded by +# double quotes, for example, 'hufnagel-fa1'. This means that +# `NAME_END_RE` doesn't apply to such entities in valid LilyPond code. NAME_END_RE = r"(?=\d|[^\w\-]|[\-_][\W\d])" def builtin_words(names, backslash, suffix=NAME_END_RE): @@ -80,10 +88,10 @@ def get_tokens_unprocessed(self, text): # Whitespace. (r"\s+", Token.Text.Whitespace), - # Multi-line comment. These are non-nestable. + # Multi-line comments. These are non-nestable. (r"%\{.*?%\}", Token.Comment.Multiline), - # Simple comment. + # Simple comments. (r"%.*?$", Token.Comment.Single), # End of embedded LilyPond in Scheme. @@ -105,22 +113,32 @@ def get_tokens_unprocessed(self, text): # \override Stem.color = red, # - comma as alternative syntax for lists: \time 3,3,2 4/4, # - colon in tremolos: c:32, - # - double hyphen in lyrics: li -- ly -- pond, - (r"\\\\|--|[{}<>=.,:|]", Token.Punctuation), - - # Pitch, with optional octavation marks, octave check, + # - double hyphen and underscore in lyrics: li -- ly -- pond __ + # (which must be preceded by ASCII whitespace) + (r"""(?x) + \\\\ + | (?<= \s ) (?: -- | __ ) + | [{}<>=.,:|] + """, Token.Punctuation), + + # Pitches, with optional octavation marks, octave check, # and forced or cautionary accidental. (words(pitches, suffix=r"=?[',]*!?\??" + NAME_END_RE), Token.Pitch), - # String, optionally with direction specifier. + # Strings, optionally with direction specifier. (r'[\-_^]?"', Token.String, "string"), # Numbers. (r"-?\d+\.\d+", Token.Number.Float), # 5. and .5 are not allowed (r"-?\d+/\d+", Token.Number.Fraction), - # Integer, or duration with optional augmentation dots. We have no - # way to distinguish these, so we highlight them all as numbers. - (r"-?(\d+|\\longa|\\breve)\.*", Token.Number), + # Integers, or durations with optional augmentation dots. + # We have no way to distinguish these, so we highlight + # them all as numbers. + (r"""(?x) + -\d+ + | (?: (?: \d+ | \\breve | \\longa | \\maxima ) + \.* ) + """, Token.Number), # Separates duration and duration multiplier highlighted as fraction. (r"\*", Token.Number), @@ -168,7 +186,10 @@ def get_tokens_unprocessed(self, text): # Definition of a variable. Support assignments to alist keys # (myAlist.my-key.my-nested-key = \markup \spam \eggs). - (r"([^\W\d]|-)+(?=([^\W\d]|[\-.])*\s*=)", Token.Name.Lvalue), + (r"""(?x) + (?: [^\W\d] | - )+ + (?= (?: [^\W\d] | [\-.] )* \s* = ) + """, Token.Name.Lvalue), # Virtually everything can appear in markup mode, so we highlight # as text. Try to get a complete word, or we might wrongly lex diff --git a/tests/examplefiles/lilypond/example.ly b/tests/examplefiles/lilypond/example.ly index b2cbb81c36..25b3a5465a 100644 --- a/tests/examplefiles/lilypond/example.ly +++ b/tests/examplefiles/lilypond/example.ly @@ -76,7 +76,7 @@ piuPiano = \markup \italic "più piano" \key d \major \cadenzaOn deses'!4.~(\tweak thickness 4\( deses^\p-\signumcongruentiae_1\4 - deses\longa) \myFunc { r } 4 des8 8[ 8]\) + deses\longa) \myFunc { r } 4 des8-- 8[__ 8]\) \bar "||" \cadenzaOff \pageBreak @@ -100,7 +100,7 @@ piuPiano = \markup \italic "più piano" } \addlyrics { \set Score.melismaBusyProperties = #'() - My Lily -- Song + My Lily -- Song __ } \chordmode { c cis:dim3+\dim des:maj7/+e\! diff --git a/tests/examplefiles/lilypond/example.ly.output b/tests/examplefiles/lilypond/example.ly.output index f5fbe53105..67bb1ef4de 100644 --- a/tests/examplefiles/lilypond/example.ly.output +++ b/tests/examplefiles/lilypond/example.ly.output @@ -402,9 +402,11 @@ ' ' Text.Whitespace 'des' Pitch '8' Literal.Number +'--' Name.Builtin.Articulation ' ' Text.Whitespace '8' Literal.Number '[' Name.Builtin.Articulation +'__' Name.Builtin.Articulation ' ' Text.Whitespace '<' Punctuation 'des' Pitch @@ -573,6 +575,8 @@ '--' Punctuation ' ' Text.Whitespace 'Song' Text +' ' Text.Whitespace +'__' Punctuation '\n ' Text.Whitespace '}' Punctuation '\n ' Text.Whitespace