Skip to content

Commit

Permalink
LilyPond: slightly improve lexing
Browse files Browse the repository at this point in the history
* Handle `--` and `__` as `Token.Punctuation` only if preceded by
  whitespace:

  ```
  <whitespace> --  ==>  hyphen (in lyrics mode)
  <whitespace> __  ==>  extender line (in lyrics mode)
  --               ==>  tenuto, neutral attachment (in music mode)
  __               ==>  portato, down attachment (in music mode)
  ```

* Handle `-` followed by a number as `Token.Number` only if preceded by
  whitespace.  This is purely heuristic, derived from the coding style shown
  in the LilyPond manuals.

  ```
  <whitespace> -3  ==>  integer (e.g., as a function argument)
  -3               ==>  fingering instruction
  ```

* Add forgotten `\maxima` note duration.

* Some legibility improvements by using verbose mode for regular
  expressions.

* Some comment additions and fixes.

* Update tests.
  • Loading branch information
lemzwerg committed Nov 27, 2022
1 parent 9084b86 commit 6ef8036
Show file tree
Hide file tree
Showing 3 changed files with 43 additions and 13 deletions.
48 changes: 37 additions & 11 deletions pygments/lexers/lilypond.py
Expand Up @@ -23,6 +23,14 @@

__all__ = ["LilyPondLexer"]

# In LilyPond, (unquoted) name tokens only contain letters, hyphens,
# and underscores, where hyphens and underscores must not start or end
# a name token.
#
# Note that many of the entities listed as LilyPond built-in keywords
# (in file `_lilypond_builtins.py`) are only valid if surrounded by
# double quotes, for example, 'hufnagel-fa1'. This means that
# `NAME_END_RE` doesn't apply to such entities in valid LilyPond code.
NAME_END_RE = r"(?=\d|[^\w\-]|[\-_][\W\d])"

def builtin_words(names, backslash, suffix=NAME_END_RE):
Expand Down Expand Up @@ -80,10 +88,10 @@ def get_tokens_unprocessed(self, text):
# Whitespace.
(r"\s+", Token.Text.Whitespace),

# Multi-line comment. These are non-nestable.
# Multi-line comments. These are non-nestable.
(r"%\{.*?%\}", Token.Comment.Multiline),

# Simple comment.
# Simple comments.
(r"%.*?$", Token.Comment.Single),

# End of embedded LilyPond in Scheme.
Expand All @@ -105,22 +113,37 @@ def get_tokens_unprocessed(self, text):
# \override Stem.color = red,
# - comma as alternative syntax for lists: \time 3,3,2 4/4,
# - colon in tremolos: c:32,
# - double hyphen in lyrics: li -- ly -- pond,
(r"\\\\|--|[{}<>=.,:|]", Token.Punctuation),

# Pitch, with optional octavation marks, octave check,
# - double hyphen and underscore in lyrics: li -- ly -- pond __
# (which must be preceded by ASCII whitespace)
(r"""(?x)
\\\\
| (?<= \s ) (?: -- | __ )
| [{}<>=.,:|]
""", Token.Punctuation),

# Pitches, with optional octavation marks, octave check,
# and forced or cautionary accidental.
(words(pitches, suffix=r"=?[',]*!?\??" + NAME_END_RE), Token.Pitch),

# String, optionally with direction specifier.
# Strings, optionally with direction specifier.
(r'[\-_^]?"', Token.String, "string"),

# Numbers.
(r"-?\d+\.\d+", Token.Number.Float), # 5. and .5 are not allowed
(r"-?\d+/\d+", Token.Number.Fraction),
# Integer, or duration with optional augmentation dots. We have no
# way to distinguish these, so we highlight them all as numbers.
(r"-?(\d+|\\longa|\\breve)\.*", Token.Number),
# Integers, or durations with optional augmentation dots.
# We have no way to distinguish these, so we highlight
# them all as numbers.
#
# Normally, there is a space before the integer (being an
# argument to a music function), which we check here. The
# case without a space is handled below (as a fingering
# number).
(r"""(?x)
(?<= \s ) -\d+
| (?: (?: \d+ | \\breve | \\longa | \\maxima )
\.* )
""", Token.Number),
# Separates duration and duration multiplier highlighted as fraction.
(r"\*", Token.Number),

Expand Down Expand Up @@ -168,7 +191,10 @@ def get_tokens_unprocessed(self, text):

# Definition of a variable. Support assignments to alist keys
# (myAlist.my-key.my-nested-key = \markup \spam \eggs).
(r"([^\W\d]|-)+(?=([^\W\d]|[\-.])*\s*=)", Token.Name.Lvalue),
(r"""(?x)
(?: [^\W\d] | - )+
(?= (?: [^\W\d] | [\-.] )* \s* = )
""", Token.Name.Lvalue),

# Virtually everything can appear in markup mode, so we highlight
# as text. Try to get a complete word, or we might wrongly lex
Expand Down
4 changes: 2 additions & 2 deletions tests/examplefiles/lilypond/example.ly
Expand Up @@ -76,7 +76,7 @@ piuPiano = \markup \italic "più piano"
\key d \major
\cadenzaOn
deses'!4.~(\tweak thickness 4\( deses^\p-\signumcongruentiae_1\4
deses\longa) \myFunc { r } 4 des8 8[ <des ges>8]\)
deses\longa) \myFunc { r } 4 des8-- 8[__ <des ges>8]\)
\bar "||"
\cadenzaOff
\pageBreak
Expand All @@ -100,7 +100,7 @@ piuPiano = \markup \italic "più piano"
}
\addlyrics {
\set Score.melismaBusyProperties = #'()
My Lily -- Song
My Lily -- Song __
}
\chordmode {
c cis:dim3+\dim des:maj7/+e\!
Expand Down
4 changes: 4 additions & 0 deletions tests/examplefiles/lilypond/example.ly.output

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

0 comments on commit 6ef8036

Please sign in to comment.