Skip to content

Commit

Permalink
LilyPond: slightly improve lexing
Browse files Browse the repository at this point in the history
* Handle `--` and `__` as `Token.Punctuation` only if preceded by
  whitespace:

  ```
  <whitespace> --  ==>  hyphen (in lyrics mode)
  <whitespace> __  ==>  extender line (in lyrics mode)
  --               ==>  tenuto, neutral attachment (in music mode)
  __               ==>  portato, down attachment (in music mode)
  ```

* Add forgotten `\maxima` note duration.

* Some legibility improvements by using verbose mode for regular
  expressions.

* Some comment additions and fixes.
  • Loading branch information
lemzwerg committed Nov 26, 2022
1 parent 9084b86 commit 1cd255e
Showing 1 changed file with 32 additions and 11 deletions.
43 changes: 32 additions & 11 deletions pygments/lexers/lilypond.py
Expand Up @@ -23,6 +23,14 @@

__all__ = ["LilyPondLexer"]

# In LilyPond, (unquoted) name tokens only contain letters, hyphens,
# and underscores, where hyphens and underscores must not start or end
# a name token.
#
# Note that many of the entities listed as LilyPond built-in keywords
# (in file `_lilypond_builtins.py`) are only valid if surrounded by
# double quotes, for example, 'hufnagel-fa1'. This means that
# `NAME_END_RE` doesn't apply to such entities in valid LilyPond code.
NAME_END_RE = r"(?=\d|[^\w\-]|[\-_][\W\d])"

def builtin_words(names, backslash, suffix=NAME_END_RE):
Expand Down Expand Up @@ -80,10 +88,10 @@ def get_tokens_unprocessed(self, text):
# Whitespace.
(r"\s+", Token.Text.Whitespace),

# Multi-line comment. These are non-nestable.
# Multi-line comments. These are non-nestable.
(r"%\{.*?%\}", Token.Comment.Multiline),

# Simple comment.
# Simple comments.
(r"%.*?$", Token.Comment.Single),

# End of embedded LilyPond in Scheme.
Expand All @@ -105,22 +113,32 @@ def get_tokens_unprocessed(self, text):
# \override Stem.color = red,
# - comma as alternative syntax for lists: \time 3,3,2 4/4,
# - colon in tremolos: c:32,
# - double hyphen in lyrics: li -- ly -- pond,
(r"\\\\|--|[{}<>=.,:|]", Token.Punctuation),

# Pitch, with optional octavation marks, octave check,
# - double hyphen and underscore in lyrics: li -- ly -- pond __
# (which must be preceded by ASCII whitespace)
(r"""(?x)
\\\\
| (?<= \s ) (?: -- | __ )
| [{}<>=.,:|]
""", Token.Punctuation),

# Pitches, with optional octavation marks, octave check,
# and forced or cautionary accidental.
(words(pitches, suffix=r"=?[',]*!?\??" + NAME_END_RE), Token.Pitch),

# String, optionally with direction specifier.
# Strings, optionally with direction specifier.
(r'[\-_^]?"', Token.String, "string"),

# Numbers.
(r"-?\d+\.\d+", Token.Number.Float), # 5. and .5 are not allowed
(r"-?\d+/\d+", Token.Number.Fraction),
# Integer, or duration with optional augmentation dots. We have no
# way to distinguish these, so we highlight them all as numbers.
(r"-?(\d+|\\longa|\\breve)\.*", Token.Number),
# Integers, or durations with optional augmentation dots.
# We have no way to distinguish these, so we highlight
# them all as numbers.
(r"""(?x)
-\d+
| (?: (?: \d+ | \\breve | \\longa | \\maxima )
\.*)
""", Token.Number),
# Separates duration and duration multiplier highlighted as fraction.
(r"\*", Token.Number),

Expand Down Expand Up @@ -168,7 +186,10 @@ def get_tokens_unprocessed(self, text):

# Definition of a variable. Support assignments to alist keys
# (myAlist.my-key.my-nested-key = \markup \spam \eggs).
(r"([^\W\d]|-)+(?=([^\W\d]|[\-.])*\s*=)", Token.Name.Lvalue),
(r"""(?x)
(?: [^\W\d] | - )+
(?= (?: [^\W\d] | [\-.] )* \s* = )
""", Token.Name.Lvalue),

# Virtually everything can appear in markup mode, so we highlight
# as text. Try to get a complete word, or we might wrongly lex
Expand Down

0 comments on commit 1cd255e

Please sign in to comment.