Skip to content

Commit

Permalink
Add World of Warcraft TOC file lexer (#2244)
Browse files Browse the repository at this point in the history
Also fix a broken link and decode as UTF8 in count_token_references.py.
  • Loading branch information
t-mart committed Sep 21, 2022
1 parent f59a3ef commit d48686d
Show file tree
Hide file tree
Showing 12 changed files with 771 additions and 2 deletions.
2 changes: 1 addition & 1 deletion Contributing.md
Expand Up @@ -11,7 +11,7 @@ Goals & non-goals of Pygments
Python support
--------------

Pygments supports all supported Python versions as per the [Python Developer's Guide](https://devguide.python.org/#status-of-python-branches). Additionally, the default Python version of the latest stable version of RHEL, Ubuntu LTS, and Debian are supported, even if they're officially EOL. Supporting other end-of-life versions is a non-goal of Pygments.
Pygments supports all supported Python versions as per the [Python Developer's Guide](https://devguide.python.org/versions/). Additionally, the default Python version of the latest stable version of RHEL, Ubuntu LTS, and Debian are supported, even if they're officially EOL. Supporting other end-of-life versions is a non-goal of Pygments.

Validation
----------
Expand Down
1 change: 1 addition & 0 deletions pygments/lexers/_mapping.py
Expand Up @@ -523,6 +523,7 @@
'WatLexer': ('pygments.lexers.webassembly', 'WebAssembly', ('wast', 'wat'), ('*.wat', '*.wast'), ()),
'WebIDLLexer': ('pygments.lexers.webidl', 'Web IDL', ('webidl',), ('*.webidl',), ()),
'WhileyLexer': ('pygments.lexers.whiley', 'Whiley', ('whiley',), ('*.whiley',), ('text/x-whiley',)),
'WoWTocLexer': ('pygments.lexers.wowtoc', 'World of Warcraft TOC', ('wowtoc',), ('*.toc',), ()),
'X10Lexer': ('pygments.lexers.x10', 'X10', ('x10', 'xten'), ('*.x10',), ('text/x-x10',)),
'XMLUL4Lexer': ('pygments.lexers.ul4', 'XML+UL4', ('xml+ul4',), ('*.xmlul4',), ()),
'XQueryLexer': ('pygments.lexers.webmisc', 'XQuery', ('xquery', 'xqy', 'xq', 'xql', 'xqm'), ('*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm'), ('text/xquery', 'application/xquery')),
Expand Down
114 changes: 114 additions & 0 deletions pygments/lexers/wowtoc.py
@@ -0,0 +1,114 @@
"""
pygments.lexers.wowtoc
~~~~~~~~~~~~~~~~~~~~~~
Lexer for World of Warcraft TOC files, which describe game addon metadata.
:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re

from pygments.lexer import RegexLexer, bygroups
from pygments.token import Comment, Name, Text, Punctuation, String, Keyword

__all__ = ["WoWTocLexer"]

def _create_tag_line_pattern(inner_pattern, ignore_case=False):
return ((r"(?i)" if ignore_case else r"")
+ r"^(##)( *)" # groups 1, 2
+ inner_pattern # group 3
+ r"( *)(:)( *)(.*?)( *)$") # groups 4, 5, 6, 7, 8


def _create_tag_line_token(inner_pattern, inner_token, ignore_case=False):
# this function template-izes the tag line for a specific type of tag, which will
# have a different pattern and different token. otherwise, everything about a tag
# line is the same
return (
_create_tag_line_pattern(inner_pattern, ignore_case=ignore_case),
bygroups(
Keyword.Declaration,
Text.Whitespace,
inner_token,
Text.Whitespace,
Punctuation,
Text.Whitespace,
String,
Text.Whitespace,
),
)


class WoWTocLexer(RegexLexer):
"""
Lexer for World of Warcraft TOC files.
.. versionadded:: 2.13
"""

name = "World of Warcraft TOC"
aliases = ["wowtoc"]
filenames = ["*.toc"]

tokens = {
"root": [
# official localized tags, Notes and Title
# (normal part is insensitive, locale part is sensitive)
_create_tag_line_token(
r"((?:[nN][oO][tT][eE][sS]|[tT][iI][tT][lL][eE])-(?:ptBR|zhCN|enCN|frFR|deDE|itIT|esMX|ptPT|koKR|ruRU|esES|zhTW|enTW|enGB|enUS))",
Name.Builtin,
),
# other official tags
_create_tag_line_token(
r"(Interface|Title|Notes|RequiredDeps|Dep[^: ]*|OptionalDeps|LoadOnDemand|LoadWith|LoadManagers|SavedVariablesPerCharacter|SavedVariables|DefaultState|Secure|Author|Version)",
Name.Builtin,
ignore_case=True,
),
# user-defined tags
_create_tag_line_token(
r"(X-[^: ]*)",
Name.Variable,
ignore_case=True,
),
# non-conforming tags, but still valid
_create_tag_line_token(
r"([^: ]*)",
Name.Other,
),

# Comments
(r"^#.*$", Comment),

# Addon Files
(r"^.+$", Name),
]
}

def analyse_text(text):
# at time of writing, this file suffix conflict's with one of Tex's in
# markup.py. Tex's anaylse_text() appears to be definitive (binary) and does not
# share any likeness to WoW TOCs, which means we wont have to compete with it by
# abitrary increments in score.

result = 0

# while not required, an almost certain marker of WoW TOC's is the interface tag
# if this tag is omitted, players will need to opt-in to loading the addon with
# an options change ("Load out of date addons"). the value is also standardized:
# `<major><minor><patch>`, with minor and patch being two-digit zero-padded.
interface_pattern = _create_tag_line_pattern(r"(Interface)", ignore_case=True)
match = re.search(interface_pattern, text)
if match and re.match(r"(\d+)(\d{2})(\d{2})", match.group(7)):
result += 0.8

casefolded = text.casefold()
# Lua file listing is good marker too, but probably conflicts with many other
# lexers
if ".lua" in casefolded:
result += 0.1
# ditto for XML files, but they're less used in WoW TOCs
if ".xml" in casefolded:
result += 0.05

return result
5 changes: 4 additions & 1 deletion scripts/count_token_references.py
Expand Up @@ -72,7 +72,10 @@ def fetch_lexer_sources():
to a list of lines.
"""
lexer_dir = (pathlib.Path(__file__).parent / "../pygments/lexers").resolve()
lexer_sources = {fn: fn.read_text().splitlines(keepends=False) for fn in lexer_dir.glob("*.py")}
lexer_sources = {
fn: fn.read_text(encoding='utf-8').splitlines(keepends=False)
for fn in lexer_dir.glob("*.py")
}
return lexer_sources


Expand Down
6 changes: 6 additions & 0 deletions tests/examplefiles/wowtoc/comments.toc
@@ -0,0 +1,6 @@
#
#a
# a comment
# a comment with a # in it
## no comma, thus a comment
## has space: and is thus, a comment
17 changes: 17 additions & 0 deletions tests/examplefiles/wowtoc/comments.toc.output

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions tests/examplefiles/wowtoc/files.toc
@@ -0,0 +1,3 @@
a
Foo.lua
Spaces allowed.lua
8 changes: 8 additions & 0 deletions tests/examplefiles/wowtoc/files.toc.output

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 63 additions & 0 deletions tests/examplefiles/wowtoc/official_tags.toc
@@ -0,0 +1,63 @@
## TiTlE-ptBR: value
## TiTlE-zhCN: value
## TiTlE-enCN: value
## TiTlE-frFR: value
## TiTlE-deDE: value
## TiTlE-itIT: value
## TiTlE-esMX: value
## TiTlE-ptPT: value
## TiTlE-koKR: value
## TiTlE-ruRU: value
## TiTlE-esES: value
## TiTlE-zhTW: value
## TiTlE-enTW: value
## TiTlE-enGB: value
## TiTlE-enUS: value
## NoTeS-ptBR: value
## NoTeS-zhCN: value
## NoTeS-enCN: value
## NoTeS-frFR: value
## NoTeS-deDE: value
## NoTeS-itIT: value
## NoTeS-esMX: value
## NoTeS-ptPT: value
## NoTeS-koKR: value
## NoTeS-ruRU: value
## NoTeS-esES: value
## NoTeS-zhTW: value
## NoTeS-enTW: value
## NoTeS-enGB: value
## NoTeS-enUS: value
## Interface: value
## interface: value
## Title: value
## title: value
## Notes: value
## notes: value
## RequiredDeps: value
## requireddeps: value
## Dependencies: value
## dependencies: value
## OptionalDeps: value
## optionaldeps: value
## LoadOnDemand: value
## loadondemand: value
## LoadWith: value
## loadwith: value
## LoadManagers: value
## loadmanagers: value
## SavedVariablesPerCharacter: value
## savedvariablespercharacter: value
## SavedVariables: value
## savedvariables: value
## DefaultState: value
## defaultstate: value
## Secure: value
## secure: value
## Author: value
## author: value
## Version: value
## version: value
## Dep: value
## dep: value
## DepSomething: value

0 comments on commit d48686d

Please sign in to comment.