Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add World of Warcraft TOC file lexer #2244

Merged
merged 11 commits into from Sep 21, 2022
2 changes: 1 addition & 1 deletion Contributing.md
Expand Up @@ -11,7 +11,7 @@ Goals & non-goals of Pygments
Python support
--------------

Pygments supports all supported Python versions as per the [Python Developer's Guide](https://devguide.python.org/#status-of-python-branches). Additionally, the default Python version of the latest stable version of RHEL, Ubuntu LTS, and Debian are supported, even if they're officially EOL. Supporting other end-of-life versions is a non-goal of Pygments.
Pygments supports all supported Python versions as per the [Python Developer's Guide](https://devguide.python.org/versions/). Additionally, the default Python version of the latest stable version of RHEL, Ubuntu LTS, and Debian are supported, even if they're officially EOL. Supporting other end-of-life versions is a non-goal of Pygments.

Validation
----------
Expand Down
1 change: 1 addition & 0 deletions pygments/lexers/_mapping.py
Expand Up @@ -522,6 +522,7 @@
'WatLexer': ('pygments.lexers.webassembly', 'WebAssembly', ('wast', 'wat'), ('*.wat', '*.wast'), ()),
'WebIDLLexer': ('pygments.lexers.webidl', 'Web IDL', ('webidl',), ('*.webidl',), ()),
'WhileyLexer': ('pygments.lexers.whiley', 'Whiley', ('whiley',), ('*.whiley',), ('text/x-whiley',)),
'WoWTocLexer': ('pygments.lexers.wowtoc', 'World of Warcraft TOC', ('wowtoc',), ('*.toc',), ()),
'X10Lexer': ('pygments.lexers.x10', 'X10', ('x10', 'xten'), ('*.x10',), ('text/x-x10',)),
'XMLUL4Lexer': ('pygments.lexers.ul4', 'XML+UL4', ('xml+ul4',), ('*.xmlul4',), ()),
'XQueryLexer': ('pygments.lexers.webmisc', 'XQuery', ('xquery', 'xqy', 'xq', 'xql', 'xqm'), ('*.xqy', '*.xquery', '*.xq', '*.xql', '*.xqm'), ('text/xquery', 'application/xquery')),
Expand Down
114 changes: 114 additions & 0 deletions pygments/lexers/wowtoc.py
@@ -0,0 +1,114 @@
"""
pygments.lexers.wowtoc
~~~~~~~~~~~~~~~~~~~~~~

Lexer for World of Warcraft TOC files, which describe game addon metadata.

:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""
import re

from pygments.lexer import RegexLexer, bygroups
from pygments.token import Comment, Name, Text, Punctuation, String, Keyword

__all__ = ["WoWTocLexer"]

def _create_tag_line_pattern(inner_pattern, ignore_case=False):
return ((r"(?i)" if ignore_case else r"")
+ r"^(##)( *)" # groups 1, 2
+ inner_pattern # group 3
+ r"( *)(:)( *)(.*?)( *)$") # groups 4, 5, 6, 7, 8


def _create_tag_line_token(inner_pattern, inner_token, ignore_case=False):
# this function template-izes the tag line for a specific type of tag, which will
# have a different pattern and different token. otherwise, everything about a tag
# line is the same
return (
_create_tag_line_pattern(inner_pattern, ignore_case=ignore_case),
bygroups(
Keyword.Declaration,
Text.Whitespace,
inner_token,
Text.Whitespace,
Punctuation,
Text.Whitespace,
String,
Text.Whitespace,
),
)


class WoWTocLexer(RegexLexer):
"""
Lexer for World of Warcraft TOC files.

.. versionadded:: 2.13
"""

name = "World of Warcraft TOC"
aliases = ["wowtoc"]
filenames = ["*.toc"]

tokens = {
"root": [
# official localized tags, Notes and Title
# (normal part is insensitive, locale part is sensitive)
_create_tag_line_token(
r"((?:[nN][oO][tT][eE][sS]|[tT][iI][tT][lL][eE])-(?:ptBR|zhCN|enCN|frFR|deDE|itIT|esMX|ptPT|koKR|ruRU|esES|zhTW|enTW|enGB|enUS))",
Name.Builtin,
),
# other official tags
_create_tag_line_token(
r"(Interface|Title|Notes|RequiredDeps|Dep[^: ]*|OptionalDeps|LoadOnDemand|LoadWith|LoadManagers|SavedVariablesPerCharacter|SavedVariables|DefaultState|Secure|Author|Version)",
Name.Builtin,
ignore_case=True,
),
# user-defined tags
_create_tag_line_token(
r"(X-[^: ]*)",
Name.Variable,
ignore_case=True,
),
# non-conforming tags, but still valid
_create_tag_line_token(
r"([^: ]*)",
Name.Other,
),

# Comments
(r"^#.*$", Comment),

# Addon Files
(r"^.+$", Name),
]
}

def analyse_text(text):
# at time of writing, this file suffix conflict's with one of Tex's in
# markup.py. Tex's anaylse_text() appears to be definitive (binary) and does not
# share any likeness to WoW TOCs, which means we wont have to compete with it by
# abitrary increments in score.

result = 0

# while not required, an almost certain marker of WoW TOC's is the interface tag
# if this tag is omitted, players will need to opt-in to loading the addon with
# an options change ("Load out of date addons"). the value is also standardized:
# `<major><minor><patch>`, with minor and patch being two-digit zero-padded.
interface_pattern = _create_tag_line_pattern(r"(Interface)", ignore_case=True)
match = re.search(interface_pattern, text)
if match and re.match(r"(\d+)(\d{2})(\d{2})", match.group(7)):
result += 0.8

casefolded = text.casefold()
# Lua file listing is good marker too, but probably conflicts with many other
# lexers
if ".lua" in casefolded:
result += 0.1
# ditto for XML files, but they're less used in WoW TOCs
if ".xml" in casefolded:
result += 0.05

return result
5 changes: 4 additions & 1 deletion scripts/count_token_references.py
Expand Up @@ -72,7 +72,10 @@ def fetch_lexer_sources():
to a list of lines.
"""
lexer_dir = (pathlib.Path(__file__).parent / "../pygments/lexers").resolve()
lexer_sources = {fn: fn.read_text().splitlines(keepends=False) for fn in lexer_dir.glob("*.py")}
lexer_sources = {
fn: fn.read_text(encoding='utf-8').splitlines(keepends=False)
jeanas marked this conversation as resolved.
Show resolved Hide resolved
for fn in lexer_dir.glob("*.py")
}
return lexer_sources


Expand Down
6 changes: 6 additions & 0 deletions tests/examplefiles/wowtoc/comments.toc
@@ -0,0 +1,6 @@
#
#a
# a comment
# a comment with a # in it
## no comma, thus a comment
## has space: and is thus, a comment
17 changes: 17 additions & 0 deletions tests/examplefiles/wowtoc/comments.toc.output

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

3 changes: 3 additions & 0 deletions tests/examplefiles/wowtoc/files.toc
@@ -0,0 +1,3 @@
a
Foo.lua
Spaces allowed.lua
8 changes: 8 additions & 0 deletions tests/examplefiles/wowtoc/files.toc.output

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

63 changes: 63 additions & 0 deletions tests/examplefiles/wowtoc/official_tags.toc
@@ -0,0 +1,63 @@
## TiTlE-ptBR: value
## TiTlE-zhCN: value
## TiTlE-enCN: value
## TiTlE-frFR: value
## TiTlE-deDE: value
## TiTlE-itIT: value
## TiTlE-esMX: value
## TiTlE-ptPT: value
## TiTlE-koKR: value
## TiTlE-ruRU: value
## TiTlE-esES: value
## TiTlE-zhTW: value
## TiTlE-enTW: value
## TiTlE-enGB: value
## TiTlE-enUS: value
## NoTeS-ptBR: value
## NoTeS-zhCN: value
## NoTeS-enCN: value
## NoTeS-frFR: value
## NoTeS-deDE: value
## NoTeS-itIT: value
## NoTeS-esMX: value
## NoTeS-ptPT: value
## NoTeS-koKR: value
## NoTeS-ruRU: value
## NoTeS-esES: value
## NoTeS-zhTW: value
## NoTeS-enTW: value
## NoTeS-enGB: value
## NoTeS-enUS: value
## Interface: value
## interface: value
## Title: value
## title: value
## Notes: value
## notes: value
## RequiredDeps: value
## requireddeps: value
## Dependencies: value
## dependencies: value
## OptionalDeps: value
## optionaldeps: value
## LoadOnDemand: value
## loadondemand: value
## LoadWith: value
## loadwith: value
## LoadManagers: value
## loadmanagers: value
## SavedVariablesPerCharacter: value
## savedvariablespercharacter: value
## SavedVariables: value
## savedvariables: value
## DefaultState: value
## defaultstate: value
## Secure: value
## secure: value
## Author: value
## author: value
## Version: value
## version: value
## Dep: value
## dep: value
## DepSomething: value