diff --git a/AUTHORS b/AUTHORS
index 5058c612d6..f209a8acba 100644
--- a/AUTHORS
+++ b/AUTHORS
@@ -90,6 +90,7 @@ Other contributors, listed alphabetically, are:
* Justin Hendrick -- ParaSail lexer
* Jordi GutiƩrrez Hermoso -- Octave lexer
* David Hess, Fish Software, Inc. -- Objective-J lexer
+* Ken Hilton -- Typographic Number Theory and Arrow lexers
* Varun Hiremath -- Debian control lexer
* Rob Hoelz -- Perl 6 lexer
* Doug Hogan -- Mscgen lexer
diff --git a/pygments/lexers/tnt.py b/pygments/lexers/tnt.py
index f62f3ab9f0..1d966ac872 100644
--- a/pygments/lexers/tnt.py
+++ b/pygments/lexers/tnt.py
@@ -13,7 +13,7 @@
from pygments.lexer import Lexer
from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \
- Punctuation, Error
+ Punctuation, Error
__all__ = ['TNTLexer']
@@ -55,6 +55,10 @@ class TNTLexer(Lexer):
LINENOS = re.compile(r'(?:[0-9]+)(?:(?:, ?|,? and )(?:[0-9]+))*')
COMMENT = re.compile(r'\[[^\n\]]+\]')
+ def __init__(self, *args, **kwargs):
+ Lexer.__init__(self, *args, **kwargs)
+ self.cur = []
+
def whitespace(self, start, text, required=False):
"""Tokenize whitespace."""
end = start
@@ -104,9 +108,6 @@ def term(self, start, text):
def formula(self, start, text):
"""Tokenize a formula."""
- if text[start] in '[]': # fantasy push or pop
- self.cur.append((start, Keyword, text[start]))
- return start+1
if text[start] in self.NEGATORS: # ~<...>
end = start+1
while text[end] in self.NEGATORS:
@@ -154,7 +155,7 @@ def rule(self, start, text):
return match.end()
def lineno(self, start, text):
- """Tokenize a line marker."""
+ """Tokenize a line referral."""
end = start
while text[end] not in self.NUMBERS:
end += 1
@@ -186,65 +187,77 @@ def get_tokens_unprocessed(self, text):
self.cur = []
start = end = self.whitespace(0, text)
while start <= end < len(text):
- # try line number
- while text[end] in self.NUMBERS:
- end += 1
- if end != start: # actual number present
- self.cur.append((start, Number.Integer, text[start:end]))
- # whitespace is required after a line number
+ try:
+ # try line number
+ while text[end] in self.NUMBERS:
+ end += 1
+ if end != start: # actual number present
+ self.cur.append((start, Number.Integer, text[start:end]))
+ # whitespace is required after a line number
+ orig = len(self.cur)
+ try:
+ start = end = self.whitespace(end, text, True)
+ except AssertionError:
+ del self.cur[orig:]
+ start = end = self.error_till_line_end(end, text)
+ continue
+ # at this point it could be a comment
+ match = self.COMMENT.match(text, start)
+ if match is not None:
+ self.cur.append((start, Comment, text[start:match.end()]))
+ start = end = match.end()
+ # anything after the closing bracket is invalid
+ start = end = self.error_till_line_end(start, text)
+ # do not attempt to process the rest
+ continue
+ del match
+ if text[start] in '[]': # fantasy push or pop
+ self.cur.append((start, Keyword, text[start]))
+ start += 1
+ end += 1
+ else:
+ # one formula, possibly containing subformulae
+ orig = len(self.cur)
+ try:
+ start = end = self.formula(start, text)
+ except AssertionError: # not well-formed
+ del self.cur[orig:]
+ while text[end] not in self.WHITESPACE:
+ end += 1
+ self.cur.append((start, Error, text[start:end]))
+ start = end
+ # skip whitespace after formula
orig = len(self.cur)
try:
start = end = self.whitespace(end, text, True)
except AssertionError:
del self.cur[orig:]
- start = end = self.error_till_line_end(end, text)
+ start = end = self.error_till_line_end(start, text)
continue
- # at this point it could be a comment
- match = self.COMMENT.match(text, start)
- if match is not None:
- self.cur.append((start, Comment, text[start:match.end()]))
- start = end = match.end()
- # anything after the closing bracket is invalid
- start = end = self.error_till_line_end(start, text)
- # do not attempt to process the rest
- continue
- del match
- # one formula, possibly containing subformulae
- orig = len(self.cur)
- try:
- start = end = self.formula(start, text)
- except AssertionError: # not well-formed
- del self.cur[orig:]
- while text[end] not in self.WHITESPACE:
- end += 1
- self.cur.append((start, Error, text[start:end]))
- start = end
- # skip whitespace after formula
- orig = len(self.cur)
- try:
- start = end = self.whitespace(end, text, True)
- except AssertionError:
- del self.cur[orig:]
- start = end = self.error_till_line_end(start, text)
- continue
- # rule proving this formula a theorem
- orig = len(self.cur)
- try:
- start = end = self.rule(start, text)
- except AssertionError:
- del self.cur[orig:]
- start = end = self.error_till_line_end(start, text)
- continue
- # skip whitespace after rule
- start = end = self.whitespace(end, text)
- # line marker
- if text[start] == '(':
+ # rule proving this formula a theorem
orig = len(self.cur)
try:
- start = end = self.lineno(start, text)
+ start = end = self.rule(start, text)
except AssertionError:
del self.cur[orig:]
start = end = self.error_till_line_end(start, text)
continue
- start = end = self.whitespace(start, text)
+ # skip whitespace after rule
+ start = end = self.whitespace(end, text)
+ # line marker
+ if text[start] == '(':
+ orig = len(self.cur)
+ try:
+ start = end = self.lineno(start, text)
+ except AssertionError:
+ del self.cur[orig:]
+ start = end = self.error_till_line_end(start, text)
+ continue
+ start = end = self.whitespace(start, text)
+ except IndexError:
+ try:
+ del self.cur[orig:]
+ except NameError:
+ pass # if orig was never defined, fine
+ self.error_till_line_end(start, text)
return self.cur
diff --git a/tests/test_tnt.py b/tests/test_tnt.py
new file mode 100644
index 0000000000..e14834acb1
--- /dev/null
+++ b/tests/test_tnt.py
@@ -0,0 +1,204 @@
+# -*- coding: utf-8 -*-
+"""
+ Typograhic Number Theory tests
+ ~~~~~~~~~~~~~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import pytest
+
+from pygments.lexers.tnt import TNTLexer
+from pygments.token import Text, Comment, Operator, Keyword, Name, Number, \
+ Punctuation, Error
+
+@pytest.fixture(autouse=True)
+def lexer():
+ yield TNTLexer()
+
+# whitespace
+
+@pytest.mark.parametrize('text', (' a', ' \t0', '\n\n 3'))
+def test_whitespace_positive_matches(lexer, text):
+ """Test fragments that should be tokenized as whitespace text."""
+ assert lexer.whitespace(0, text) == len(text) - 1
+ assert lexer.whitespace(0, text, True) == len(text) - 1
+ assert lexer.cur[-1] == (0, Text, text[:-1])
+
+@pytest.mark.parametrize('text', ('0 a=b premise', 'b=a symmetry'))
+def test_whitespace_negative_matches(lexer, text):
+ """Test statements that do not start with whitespace text."""
+ assert lexer.whitespace(0, text) == 0
+ with pytest.raises(AssertionError):
+ lexer.whitespace(0, text, True)
+ assert not lexer.cur
+
+# terms that can go on either side of an = sign
+
+@pytest.mark.parametrize('text', ('a ', "a' ", 'b ', "c' "))
+def test_variable_positive_matches(lexer, text):
+ """Test fragments that should be tokenized as variables."""
+ assert lexer.variable(0, text) == len(text) - 1
+ assert lexer.cur[-1] == (0, Name.Variable, text[:-1])
+
+@pytest.mark.parametrize('text', ("' ", 'f ', "f' "))
+def test_variable_negative_matches(lexer, text):
+ """Test fragments that should **not** be tokenized as variables."""
+ with pytest.raises(AssertionError):
+ lexer.variable(0, text)
+ assert not lexer.cur
+
+@pytest.mark.parametrize('text', ('0', 'S0', 'SSSSS0'))
+def test_numeral_positive_matches(lexer, text):
+ """Test fragments that should be tokenized as (unary) numerals."""
+ assert lexer.term(0, text) == len(text)
+ assert lexer.cur[-1] == (len(text) - 1, Number.Integer, text[-1])
+ if text != '0':
+ assert lexer.cur[-2] == (0, Number.Integer, text[:-1])
+
+@pytest.mark.parametrize('text', (
+ '(a+b)', '(b.a)', '(c+d)'
+))
+def test_multiterm_positive_matches(lexer, text):
+ """Test fragments that should be tokenized as a compound term."""
+ assert lexer.term(0, text) == len(text)
+ assert [t[1] for t in lexer.cur] == [
+ Punctuation, Name.Variable, Operator,
+ Name.Variable, Punctuation
+ ]
+
+@pytest.mark.parametrize('text', ('1', '=', 'A'))
+def test_term_negative_matches(lexer, text):
+ """Test fragments that should not be tokenized as terms at all."""
+ with pytest.raises(AssertionError):
+ lexer.term(0, text)
+ assert not lexer.cur
+
+# full statements, minus rule
+
+@pytest.mark.parametrize('text', ('~a=b ', '~~~~a=b '))
+def test_negator_positive_matches(lexer, text):
+ """Test statements that start with a negation."""
+ assert lexer.formula(0, text) == len(text) - 1
+ assert lexer.cur[0] == (0, Operator, text[:-4])
+
+@pytest.mark.parametrize('text', ('Aa:a=b ', 'Eb:a=b '))
+def test_quantifier_positive_matches(lexer, text):
+ """Test statements that start with a quantifier."""
+ assert lexer.formula(0, text) == len(text) - 1
+ assert lexer.cur[0][1] == Keyword.Declaration
+ assert lexer.cur[1][1] == Name.Variable
+ assert lexer.cur[2] == (2, Punctuation, ':')
+
+@pytest.mark.parametrize('text', ('Aaa=b', 'Eba=b'))
+def test_quantifier_negative_matches(lexer, text):
+ """Test quantifiers that are only partially valid."""
+ with pytest.raises(AssertionError):
+ lexer.formula(0, text)
+ # leftovers should still be valid
+ assert lexer.cur[0][1] == Keyword.Declaration
+ assert lexer.cur[1][1] == Name.Variable
+
+@pytest.mark.parametrize('text', ('', '', ''))
+def test_compound_positive_matches(lexer, text):
+ """Test statements that consist of multiple formulas compounded."""
+ assert lexer.formula(0, text) == len(text)
+ assert lexer.cur[0] == (0, Punctuation, '<')
+ assert lexer.cur[4][1] == Operator
+ assert lexer.cur[-1] == (len(text)-1, Punctuation, '>')
+
+@pytest.mark.parametrize('text', ('', '