diff --git a/pygments/lexers/rdf.py b/pygments/lexers/rdf.py index 0f2cdc135f..917807aac3 100644 --- a/pygments/lexers/rdf.py +++ b/pygments/lexers/rdf.py @@ -187,19 +187,61 @@ class TurtleLexer(RegexLexer): filenames = ['*.ttl'] mimetypes = ['text/turtle', 'application/x-turtle'] - flags = re.IGNORECASE + # character group definitions :: + PN_CHARS_BASE_GRP = ('a-zA-Z' + '\u00c0-\u00d6' + '\u00d8-\u00f6' + '\u00f8-\u02ff' + '\u0370-\u037d' + '\u037f-\u1fff' + '\u200c-\u200d' + '\u2070-\u218f' + '\u2c00-\u2fef' + '\u3001-\ud7ff' + '\uf900-\ufdcf' + '\ufdf0-\ufffd') + + PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_') + + PN_CHARS_GRP = (PN_CHARS_U_GRP + + r'\-' + + r'0-9' + + '\u00b7' + + '\u0300-\u036f' + + '\u203f-\u2040') + + PN_CHARS = '[' + PN_CHARS_GRP + ']' + + PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']' + + PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?' + + HEX_GRP = '0-9A-Fa-f' + + HEX = '[' + HEX_GRP + ']' + + PERCENT = '%' + HEX + HEX + + PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%' + + PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']' + + PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS + + PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')' + + PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' + + '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' + + PN_CHARS_GRP + ':]|' + PLX + '))?') patterns = { - 'PNAME_NS': r'((?:[a-z][\w-]*)?\:)', # Simplified character range + 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range 'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)' } - # PNAME_NS PN_LOCAL (with simplified character range) - patterns['PrefixedName'] = r'%(PNAME_NS)s([a-z][\w-]*)' % patterns - tokens = { 'root': [ - (r'\s+', Whitespace), + (r'\s+', Text), # Base / prefix (r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns, @@ -216,8 +258,8 @@ class TurtleLexer(RegexLexer): (r'%(IRIREF)s' % patterns, Name.Variable), # PrefixedName - (r'%(PrefixedName)s' % patterns, - bygroups(Name.Namespace, Name.Tag)), + (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?', + bygroups(Name.Namespace, Punctuation, Name.Tag)), # Comment (r'#[^\n]+', Comment), @@ -257,12 +299,10 @@ class TurtleLexer(RegexLexer): (r'.', String, '#pop'), ], 'end-of-string': [ - (r'(@)([a-z]+(:?-[a-z0-9]+)*)', + (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)', bygroups(Operator, Generic.Emph), '#pop:2'), (r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'), - (r'(\^\^)%(PrefixedName)s' % patterns, - bygroups(Operator, Generic.Emph, Generic.Emph), '#pop:2'), default('#pop:2'), diff --git a/tests/examplefiles/example.ttl b/tests/examplefiles/example.ttl index e524d86cf0..696f184a9b 100644 --- a/tests/examplefiles/example.ttl +++ b/tests/examplefiles/example.ttl @@ -2,14 +2,14 @@ @prefix dcterms: . @prefix xs: . @prefix mads: . @prefix skos: . -@PREFIX dc: # SPARQL-like syntax is OK +PREFIX dc: # SPARQL-like syntax is OK @prefix : . # empty prefix is OK . -<#doc1> a <#document> +<#doc1> a <#document>; dc:creator "Smith", "Jones"; - :knows + :knows ; dcterms:hasPart [ # A comment dc:title "Some title", "Some other title"; dc:creator "برشت، برتولد"@ar; @@ -23,8 +23,8 @@ a mads:Topic, skos:Concept ; - dcterms:created "2014-08-25"^^xsd:date ; - dcterms:modified "2014-11-12"^^xsd:date ; + dcterms:created "2014-08-25"^^xs:date ; + dcterms:modified "2014-11-12"^^xs:date ; dcterms:identifier "REAL006839" ; skos:prefLabel "Flerbørstemarker"@nb, "Polychaeta"@la ; @@ -33,7 +33,7 @@ "Mangebørsteormer"@nb, "Havbørsteormer"@nb, "Havbørstemarker"@nb, - "Polycheter"@nb. + "Polycheter"@nb ; skos:inScheme ; skos:narrower , , diff --git a/tests/test_rdf.py b/tests/test_rdf.py new file mode 100644 index 0000000000..ff8c9313c2 --- /dev/null +++ b/tests/test_rdf.py @@ -0,0 +1,42 @@ +# -*- coding: utf-8 -*- +""" + Basic RubyLexer Test + ~~~~~~~~~~~~~~~~~~~~ + + :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS. + :license: BSD, see LICENSE for details. +""" + +import pytest + +from pygments.token import Name, Punctuation, Text +from pygments.lexers import TurtleLexer, ShExCLexer + + +@pytest.fixture(scope='module') +def turtle_lexer(): + yield TurtleLexer() + +@pytest.fixture(scope='module') +def shexc_lexer(): + yield ShExCLexer() + +def test_turtle_prefixed_name_starting_with_number(turtle_lexer): + fragment = 'alice:6f6e4241-75a2-4780-9b2a-40da53082e54\n' + tokens = [ + (Name.Namespace, 'alice'), + (Punctuation, ':'), + (Name.Tag, '6f6e4241-75a2-4780-9b2a-40da53082e54'), + (Text, '\n'), + ] + assert list(turtle_lexer.get_tokens(fragment)) == tokens + +def test_shexc_prefixed_name_starting_with_number(shexc_lexer): + fragment = 'alice:6f6e4241-75a2-4780-9b2a-40da53082e54\n' + tokens = [ + (Name.Namespace, 'alice'), + (Punctuation, ':'), + (Name.Tag, '6f6e4241-75a2-4780-9b2a-40da53082e54'), + (Text, '\n'), + ] + assert list(shexc_lexer.get_tokens(fragment)) == tokens