diff --git a/pygments/lexers/rdf.py b/pygments/lexers/rdf.py
index 0f2cdc135f..917807aac3 100644
--- a/pygments/lexers/rdf.py
+++ b/pygments/lexers/rdf.py
@@ -187,19 +187,61 @@ class TurtleLexer(RegexLexer):
filenames = ['*.ttl']
mimetypes = ['text/turtle', 'application/x-turtle']
- flags = re.IGNORECASE
+ # character group definitions ::
+ PN_CHARS_BASE_GRP = ('a-zA-Z'
+ '\u00c0-\u00d6'
+ '\u00d8-\u00f6'
+ '\u00f8-\u02ff'
+ '\u0370-\u037d'
+ '\u037f-\u1fff'
+ '\u200c-\u200d'
+ '\u2070-\u218f'
+ '\u2c00-\u2fef'
+ '\u3001-\ud7ff'
+ '\uf900-\ufdcf'
+ '\ufdf0-\ufffd')
+
+ PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')
+
+ PN_CHARS_GRP = (PN_CHARS_U_GRP +
+ r'\-' +
+ r'0-9' +
+ '\u00b7' +
+ '\u0300-\u036f' +
+ '\u203f-\u2040')
+
+ PN_CHARS = '[' + PN_CHARS_GRP + ']'
+
+ PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'
+
+ PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'
+
+ HEX_GRP = '0-9A-Fa-f'
+
+ HEX = '[' + HEX_GRP + ']'
+
+ PERCENT = '%' + HEX + HEX
+
+ PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%'
+
+ PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'
+
+ PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS
+
+ PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'
+
+ PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
+ '(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
+ PN_CHARS_GRP + ':]|' + PLX + '))?')
patterns = {
- 'PNAME_NS': r'((?:[a-z][\w-]*)?\:)', # Simplified character range
+ 'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range
'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)'
}
- # PNAME_NS PN_LOCAL (with simplified character range)
- patterns['PrefixedName'] = r'%(PNAME_NS)s([a-z][\w-]*)' % patterns
-
tokens = {
'root': [
- (r'\s+', Whitespace),
+ (r'\s+', Text),
# Base / prefix
(r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns,
@@ -216,8 +258,8 @@ class TurtleLexer(RegexLexer):
(r'%(IRIREF)s' % patterns, Name.Variable),
# PrefixedName
- (r'%(PrefixedName)s' % patterns,
- bygroups(Name.Namespace, Name.Tag)),
+ (r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?',
+ bygroups(Name.Namespace, Punctuation, Name.Tag)),
# Comment
(r'#[^\n]+', Comment),
@@ -257,12 +299,10 @@ class TurtleLexer(RegexLexer):
(r'.', String, '#pop'),
],
'end-of-string': [
- (r'(@)([a-z]+(:?-[a-z0-9]+)*)',
+ (r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
bygroups(Operator, Generic.Emph), '#pop:2'),
(r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'),
- (r'(\^\^)%(PrefixedName)s' % patterns,
- bygroups(Operator, Generic.Emph, Generic.Emph), '#pop:2'),
default('#pop:2'),
diff --git a/tests/examplefiles/example.ttl b/tests/examplefiles/example.ttl
index e524d86cf0..696f184a9b 100644
--- a/tests/examplefiles/example.ttl
+++ b/tests/examplefiles/example.ttl
@@ -2,14 +2,14 @@
@prefix dcterms: . @prefix xs: .
@prefix mads: .
@prefix skos: .
-@PREFIX dc: # SPARQL-like syntax is OK
+PREFIX dc: # SPARQL-like syntax is OK
@prefix : . # empty prefix is OK
.
-<#doc1> a <#document>
+<#doc1> a <#document>;
dc:creator "Smith", "Jones";
- :knows
+ :knows ;
dcterms:hasPart [ # A comment
dc:title "Some title", "Some other title";
dc:creator "برشت، برتولد"@ar;
@@ -23,8 +23,8 @@
a mads:Topic,
skos:Concept ;
- dcterms:created "2014-08-25"^^xsd:date ;
- dcterms:modified "2014-11-12"^^xsd:date ;
+ dcterms:created "2014-08-25"^^xs:date ;
+ dcterms:modified "2014-11-12"^^xs:date ;
dcterms:identifier "REAL006839" ;
skos:prefLabel "Flerbørstemarker"@nb,
"Polychaeta"@la ;
@@ -33,7 +33,7 @@
"Mangebørsteormer"@nb,
"Havbørsteormer"@nb,
"Havbørstemarker"@nb,
- "Polycheter"@nb.
+ "Polycheter"@nb ;
skos:inScheme ;
skos:narrower ,
,
diff --git a/tests/test_rdf.py b/tests/test_rdf.py
new file mode 100644
index 0000000000..ff8c9313c2
--- /dev/null
+++ b/tests/test_rdf.py
@@ -0,0 +1,42 @@
+# -*- coding: utf-8 -*-
+"""
+ Basic RubyLexer Test
+ ~~~~~~~~~~~~~~~~~~~~
+
+ :copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
+ :license: BSD, see LICENSE for details.
+"""
+
+import pytest
+
+from pygments.token import Name, Punctuation, Text
+from pygments.lexers import TurtleLexer, ShExCLexer
+
+
+@pytest.fixture(scope='module')
+def turtle_lexer():
+ yield TurtleLexer()
+
+@pytest.fixture(scope='module')
+def shexc_lexer():
+ yield ShExCLexer()
+
+def test_turtle_prefixed_name_starting_with_number(turtle_lexer):
+ fragment = 'alice:6f6e4241-75a2-4780-9b2a-40da53082e54\n'
+ tokens = [
+ (Name.Namespace, 'alice'),
+ (Punctuation, ':'),
+ (Name.Tag, '6f6e4241-75a2-4780-9b2a-40da53082e54'),
+ (Text, '\n'),
+ ]
+ assert list(turtle_lexer.get_tokens(fragment)) == tokens
+
+def test_shexc_prefixed_name_starting_with_number(shexc_lexer):
+ fragment = 'alice:6f6e4241-75a2-4780-9b2a-40da53082e54\n'
+ tokens = [
+ (Name.Namespace, 'alice'),
+ (Punctuation, ':'),
+ (Name.Tag, '6f6e4241-75a2-4780-9b2a-40da53082e54'),
+ (Text, '\n'),
+ ]
+ assert list(shexc_lexer.get_tokens(fragment)) == tokens