Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

testing turtle prefix names where reference starts with number #1590

Merged
merged 4 commits into from Dec 5, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
62 changes: 51 additions & 11 deletions pygments/lexers/rdf.py
Expand Up @@ -187,19 +187,61 @@ class TurtleLexer(RegexLexer):
filenames = ['*.ttl']
mimetypes = ['text/turtle', 'application/x-turtle']

flags = re.IGNORECASE
# character group definitions ::
PN_CHARS_BASE_GRP = ('a-zA-Z'
'\u00c0-\u00d6'
'\u00d8-\u00f6'
'\u00f8-\u02ff'
'\u0370-\u037d'
'\u037f-\u1fff'
'\u200c-\u200d'
'\u2070-\u218f'
'\u2c00-\u2fef'
'\u3001-\ud7ff'
'\uf900-\ufdcf'
'\ufdf0-\ufffd')

PN_CHARS_U_GRP = (PN_CHARS_BASE_GRP + '_')

PN_CHARS_GRP = (PN_CHARS_U_GRP +
r'\-' +
r'0-9' +
'\u00b7' +
'\u0300-\u036f' +
'\u203f-\u2040')

PN_CHARS = '[' + PN_CHARS_GRP + ']'

PN_CHARS_BASE = '[' + PN_CHARS_BASE_GRP + ']'

PN_PREFIX = PN_CHARS_BASE + '(?:[' + PN_CHARS_GRP + '.]*' + PN_CHARS + ')?'

HEX_GRP = '0-9A-Fa-f'

HEX = '[' + HEX_GRP + ']'

PERCENT = '%' + HEX + HEX

PN_LOCAL_ESC_CHARS_GRP = r' _~.\-!$&"()*+,;=/?#@%'

PN_LOCAL_ESC_CHARS = '[' + PN_LOCAL_ESC_CHARS_GRP + ']'

PN_LOCAL_ESC = r'\\' + PN_LOCAL_ESC_CHARS

PLX = '(?:' + PERCENT + ')|(?:' + PN_LOCAL_ESC + ')'

PN_LOCAL = ('(?:[' + PN_CHARS_U_GRP + ':0-9' + ']|' + PLX + ')' +
'(?:(?:[' + PN_CHARS_GRP + '.:]|' + PLX + ')*(?:[' +
PN_CHARS_GRP + ':]|' + PLX + '))?')

patterns = {
'PNAME_NS': r'((?:[a-z][\w-]*)?\:)', # Simplified character range
'PNAME_NS': r'((?:[a-zA-Z][\w-]*)?\:)', # Simplified character range
'IRIREF': r'(<[^<>"{}|^`\\\x00-\x20]*>)'
}

# PNAME_NS PN_LOCAL (with simplified character range)
patterns['PrefixedName'] = r'%(PNAME_NS)s([a-z][\w-]*)' % patterns

tokens = {
'root': [
(r'\s+', Whitespace),
(r'\s+', Text),

# Base / prefix
(r'(@base|BASE)(\s+)%(IRIREF)s(\s*)(\.?)' % patterns,
Expand All @@ -216,8 +258,8 @@ class TurtleLexer(RegexLexer):
(r'%(IRIREF)s' % patterns, Name.Variable),

# PrefixedName
(r'%(PrefixedName)s' % patterns,
bygroups(Name.Namespace, Name.Tag)),
(r'(' + PN_PREFIX + r')?(\:)(' + PN_LOCAL + r')?',
bygroups(Name.Namespace, Punctuation, Name.Tag)),

# Comment
(r'#[^\n]+', Comment),
Expand Down Expand Up @@ -257,12 +299,10 @@ class TurtleLexer(RegexLexer):
(r'.', String, '#pop'),
],
'end-of-string': [
(r'(@)([a-z]+(:?-[a-z0-9]+)*)',
(r'(@)([a-zA-Z]+(?:-[a-zA-Z0-9]+)*)',
bygroups(Operator, Generic.Emph), '#pop:2'),

(r'(\^\^)%(IRIREF)s' % patterns, bygroups(Operator, Generic.Emph), '#pop:2'),
(r'(\^\^)%(PrefixedName)s' % patterns,
bygroups(Operator, Generic.Emph, Generic.Emph), '#pop:2'),

default('#pop:2'),

Expand Down
12 changes: 6 additions & 6 deletions tests/examplefiles/example.ttl
Expand Up @@ -2,14 +2,14 @@
@prefix dcterms: <http://purl.org/dc/terms/>. @prefix xs: <http://www.w3.org/2001/XMLSchema> .
@prefix mads: <http://www.loc.gov/mads/rdf/v1#> .
@prefix skos: <http://www.w3.org/2004/02/skos/core#> .
@PREFIX dc: <http://purl.org/dc/elements/1.1/> # SPARQL-like syntax is OK
PREFIX dc: <http://purl.org/dc/elements/1.1/> # SPARQL-like syntax is OK
@prefix : <http://xmlns.com/foaf/0.1/> . # empty prefix is OK

<http://example.org/#spiderman> <http://www.perceive.net/schemas/relationship/enemyOf> <http://example.org/#green-goblin> .

<#doc1> a <#document>
<#doc1> a <#document>;
dc:creator "Smith", "Jones";
:knows <http://getopenid.com/jsmith>
:knows <http://getopenid.com/jsmith>;
dcterms:hasPart [ # A comment
dc:title "Some title", "Some other title";
dc:creator "برشت، برتولد"@ar;
Expand All @@ -23,8 +23,8 @@

<http://data.ub.uio.no/realfagstermer/006839> a mads:Topic,
skos:Concept ;
dcterms:created "2014-08-25"^^xsd:date ;
dcterms:modified "2014-11-12"^^xsd:date ;
dcterms:created "2014-08-25"^^xs:date ;
dcterms:modified "2014-11-12"^^xs:date ;
dcterms:identifier "REAL006839" ;
skos:prefLabel "Flerbørstemarker"@nb,
"Polychaeta"@la ;
Expand All @@ -33,7 +33,7 @@
"Mangebørsteormer"@nb,
"Havbørsteormer"@nb,
"Havbørstemarker"@nb,
"Polycheter"@nb.
"Polycheter"@nb ;
skos:inScheme <http://data.ub.uio.no/realfagstermer/> ;
skos:narrower <http://data.ub.uio.no/realfagstermer/018529>,
<http://data.ub.uio.no/realfagstermer/024538>,
Expand Down
42 changes: 42 additions & 0 deletions tests/test_rdf.py
@@ -0,0 +1,42 @@
# -*- coding: utf-8 -*-
"""
Basic RubyLexer Test
~~~~~~~~~~~~~~~~~~~~

:copyright: Copyright 2006-2020 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

import pytest

from pygments.token import Name, Punctuation, Text
from pygments.lexers import TurtleLexer, ShExCLexer


@pytest.fixture(scope='module')
def turtle_lexer():
yield TurtleLexer()

@pytest.fixture(scope='module')
def shexc_lexer():
yield ShExCLexer()

def test_turtle_prefixed_name_starting_with_number(turtle_lexer):
fragment = 'alice:6f6e4241-75a2-4780-9b2a-40da53082e54\n'
tokens = [
(Name.Namespace, 'alice'),
(Punctuation, ':'),
(Name.Tag, '6f6e4241-75a2-4780-9b2a-40da53082e54'),
(Text, '\n'),
]
assert list(turtle_lexer.get_tokens(fragment)) == tokens

def test_shexc_prefixed_name_starting_with_number(shexc_lexer):
fragment = 'alice:6f6e4241-75a2-4780-9b2a-40da53082e54\n'
tokens = [
(Name.Namespace, 'alice'),
(Punctuation, ':'),
(Name.Tag, '6f6e4241-75a2-4780-9b2a-40da53082e54'),
(Text, '\n'),
]
assert list(shexc_lexer.get_tokens(fragment)) == tokens