Skip to content

Commit

Permalink
Import: Lexer for SPARQL (#49)
Browse files Browse the repository at this point in the history
  • Loading branch information
pyrmont committed May 13, 2019
1 parent 8ac43fa commit a6816d2
Show file tree
Hide file tree
Showing 4 changed files with 125 additions and 0 deletions.
6 changes: 6 additions & 0 deletions lib/rouge/demos/sparql
@@ -0,0 +1,6 @@
SELECT ?item ?itemLabel
WHERE
{
?item wdt:P31 wd:Q146.
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en". }
}
60 changes: 60 additions & 0 deletions lib/rouge/lexers/sparql.rb
@@ -0,0 +1,60 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

module Rouge
module Lexers
class SPARQL < RegexLexer
title "SPARQL"
desc "Semantic Query Language, for RDF data"
tag 'sparql'
filenames '*.rq'
mimetypes 'application/sparql-query'

BUILTINS = Set.new %w[
ABS AVG BNODE BOUND CEIL COALESCE CONCAT CONTAINS COUNT DATATYPE DAY
ENCODE_FOR_URI FLOOR GROUP_CONCAT HOURS IF IRI isBLANK isIRI isLITERAL
isNUMERIC isURI LANG LANGMATCHES LCASE MAX MD5 MIN MINUTES MONTH NOW
RAND REGEXP REPLACE ROUND sameTerm SAMPLE SECONDS SEPARATOR SHA1
SHA256 SHA384 SHA512 STR STRAFTER STRBEFORE STRDT STRENDS STRLANG
STRLEN STRSTARTS STRUUID SUBSTR SUM TIMEZONE TZ UCASE URI UUID YEAR
]

KEYWORDS = Set.new %w[
ADD ALL AS ASC BIND CLEAR COPY CREATE DATA DEFAULT DELETE DESC
DISTINCT DROP EXISTS FILTER GRAPH GROUP\ BY HAVING IN INSERT LIMIT
LOAD MINUS MOVE NAMED NOT\ EXISTS NOT\ IN OFFSET OPTIONAL ORDER\ BY
SELECT SERVICE SILENT UNDEF UNION USING VALUES WHERE WITH
]

state :root do
rule %r("), Str, :string_double
rule %r('), Str, :string_single
rule %r(#.*), Comment::Single
rule %r([$?]\w+), Name::Variable
rule %r((\w*:)(\w+)) do |m|
token Name::Namespace, m[1]
token Str::Symbol, m[2]
end
rule %r(<[^>]*>), Name::Namespace
rule Regexp.union(KEYWORDS.map{ |str| /\b#{str}\b/i }), Keyword
rule Regexp.union(BUILTINS.map{ |str| /\b#{str}\b/i }), Name::Builtin
rule %r(-?([0-9]+\.[0-9]+|\.[0-9]+|[0-9]+)([eE][+-]?[0-9]+)?), Num
rule %r([\]\[(){}.,;=]), Punctuation
rule %r([/?*+=!<>]|&&|\|\|), Operator
rule %r(\s+), Text::Whitespace
end

state :string_double do
rule %r(\\[tbnrf"'\\]), Str::Escape
rule %r("), Str, :pop!
rule %r(.), Str
end

state :string_single do
rule %r(\\[tbnrf"'\\]), Str::Escape
rule %r('), Str, :pop!
rule %r(.), Str
end
end
end
end
18 changes: 18 additions & 0 deletions spec/lexers/sparql_spec.rb
@@ -0,0 +1,18 @@
# -*- coding: utf-8 -*- #
# frozen_string_literal: true

describe Rouge::Lexers::SPARQL do
let(:subject) { Rouge::Lexers::SPARQL.new }

describe 'guessing' do
include Support::Guessing

it 'guesses by filename' do
assert_guess :filename => 'foo.rq'
end

it 'guesses by mimetype' do
assert_guess :mimetype => 'application/sparql-query'
end
end
end
41 changes: 41 additions & 0 deletions spec/visual/samples/sparql
@@ -0,0 +1,41 @@
#Artists entering in public domain by countries
#added before 2017-02

SELECT ?item ?itemLabel ?genderLabel (GROUP_CONCAT(DISTINCT ?occupationLabel; SEPARATOR=", ") AS ?occupations) (GROUP_CONCAT(DISTINCT ?countryLabel; SEPARATOR=", ") AS ?countries) ?death {
VALUES ?target_country { wd:Q16 wd:Q31 wd:Q39 wd:Q142 } . #countries: Canada, France, Switzerland, Belgium. Remove this line to get worldwide.
VALUES ?occupation { wd:Q483501 wd:Q36834 wd:Q639669 wd:Q753110 wd:Q488205 wd:Q49757 wd:Q4964182 wd:Q1281618 wd:Q36180 wd:Q482980 wd:Q1028181 wd:Q6625963 wd:Q28389 wd:Q1930187 wd:Q33999 wd:Q3282637 wd:Q81096 wd:Q201788 wd:Q18939491 wd:Q486748 wd:Q3658608 wd:Q214917 wd:Q11774202 wd:Q205375 } . #occupation: composer, poet, sculptor, writer, artist, painter, etc.
?item wdt:P31 wd:Q5;
wdt:P21 ?gender;
wdt:P570 ?death;
wdt:P27 ?target_country;
wdt:P27 ?country;
wdt:P106 ?occupation .
FILTER( YEAR( ?death ) = 1966 ) .
SERVICE wikibase:label {
bd:serviceParam wikibase:language "fr,en,ru,el,es,fa" .
?item rdfs:label ?itemLabel .
?gender rdfs:label ?genderLabel .
?occupation rdfs:label ?occupationLabel .
?country rdfs:label ?countryLabel .
} .
} GROUP BY ?item ?itemLabel ?genderLabel ?death ORDER BY ?itemLabel


SELECT ?material $VAR (concat("test#\"foo", 'foo\'bar\n') AS ?x_2) (-.123 AS ?num)
where {
?painting wdt:P31/wdt:P279 wd:Q3305213; # end of line comment
p:P186 [ ps:P186 ?material; pq:P518 wd:Q861259 ].
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE]". }
}
ORDER BY DESC(?count)


#illustrates sitelink selection, ";" notation
SELECT ?item ?itemLabel ?article (-200.23e+3 AS ?exp)
WHERE
{
?article schema:about ?item ;
schema:isPartOf <https://species.wikimedia.org/> .
SERVICE wikibase:label { bd:serviceParam wikibase:language "[AUTO_LANGUAGE],en" }
}
LIMIT +200

0 comments on commit a6816d2

Please sign in to comment.