From 1cc1126c254e194fda635a1981345c3d59bdfe7b Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 11 Feb 2020 04:20:49 +0900 Subject: [PATCH 1/3] Clean up Varnish files --- lib/rouge/lexers/varnish.rb | 342 +++++++++++++----------------------- spec/lexers/varnish_spec.rb | 1 - 2 files changed, 124 insertions(+), 219 deletions(-) diff --git a/lib/rouge/lexers/varnish.rb b/lib/rouge/lexers/varnish.rb index 15a7921703..5f50e3c884 100644 --- a/lib/rouge/lexers/varnish.rb +++ b/lib/rouge/lexers/varnish.rb @@ -1,224 +1,130 @@ # -*- coding: utf-8 -*- # +# frozen_string_literal: true module Rouge - module Lexers - class Varnish < RegexLexer - aliases 'varnishconf', 'VCL' - title 'Varnish' - desc 'The Varnish (high-performance web accelerator) configuration language' - tag 'varnish' - filenames '*.vcl' - mimetypes 'text/x-varnish' - - LNUM = '[0-9]+' - DNUM = '([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)' - SPACE = '[ \f\n\r\t\v]+' -# IDENT1 = [a-zA-Z] -# IDENT = IDENT1 | [0-9_-] -# VAR = IDENT | '.' - - # backend acl - KEYWORDS = Set.new %w[vcl set unset include import if else elseif elif elsif] - - BUILTIN_FUNCTIONS = Set.new %w[ - ban - call - hash_data - new - regsub - regsuball - return - rollback - std.cache_req_body - std.collect - std.duration - std.fileread - std.healthy - std.integer - std.ip - std.log - std.port - std.querysort - std.random - std.real - std.real2time - std.rollback - std.set_ip_tos - std.strstr - std.syslog - std.time - std.time2integer - std.time2real - std.timestamp - std.tolower - std.toupper - synth - synthetic - ] - - BUILTIN_VARIABLES = Set.new %w[ - bereq - bereq.backend - bereq.between_bytes_timeout - bereq.connect_timeout - bereq.first_byte_timeout - bereq.method - bereq.proto - bereq.retries - bereq.uncacheable - bereq.url - bereq.xid - beresp - beresp.age - beresp.backend - beresp.backend.ip - beresp.backend.name - beresp.do_esi - beresp.do_gunzip - beresp.do_gzip - beresp.do_stream - beresp.grace - beresp.keep - beresp.proto - beresp.reason - beresp.status - beresp.storage_hint - beresp.ttl - beresp.uncacheable - beresp.was_304 - client.identity - client.ip - local.ip - now - obj.age - obj.grace - obj.hits - obj.keep - obj.proto - obj.reason - obj.status - obj.ttl - obj.uncacheable - remote.ip - req - req.backend_hint - req.can_gzip - req.esi - req.esi_level - req.hash_always_miss - req.hash_ignore_busy - req.method - req.proto - req.restarts - req.ttl - req.url - req.xid - resp - resp.proto - resp.reason - resp.status - server.hostname - server.identity - server.ip - ] - - BUILTIN_ROUTINES = Set.new %w[ - backend_error - backend_fetch - backend_response - purge - deliver - fini - hash - hit - init - miss - pass - pipe - recv - synth - ] - - STATES_MAP = { - :root => Text, - :string => Str::Double, - } - - state :default do - rule(/\r\n?|\n/) do - token STATES_MAP[state.name.to_sym] - end - rule(/./) do - token STATES_MAP[state.name.to_sym] - end - end - - state :root do - # long strings ({" ... "}) - rule(%r'\{".*?"}'m, Str::Single) - - # comments - rule(%r'/\*.*?\*/'m, Comment::Multiline) - rule(%r'(?://|#).*', Comment::Single) - - rule(/true|false/, Keyword::Constant) - - # "wildcard variables" - rule(/(?:(?:be)?re(?:sp|q)|obj)\.http\.[a-zA-Z0-9_.-]+/) do - token Name::Variable - end - - rule(/(sub)(#{SPACE})([a-zA-Z0-9_-]+)/) do - groups Keyword, Text, Name::Function - end - - # inline C (C{ ... }C) - rule(/C\{/) do - token Comment::Preproc - push :inline_c - end - - rule(/[a-zA-Z_.-]+/) do |m| - next token Keyword if KEYWORDS.include? m[0] - next token Name::Function if BUILTIN_FUNCTIONS.include? m[0] - next token Name::Variable if BUILTIN_VARIABLES.include? m[0] - token Text - end - - # duration - rule(/(?:#{LNUM}|#{DNUM})(?:ms|[smhdwy])/, Literal::Number::Other) - # size in bytes - rule(/#{LNUM}[KMGT]?B/, Literal::Number::Other) - # literal numeric values (integer/float) - rule(/#{LNUM}/, Num::Integer) - rule(/#{DNUM}/, Num::Float) - - # standard strings - rule(/"/) do |m| - token Str::Double - push :string - end - - rule(%r'[&|+-]{2}|[<=>!*/+-]=|<<|>>|!~|[-+*/%><=!&|~]', Operator) - - rule(/[{}();.,]/, Punctuation) - - mixin :default - end - - state :string do - rule(/"/, Str::Double, :pop!) - rule(/\\[\\"nt]/, Str::Escape) - - mixin :default - end - - state :inline_c do - rule(/}C/, Comment::Preproc, :pop!) - rule(/.*?(?=}C)/m) do - delegate C - end - end + module Lexers + class Varnish < RegexLexer + title 'Varnish' + desc 'The Varnish (high-performance web accelerator) configuration language' + + tag 'varnish' + aliases 'varnishconf', 'VCL' + filenames '*.vcl' + mimetypes 'text/x-varnish' + + LNUM = '[0-9]+' + DNUM = '([0-9]*"."[0-9]+)|([0-9]+"."[0-9]*)' + SPACE = '[ \f\n\r\t\v]+' + + # backend acl + def self.keywords + @keywords ||= Set.new %w[ + vcl set unset include import if else elseif elif elsif + ] + end + + def self.functions + @functions ||= Set.new %w[ + ban call hash_data new regsub regsuball return rollback + std.cache_req_body std.collect std.duration std.fileread std.healthy + std.integer std.ip std.log std.port std.querysort std.random std.real + std.real2time std.rollback std.set_ip_tos std.strstr std.syslog + std.time std.time2integer std.time2real std.timestamp std.tolower + std.toupper synth synthetic + ] + end + + def self.variables + @variables ||= Set.new %w[ + bereq bereq.backend bereq.between_bytes_timeout bereq.connect_timeout + bereq.first_byte_timeout bereq.method bereq.proto bereq.retries + bereq.uncacheable bereq.url bereq.xid beresp beresp.age + beresp.backend beresp.backend.ip beresp.backend.name beresp.do_esi + beresp.do_gunzip beresp.do_gzip beresp.do_stream beresp.grace + beresp.keep beresp.proto beresp.reason beresp.status + beresp.storage_hint beresp.ttl beresp.uncacheable beresp.was_304 + client.identity client.ip local.ip now obj.age obj.grace obj.hits + obj.keep obj.proto obj.reason obj.status obj.ttl obj.uncacheable + remote.ip req req.backend_hint req.can_gzip req.esi req.esi_level + req.hash_always_miss req.hash_ignore_busy req.method req.proto + req.restarts req.ttl req.url req.xid resp resp.proto resp.reason + resp.status server.hostname server.identity server.ip + ] + end + + # This is never used + # def self.routines + # @routines ||= Set.new %w[ + # backend_error backend_fetch backend_response purge deliver fini hash + # hit init miss pass pipe recv synth + # ] + # end + + state :root do + # long strings ({" ... "}) + rule %r/\{".*?"}/m, Str::Single + + # comments + rule %r'/\*.*?\*/'m, Comment::Multiline + rule %r'(?:/|#).*', Comment::Single + + rule %r/true|false/, Keyword::Constant + + # "wildcard variables" + rule %r/(?:(?:be)?re(?:sp|q)|obj)\.http\.[\w.-]+/ do + token Name::Variable end + + rule %r/(sub)(#{SPACE})([\w-]+)/ do + groups Keyword, Text, Name::Function + end + + # inline C (C{ ... }C) + rule %r/C\{/ do + token Comment::Preproc + push :inline_c + end + + rule %r/[a-z_.-]+/i do |m| + next token Keyword if self.class.keywords.include? m[0] + next token Name::Function if self.class.functions.include? m[0] + next token Name::Variable if self.class.variables.include? m[0] + token Text + end + + # duration + rule %r/(?:#{LNUM}|#{DNUM})(?:ms|[smhdwy])/, Num::Other + # size in bytes + rule %r/#{LNUM}[KMGT]?B/, Num::Other + # literal numeric values (integer/float) + rule %r/#{LNUM}/, Num::Integer + rule %r/#{DNUM}/, Num::Float + + # standard strings + rule %r/"/, Str::Double, :string + + rule %r'[&|+-]{2}|[<=>!*/+-]=|<<|>>|!~|[-+*/%><=!&|~]', Operator + + rule %r/[{}();.,]/, Punctuation + + rule %r/\r\n?|\n/, Text + rule %r/./, Text + end + + state :string do + rule %r/"/, Str::Double, :pop! + rule %r/\\[\\"nt]/, Str::Escape + + rule %r/\r\n?|\n/, Str::Double + rule %r/./, Str::Double + end + + state :inline_c do + rule %r/}C/, Comment::Preproc, :pop! + rule %r/.*?(?=}C)/m do + delegate C + end + end end + end end diff --git a/spec/lexers/varnish_spec.rb b/spec/lexers/varnish_spec.rb index 61c20a9ef3..12f527e1fe 100644 --- a/spec/lexers/varnish_spec.rb +++ b/spec/lexers/varnish_spec.rb @@ -6,7 +6,6 @@ it 'guesses by filename' do assert_guess :filename => 'varnish.vcl' - assert_guess :filename => 'builtin.vcl' end it 'guesses by mimetype' do From 55277acd1efb8030e21c9e8bda30064062441a90 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Tue, 11 Feb 2020 09:25:48 +0900 Subject: [PATCH 2/3] Add missing keywords --- lib/rouge/lexers/varnish.rb | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/lib/rouge/lexers/varnish.rb b/lib/rouge/lexers/varnish.rb index 5f50e3c884..ff45a5a6ac 100644 --- a/lib/rouge/lexers/varnish.rb +++ b/lib/rouge/lexers/varnish.rb @@ -19,7 +19,8 @@ class Varnish < RegexLexer # backend acl def self.keywords @keywords ||= Set.new %w[ - vcl set unset include import if else elseif elif elsif + vcl set unset include import if else elseif elif elsif director probe + backend acl ] end From b7f14dfcfe1ced775d58de919f04c6504532711f Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Wed, 12 Feb 2020 03:46:29 +0900 Subject: [PATCH 3/3] Fix rule for single-line comments --- lib/rouge/lexers/varnish.rb | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/lib/rouge/lexers/varnish.rb b/lib/rouge/lexers/varnish.rb index ff45a5a6ac..98e66fc9dd 100644 --- a/lib/rouge/lexers/varnish.rb +++ b/lib/rouge/lexers/varnish.rb @@ -67,7 +67,7 @@ def self.variables # comments rule %r'/\*.*?\*/'m, Comment::Multiline - rule %r'(?:/|#).*', Comment::Single + rule %r'(?://|#).*', Comment::Single rule %r/true|false/, Keyword::Constant