From 90e417358603256927257c96cf2a50dc7cb4a4f0 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Sat, 18 Apr 2020 17:03:51 +0900 Subject: [PATCH 1/5] Add example to visual sample --- spec/visual/samples/llvm | 2 ++ 1 file changed, 2 insertions(+) diff --git a/spec/visual/samples/llvm b/spec/visual/samples/llvm index f2bde2a93e..d7aad85ff2 100644 --- a/spec/visual/samples/llvm +++ b/spec/visual/samples/llvm @@ -84,3 +84,5 @@ attributes #1 = { "no-sse" } define void @f() #0 #1 { ... } %1 = addrspacecast i32* %a to i32 addrspace(1)* + +source_filename = "/path/to/source.c" From 9832aa3dfbe3cdc352b1857a6246f231ca349642 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Sat, 18 Apr 2020 17:04:16 +0900 Subject: [PATCH 2/5] Add keywords --- lib/rouge/lexers/llvm.rb | 76 ++++++++++++++++++++++++++-------------- 1 file changed, 49 insertions(+), 27 deletions(-) diff --git a/lib/rouge/lexers/llvm.rb b/lib/rouge/lexers/llvm.rb index 797244ff1b..8037e1bd70 100644 --- a/lib/rouge/lexers/llvm.rb +++ b/lib/rouge/lexers/llvm.rb @@ -16,43 +16,65 @@ class LLVM < RegexLexer def self.keywords @keywords ||= Set.new %w( - addrspace addrspacecast alias align alignstack allocsize alwaysinline - appending arcp argmemonly arm_aapcs_vfpcc arm_aapcscc arm_apcscc asm - attributes available_externally begin builtin byval c cc ccc cold - coldcc common constant convergent datalayout dbg declare default - define dllexport dllimport end eq exact extern_weak external false - fast fastcc gc global hidden inaccessiblemem_or_argmemonly - inaccessiblememonly inbounds inlinehint inreg internal jumptable - landingpad linker_private linkonce linkonce_odr minsize module naked - ne nest ninf nnan no-jump-tables noalias nobuiltin nocapture - nocf_check noduplicate noimplicitfloat noinline nonlazybind norecurse - noredzone noredzone noreturn nounwind nsw nsz null nuw oeq oge ogt - ole olt one opaque optforfuzzing optnone optsize ord personality - private protected ptx_device ptx_kernel readnone readonly - returns_twice safestack sanitize_address sanitize_hwaddress - sanitize_memory sanitize_thread section sge sgt shadowcallstack - sideeffect signext sle slt speculatable speculative_load_hardening - sret ssp sspreq sspstrong strictfp tail target thread_local to triple - true type ueq uge ugt ule ult undef une unnamed_addr uno uwtable - volatile weak weak_odr writeonly x x86_fastcallcc x86_stdcallcc - zeroext zeroinitializer + acq_rel acquire addrspace addrspacecast afn alias aliasee align + alignLog2 alignstack allOnes allocsize alwaysinline amdgpu_cs + amdgpu_es amdgpu_gs amdgpu_hs amdgpu_kernel amdgpu_ls amdgpu_ps + amdgpu_vs any anyregcc appending arcp argmemonly args arm_aapcs_vfpcc + arm_aapcscc arm_apcscc asm atomic attributes available_externally + begin bit bitMask blockaddress branchFunnel builtin byArg byte + byteArray byval c callee caller calls cc ccc cold coldcc comdat + common constant contract convergent critical cxx_fast_tlscc + datalayout dbg declare default define dereferenceable + dereferenceable_or_null distinct dllexport dllimport dsoLocal + dso_local dso_preemptable end eq exact exactmatch extern_weak + external externally_initialized false fast fastcc filter flags from + funcFlags gc global guid gv hash hidden hot hotness ifunc immarg + inaccessiblemem_or_argmemonly inaccessiblememonly inalloca inbounds + indir info initialexec inlineBits inlinehint inrange inreg insts + inteldialect internal jumptable kind landingpad largest linkage + linker_private linkonce linkonce_odr live local_unnamed_addr + localdynamic localexec max min minsize module monotonic musttail + naked name nand ne nest ninf nnan no-jump-tables noRecurse noalias + nobuiltin nocapture nocf_check noduplicate noduplicates + noimplicitfloat noinline nonlazybind nonnull norecurse noredzone + noredzone noreturn notEligibleToImport notail nounwind nsw nsz null + nuw oeq offset oge ogt ole olt one opaque optforfuzzing optnone + optsize ord path personality prefix preserve_allcc preserve_mostcc + private prologue protected ptx_device ptx_kernel readNone readOnly + readnone readonly reassoc refs relbf release resByArg + returnDoesNotAlias returned returns_twice safestack samesize + sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread + section seq_cst sge sgt shadowcallstack sideeffect signext single + singleImpl singleImplName sizeM1 sizeM1BitWidth sle slt + source_filename speculatable speculative_load_hardening sret ssp + sspreq sspstrong strictfp summaries summary swiftcc swifterror + swiftself syncscope tail target thread_local to triple true type + typeCheckedLoadConstVCalls typeCheckedLoadVCalls typeIdInfo + typeTestAssumeConstVCalls typeTestAssumeVCalls typeTestRes typeTests + typeid ueq uge ugt ule ult umax umin undef une uniformRetVal + uniqueRetVal unnamed_addr uno unordered unsat uselistorder + uselistorder_bb uwtable vFuncId virtualConstProp volatile weak + weak_odr webkit_jscc willreturn within wpdRes wpdResolutions + writeonly x x86_fastcallcc x86_stdcallcc xchg zeroext zeroinitializer ) end def self.instructions @instructions ||= Set.new %w( - add alloca and ashr bitcast br call catch cleanup extractelement - extractvalue fadd fcmp fdiv fmul fpext fptosi fptoui fptrunc free - frem fsub getelementptr getresult icmp insertelement insertvalue - inttoptr invoke load lshr malloc mul or phi ptrtoint resume ret sdiv - select sext shl shufflevector sitofp srem store sub switch trunc udiv - uitofp unreachable unwind urem va_arg xor zext + add alloca and ashr atomicrmw bitcast br call catch catchpad catchret + catchswitch cleanup cleanuppad cleanupret cmpxchg extractelement + extractvalue fadd fcmp fdiv fence fmul fpext fptosi fptoui fptrunc + free frem fsub getelementptr getresult icmp indirectbr insertelement + insertvalue inttoptr invoke load lshr malloc mul or phi ptrtoint + resume ret sdiv select sext shl shufflevector sitofp srem store sub + switch trunc udiv uitofp unreachable unwind urem va_arg xor zext ) end def self.types @types ||= Set.new %w( - double float fp128 half label metadata ppc_fp128 void x86_fp80 x86mmx + double float fp128 half label metadata ppc_fp128 token void x86_fp80 + x86_mmx x86mmx ) end From d6b04424c8d2edac6b08977f4b820a5f97fa8852 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Sun, 19 Apr 2020 11:40:30 +0900 Subject: [PATCH 3/5] Automate keyword generation with Rake task --- lib/rouge/lexers/llvm/keywords.rb | 25 +++++++++++ tasks/builtins/llvm.rake | 74 +++++++++++++++++++++++++++++++ 2 files changed, 99 insertions(+) create mode 100644 lib/rouge/lexers/llvm/keywords.rb create mode 100644 tasks/builtins/llvm.rake diff --git a/lib/rouge/lexers/llvm/keywords.rb b/lib/rouge/lexers/llvm/keywords.rb new file mode 100644 index 0000000000..0c0db22ea3 --- /dev/null +++ b/lib/rouge/lexers/llvm/keywords.rb @@ -0,0 +1,25 @@ +# encoding: utf-8 +# frozen_string_literal: true + +# DO NOT EDIT +# This file is automatically generated by `rake builtins:llvm`. +# See tasks/builtins/llvm.rake for more info. + +module Rouge + module Lexers + class LLVM + def self.keywords + @keywords ||= Set.new ["aarch64_sve_vector_pcs", "aarch64_vector_pcs", "acq_rel", "acquire", "addrspace", "afn", "alias", "aliasee", "align", "alignLog2", "alignstack", "allOnes", "allocsize", "alwaysInline", "alwaysinline", "amdgpu_cs", "amdgpu_es", "amdgpu_gs", "amdgpu_hs", "amdgpu_kernel", "amdgpu_ls", "amdgpu_ps", "amdgpu_vs", "any", "anyregcc", "appending", "arcp", "argmemonly", "args", "arm_aapcs_vfpcc", "arm_aapcscc", "arm_apcscc", "asm", "atomic", "attributes", "available_externally", "avr_intrcc", "avr_signalcc", "bit", "bitMask", "blockaddress", "branchFunnel", "builtin", "byArg", "byte", "byteArray", "byval", "c", "callee", "caller", "calls", "canAutoHide", "catch", "cc", "ccc", "cfguard_checkcc", "cleanup", "cold", "coldcc", "comdat", "common", "constant", "contract", "convergent", "critical", "cxx_fast_tlscc", "datalayout", "declare", "default", "define", "deplibs", "dereferenceable", "dereferenceable_or_null", "distinct", "dllexport", "dllimport", "dsoLocal", "dso_local", "dso_preemptable", "eq", "exact", "exactmatch", "extern_weak", "external", "externally_initialized", "false", "fast", "fastcc", "filter", "flags", "from", "funcFlags", "function", "gc", "ghccc", "global", "guid", "gv", "hash", "hhvm_ccc", "hhvmcc", "hidden", "hot", "hotness", "ifunc", "immarg", "inaccessiblemem_or_argmemonly", "inaccessiblememonly", "inalloca", "inbounds", "indir", "info", "initialexec", "inline", "inlineBits", "inlinehint", "inrange", "inreg", "insts", "intel_ocl_bicc", "inteldialect", "internal", "jumptable", "kind", "largest", "linkage", "linkonce", "linkonce_odr", "live", "local_unnamed_addr", "localdynamic", "localexec", "max", "min", "minsize", "module", "monotonic", "msp430_intrcc", "musttail", "naked", "name", "nand", "ne", "nest", "ninf", "nnan", "noInline", "noRecurse", "noalias", "nobuiltin", "nocapture", "nocf_check", "noduplicate", "noduplicates", "nofree", "noimplicitfloat", "noinline", "none", "nonlazybind", "nonnull", "norecurse", "noredzone", "noreturn", "nosync", "notEligibleToImport", "notail", "nounwind", "nsw", "nsz", "null", "nuw", "oeq", "offset", "oge", "ogt", "ole", "olt", "one", "opaque", "optforfuzzing", "optnone", "optsize", "ord", "partition", "path", "personality", "prefix", "preserve_allcc", "preserve_mostcc", "private", "prologue", "protected", "ptx_device", "ptx_kernel", "readNone", "readOnly", "readnone", "readonly", "reassoc", "refs", "relbf", "release", "resByArg", "returnDoesNotAlias", "returned", "returns_twice", "safestack", "samesize", "sanitize_address", "sanitize_hwaddress", "sanitize_memory", "sanitize_memtag", "sanitize_thread", "section", "seq_cst", "sge", "sgt", "shadowcallstack", "sideeffect", "signext", "single", "singleImpl", "singleImplName", "sizeM1", "sizeM1BitWidth", "sle", "slt", "source_filename", "speculatable", "speculative_load_hardening", "spir_func", "spir_kernel", "sret", "ssp", "sspreq", "sspstrong", "strictfp", "summaries", "summary", "swiftcc", "swifterror", "swiftself", "syncscope", "tail", "tailcc", "target", "thread_local", "to", "triple", "true", "type", "typeCheckedLoadConstVCalls", "typeCheckedLoadVCalls", "typeIdInfo", "typeTestAssumeConstVCalls", "typeTestAssumeVCalls", "typeTestRes", "typeTests", "typeid", "typeidCompatibleVTable", "ueq", "uge", "ugt", "ule", "ult", "umax", "umin", "undef", "une", "uniformRetVal", "uniqueRetVal", "unknown", "unnamed_addr", "uno", "unordered", "unsat", "unwind", "uselistorder", "uselistorder_bb", "uwtable", "vFuncId", "vTableFuncs", "varFlags", "variable", "vcall_visibility", "virtFunc", "virtualConstProp", "volatile", "vscale", "weak", "weak_odr", "webkit_jscc", "willreturn", "win64cc", "within", "wpdRes", "wpdResolutions", "writeonly", "x", "x86_64_sysvcc", "x86_fastcallcc", "x86_intrcc", "x86_regcallcc", "x86_stdcallcc", "x86_thiscallcc", "x86_vectorcallcc", "xchg", "zeroext", "zeroinitializer"] + end + + def self.types + @types ||= Set.new ["double", "float", "fp128", "half", "label", "metadata", "ppc_fp128", "token", "void", "x86_fp80", "x86_mmx"] + end + + def self.instructions + @instructions ||= Set.new ["add", "addrspacecast", "alloca", "and", "ashr", "atomicrmw", "bitcast", "br", "call", "callbr", "catchpad", "catchret", "catchswitch", "cleanuppad", "cleanupret", "cmpxchg", "extractelement", "extractvalue", "fadd", "fcmp", "fdiv", "fence", "fmul", "fneg", "fpext", "fptosi", "fptoui", "fptrunc", "freeze", "frem", "fsub", "getelementptr", "icmp", "indirectbr", "insertelement", "insertvalue", "inttoptr", "invoke", "landingpad", "load", "lshr", "mul", "or", "phi", "ptrtoint", "resume", "ret", "sdiv", "select", "sext", "shl", "shufflevector", "sitofp", "srem", "store", "sub", "switch", "trunc", "udiv", "uitofp", "unreachable", "urem", "va_arg", "xor", "zext"] + end + + end + end +end \ No newline at end of file diff --git a/tasks/builtins/llvm.rake b/tasks/builtins/llvm.rake new file mode 100644 index 0000000000..701c9b1075 --- /dev/null +++ b/tasks/builtins/llvm.rake @@ -0,0 +1,74 @@ +# encoding: utf-8 +# frozen_string_literal: true + +require 'open-uri' + +LLVM_SYNTAX_URI = "https://raw.githubusercontent.com/llvm/llvm-project/master/llvm/lib/AsmParser/LLLexer.cpp" +LLVM_KEYWORDS_FILE = "./lib/rouge/lexers/llvm/keywords.rb" + +module Rouge::Tasks::Builtins + class LLVM + def extract_keywords(input) + keywords = Hash.new { |h,k| h[k] = Array.new } + kind = nil + + input.each_line(";") do |line| + if line =~ /#define (.*?)\(/ + case $1 + when "KEYWORD" + kind = "keywords" + when "TYPEKEYWORD" + kind = "types" + when "INSTKEYWORD" + kind = "instructions" + else + kind = nil + end + + next + end + + next unless kind && line =~ /KEYWORD\("?([^)",]+)/ + + keywords[kind].push $1 + end + + keywords.transform_values! { |v| v.sort } + end + + def render_output(keywords, &b) + return enum_for(:render_output, keywords).to_a.join("\n") unless b + + yield "# encoding: utf-8" + yield "# frozen_string_literal: true" + yield "" + yield "# DO NOT EDIT" + yield "# This file is automatically generated by `rake builtins:llvm`." + yield "# See tasks/builtins/llvm.rake for more info." + yield "" + yield "module Rouge" + yield " module Lexers" + yield " class LLVM" + keywords.each do |k,v| + yield " def self.#{k}" + yield " @#{k} ||= Set.new #{v.inspect}" + yield " end" + yield "" + end + yield " end" + yield " end" + yield "end" + end + end +end + +namespace :builtins do + task :llvm do + input = URI.open(LLVM_SYNTAX_URI) { |f| f.read } + generator = Rouge::Tasks::Builtins::LLVM.new + keywords = generator.extract_keywords(input) + output = generator.render_output(keywords) + + File.write(LLVM_KEYWORDS_FILE, output) + end +end From 8a7d2dbb25b7b7bbf55b2e5b138adb15f76f3093 Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Sun, 19 Apr 2020 11:41:00 +0900 Subject: [PATCH 4/5] Update lexer to use automatically generated keywords --- lib/rouge/lexers/llvm.rb | 61 ++++------------------------------------ 1 file changed, 6 insertions(+), 55 deletions(-) diff --git a/lib/rouge/lexers/llvm.rb b/lib/rouge/lexers/llvm.rb index 8037e1bd70..b5ffa4459c 100644 --- a/lib/rouge/lexers/llvm.rb +++ b/lib/rouge/lexers/llvm.rb @@ -15,67 +15,18 @@ class LLVM < RegexLexer identifier = /([-a-zA-Z$._][-a-zA-Z$._0-9]*|#{string})/ def self.keywords - @keywords ||= Set.new %w( - acq_rel acquire addrspace addrspacecast afn alias aliasee align - alignLog2 alignstack allOnes allocsize alwaysinline amdgpu_cs - amdgpu_es amdgpu_gs amdgpu_hs amdgpu_kernel amdgpu_ls amdgpu_ps - amdgpu_vs any anyregcc appending arcp argmemonly args arm_aapcs_vfpcc - arm_aapcscc arm_apcscc asm atomic attributes available_externally - begin bit bitMask blockaddress branchFunnel builtin byArg byte - byteArray byval c callee caller calls cc ccc cold coldcc comdat - common constant contract convergent critical cxx_fast_tlscc - datalayout dbg declare default define dereferenceable - dereferenceable_or_null distinct dllexport dllimport dsoLocal - dso_local dso_preemptable end eq exact exactmatch extern_weak - external externally_initialized false fast fastcc filter flags from - funcFlags gc global guid gv hash hidden hot hotness ifunc immarg - inaccessiblemem_or_argmemonly inaccessiblememonly inalloca inbounds - indir info initialexec inlineBits inlinehint inrange inreg insts - inteldialect internal jumptable kind landingpad largest linkage - linker_private linkonce linkonce_odr live local_unnamed_addr - localdynamic localexec max min minsize module monotonic musttail - naked name nand ne nest ninf nnan no-jump-tables noRecurse noalias - nobuiltin nocapture nocf_check noduplicate noduplicates - noimplicitfloat noinline nonlazybind nonnull norecurse noredzone - noredzone noreturn notEligibleToImport notail nounwind nsw nsz null - nuw oeq offset oge ogt ole olt one opaque optforfuzzing optnone - optsize ord path personality prefix preserve_allcc preserve_mostcc - private prologue protected ptx_device ptx_kernel readNone readOnly - readnone readonly reassoc refs relbf release resByArg - returnDoesNotAlias returned returns_twice safestack samesize - sanitize_address sanitize_hwaddress sanitize_memory sanitize_thread - section seq_cst sge sgt shadowcallstack sideeffect signext single - singleImpl singleImplName sizeM1 sizeM1BitWidth sle slt - source_filename speculatable speculative_load_hardening sret ssp - sspreq sspstrong strictfp summaries summary swiftcc swifterror - swiftself syncscope tail target thread_local to triple true type - typeCheckedLoadConstVCalls typeCheckedLoadVCalls typeIdInfo - typeTestAssumeConstVCalls typeTestAssumeVCalls typeTestRes typeTests - typeid ueq uge ugt ule ult umax umin undef une uniformRetVal - uniqueRetVal unnamed_addr uno unordered unsat uselistorder - uselistorder_bb uwtable vFuncId virtualConstProp volatile weak - weak_odr webkit_jscc willreturn within wpdRes wpdResolutions - writeonly x x86_fastcallcc x86_stdcallcc xchg zeroext zeroinitializer - ) + Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb") + keywords end def self.instructions - @instructions ||= Set.new %w( - add alloca and ashr atomicrmw bitcast br call catch catchpad catchret - catchswitch cleanup cleanuppad cleanupret cmpxchg extractelement - extractvalue fadd fcmp fdiv fence fmul fpext fptosi fptoui fptrunc - free frem fsub getelementptr getresult icmp indirectbr insertelement - insertvalue inttoptr invoke load lshr malloc mul or phi ptrtoint - resume ret sdiv select sext shl shufflevector sitofp srem store sub - switch trunc udiv uitofp unreachable unwind urem va_arg xor zext - ) + Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb") + instructions end def self.types - @types ||= Set.new %w( - double float fp128 half label metadata ppc_fp128 token void x86_fp80 - x86_mmx x86mmx - ) + Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb") + types end state :basic do From 1f10951f71cd0f6b5d7ba3ab5921fc72a20ed62f Mon Sep 17 00:00:00 2001 From: Michael Camilleri Date: Sun, 19 Apr 2020 11:52:47 +0900 Subject: [PATCH 5/5] Use nested modules in Rake file --- tasks/builtins/llvm.rake | 114 ++++++++++++++++++++------------------- 1 file changed, 59 insertions(+), 55 deletions(-) diff --git a/tasks/builtins/llvm.rake b/tasks/builtins/llvm.rake index 701c9b1075..08b297050d 100644 --- a/tasks/builtins/llvm.rake +++ b/tasks/builtins/llvm.rake @@ -6,69 +6,73 @@ require 'open-uri' LLVM_SYNTAX_URI = "https://raw.githubusercontent.com/llvm/llvm-project/master/llvm/lib/AsmParser/LLLexer.cpp" LLVM_KEYWORDS_FILE = "./lib/rouge/lexers/llvm/keywords.rb" -module Rouge::Tasks::Builtins - class LLVM - def extract_keywords(input) - keywords = Hash.new { |h,k| h[k] = Array.new } - kind = nil +namespace :builtins do + task :llvm do + input = URI.open(LLVM_SYNTAX_URI) { |f| f.read } + generator = Rouge::Tasks::Builtins::LLVM.new + keywords = generator.extract_keywords(input) + output = generator.render_output(keywords) - input.each_line(";") do |line| - if line =~ /#define (.*?)\(/ - case $1 - when "KEYWORD" - kind = "keywords" - when "TYPEKEYWORD" - kind = "types" - when "INSTKEYWORD" - kind = "instructions" - else - kind = nil - end + File.write(LLVM_KEYWORDS_FILE, output) + end +end - next - end +module Rouge + module Tasks + module Builtins + class LLVM + def extract_keywords(input) + keywords = Hash.new { |h,k| h[k] = Array.new } + kind = nil - next unless kind && line =~ /KEYWORD\("?([^)",]+)/ + input.each_line(";") do |line| + if line =~ /#define (.*?)\(/ + case $1 + when "KEYWORD" + kind = "keywords" + when "TYPEKEYWORD" + kind = "types" + when "INSTKEYWORD" + kind = "instructions" + else + kind = nil + end - keywords[kind].push $1 - end + next + end - keywords.transform_values! { |v| v.sort } - end + next unless kind && line =~ /KEYWORD\("?([^)",]+)/ - def render_output(keywords, &b) - return enum_for(:render_output, keywords).to_a.join("\n") unless b + keywords[kind].push $1 + end - yield "# encoding: utf-8" - yield "# frozen_string_literal: true" - yield "" - yield "# DO NOT EDIT" - yield "# This file is automatically generated by `rake builtins:llvm`." - yield "# See tasks/builtins/llvm.rake for more info." - yield "" - yield "module Rouge" - yield " module Lexers" - yield " class LLVM" - keywords.each do |k,v| - yield " def self.#{k}" - yield " @#{k} ||= Set.new #{v.inspect}" - yield " end" - yield "" - end - yield " end" - yield " end" - yield "end" - end - end -end + keywords.transform_values! { |v| v.sort } + end -namespace :builtins do - task :llvm do - input = URI.open(LLVM_SYNTAX_URI) { |f| f.read } - generator = Rouge::Tasks::Builtins::LLVM.new - keywords = generator.extract_keywords(input) - output = generator.render_output(keywords) + def render_output(keywords, &b) + return enum_for(:render_output, keywords).to_a.join("\n") unless b - File.write(LLVM_KEYWORDS_FILE, output) + yield "# encoding: utf-8" + yield "# frozen_string_literal: true" + yield "" + yield "# DO NOT EDIT" + yield "# This file is automatically generated by `rake builtins:llvm`." + yield "# See tasks/builtins/llvm.rake for more info." + yield "" + yield "module Rouge" + yield " module Lexers" + yield " class LLVM" + keywords.each do |k,v| + yield " def self.#{k}" + yield " @#{k} ||= Set.new #{v.inspect}" + yield " end" + yield "" + end + yield " end" + yield " end" + yield "end" + end + end + end end end