diff --git a/lib/rouge/lexers/llvm.rb b/lib/rouge/lexers/llvm.rb index 797244ff1b..b5ffa4459c 100644 --- a/lib/rouge/lexers/llvm.rb +++ b/lib/rouge/lexers/llvm.rb @@ -15,45 +15,18 @@ class LLVM < RegexLexer identifier = /([-a-zA-Z$._][-a-zA-Z$._0-9]*|#{string})/ def self.keywords - @keywords ||= Set.new %w( - addrspace addrspacecast alias align alignstack allocsize alwaysinline - appending arcp argmemonly arm_aapcs_vfpcc arm_aapcscc arm_apcscc asm - attributes available_externally begin builtin byval c cc ccc cold - coldcc common constant convergent datalayout dbg declare default - define dllexport dllimport end eq exact extern_weak external false - fast fastcc gc global hidden inaccessiblemem_or_argmemonly - inaccessiblememonly inbounds inlinehint inreg internal jumptable - landingpad linker_private linkonce linkonce_odr minsize module naked - ne nest ninf nnan no-jump-tables noalias nobuiltin nocapture - nocf_check noduplicate noimplicitfloat noinline nonlazybind norecurse - noredzone noredzone noreturn nounwind nsw nsz null nuw oeq oge ogt - ole olt one opaque optforfuzzing optnone optsize ord personality - private protected ptx_device ptx_kernel readnone readonly - returns_twice safestack sanitize_address sanitize_hwaddress - sanitize_memory sanitize_thread section sge sgt shadowcallstack - sideeffect signext sle slt speculatable speculative_load_hardening - sret ssp sspreq sspstrong strictfp tail target thread_local to triple - true type ueq uge ugt ule ult undef une unnamed_addr uno uwtable - volatile weak weak_odr writeonly x x86_fastcallcc x86_stdcallcc - zeroext zeroinitializer - ) + Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb") + keywords end def self.instructions - @instructions ||= Set.new %w( - add alloca and ashr bitcast br call catch cleanup extractelement - extractvalue fadd fcmp fdiv fmul fpext fptosi fptoui fptrunc free - frem fsub getelementptr getresult icmp insertelement insertvalue - inttoptr invoke load lshr malloc mul or phi ptrtoint resume ret sdiv - select sext shl shufflevector sitofp srem store sub switch trunc udiv - uitofp unreachable unwind urem va_arg xor zext - ) + Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb") + instructions end def self.types - @types ||= Set.new %w( - double float fp128 half label metadata ppc_fp128 void x86_fp80 x86mmx - ) + Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb") + types end state :basic do diff --git a/lib/rouge/lexers/llvm/keywords.rb b/lib/rouge/lexers/llvm/keywords.rb new file mode 100644 index 0000000000..0c0db22ea3 --- /dev/null +++ b/lib/rouge/lexers/llvm/keywords.rb @@ -0,0 +1,25 @@ +# encoding: utf-8 +# frozen_string_literal: true + +# DO NOT EDIT +# This file is automatically generated by `rake builtins:llvm`. +# See tasks/builtins/llvm.rake for more info. + +module Rouge + module Lexers + class LLVM + def self.keywords + @keywords ||= Set.new ["aarch64_sve_vector_pcs", "aarch64_vector_pcs", "acq_rel", "acquire", "addrspace", "afn", "alias", "aliasee", "align", "alignLog2", "alignstack", "allOnes", "allocsize", "alwaysInline", "alwaysinline", "amdgpu_cs", "amdgpu_es", "amdgpu_gs", "amdgpu_hs", "amdgpu_kernel", "amdgpu_ls", "amdgpu_ps", "amdgpu_vs", "any", "anyregcc", "appending", "arcp", "argmemonly", "args", "arm_aapcs_vfpcc", "arm_aapcscc", "arm_apcscc", "asm", "atomic", "attributes", "available_externally", "avr_intrcc", "avr_signalcc", "bit", "bitMask", "blockaddress", "branchFunnel", "builtin", "byArg", "byte", "byteArray", "byval", "c", "callee", "caller", "calls", "canAutoHide", "catch", "cc", "ccc", "cfguard_checkcc", "cleanup", "cold", "coldcc", "comdat", "common", "constant", "contract", "convergent", "critical", "cxx_fast_tlscc", "datalayout", "declare", "default", "define", "deplibs", "dereferenceable", "dereferenceable_or_null", "distinct", "dllexport", "dllimport", "dsoLocal", "dso_local", "dso_preemptable", "eq", "exact", "exactmatch", "extern_weak", "external", "externally_initialized", "false", "fast", "fastcc", "filter", "flags", "from", "funcFlags", "function", "gc", "ghccc", "global", "guid", "gv", "hash", "hhvm_ccc", "hhvmcc", "hidden", "hot", "hotness", "ifunc", "immarg", "inaccessiblemem_or_argmemonly", "inaccessiblememonly", "inalloca", "inbounds", "indir", "info", "initialexec", "inline", "inlineBits", "inlinehint", "inrange", "inreg", "insts", "intel_ocl_bicc", "inteldialect", "internal", "jumptable", "kind", "largest", "linkage", "linkonce", "linkonce_odr", "live", "local_unnamed_addr", "localdynamic", "localexec", "max", "min", "minsize", "module", "monotonic", "msp430_intrcc", "musttail", "naked", "name", "nand", "ne", "nest", "ninf", "nnan", "noInline", "noRecurse", "noalias", "nobuiltin", "nocapture", "nocf_check", "noduplicate", "noduplicates", "nofree", "noimplicitfloat", "noinline", "none", "nonlazybind", "nonnull", "norecurse", "noredzone", "noreturn", "nosync", "notEligibleToImport", "notail", "nounwind", "nsw", "nsz", "null", "nuw", "oeq", "offset", "oge", "ogt", "ole", "olt", "one", "opaque", "optforfuzzing", "optnone", "optsize", "ord", "partition", "path", "personality", "prefix", "preserve_allcc", "preserve_mostcc", "private", "prologue", "protected", "ptx_device", "ptx_kernel", "readNone", "readOnly", "readnone", "readonly", "reassoc", "refs", "relbf", "release", "resByArg", "returnDoesNotAlias", "returned", "returns_twice", "safestack", "samesize", "sanitize_address", "sanitize_hwaddress", "sanitize_memory", "sanitize_memtag", "sanitize_thread", "section", "seq_cst", "sge", "sgt", "shadowcallstack", "sideeffect", "signext", "single", "singleImpl", "singleImplName", "sizeM1", "sizeM1BitWidth", "sle", "slt", "source_filename", "speculatable", "speculative_load_hardening", "spir_func", "spir_kernel", "sret", "ssp", "sspreq", "sspstrong", "strictfp", "summaries", "summary", "swiftcc", "swifterror", "swiftself", "syncscope", "tail", "tailcc", "target", "thread_local", "to", "triple", "true", "type", "typeCheckedLoadConstVCalls", "typeCheckedLoadVCalls", "typeIdInfo", "typeTestAssumeConstVCalls", "typeTestAssumeVCalls", "typeTestRes", "typeTests", "typeid", "typeidCompatibleVTable", "ueq", "uge", "ugt", "ule", "ult", "umax", "umin", "undef", "une", "uniformRetVal", "uniqueRetVal", "unknown", "unnamed_addr", "uno", "unordered", "unsat", "unwind", "uselistorder", "uselistorder_bb", "uwtable", "vFuncId", "vTableFuncs", "varFlags", "variable", "vcall_visibility", "virtFunc", "virtualConstProp", "volatile", "vscale", "weak", "weak_odr", "webkit_jscc", "willreturn", "win64cc", "within", "wpdRes", "wpdResolutions", "writeonly", "x", "x86_64_sysvcc", "x86_fastcallcc", "x86_intrcc", "x86_regcallcc", "x86_stdcallcc", "x86_thiscallcc", "x86_vectorcallcc", "xchg", "zeroext", "zeroinitializer"] + end + + def self.types + @types ||= Set.new ["double", "float", "fp128", "half", "label", "metadata", "ppc_fp128", "token", "void", "x86_fp80", "x86_mmx"] + end + + def self.instructions + @instructions ||= Set.new ["add", "addrspacecast", "alloca", "and", "ashr", "atomicrmw", "bitcast", "br", "call", "callbr", "catchpad", "catchret", "catchswitch", "cleanuppad", "cleanupret", "cmpxchg", "extractelement", "extractvalue", "fadd", "fcmp", "fdiv", "fence", "fmul", "fneg", "fpext", "fptosi", "fptoui", "fptrunc", "freeze", "frem", "fsub", "getelementptr", "icmp", "indirectbr", "insertelement", "insertvalue", "inttoptr", "invoke", "landingpad", "load", "lshr", "mul", "or", "phi", "ptrtoint", "resume", "ret", "sdiv", "select", "sext", "shl", "shufflevector", "sitofp", "srem", "store", "sub", "switch", "trunc", "udiv", "uitofp", "unreachable", "urem", "va_arg", "xor", "zext"] + end + + end + end +end \ No newline at end of file diff --git a/spec/visual/samples/llvm b/spec/visual/samples/llvm index f2bde2a93e..d7aad85ff2 100644 --- a/spec/visual/samples/llvm +++ b/spec/visual/samples/llvm @@ -84,3 +84,5 @@ attributes #1 = { "no-sse" } define void @f() #0 #1 { ... } %1 = addrspacecast i32* %a to i32 addrspace(1)* + +source_filename = "/path/to/source.c" diff --git a/tasks/builtins/llvm.rake b/tasks/builtins/llvm.rake new file mode 100644 index 0000000000..08b297050d --- /dev/null +++ b/tasks/builtins/llvm.rake @@ -0,0 +1,78 @@ +# encoding: utf-8 +# frozen_string_literal: true + +require 'open-uri' + +LLVM_SYNTAX_URI = "https://raw.githubusercontent.com/llvm/llvm-project/master/llvm/lib/AsmParser/LLLexer.cpp" +LLVM_KEYWORDS_FILE = "./lib/rouge/lexers/llvm/keywords.rb" + +namespace :builtins do + task :llvm do + input = URI.open(LLVM_SYNTAX_URI) { |f| f.read } + generator = Rouge::Tasks::Builtins::LLVM.new + keywords = generator.extract_keywords(input) + output = generator.render_output(keywords) + + File.write(LLVM_KEYWORDS_FILE, output) + end +end + +module Rouge + module Tasks + module Builtins + class LLVM + def extract_keywords(input) + keywords = Hash.new { |h,k| h[k] = Array.new } + kind = nil + + input.each_line(";") do |line| + if line =~ /#define (.*?)\(/ + case $1 + when "KEYWORD" + kind = "keywords" + when "TYPEKEYWORD" + kind = "types" + when "INSTKEYWORD" + kind = "instructions" + else + kind = nil + end + + next + end + + next unless kind && line =~ /KEYWORD\("?([^)",]+)/ + + keywords[kind].push $1 + end + + keywords.transform_values! { |v| v.sort } + end + + def render_output(keywords, &b) + return enum_for(:render_output, keywords).to_a.join("\n") unless b + + yield "# encoding: utf-8" + yield "# frozen_string_literal: true" + yield "" + yield "# DO NOT EDIT" + yield "# This file is automatically generated by `rake builtins:llvm`." + yield "# See tasks/builtins/llvm.rake for more info." + yield "" + yield "module Rouge" + yield " module Lexers" + yield " class LLVM" + keywords.each do |k,v| + yield " def self.#{k}" + yield " @#{k} ||= Set.new #{v.inspect}" + yield " end" + yield "" + end + yield " end" + yield " end" + yield "end" + end + end + end + end +end