Skip to content
This repository has been archived by the owner on Sep 8, 2023. It is now read-only.

Commit

Permalink
Add Rake task to generate keywords for LLVM lexer (rouge-ruby#1505)
Browse files Browse the repository at this point in the history
The list of keywords that is recognised in the current LLVM lexer is
not as complete as in some other syntax highlighting libraries (such as 
Pygments). This commit adds a Rake task that checks a source file in
the public LLVM repo and generates a keyword file that is loaded on
request when the LLVM lexer is used.
  • Loading branch information
pyrmont authored and mattt committed May 21, 2020
1 parent b1c85bb commit 723a265
Show file tree
Hide file tree
Showing 4 changed files with 111 additions and 33 deletions.
39 changes: 6 additions & 33 deletions lib/rouge/lexers/llvm.rb
Expand Up @@ -15,45 +15,18 @@ class LLVM < RegexLexer
identifier = /([-a-zA-Z$._][-a-zA-Z$._0-9]*|#{string})/

def self.keywords
@keywords ||= Set.new %w(
addrspace addrspacecast alias align alignstack allocsize alwaysinline
appending arcp argmemonly arm_aapcs_vfpcc arm_aapcscc arm_apcscc asm
attributes available_externally begin builtin byval c cc ccc cold
coldcc common constant convergent datalayout dbg declare default
define dllexport dllimport end eq exact extern_weak external false
fast fastcc gc global hidden inaccessiblemem_or_argmemonly
inaccessiblememonly inbounds inlinehint inreg internal jumptable
landingpad linker_private linkonce linkonce_odr minsize module naked
ne nest ninf nnan no-jump-tables noalias nobuiltin nocapture
nocf_check noduplicate noimplicitfloat noinline nonlazybind norecurse
noredzone noredzone noreturn nounwind nsw nsz null nuw oeq oge ogt
ole olt one opaque optforfuzzing optnone optsize ord personality
private protected ptx_device ptx_kernel readnone readonly
returns_twice safestack sanitize_address sanitize_hwaddress
sanitize_memory sanitize_thread section sge sgt shadowcallstack
sideeffect signext sle slt speculatable speculative_load_hardening
sret ssp sspreq sspstrong strictfp tail target thread_local to triple
true type ueq uge ugt ule ult undef une unnamed_addr uno uwtable
volatile weak weak_odr writeonly x x86_fastcallcc x86_stdcallcc
zeroext zeroinitializer
)
Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb")
keywords
end

def self.instructions
@instructions ||= Set.new %w(
add alloca and ashr bitcast br call catch cleanup extractelement
extractvalue fadd fcmp fdiv fmul fpext fptosi fptoui fptrunc free
frem fsub getelementptr getresult icmp insertelement insertvalue
inttoptr invoke load lshr malloc mul or phi ptrtoint resume ret sdiv
select sext shl shufflevector sitofp srem store sub switch trunc udiv
uitofp unreachable unwind urem va_arg xor zext
)
Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb")
instructions
end

def self.types
@types ||= Set.new %w(
double float fp128 half label metadata ppc_fp128 void x86_fp80 x86mmx
)
Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb")
types
end

state :basic do
Expand Down
25 changes: 25 additions & 0 deletions lib/rouge/lexers/llvm/keywords.rb
@@ -0,0 +1,25 @@
# encoding: utf-8
# frozen_string_literal: true

# DO NOT EDIT
# This file is automatically generated by `rake builtins:llvm`.
# See tasks/builtins/llvm.rake for more info.

module Rouge
module Lexers
class LLVM
def self.keywords
@keywords ||= Set.new ["aarch64_sve_vector_pcs", "aarch64_vector_pcs", "acq_rel", "acquire", "addrspace", "afn", "alias", "aliasee", "align", "alignLog2", "alignstack", "allOnes", "allocsize", "alwaysInline", "alwaysinline", "amdgpu_cs", "amdgpu_es", "amdgpu_gs", "amdgpu_hs", "amdgpu_kernel", "amdgpu_ls", "amdgpu_ps", "amdgpu_vs", "any", "anyregcc", "appending", "arcp", "argmemonly", "args", "arm_aapcs_vfpcc", "arm_aapcscc", "arm_apcscc", "asm", "atomic", "attributes", "available_externally", "avr_intrcc", "avr_signalcc", "bit", "bitMask", "blockaddress", "branchFunnel", "builtin", "byArg", "byte", "byteArray", "byval", "c", "callee", "caller", "calls", "canAutoHide", "catch", "cc", "ccc", "cfguard_checkcc", "cleanup", "cold", "coldcc", "comdat", "common", "constant", "contract", "convergent", "critical", "cxx_fast_tlscc", "datalayout", "declare", "default", "define", "deplibs", "dereferenceable", "dereferenceable_or_null", "distinct", "dllexport", "dllimport", "dsoLocal", "dso_local", "dso_preemptable", "eq", "exact", "exactmatch", "extern_weak", "external", "externally_initialized", "false", "fast", "fastcc", "filter", "flags", "from", "funcFlags", "function", "gc", "ghccc", "global", "guid", "gv", "hash", "hhvm_ccc", "hhvmcc", "hidden", "hot", "hotness", "ifunc", "immarg", "inaccessiblemem_or_argmemonly", "inaccessiblememonly", "inalloca", "inbounds", "indir", "info", "initialexec", "inline", "inlineBits", "inlinehint", "inrange", "inreg", "insts", "intel_ocl_bicc", "inteldialect", "internal", "jumptable", "kind", "largest", "linkage", "linkonce", "linkonce_odr", "live", "local_unnamed_addr", "localdynamic", "localexec", "max", "min", "minsize", "module", "monotonic", "msp430_intrcc", "musttail", "naked", "name", "nand", "ne", "nest", "ninf", "nnan", "noInline", "noRecurse", "noalias", "nobuiltin", "nocapture", "nocf_check", "noduplicate", "noduplicates", "nofree", "noimplicitfloat", "noinline", "none", "nonlazybind", "nonnull", "norecurse", "noredzone", "noreturn", "nosync", "notEligibleToImport", "notail", "nounwind", "nsw", "nsz", "null", "nuw", "oeq", "offset", "oge", "ogt", "ole", "olt", "one", "opaque", "optforfuzzing", "optnone", "optsize", "ord", "partition", "path", "personality", "prefix", "preserve_allcc", "preserve_mostcc", "private", "prologue", "protected", "ptx_device", "ptx_kernel", "readNone", "readOnly", "readnone", "readonly", "reassoc", "refs", "relbf", "release", "resByArg", "returnDoesNotAlias", "returned", "returns_twice", "safestack", "samesize", "sanitize_address", "sanitize_hwaddress", "sanitize_memory", "sanitize_memtag", "sanitize_thread", "section", "seq_cst", "sge", "sgt", "shadowcallstack", "sideeffect", "signext", "single", "singleImpl", "singleImplName", "sizeM1", "sizeM1BitWidth", "sle", "slt", "source_filename", "speculatable", "speculative_load_hardening", "spir_func", "spir_kernel", "sret", "ssp", "sspreq", "sspstrong", "strictfp", "summaries", "summary", "swiftcc", "swifterror", "swiftself", "syncscope", "tail", "tailcc", "target", "thread_local", "to", "triple", "true", "type", "typeCheckedLoadConstVCalls", "typeCheckedLoadVCalls", "typeIdInfo", "typeTestAssumeConstVCalls", "typeTestAssumeVCalls", "typeTestRes", "typeTests", "typeid", "typeidCompatibleVTable", "ueq", "uge", "ugt", "ule", "ult", "umax", "umin", "undef", "une", "uniformRetVal", "uniqueRetVal", "unknown", "unnamed_addr", "uno", "unordered", "unsat", "unwind", "uselistorder", "uselistorder_bb", "uwtable", "vFuncId", "vTableFuncs", "varFlags", "variable", "vcall_visibility", "virtFunc", "virtualConstProp", "volatile", "vscale", "weak", "weak_odr", "webkit_jscc", "willreturn", "win64cc", "within", "wpdRes", "wpdResolutions", "writeonly", "x", "x86_64_sysvcc", "x86_fastcallcc", "x86_intrcc", "x86_regcallcc", "x86_stdcallcc", "x86_thiscallcc", "x86_vectorcallcc", "xchg", "zeroext", "zeroinitializer"]
end

def self.types
@types ||= Set.new ["double", "float", "fp128", "half", "label", "metadata", "ppc_fp128", "token", "void", "x86_fp80", "x86_mmx"]
end

def self.instructions
@instructions ||= Set.new ["add", "addrspacecast", "alloca", "and", "ashr", "atomicrmw", "bitcast", "br", "call", "callbr", "catchpad", "catchret", "catchswitch", "cleanuppad", "cleanupret", "cmpxchg", "extractelement", "extractvalue", "fadd", "fcmp", "fdiv", "fence", "fmul", "fneg", "fpext", "fptosi", "fptoui", "fptrunc", "freeze", "frem", "fsub", "getelementptr", "icmp", "indirectbr", "insertelement", "insertvalue", "inttoptr", "invoke", "landingpad", "load", "lshr", "mul", "or", "phi", "ptrtoint", "resume", "ret", "sdiv", "select", "sext", "shl", "shufflevector", "sitofp", "srem", "store", "sub", "switch", "trunc", "udiv", "uitofp", "unreachable", "urem", "va_arg", "xor", "zext"]
end

end
end
end
2 changes: 2 additions & 0 deletions spec/visual/samples/llvm
Expand Up @@ -84,3 +84,5 @@ attributes #1 = { "no-sse" }
define void @f() #0 #1 { ... }

%1 = addrspacecast i32* %a to i32 addrspace(1)*

source_filename = "/path/to/source.c"
78 changes: 78 additions & 0 deletions tasks/builtins/llvm.rake
@@ -0,0 +1,78 @@
# encoding: utf-8
# frozen_string_literal: true

require 'open-uri'

LLVM_SYNTAX_URI = "https://raw.githubusercontent.com/llvm/llvm-project/master/llvm/lib/AsmParser/LLLexer.cpp"
LLVM_KEYWORDS_FILE = "./lib/rouge/lexers/llvm/keywords.rb"

namespace :builtins do
task :llvm do
input = URI.open(LLVM_SYNTAX_URI) { |f| f.read }
generator = Rouge::Tasks::Builtins::LLVM.new
keywords = generator.extract_keywords(input)
output = generator.render_output(keywords)

File.write(LLVM_KEYWORDS_FILE, output)
end
end

module Rouge
module Tasks
module Builtins
class LLVM
def extract_keywords(input)
keywords = Hash.new { |h,k| h[k] = Array.new }
kind = nil

input.each_line(";") do |line|
if line =~ /#define (.*?)\(/
case $1
when "KEYWORD"
kind = "keywords"
when "TYPEKEYWORD"
kind = "types"
when "INSTKEYWORD"
kind = "instructions"
else
kind = nil
end

next
end

next unless kind && line =~ /KEYWORD\("?([^)",]+)/

keywords[kind].push $1
end

keywords.transform_values! { |v| v.sort }
end

def render_output(keywords, &b)
return enum_for(:render_output, keywords).to_a.join("\n") unless b

yield "# encoding: utf-8"
yield "# frozen_string_literal: true"
yield ""
yield "# DO NOT EDIT"
yield "# This file is automatically generated by `rake builtins:llvm`."
yield "# See tasks/builtins/llvm.rake for more info."
yield ""
yield "module Rouge"
yield " module Lexers"
yield " class LLVM"
keywords.each do |k,v|
yield " def self.#{k}"
yield " @#{k} ||= Set.new #{v.inspect}"
yield " end"
yield ""
end
yield " end"
yield " end"
yield "end"
end
end
end
end
end

0 comments on commit 723a265

Please sign in to comment.