Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Add missing keywords to LLVM lexer #1505

Merged
merged 5 commits into from Apr 20, 2020
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Jump to
Jump to file
Failed to load files.
Diff view
Diff view
39 changes: 6 additions & 33 deletions lib/rouge/lexers/llvm.rb
Expand Up @@ -15,45 +15,18 @@ class LLVM < RegexLexer
identifier = /([-a-zA-Z$._][-a-zA-Z$._0-9]*|#{string})/

def self.keywords
@keywords ||= Set.new %w(
addrspace addrspacecast alias align alignstack allocsize alwaysinline
appending arcp argmemonly arm_aapcs_vfpcc arm_aapcscc arm_apcscc asm
attributes available_externally begin builtin byval c cc ccc cold
coldcc common constant convergent datalayout dbg declare default
define dllexport dllimport end eq exact extern_weak external false
fast fastcc gc global hidden inaccessiblemem_or_argmemonly
inaccessiblememonly inbounds inlinehint inreg internal jumptable
landingpad linker_private linkonce linkonce_odr minsize module naked
ne nest ninf nnan no-jump-tables noalias nobuiltin nocapture
nocf_check noduplicate noimplicitfloat noinline nonlazybind norecurse
noredzone noredzone noreturn nounwind nsw nsz null nuw oeq oge ogt
ole olt one opaque optforfuzzing optnone optsize ord personality
private protected ptx_device ptx_kernel readnone readonly
returns_twice safestack sanitize_address sanitize_hwaddress
sanitize_memory sanitize_thread section sge sgt shadowcallstack
sideeffect signext sle slt speculatable speculative_load_hardening
sret ssp sspreq sspstrong strictfp tail target thread_local to triple
true type ueq uge ugt ule ult undef une unnamed_addr uno uwtable
volatile weak weak_odr writeonly x x86_fastcallcc x86_stdcallcc
zeroext zeroinitializer
)
Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb")
keywords
end

def self.instructions
@instructions ||= Set.new %w(
add alloca and ashr bitcast br call catch cleanup extractelement
extractvalue fadd fcmp fdiv fmul fpext fptosi fptoui fptrunc free
frem fsub getelementptr getresult icmp insertelement insertvalue
inttoptr invoke load lshr malloc mul or phi ptrtoint resume ret sdiv
select sext shl shufflevector sitofp srem store sub switch trunc udiv
uitofp unreachable unwind urem va_arg xor zext
)
Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb")
instructions
end

def self.types
@types ||= Set.new %w(
double float fp128 half label metadata ppc_fp128 void x86_fp80 x86mmx
)
Kernel::load File.join(Lexers::BASE_DIR, "llvm/keywords.rb")
types
end

state :basic do
Expand Down
25 changes: 25 additions & 0 deletions lib/rouge/lexers/llvm/keywords.rb
@@ -0,0 +1,25 @@
# encoding: utf-8
# frozen_string_literal: true

# DO NOT EDIT
# This file is automatically generated by `rake builtins:llvm`.
# See tasks/builtins/llvm.rake for more info.

module Rouge
module Lexers
class LLVM
def self.keywords
@keywords ||= Set.new ["aarch64_sve_vector_pcs", "aarch64_vector_pcs", "acq_rel", "acquire", "addrspace", "afn", "alias", "aliasee", "align", "alignLog2", "alignstack", "allOnes", "allocsize", "alwaysInline", "alwaysinline", "amdgpu_cs", "amdgpu_es", "amdgpu_gs", "amdgpu_hs", "amdgpu_kernel", "amdgpu_ls", "amdgpu_ps", "amdgpu_vs", "any", "anyregcc", "appending", "arcp", "argmemonly", "args", "arm_aapcs_vfpcc", "arm_aapcscc", "arm_apcscc", "asm", "atomic", "attributes", "available_externally", "avr_intrcc", "avr_signalcc", "bit", "bitMask", "blockaddress", "branchFunnel", "builtin", "byArg", "byte", "byteArray", "byval", "c", "callee", "caller", "calls", "canAutoHide", "catch", "cc", "ccc", "cfguard_checkcc", "cleanup", "cold", "coldcc", "comdat", "common", "constant", "contract", "convergent", "critical", "cxx_fast_tlscc", "datalayout", "declare", "default", "define", "deplibs", "dereferenceable", "dereferenceable_or_null", "distinct", "dllexport", "dllimport", "dsoLocal", "dso_local", "dso_preemptable", "eq", "exact", "exactmatch", "extern_weak", "external", "externally_initialized", "false", "fast", "fastcc", "filter", "flags", "from", "funcFlags", "function", "gc", "ghccc", "global", "guid", "gv", "hash", "hhvm_ccc", "hhvmcc", "hidden", "hot", "hotness", "ifunc", "immarg", "inaccessiblemem_or_argmemonly", "inaccessiblememonly", "inalloca", "inbounds", "indir", "info", "initialexec", "inline", "inlineBits", "inlinehint", "inrange", "inreg", "insts", "intel_ocl_bicc", "inteldialect", "internal", "jumptable", "kind", "largest", "linkage", "linkonce", "linkonce_odr", "live", "local_unnamed_addr", "localdynamic", "localexec", "max", "min", "minsize", "module", "monotonic", "msp430_intrcc", "musttail", "naked", "name", "nand", "ne", "nest", "ninf", "nnan", "noInline", "noRecurse", "noalias", "nobuiltin", "nocapture", "nocf_check", "noduplicate", "noduplicates", "nofree", "noimplicitfloat", "noinline", "none", "nonlazybind", "nonnull", "norecurse", "noredzone", "noreturn", "nosync", "notEligibleToImport", "notail", "nounwind", "nsw", "nsz", "null", "nuw", "oeq", "offset", "oge", "ogt", "ole", "olt", "one", "opaque", "optforfuzzing", "optnone", "optsize", "ord", "partition", "path", "personality", "prefix", "preserve_allcc", "preserve_mostcc", "private", "prologue", "protected", "ptx_device", "ptx_kernel", "readNone", "readOnly", "readnone", "readonly", "reassoc", "refs", "relbf", "release", "resByArg", "returnDoesNotAlias", "returned", "returns_twice", "safestack", "samesize", "sanitize_address", "sanitize_hwaddress", "sanitize_memory", "sanitize_memtag", "sanitize_thread", "section", "seq_cst", "sge", "sgt", "shadowcallstack", "sideeffect", "signext", "single", "singleImpl", "singleImplName", "sizeM1", "sizeM1BitWidth", "sle", "slt", "source_filename", "speculatable", "speculative_load_hardening", "spir_func", "spir_kernel", "sret", "ssp", "sspreq", "sspstrong", "strictfp", "summaries", "summary", "swiftcc", "swifterror", "swiftself", "syncscope", "tail", "tailcc", "target", "thread_local", "to", "triple", "true", "type", "typeCheckedLoadConstVCalls", "typeCheckedLoadVCalls", "typeIdInfo", "typeTestAssumeConstVCalls", "typeTestAssumeVCalls", "typeTestRes", "typeTests", "typeid", "typeidCompatibleVTable", "ueq", "uge", "ugt", "ule", "ult", "umax", "umin", "undef", "une", "uniformRetVal", "uniqueRetVal", "unknown", "unnamed_addr", "uno", "unordered", "unsat", "unwind", "uselistorder", "uselistorder_bb", "uwtable", "vFuncId", "vTableFuncs", "varFlags", "variable", "vcall_visibility", "virtFunc", "virtualConstProp", "volatile", "vscale", "weak", "weak_odr", "webkit_jscc", "willreturn", "win64cc", "within", "wpdRes", "wpdResolutions", "writeonly", "x", "x86_64_sysvcc", "x86_fastcallcc", "x86_intrcc", "x86_regcallcc", "x86_stdcallcc", "x86_thiscallcc", "x86_vectorcallcc", "xchg", "zeroext", "zeroinitializer"]
end

def self.types
@types ||= Set.new ["double", "float", "fp128", "half", "label", "metadata", "ppc_fp128", "token", "void", "x86_fp80", "x86_mmx"]
end

def self.instructions
@instructions ||= Set.new ["add", "addrspacecast", "alloca", "and", "ashr", "atomicrmw", "bitcast", "br", "call", "callbr", "catchpad", "catchret", "catchswitch", "cleanuppad", "cleanupret", "cmpxchg", "extractelement", "extractvalue", "fadd", "fcmp", "fdiv", "fence", "fmul", "fneg", "fpext", "fptosi", "fptoui", "fptrunc", "freeze", "frem", "fsub", "getelementptr", "icmp", "indirectbr", "insertelement", "insertvalue", "inttoptr", "invoke", "landingpad", "load", "lshr", "mul", "or", "phi", "ptrtoint", "resume", "ret", "sdiv", "select", "sext", "shl", "shufflevector", "sitofp", "srem", "store", "sub", "switch", "trunc", "udiv", "uitofp", "unreachable", "urem", "va_arg", "xor", "zext"]
end

end
end
end
2 changes: 2 additions & 0 deletions spec/visual/samples/llvm
Expand Up @@ -84,3 +84,5 @@ attributes #1 = { "no-sse" }
define void @f() #0 #1 { ... }

%1 = addrspacecast i32* %a to i32 addrspace(1)*

source_filename = "/path/to/source.c"
78 changes: 78 additions & 0 deletions tasks/builtins/llvm.rake
@@ -0,0 +1,78 @@
# encoding: utf-8
# frozen_string_literal: true

require 'open-uri'

LLVM_SYNTAX_URI = "https://raw.githubusercontent.com/llvm/llvm-project/master/llvm/lib/AsmParser/LLLexer.cpp"
LLVM_KEYWORDS_FILE = "./lib/rouge/lexers/llvm/keywords.rb"

namespace :builtins do
task :llvm do
input = URI.open(LLVM_SYNTAX_URI) { |f| f.read }
generator = Rouge::Tasks::Builtins::LLVM.new
keywords = generator.extract_keywords(input)
output = generator.render_output(keywords)

File.write(LLVM_KEYWORDS_FILE, output)
end
end

module Rouge
module Tasks
module Builtins
class LLVM
def extract_keywords(input)
keywords = Hash.new { |h,k| h[k] = Array.new }
kind = nil

input.each_line(";") do |line|
if line =~ /#define (.*?)\(/
case $1
when "KEYWORD"
kind = "keywords"
when "TYPEKEYWORD"
kind = "types"
when "INSTKEYWORD"
kind = "instructions"
else
kind = nil
end

next
end

next unless kind && line =~ /KEYWORD\("?([^)",]+)/

keywords[kind].push $1
end

keywords.transform_values! { |v| v.sort }
end

def render_output(keywords, &b)
return enum_for(:render_output, keywords).to_a.join("\n") unless b

yield "# encoding: utf-8"
yield "# frozen_string_literal: true"
yield ""
yield "# DO NOT EDIT"
yield "# This file is automatically generated by `rake builtins:llvm`."
yield "# See tasks/builtins/llvm.rake for more info."
yield ""
yield "module Rouge"
yield " module Lexers"
yield " class LLVM"
keywords.each do |k,v|
yield " def self.#{k}"
yield " @#{k} ||= Set.new #{v.inspect}"
yield " end"
yield ""
end
yield " end"
yield " end"
yield "end"
end
end
end
end
end