Skip to content

Commit

Permalink
Add Smali lexer (#765)
Browse files Browse the repository at this point in the history
Converted from the pygments Smali lexer. Add a basic test case and
update the 'Supported languages' table in the README.
  • Loading branch information
kenjenkins committed Mar 23, 2023
1 parent b9e3758 commit 2672d3c
Show file tree
Hide file tree
Showing 4 changed files with 383 additions and 1 deletion.
2 changes: 1 addition & 1 deletion README.md
Expand Up @@ -55,7 +55,7 @@ O | Objective-C, OCaml, Octave, OnesEnterprise, OpenEdge ABL, OpenSCAD, Org Mode
P | PacmanConf, Perl, PHP, PHTML, Pig, PkgConfig, PL/pgSQL, plaintext, Pony, PostgreSQL SQL dialect, PostScript, POVRay, PowerShell, Prolog, PromQL, Properties, Protocol Buffer, PSL, Puppet, Python 2, Python
Q | QBasic
R | R, Racket, Ragel, Raku, react, ReasonML, reg, reStructuredText, Rexx, Ruby, Rust
S | SAS, Sass, Scala, Scheme, Scilab, SCSS, Sed, Smalltalk, Smarty, Snobol, Solidity, SPARQL, SQL, SquidConf, Standard ML, stas, Stylus, Svelte, Swift, SYSTEMD, systemverilog
S | SAS, Sass, Scala, Scheme, Scilab, SCSS, Sed, Smali, Smalltalk, Smarty, Snobol, Solidity, SPARQL, SQL, SquidConf, Standard ML, stas, Stylus, Svelte, Swift, SYSTEMD, systemverilog
T | TableGen, TASM, Tcl, Tcsh, Termcap, Terminfo, Terraform, TeX, Thrift, TOML, TradingView, Transact-SQL, Turing, Turtle, Twig, TypeScript, TypoScript, TypoScriptCssData, TypoScriptHtmlData
V | VB.net, verilog, VHDL, VHS, VimL, vue
W | WDTE
Expand Down
73 changes: 73 additions & 0 deletions lexers/embedded/smali.xml
@@ -0,0 +1,73 @@
<!--
Generated from https://github.com/pygments/pygments/blob/15f222adefd2bf7835bfd74a12d720028ae68d29/pygments/lexers/dalvik.py.
-->
<lexer>
<config>
<name>Smali</name>
<alias>smali</alias>
<filename>*.smali</filename>
<mime_type>text/smali</mime_type>
</config>
<rules>
<state name="root">
<rule><include state="comment"/></rule>
<rule><include state="label"/></rule>
<rule><include state="field"/></rule>
<rule><include state="method"/></rule>
<rule><include state="class"/></rule>
<rule><include state="directive"/></rule>
<rule><include state="access-modifier"/></rule>
<rule><include state="instruction"/></rule>
<rule><include state="literal"/></rule>
<rule><include state="punctuation"/></rule>
<rule><include state="type"/></rule>
<rule><include state="whitespace"/></rule>
</state>
<state name="directive">
<rule pattern="^([ \t]*)(\.(?:class|super|implements|field|subannotation|annotation|enum|method|registers|locals|array-data|packed-switch|sparse-switch|catchall|catch|line|parameter|local|prologue|epilogue|source))"><bygroups><token type="TextWhitespace"/><token type="Keyword"/></bygroups></rule>
<rule pattern="^([ \t]*)(\.end)( )(field|subannotation|annotation|method|array-data|packed-switch|sparse-switch|parameter|local)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="TextWhitespace"/><token type="Keyword"/></bygroups></rule>
<rule pattern="^([ \t]*)(\.restart)( )(local)"><bygroups><token type="TextWhitespace"/><token type="Keyword"/><token type="TextWhitespace"/><token type="Keyword"/></bygroups></rule>
</state>
<state name="access-modifier">
<rule pattern="(public|private|protected|static|final|synchronized|bridge|varargs|native|abstract|strictfp|synthetic|constructor|declared-synchronized|interface|enum|annotation|volatile|transient)"><token type="Keyword"/></rule>
</state>
<state name="whitespace">
<rule pattern="\n"><token type="TextWhitespace"/></rule>
<rule pattern="\s+"><token type="TextWhitespace"/></rule>
</state>
<state name="instruction">
<rule pattern="\b[vp]\d+\b"><token type="NameBuiltin"/></rule>
<rule pattern="(\b[a-z][A-Za-z0-9/-]+)(\s+)"><bygroups><token type="Text"/><token type="TextWhitespace"/></bygroups></rule>
</state>
<state name="literal">
<rule pattern="&quot;.*&quot;"><token type="LiteralString"/></rule>
<rule pattern="0x[0-9A-Fa-f]+t?"><token type="LiteralNumberHex"/></rule>
<rule pattern="[0-9]*\.[0-9]+([eE][0-9]+)?[fd]?"><token type="LiteralNumberFloat"/></rule>
<rule pattern="[0-9]+L?"><token type="LiteralNumberInteger"/></rule>
</state>
<state name="field">
<rule pattern="(\$?\b)([\w$]*)(:)"><bygroups><token type="Punctuation"/><token type="NameVariable"/><token type="Punctuation"/></bygroups></rule>
</state>
<state name="method">
<rule pattern="&lt;(?:cl)?init&gt;"><token type="NameFunction"/></rule>
<rule pattern="(\$?\b)([\w$]*)(\()"><bygroups><token type="Punctuation"/><token type="NameFunction"/><token type="Punctuation"/></bygroups></rule>
</state>
<state name="label">
<rule pattern=":\w+"><token type="NameLabel"/></rule>
</state>
<state name="class">
<rule pattern="(L)((?:[\w$]+/)*)([\w$]+)(;)"><bygroups><token type="KeywordType"/><token type="Text"/><token type="NameClass"/><token type="Text"/></bygroups></rule>
</state>
<state name="punctuation">
<rule pattern="-&gt;"><token type="Punctuation"/></rule>
<rule pattern="[{},():=.-]"><token type="Punctuation"/></rule>
</state>
<state name="type">
<rule pattern="[ZBSCIJFDV\[]+"><token type="KeywordType"/></rule>
</state>
<state name="comment">
<rule pattern="#.*?\n"><token type="Comment"/></rule>
</state>
</rules>
</lexer>

55 changes: 55 additions & 0 deletions lexers/testdata/smali.actual
@@ -0,0 +1,55 @@
.class LExample;
.super Ljava/lang/Object;
.source "Example.java"


# instance fields
.field protected count:I

.field private label:Ljava/lang/String;


# direct methods
.method constructor <init>()V
.registers 1

.line 1
invoke-direct {p0}, Ljava/lang/Object;-><init>()V

return-void
.end method

.method public static main([Ljava/lang/String;)V
.registers 2

.line 16
sget-object p0, Ljava/lang/System;->out:Ljava/io/PrintStream;

const-string v0, "Hello world!"

invoke-virtual {p0, v0}, Ljava/io/PrintStream;->println(Ljava/lang/String;)V

.line 17
return-void
.end method


# virtual methods
.method public getLabel()Ljava/lang/String;
.registers 2

.line 8
iget-object v0, p0, LExample;->label:Ljava/lang/String;

return-object v0
.end method

.method public setLabel(Ljava/lang/String;)V
.registers 2

.line 12
iput-object p1, p0, LExample;->label:Ljava/lang/String;

.line 13
return-void
.end method
254 changes: 254 additions & 0 deletions lexers/testdata/smali.expected
@@ -0,0 +1,254 @@
[
{"type":"Keyword","value":".class"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordType","value":"L"},
{"type":"NameClass","value":"Example"},
{"type":"Text","value":";"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":".super"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordType","value":"L"},
{"type":"Text","value":"java/lang/"},
{"type":"NameClass","value":"Object"},
{"type":"Text","value":";"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":".source"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralString","value":"\"Example.java\""},
{"type":"TextWhitespace","value":"\n\n\n"},
{"type":"Comment","value":"# instance fields\n"},
{"type":"Keyword","value":".field"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"protected"},
{"type":"TextWhitespace","value":" "},
{"type":"NameVariable","value":"count"},
{"type":"Punctuation","value":":"},
{"type":"KeywordType","value":"I"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"Keyword","value":".field"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"private"},
{"type":"TextWhitespace","value":" "},
{"type":"NameVariable","value":"label"},
{"type":"Punctuation","value":":"},
{"type":"KeywordType","value":"L"},
{"type":"Text","value":"java/lang/"},
{"type":"NameClass","value":"String"},
{"type":"Text","value":";"},
{"type":"TextWhitespace","value":"\n\n\n"},
{"type":"Comment","value":"# direct methods\n"},
{"type":"Keyword","value":".method"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"constructor"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"\u003cinit\u003e"},
{"type":"Punctuation","value":"()"},
{"type":"KeywordType","value":"V"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Keyword","value":".registers"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumberInteger","value":"1"},
{"type":"TextWhitespace","value":"\n\n "},
{"type":"Keyword","value":".line"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumberInteger","value":"1"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Text","value":"invoke-direct"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"{"},
{"type":"NameBuiltin","value":"p0"},
{"type":"Punctuation","value":"},"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordType","value":"L"},
{"type":"Text","value":"java/lang/"},
{"type":"NameClass","value":"Object"},
{"type":"Text","value":";"},
{"type":"Punctuation","value":"-\u003e"},
{"type":"NameFunction","value":"\u003cinit\u003e"},
{"type":"Punctuation","value":"()"},
{"type":"KeywordType","value":"V"},
{"type":"TextWhitespace","value":"\n\n "},
{"type":"Text","value":"return-void"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":".end"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"method"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"Keyword","value":".method"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"public"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"static"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"main"},
{"type":"Punctuation","value":"("},
{"type":"KeywordType","value":"[L"},
{"type":"Text","value":"java/lang/"},
{"type":"NameClass","value":"String"},
{"type":"Text","value":";"},
{"type":"Punctuation","value":")"},
{"type":"KeywordType","value":"V"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Keyword","value":".registers"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumberInteger","value":"2"},
{"type":"TextWhitespace","value":"\n\n "},
{"type":"Keyword","value":".line"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumberInteger","value":"16"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Text","value":"sget-object"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"p0"},
{"type":"Punctuation","value":","},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordType","value":"L"},
{"type":"Text","value":"java/lang/"},
{"type":"NameClass","value":"System"},
{"type":"Text","value":";"},
{"type":"Punctuation","value":"-\u003e"},
{"type":"NameVariable","value":"out"},
{"type":"Punctuation","value":":"},
{"type":"KeywordType","value":"L"},
{"type":"Text","value":"java/io/"},
{"type":"NameClass","value":"PrintStream"},
{"type":"Text","value":";"},
{"type":"TextWhitespace","value":"\n\n "},
{"type":"Text","value":"const-string"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"v0"},
{"type":"Punctuation","value":","},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralString","value":"\"Hello world!\""},
{"type":"TextWhitespace","value":"\n\n "},
{"type":"Text","value":"invoke-virtual"},
{"type":"TextWhitespace","value":" "},
{"type":"Punctuation","value":"{"},
{"type":"NameBuiltin","value":"p0"},
{"type":"Punctuation","value":","},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"v0"},
{"type":"Punctuation","value":"},"},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordType","value":"L"},
{"type":"Text","value":"java/io/"},
{"type":"NameClass","value":"PrintStream"},
{"type":"Text","value":";"},
{"type":"Punctuation","value":"-\u003e"},
{"type":"NameFunction","value":"println"},
{"type":"Punctuation","value":"("},
{"type":"KeywordType","value":"L"},
{"type":"Text","value":"java/lang/"},
{"type":"NameClass","value":"String"},
{"type":"Text","value":";"},
{"type":"Punctuation","value":")"},
{"type":"KeywordType","value":"V"},
{"type":"TextWhitespace","value":"\n\n "},
{"type":"Keyword","value":".line"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumberInteger","value":"17"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Text","value":"return-void"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":".end"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"method"},
{"type":"TextWhitespace","value":"\n\n\n"},
{"type":"Comment","value":"# virtual methods\n"},
{"type":"Keyword","value":".method"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"public"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"getLabel"},
{"type":"Punctuation","value":"()"},
{"type":"KeywordType","value":"L"},
{"type":"Text","value":"java/lang/"},
{"type":"NameClass","value":"String"},
{"type":"Text","value":";"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Keyword","value":".registers"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumberInteger","value":"2"},
{"type":"TextWhitespace","value":"\n\n "},
{"type":"Keyword","value":".line"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumberInteger","value":"8"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Text","value":"iget-object"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"v0"},
{"type":"Punctuation","value":","},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"p0"},
{"type":"Punctuation","value":","},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordType","value":"L"},
{"type":"NameClass","value":"Example"},
{"type":"Text","value":";"},
{"type":"Punctuation","value":"-\u003e"},
{"type":"NameVariable","value":"label"},
{"type":"Punctuation","value":":"},
{"type":"KeywordType","value":"L"},
{"type":"Text","value":"java/lang/"},
{"type":"NameClass","value":"String"},
{"type":"Text","value":";"},
{"type":"TextWhitespace","value":"\n\n "},
{"type":"Text","value":"return-object"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"v0"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":".end"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"method"},
{"type":"TextWhitespace","value":"\n\n"},
{"type":"Keyword","value":".method"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"public"},
{"type":"TextWhitespace","value":" "},
{"type":"NameFunction","value":"setLabel"},
{"type":"Punctuation","value":"("},
{"type":"KeywordType","value":"L"},
{"type":"Text","value":"java/lang/"},
{"type":"NameClass","value":"String"},
{"type":"Text","value":";"},
{"type":"Punctuation","value":")"},
{"type":"KeywordType","value":"V"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Keyword","value":".registers"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumberInteger","value":"2"},
{"type":"TextWhitespace","value":"\n\n "},
{"type":"Keyword","value":".line"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumberInteger","value":"12"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Text","value":"iput-object"},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"p1"},
{"type":"Punctuation","value":","},
{"type":"TextWhitespace","value":" "},
{"type":"NameBuiltin","value":"p0"},
{"type":"Punctuation","value":","},
{"type":"TextWhitespace","value":" "},
{"type":"KeywordType","value":"L"},
{"type":"NameClass","value":"Example"},
{"type":"Text","value":";"},
{"type":"Punctuation","value":"-\u003e"},
{"type":"NameVariable","value":"label"},
{"type":"Punctuation","value":":"},
{"type":"KeywordType","value":"L"},
{"type":"Text","value":"java/lang/"},
{"type":"NameClass","value":"String"},
{"type":"Text","value":";"},
{"type":"TextWhitespace","value":"\n\n "},
{"type":"Keyword","value":".line"},
{"type":"TextWhitespace","value":" "},
{"type":"LiteralNumberInteger","value":"13"},
{"type":"TextWhitespace","value":"\n "},
{"type":"Text","value":"return-void"},
{"type":"TextWhitespace","value":"\n"},
{"type":"Keyword","value":".end"},
{"type":"TextWhitespace","value":" "},
{"type":"Keyword","value":"method"},
{"type":"TextWhitespace","value":"\n"}
]

0 comments on commit 2672d3c

Please sign in to comment.