Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

Adding MIPS Lexer #2228

Merged
merged 12 commits into from Sep 21, 2022
1 change: 1 addition & 0 deletions pygments/lexers/_mapping.py
Expand Up @@ -273,6 +273,7 @@
'LuaLexer': ('pygments.lexers.scripting', 'Lua', ('lua',), ('*.lua', '*.wlua'), ('text/x-lua', 'application/x-lua')),
'MCFunctionLexer': ('pygments.lexers.mcfunction', 'MCFunction', ('mcfunction', 'mcf'), ('*.mcfunction',), ('text/mcfunction',)),
'MIMELexer': ('pygments.lexers.mime', 'MIME', ('mime',), (), ('multipart/mixed', 'multipart/related', 'multipart/alternative')),
'MIPSLexer': ('pygments.lexers.mips', 'MIPS', ('mips',), ('*.s', '*.asm', '*.mips', '*.S', '*.ASM', '*.MIPS'), ()),
'MOOCodeLexer': ('pygments.lexers.scripting', 'MOOCode', ('moocode', 'moo'), ('*.moo',), ('text/x-moocode',)),
'MSDOSSessionLexer': ('pygments.lexers.shell', 'MSDOS Session', ('doscon',), (), ()),
'Macaulay2Lexer': ('pygments.lexers.macaulay2', 'Macaulay2', ('macaulay2',), ('*.m2',), ()),
Expand Down
126 changes: 126 additions & 0 deletions pygments/lexers/mips.py
@@ -0,0 +1,126 @@
"""
pygments.lexers.mips
~~~~~~~~~~~~~~~~~~~~
Lexers for MIPS assembly.
:copyright: Copyright 2006-2022 by the Pygments team, see AUTHORS.
:license: BSD, see LICENSE for details.
"""

import re
from pygments.lexer import RegexLexer, words
from pygments.token import Whitespace, Comment, String, Keyword, Name, Text

__all__ = ["MIPSLexer"]


class MIPSLexer(RegexLexer):
"""
A MIPS Assembly Lexer.

Based on the Emacs major mode by hlissner:
https://github.com/hlissner/emacs-mips-mode
"""

name = 'MIPS'
aliases = ['mips']
# TODO: add '*.s' and '*.asm', which will require designing an analyse_text
# method for this lexer and refactoring those from Gas and Nasm in order to
# have relatively reliable detection
filenames = ['*.mips', '*.MIPS']

keywords = [
# Arithmetic insturctions
"add", "sub", "subu", "addi", "subi", "addu", "addiu",
# Multiplication/division
"mul", "mult", "multu", "mulu", "madd", "maddu", "msub", "msubu", "div", "divu",
# Bitwise operations
"and", "or", "nor", "xor", "andi", "ori", "xori", "clo", "clz",
# Shifts
"sll", "srl", "sllv", "srlv", "sra", "srav",
# Comparisons
"slt", "sltu", "slti", "sltiu",
# Move data
"mfhi", "mthi", "mflo", "mtlo", "movn", "movz", "movf", "movt",
# Jump
"j", "jal", "jalr", "jr",
# branch
"bc1f", "bc1t", "beq", "bgez", "bgezal", "bgtz", "blez", "bltzal", "bltz", "bne",
# Load
"lui", "lb", "lbu", "lh", "lhu", "lw", "lwcl", "lwl", "lwr",
# Store
"sb", "sh", "sw", "swl", "swr", # coproc: swc1 sdc1
# Concurrent load/store
"ll", "sc",
# Trap handling
"teq", "teqi", "tne", "tneqi", "tge", "tgeu", "tgei", "tgeiu", "tlt", "tltu", "tlti",
"tltiu",
# Exception / Interrupt
"eret", "break", "bop", "syscall",
#--- Floats -----------------------------------------------------
# Arithmetic
"add.s", "add.d", "sub.s", "sub.d", "mul.s", "mul.d", "div.s", "div.d", "neg.d",
"neg.s",
# Comparison
"c.e.d", "c.e.s", "c.le.d", "c.le.s", "c.lt.s", "c.lt.d", # "c.gt.s", "c.gt.d",
"madd.s", "madd.d", "msub.s", "msub.d",
# Move Floats
"mov.d", "move.s", "movf.d", "movf.s", "movt.d", "movt.s", "movn.d", "movn.s",
"movnzd", "movz.s", "movz.d",
# Conversion
"cvt.d.s", "cvt.d.w", "cvt.s.d", "cvt.s.w", "cvt.w.d", "cvt.w.s", "trunc.w.d",
"trunc.w.s",
# Math
"abs.s", "abs.d", "sqrt.s", "sqrt.d", "ceil.w.d", "ceil.w.s", "floor.w.d",
"floor.w.s", "round.w.d", "round.w.s",
]

pseudoinstructions = [
# Arithmetic & logical
"rem", "remu", "mulo", "mulou", "abs", "neg", "negu", "not", "rol", "ror",
# branches
"b", "beqz", "bge", "bgeu", "bgt", "bgtu", "ble", "bleu", "blt", "bltu", "bnez",
# loads
"la", "li", "ld", "ulh", "ulhu", "ulw",
# Store
"sd", "ush", "usw",
# move
"move", # coproc: "mfc1.d",
# comparisons
"sgt", "sgtu", "sge", "sgeu", "sle", "sleu", "sne", "seq",
#--- Floats -----------------------------------------------------
# load-store
"l.d", "l.s", "s.d", "s.s",
]

directives = [
".align", ".ascii", ".asciiz", ".byte", ".data", ".double", ".extern", ".float",
".globl", ".half", ".kdata", ".ktext", ".space", ".text", ".word",
]

deprecated = [
"beql", "bnel", "bgtzl", "bgezl", "bltzl", "blezl", "bltzall", "bgezall",
]

tokens = {
'root': [
(r'\s+', Whitespace),
(r'#.*', Comment),
(r'"', String, 'string'),
(r'-?[0-9]+?', Keyword.Constant),
(r'\w*:', Name.Function),
(words(deprecated, suffix=r'\b'), Keyword.Pseudo), # need warning face
(words(pseudoinstructions, suffix=r'\b'), Name.Variable),
(words(keywords, suffix=r'\b'), Keyword),
(r'[slm][ftwd]c[0-9]([.]d)?', Keyword),
(r'\$(f?[0-2][0-9]|f?3[01]|[ft]?[0-9]|[vk][01]|a[0-3]|s[0-7]|[gsf]p|ra|at|zero)', Keyword.Type),
(words(directives, suffix=r'\b'), Name.Entity), # Preprocessor?
(r':|,|;|\{|\}|=>|@|\$|=', Name.Builtin),
(r'\w+', Text),
(r'.', Text),
],
'string': [
(r'\\.', String.Escape),
(r'"', String, '#pop'),
(r'[^\\"]+', String),
davisrichard437 marked this conversation as resolved.
Show resolved Hide resolved
],
}
84 changes: 84 additions & 0 deletions tests/examplefiles/mips/function_with_stack.mips
@@ -0,0 +1,84 @@
# Simple routine to demo functions
# USING a stack in this example to preserve
# values of calling function

# ------------------------------------------------------------------

.text

.globl main
main:
# Register assignments
# $s0 = x
# $s1 = y

# Initialize registers
lw $s0, x # Reg $s0 = x
lw $s1, y # Reg $s1 = y

# Call function
move $a0, $s0 # Argument 1: x ($s0)
jal fun # Save current PC in $ra, and jump to fun
move $s1,$v0 # Return value saved in $v0. This is y ($s1)

# Print msg1
li $v0, 4 # print_string syscall code = 4
la $a0, msg1
syscall

# Print result (y)
li $v0,1 # print_int syscall code = 1
move $a0, $s1 # Load integer to print in $a0
syscall

# Print newline
li $v0,4 # print_string syscall code = 4
la $a0, lf
syscall

# Exit
li $v0,10 # exit
syscall

# ------------------------------------------------------------------

# FUNCTION: int fun(int a)
# Arguments are stored in $a0
# Return value is stored in $v0
# Return address is stored in $ra (put there by jal instruction)
# Typical function operation is:

fun: # This function overwrites $s0 and $s1
# We should save those on the stack
# This is PUSH'ing onto the stack
addi $sp,$sp,-4 # Adjust stack pointer
sw $s0,0($sp) # Save $s0
addi $sp,$sp,-4 # Adjust stack pointer
sw $s1,0($sp) # Save $s1

# Do the function math
li $s0, 3
mul $s1,$s0,$a0 # s1 = 3*$a0 (i.e. 3*a)
addi $s1,$s1,5 # 3*a+5

# Save the return value in $v0
move $v0,$s1

# Restore saved register values from stack in opposite order
# This is POP'ing from the stack
lw $s1,0($sp) # Restore $s1
addi $sp,$sp,4 # Adjust stack pointer
lw $s0,0($sp) # Restore $s0
addi $sp,$sp,4 # Adjust stack pointer

# Return from function
jr $ra # Jump to addr stored in $ra

# ------------------------------------------------------------------

# Start .data segment (data!)
.data
x: .word 5
y: .word 0
msg1: .asciiz "y="
lf: .asciiz "\n"