gipu/assembler/assembler.py

466 lines
14 KiB
Python
Raw Normal View History

2017-05-15 11:49:11 +01:00
import sys
import re
import struct
2017-05-16 16:39:49 +01:00
import IPython
2017-05-17 10:01:47 +01:00
import copy
2017-05-16 16:39:49 +01:00
2017-05-17 10:01:47 +01:00
class AssemblerException(Exception):
pass
class InvalidRegister(AssemblerException):
2017-05-16 17:47:51 +01:00
def __init__(self, register):
super().__init__("Invalid register: {}".format(register))
2017-05-17 10:01:47 +01:00
class InvalidOperation(AssemblerException):
2017-05-16 17:47:51 +01:00
def __init__(self, operation):
super().__init__("Invalid operation: {}".format(operation))
2017-05-17 10:01:47 +01:00
class ExpectedImmediate(AssemblerException):
2017-05-16 17:47:51 +01:00
def __init__(self, value):
super().__init__("Expected immediate, got {}".format(value))
2017-05-17 10:01:47 +01:00
class ExpectedRegister(AssemblerException):
2017-05-16 17:47:51 +01:00
def __init__(self, value):
super().__init__("Expected register, got {}".format(value))
2017-05-17 10:01:47 +01:00
class IPOverwrite(AssemblerException):
2017-05-16 17:47:51 +01:00
def __init__(self, instruction=None):
if instruction:
super().__init__("IP can't be overwritten. Instruction: {}".format(instruction))
else:
super().__init__("IP can't be overwritten.")
2017-05-17 10:01:47 +01:00
class InvalidValue(AssemblerException):
2017-05-16 17:47:51 +01:00
def __init__(self, instruction):
super().__init__("Invalid value while assembling: {}".format(instruction))
2017-05-18 19:53:28 +01:00
2017-05-16 16:39:49 +01:00
class VMAssembler:
2017-05-17 10:01:47 +01:00
def __init__(self, key):
self.assembled_code = bytearray()
2017-05-18 17:21:01 +01:00
self.encrypt_ops(key)
2017-05-16 16:39:49 +01:00
def parse(self, instruction):
2017-05-18 14:41:05 +01:00
action = getattr(self, "{}".format(instruction.opcode.method))
2017-05-16 16:39:49 +01:00
action(instruction)
def process_code_line(self, line):
components = [x for x in re.split('\W', line) if x]
instruction = VMInstruction(components[0], components[1:])
2017-05-18 12:20:17 +01:00
sys.stdout.write(str(instruction) + "\n")
2017-05-16 16:39:49 +01:00
self.parse(instruction)
def imm2reg(self, instruction):
"""
Intel syntax -> REG, IMM
"""
opcode = instruction.opcode
reg = instruction.args[0]
imm = instruction.args[1]
2017-05-17 10:01:47 +01:00
if reg.name == "ip":
raise IPOverwrite(instruction)
if not imm.isimm():
raise ExpectedImmediate(imm)
if not reg.isreg():
raise ExpectedRegister(reg)
if not opcode.uint8() or not reg.uint8() or not imm.uint16():
raise InvalidValue(instruction)
self.assembled_code += opcode.uint8() + reg.uint8() + imm.uint16()
2017-05-16 17:47:51 +01:00
return
2017-05-16 16:39:49 +01:00
def reg2reg(self, instruction):
2017-05-17 17:58:00 +01:00
"""
Intel syntax -> DST_REG, SRC_REG
"""
opcode = instruction.opcode
dst_reg = instruction.args[0]
src_reg = instruction.args[1]
if dst_reg.name == "ip" or src_reg.name == "ip":
raise IPOverwrite(instruction)
if not dst_reg.isreg():
raise ExpectedRegister(dst_reg)
if not src_reg.isreg():
raise ExpectedRegister(src_reg)
if not opcode.uint8() or not dst_reg.uint8() or not src_reg.uint8():
raise InvalidValue(instruction)
byte_with_nibbles = struct.pack("<B", dst_reg.uint8()[0] << 4 ^ (
src_reg.uint8()[0] & 0b00001111))
self.assembled_code += opcode.uint8() + byte_with_nibbles
2017-05-16 16:39:49 +01:00
return
def reg2imm(self, instruction):
"""
Intel syntax -> IMM, REG
"""
opcode = instruction.opcode
imm = instruction.args[0]
reg = instruction.args[1]
2017-05-17 10:01:47 +01:00
if reg.name == "ip":
raise IPOverwrite(instruction)
if not imm.isimm():
raise ExpectedImmediate(imm)
if not reg.isreg():
raise ExpectedRegister(reg)
if not opcode.uint8() or not reg.uint8() or not imm.uint16():
raise InvalidValue(instruction)
self.assembled_code += opcode.uint8() + imm.uint16() + reg.uint8()
2017-05-16 17:47:51 +01:00
return
2017-05-16 16:39:49 +01:00
2017-05-18 14:41:05 +01:00
def byt2reg(self, instruction):
"""
Intel syntax -> REG, [BYTE]IMM
"""
opcode = instruction.opcode
reg = instruction.args[0]
imm = instruction.args[1]
if reg.name == "ip":
raise IPOverwrite(instruction)
if not imm.isimm():
raise ExpectedImmediate(imm)
if not reg.isreg():
raise ExpectedRegister(reg)
if not opcode.uint8() or not reg.uint8() or not imm.uint8():
raise InvalidValue(instruction)
self.assembled_code += opcode.uint8() + reg.uint8() + imm.uint8()
2017-05-16 16:39:49 +01:00
return
2017-05-18 14:41:05 +01:00
def regonly(self, instruction):
"""
Instruction with only an argument: a register
"""
opcode = instruction.opcode
reg = instruction.args[0]
if reg.name == "ip":
raise IPOverwrite(instruction)
if not reg.isreg():
raise ExpectedRegister(reg)
if not opcode.uint8() or not reg.uint8():
raise InvalidValue(instruction)
self.assembled_code += opcode.uint8() + reg.uint8()
return
2017-05-16 16:39:49 +01:00
2017-05-18 14:41:05 +01:00
def immonly(self, instruction):
"""
Instruction with only an argument: an immediate
"""
opcode = instruction.opcode
imm = instruction.args[0]
if not imm.isimm():
raise ExpectedImmediate(imm)
if not opcode.uint8() or not imm.uint16():
raise InvalidValue(instruction)
self.assembled_code += opcode.uint8() + imm.uint16()
return
2017-05-16 16:39:49 +01:00
2017-05-18 19:53:28 +01:00
def jump(self, instruction):
imm_op_re = re.compile(".*[iI]$")
reg_op_re = re.compile(".*[rR]$")
arg = instruction.args[0]
section = next((x for x in functions if x.name == arg.name), None)
2017-05-18 19:53:28 +01:00
# TODO this is due the VMComponent structure
instruction.args[0].name = section.offset
instruction.args[0].value = section.offset
if imm_op_re.match(instruction.opcode.name):
self.immonly(instruction)
elif reg_op_re.match(instruction.opcode.name):
self.regonly(instruction)
else:
raise AssemblerException()
2017-05-18 14:41:05 +01:00
def single(self, instruction):
"""
Instruction with no arguments
"""
opcode = instruction.opcode
self.assembled_code += opcode.uint8()
return
2017-05-17 17:58:00 +01:00
2017-05-18 17:21:01 +01:00
def encrypt_ops(self, key):
2017-05-17 10:01:47 +01:00
key_ba = bytearray(key, 'utf-8')
olds = copy.deepcopy(ops)
2017-05-18 17:28:51 +01:00
2017-05-18 17:21:01 +01:00
# RC4 KSA! :-P
arr = [i for i in range(256)]
j = 0
for i in range(len(arr)):
j = (j + arr[i] + key_ba[i % len(key)]) % len(arr)
arr[i], arr[j] = arr[j], arr[i]
2017-05-18 17:28:51 +01:00
2017-05-18 17:21:01 +01:00
for i, o in enumerate(ops):
o.set_value(arr[i])
2017-05-18 17:28:51 +01:00
2017-05-17 10:01:47 +01:00
for o, n in zip(olds, ops):
print("{} : {}->{}".format(o.name, hex(o.value), hex(n.value)))
2017-05-16 16:39:49 +01:00
class VMComponent:
"""
2017-05-16 17:47:51 +01:00
Represents a register, operation or an immediate the VM recognizes
2017-05-16 16:39:49 +01:00
"""
2017-05-18 14:41:05 +01:00
def __init__(self, name, value, method=None):
2017-05-16 17:47:51 +01:00
self.name = name.casefold()
2017-05-16 16:39:49 +01:00
self.value = value
2017-05-18 14:41:05 +01:00
self.method = method
2017-05-16 16:39:49 +01:00
def __repr__(self):
return "{}".format(self.name)
2017-05-17 10:01:47 +01:00
def set_name(self, name):
self.name = name
def set_value(self, value):
self.value = value
2017-05-16 16:39:49 +01:00
def uint8(self):
numre = re.compile("^[0-9]+$")
if isinstance(self.value, int):
return struct.pack("<B", self.value)
elif self.value.startswith("0x"):
return struct.pack("<B", int(self.value, 16))
elif numre.match(self.value): # only numbers
return struct.pack("<B", int(self.value))
return None
def uint16(self):
numre = re.compile("^[0-9]+$")
if isinstance(self.value, int):
return struct.pack("<H", self.value)
elif self.value.startswith("0x"):
return struct.pack("<H", int(self.value, 16))
elif numre.match(self.value): # only numbers
return struct.pack("<H", int(self.value))
return None
2017-05-16 17:47:51 +01:00
def isreg(self):
if self.name not in [x.casefold() for x in reg_names]:
return False
return True
def isop(self):
2017-05-18 14:41:05 +01:00
if self.name not in [x[0].casefold() for x in op_names]:
2017-05-16 17:47:51 +01:00
return False
return True
def isimm(self):
2017-05-18 19:53:28 +01:00
if not immediate_re.match(str(self.name)):
2017-05-16 17:47:51 +01:00
return False
return True
2017-05-16 16:39:49 +01:00
class VMInstruction:
"""
Represents an instruction the VM recognizes.
e.g: MOVI [R0, 2]
^ ^
opcode args
"""
def __init__(self, opcode, instr_list):
2017-05-18 19:53:28 +01:00
self.opcode = None
self.args = None
self.size = 1
2017-05-18 12:20:17 +01:00
self.opcode = next((x for x in ops if x.name == opcode), None)
2017-05-18 16:28:10 +01:00
if self.opcode == None:
raise InvalidOperation(opcode)
2017-05-16 16:39:49 +01:00
self.args = []
for el in instr_list:
2017-05-18 19:53:28 +01:00
if immediate_re.match(el):
# directly append the immediate
self.args.append(VMComponent(el, el))
self.size += 2
continue
elif register_re.match(el):
2017-05-16 16:39:49 +01:00
# create a VM component for a register
2017-05-18 12:20:17 +01:00
reg_comp = next((x for x in regs if x.name == el), None)
self.args.append(reg_comp)
2017-05-18 19:53:28 +01:00
self.size += 1
continue
2017-05-16 16:39:49 +01:00
else:
2017-05-18 19:53:28 +01:00
# section
print(el)
sec_comp = next((x for x in functions if x.name == el), None)
2017-05-18 19:53:28 +01:00
if sec_comp:
self.args.append(VMComponent(
sec_comp.name, sec_comp.offset))
self.size += 2
continue
raise AssemblerException()
2017-05-16 16:39:49 +01:00
def __repr__(self):
return "{} {}".format(self.opcode.name, ", ".join([x.name for x in self.args]))
2017-05-15 11:49:11 +01:00
2017-05-18 19:53:28 +01:00
class VMFunction:
2017-05-18 19:53:28 +01:00
"""
Represents a code section or "label" such as "main:"
"""
def __init__(self, name, line_start):
2017-05-18 19:53:28 +01:00
self.name = name
self.size = 0
self.offset = 0
self.line_start = line_start
self.line_end = 0
self.labels = []
2017-05-18 19:53:28 +01:00
def set_size(self, size):
self.size = size
def set_offset(self, offset):
self.offset = offset
def set_line_start(self, start):
self.line_start = start
def set_line_end(self, end):
self.line_end = end
2017-05-18 19:53:28 +01:00
def __repr__(self):
return "{} | ls: {}, le: {}, s: {}, o: {}".format(self.name, hex(self.line_start), hex(self.line_end), hex(self.size), hex(self.offset))
2017-05-18 19:53:28 +01:00
2017-05-18 14:41:05 +01:00
op_names = [["MOVI", "imm2reg"],
["MOVR", "reg2reg"],
["LOAD", "imm2reg"],
["STOR", "reg2imm"],
["ADDI", "imm2reg"],
["ADDR", "reg2reg"],
["SUBI", "imm2reg"],
["SUBR", "reg2reg"],
2017-05-18 16:28:10 +01:00
["ANDB", "byt2reg"],
["ANDW", "imm2reg"],
["ANDR", "reg2reg"],
["YORB", "byt2reg"],
["YORW", "imm2reg"],
["YORR", "reg2reg"],
2017-05-18 14:41:05 +01:00
["XORB", "byt2reg"],
["XORW", "imm2reg"],
["XORR", "reg2reg"],
["NOTR", "regonly"],
["MULI", "imm2reg"],
["MULR", "reg2reg"],
["DIVI", "imm2reg"],
["DIVR", "reg2reg"],
["PUSH", "regonly"],
["POOP", "regonly"],
["CMPI", "imm2reg"],
["CMPR", "reg2reg"],
2017-05-18 19:53:28 +01:00
["JMPI", "jump"],
["JMPR", "jump"],
["JPAI", "jump"],
["JPAR", "jump"],
["JPBI", "jump"],
["JPBR", "jump"],
["JPEI", "jump"],
["JPER", "jump"],
["JPNI", "jump"],
["JPNR", "jump"],
["RETN", "single"],
2017-05-18 14:41:05 +01:00
["SHIT", "single"],
["NOPE", "single"],
["GRMN", "single"]]
2017-05-17 10:01:47 +01:00
2017-05-15 11:49:11 +01:00
reg_names = ["R0", "R1", "R2", "R3", "S0", "S1", "S2", "S3", "IP", "BP", "SP"]
2017-05-18 14:41:05 +01:00
ops = [VMComponent(le[0], i, le[1]) for i, le in enumerate(op_names)]
2017-05-16 16:39:49 +01:00
regs = [VMComponent(s.casefold(), i) for i, s in enumerate(reg_names)]
functions = []
instruction_re = re.compile("([\w]{4})(?:\ +(?:([\w]+)\ *(?:,[\ ]*([\w]+))*))?") # 1: opcode 2+: args
function_re = re.compile("(?:def\ )([a-zA-Z]*)\:")
immediate_re = re.compile("(?:0x)?[0-9]*[0-9]$")
register_re = re.compile("(^[rRsS]{1}[0-4]{1}$)|([iIrRsS]{1}[pP]{1}$)")
labeldef_re = re.compile("([a-zA-Z]*)\:")
labelcall_re = re.compile("(?:[jJ]{1}[pPmM]{1}[pPaAbBeEnN]{1}[iIrR]{1}\ *)([\w]*)")
def parse_functions(lines):
2017-05-18 19:53:28 +01:00
current_size = 0
cur_func = None
# first parsing to get functions' names
for i, line in enumerate(lines):
match = function_re.match(line)
if match:
if cur_func:
tmp = next(x for x in functions if x.name == cur_func)
tmp.set_line_end(i-1)
cur_func = match.group(2)
functions.append(VMFunction(cur_func, i + 1))
continue
tmp = next(x for x in functions if x.name == cur_func)
tmp.set_line_end(i)
# calculating sizes and offsets
2017-05-18 19:53:28 +01:00
for line in lines:
match = function_re.match(line)
if match:
if cur_func:
tmp = next(x for x in functions if x.name == cur_func)
2017-05-18 19:53:28 +01:00
tmp.set_size(current_size)
cur_func = match.group(2)
2017-05-18 19:53:28 +01:00
current_size = 0
continue
components = [x for x in instruction_re.match(line).groups() if x is not None]
current_size += VMInstruction(components[0], components[1:]).size
tmp = next(x for x in functions if x.name == cur_func)
2017-05-18 19:53:28 +01:00
tmp.set_size(current_size)
2017-05-16 16:39:49 +01:00
# if not, main as to be the first entry
for i in range(len(functions)):
if functions[i].name == "main" and i is not 0:
functions[0], functions[i] = functions[i], functions[0]
break
2017-05-16 17:47:51 +01:00
calc_fun_offsets()
def calc_fun_offsets():
2017-05-18 19:53:28 +01:00
current_offset = 0
for i in range(1, len(functions)):
prev_size = functions[i - 1].size
2017-05-18 19:53:28 +01:00
current_offset += prev_size
functions[i].set_offset(current_offset)
2017-05-15 11:49:11 +01:00
2017-05-16 17:47:51 +01:00
2017-05-15 11:49:11 +01:00
def main():
2017-05-17 10:01:47 +01:00
if len(sys.argv) < 4:
2017-05-17 17:58:00 +01:00
print("Usage: {} opcodes_key file_to_assemble output".format(
sys.argv[0]))
return
2017-05-18 19:53:28 +01:00
2017-05-17 10:01:47 +01:00
vma = VMAssembler(sys.argv[1])
with open(sys.argv[2], 'r') as f:
2017-05-18 19:53:28 +01:00
filedata = f.readlines()
filedata = [x.strip() for x in filedata if x.strip()]
# let's parse the whole file for labels
parse_functions(filedata)
2017-05-18 19:53:28 +01:00
if "main" not in [x.name for x in functions]:
2017-05-18 19:53:28 +01:00
sys.stderr.write("No main specified!")
return
for s in functions:
section_code = filedata[s.line_start:s.line_end+1]
for line in section_code:
2017-05-18 19:53:28 +01:00
vma.process_code_line(line)
2017-05-15 11:49:11 +01:00
2017-05-17 10:01:47 +01:00
with open(sys.argv[3], 'wb') as f:
2017-05-16 16:39:49 +01:00
f.write(vma.assembled_code)
2017-05-15 11:49:11 +01:00
if __name__ == '__main__':
main()