Assembler OO

This commit is contained in:
Giulio De Pasquale 2017-05-16 17:39:49 +02:00
parent 8ad7634be1
commit 9d888723b7

View File

@ -1,6 +1,160 @@
import sys import sys
import re import re
import struct import struct
import IPython
class VMAssembler:
assembled_code = bytearray()
def parse(self, instruction):
action = getattr(self, "{}".format(instruction.opcode.name))
action(instruction)
def process_code_line(self, line):
sys.stdout.write("CODE: ")
components = [x for x in re.split('\W', line) if x]
instruction = VMInstruction(components[0], components[1:])
self.parse(instruction)
def imm2reg(self, instruction):
"""
Intel syntax -> REG, IMM
"""
opcode = instruction.opcode
reg = instruction.args[0]
imm = instruction.args[1]
print(instruction)
if reg.name != "ip":
if opcode.uint8() and reg.uint8() and imm.uint16():
self.assembled_code += opcode.uint8() + reg.uint8() + imm.uint16()
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
return False
else:
sys.stderr.write("CAN'T MOVI TO IP!\n")
return False
return True
def reg2reg(self, instruction):
return
def reg2imm(self, instruction):
"""
Intel syntax -> IMM, REG
"""
opcode = instruction.opcode
imm = instruction.args[0]
reg = instruction.args[1]
print(instruction)
if reg.name != "ip":
if opcode.uint8() and reg.uint8() and imm.uint16():
self.assembled_code += opcode.uint8() + imm.uint16() + reg.uint8()
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
return False
else:
sys.stderr.write("CAN'T MOVI TO IP!\n")
return False
return True
def imm(self, instruction):
return
def movi(self, instruction):
if not self.imm2reg(instruction):
print("WAT")
return False
return True
def movr(self, instruction):
if not self.reg2reg(instruction):
print("WAT")
return False
return True
def getm(self, instruction):
if not self.imm2reg(instruction):
print("WAT")
return False
return True
def putm(self, instruction):
if not self.reg2imm(instruction):
print("WAT")
return False
return True
def addi(self, instruction):
if not self.imm2reg(instruction):
print("WAT")
return False
return True
class VMComponent:
"""
Represents a register or a operation the VM recognizes
"""
name = ""
value = ""
def __init__(self, name, value):
self.name = name
self.value = value
def __repr__(self):
return "{}".format(self.name)
def uint8(self):
numre = re.compile("^[0-9]+$")
if isinstance(self.value, int):
return struct.pack("<B", self.value)
elif self.value.startswith("0x"):
return struct.pack("<B", int(self.value, 16))
elif numre.match(self.value): # only numbers
return struct.pack("<B", int(self.value))
return None
def uint16(self):
numre = re.compile("^[0-9]+$")
if isinstance(self.value, int):
return struct.pack("<H", self.value)
elif self.value.startswith("0x"):
return struct.pack("<H", int(self.value, 16))
elif numre.match(self.value): # only numbers
return struct.pack("<H", int(self.value))
return None
class VMInstruction:
"""
Represents an instruction the VM recognizes.
e.g: MOVI [R0, 2]
^ ^
opcode args
"""
def __init__(self, opcode, instr_list):
# TODO EXCEPTION SE REGISTRO / IMM / OPCODE NON VALIDO
immediate_regexp = re.compile("^(0x*|[0-9]*$)")
opc_name, opc_value = value_from_list(ops, opcode)
self.opcode = VMComponent(opc_name, opc_value)
self.args = []
for el in instr_list:
if not immediate_regexp.match(el):
# create a VM component for a register
reg_name, reg_value = value_from_list(regs, el)
self.args.append(VMComponent(reg_name, reg_value))
else:
# directly append the immediate
self.args.append(VMComponent(el, el))
def __repr__(self):
return "{} {}".format(self.opcode.name, ", ".join([x.name for x in self.args]))
op_names = ["MOVI", op_names = ["MOVI",
"MOVR", "MOVR",
@ -23,215 +177,57 @@ op_names = ["MOVI",
"CALL", "CALL",
"HALT", "HALT",
"NOPE"] "NOPE"]
reg_names = ["R0", "R1", "R2", "R3", "S0", "S1", "S2", "S3", "IP", "BP", "SP"] reg_names = ["R0", "R1", "R2", "R3", "S0", "S1", "S2", "S3", "IP", "BP", "SP"]
section_names = ["DATA:", "CODE:", "STACK:"] section_names = ["DATA:", "CODE:", "STACK:"]
section_flags = {s.casefold(): i + 1 for i, s in enumerate(section_names)} section_flags = {s.casefold(): i + 1 for i, s in enumerate(section_names)}
ops = {s.casefold(): i for i, s in enumerate(op_names)} ops = [VMComponent(s.casefold(), i) for i, s in enumerate(op_names)]
regs = {s.casefold(): i for i, s in enumerate(reg_names)} regs = [VMComponent(s.casefold(), i) for i, s in enumerate(reg_names)]
assembled = bytearray() def value_from_list(fromlist, name):
"""
returns a tuple (name, value) from a list of VMComponents
def to_uint8(data): """
alphanum = re.compile("^[0-9]+$") for el in fromlist:
if isinstance(data, int): if el.name == name:
return struct.pack("<B", data) return (el.name, el.value)
elif data.startswith("0x"):
return struct.pack("<B", int(data, 16))
elif alphanum.match(data): # only numbers
return struct.pack("<B", int(data))
return None return None
def to_uint16(data): def name_from_list(fromlist, value):
alphanum = re.compile("^[0-9]+$") """
if isinstance(data, int): returns a tuple (name, value) from a list of VMComponents
return struct.pack("<H", data) """
elif data.startswith("0x"): for el in fromlist:
return struct.pack("<H", int(data, 16)) if el.value == value:
elif alphanum.match(data): # only numbers return (el.name, el.value)
return struct.pack("<H", int(data))
return None return None
def is_reg(data):
if data not in [rn.casefold() for rn in reg_names]:
return False
return True
def movi(op_str, dst_str, src_str):
global assembled
if is_reg(dst_str):
if dst_str != "ip":
op_val = to_uint8(ops[op_str])
dst_val = to_uint8(regs[dst_str])
src_val = to_uint16(src_str)
if op_val and dst_val and src_val:
assembled += op_val + dst_val + src_val
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
return False
else:
sys.stderr.write("CAN'T MOVI TO IP!\n")
return False
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(dst_str))
return False
return True
def movr(op_str, dst_str, src_str):
global assembled
if is_reg(dst_str) and is_reg(src_str):
if dst_str != "ip" and src_str != "ip":
op_val = to_uint8(ops[op_str])
dstsrc_val = (to_uint8(regs[dst_str])[0]
<< 4) ^ to_uint8(regs[src_str])[0]
if op_val and dstsrc_val:
assembled += op_val + to_uint8(dstsrc_val)
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
return False
else:
sys.stderr.write("CAN'T MOVR IP!\n")
return False
else:
if not is_reg(dst_str):
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(dst_str))
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(src_str))
return False
return True
def getm(op_str, dst_str, src_str):
global assembled
if is_reg(dst_str):
if dst_str != "ip":
op_val = to_uint8(ops[op_str])
dst_val = to_uint8(regs[dst_str])
src_val = to_uint16(src_str)
if op_val and dst_val and src_val:
assembled += op_val + dst_val + src_val
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
return False
else:
sys.stderr.write("CAN'T MOVI TO IP!\n")
return False
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(dst_str))
return False
return True
def putm(op_str, dst_str, src_str):
global assembled
if is_reg(src_str):
if src_str != "ip":
op_val = to_uint8(ops[op_str])
src_val = to_uint8(regs[src_str])
dst_val = to_uint16(dst_str)
if op_val and dst_val and src_val:
assembled += op_val + dst_val + src_val
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
return False
else:
sys.stderr.write("CAN'T MOVI TO IP!\n")
return False
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(dst_str))
return False
return True
def addi(op_str, dst_str, src_str):
global assembled
if is_reg(dst_str):
if src_str != "ip":
op_val = to_uint8(ops[op_str])
src_val = to_uint16(src_str)
dst_val = to_uint8(regs[dst_str])
if op_val and dst_val and src_val:
assembled += op_val + dst_val + src_val
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
return False
else:
sys.stderr.write("CAN'T MOVI TO IP!\n")
return False
else:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(dst_str))
return False
return True
def assemble_code(line):
global assembled
sys.stdout.write("CODE: ")
instruction = [x for x in re.split('\W', line) if x]
op_name = instruction[0].casefold()
if op_name not in [on.casefold() for on in op_names]:
sys.stderr.write(
"ERROR WHILE ASSEMBLING UNKNOWN OPERATION: {}\n".format(op_name))
return False
sys.stdout.write("{} {}\n".format(op_name, ", ".join(instruction[1::])))
if op_name == "movi":
movi(op_name, instruction[1], instruction[2])
elif op_name == "movr":
movr(op_name, instruction[1], instruction[2])
elif op_name == "getm":
getm(op_name, instruction[1], instruction[2])
elif op_name == "putm":
putm(op_name, instruction[1], instruction[2])
elif op_name == "addi":
addi(op_name, instruction[1], instruction[2])
return True
def assemble_data(line): def assemble_data(line):
sys.stdout.write("DATA:\t") sys.stdout.write("DATA:\t")
sys.stdout.write(line.strip(",") + "\n") sys.stdout.write(line.strip(",") + "\n")
def main(): def main():
global assembled
if len(sys.argv) < 3: if len(sys.argv) < 3:
print("Usage: {} file_to_assemble output".format(sys.argv[0])) print("Usage: {} file_to_assemble output".format(sys.argv[0]))
return return
vma = VMAssembler()
with open(sys.argv[1], 'r') as f: with open(sys.argv[1], 'r') as f:
gen = (line.casefold().strip("\n") for line in f if line != "\n") gen = (line.casefold().strip("\n") for line in f if line != "\n")
flag = None flag = None
for line in gen: for line in gen:
if line.startswith(tuple([sn.casefold() for sn in section_names])): if line in section_flags:
flag = section_flags[line] flag = section_flags[line]
continue continue
if flag == section_flags["data:"]: if flag == section_flags["data:"]:
assemble_data(line) vma.process_code_line(line)
elif flag == section_flags["code:"]: elif flag == section_flags["code:"]:
assemble_code(line) vma.process_code_line(line)
if not flag: if not flag:
sys.stderr.write( sys.stderr.write(
"Nothing was assembled! Did you use the section delimiters?\n") "Nothing was assembled! Did you use the section delimiters?\n")
with open(sys.argv[2], 'wb') as f: with open(sys.argv[2], 'wb') as f:
f.write(assembled) f.write(vma.assembled_code)
if __name__ == '__main__': if __name__ == '__main__':
main() main()