Assembler OO
This commit is contained in:
parent
8ad7634be1
commit
9d888723b7
@ -1,6 +1,160 @@
|
|||||||
import sys
|
import sys
|
||||||
import re
|
import re
|
||||||
import struct
|
import struct
|
||||||
|
import IPython
|
||||||
|
|
||||||
|
|
||||||
|
class VMAssembler:
|
||||||
|
assembled_code = bytearray()
|
||||||
|
|
||||||
|
def parse(self, instruction):
|
||||||
|
action = getattr(self, "{}".format(instruction.opcode.name))
|
||||||
|
action(instruction)
|
||||||
|
|
||||||
|
def process_code_line(self, line):
|
||||||
|
sys.stdout.write("CODE: ")
|
||||||
|
components = [x for x in re.split('\W', line) if x]
|
||||||
|
|
||||||
|
instruction = VMInstruction(components[0], components[1:])
|
||||||
|
self.parse(instruction)
|
||||||
|
|
||||||
|
def imm2reg(self, instruction):
|
||||||
|
"""
|
||||||
|
Intel syntax -> REG, IMM
|
||||||
|
"""
|
||||||
|
opcode = instruction.opcode
|
||||||
|
reg = instruction.args[0]
|
||||||
|
imm = instruction.args[1]
|
||||||
|
print(instruction)
|
||||||
|
if reg.name != "ip":
|
||||||
|
if opcode.uint8() and reg.uint8() and imm.uint16():
|
||||||
|
self.assembled_code += opcode.uint8() + reg.uint8() + imm.uint16()
|
||||||
|
else:
|
||||||
|
sys.stderr.write(
|
||||||
|
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
sys.stderr.write("CAN'T MOVI TO IP!\n")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def reg2reg(self, instruction):
|
||||||
|
return
|
||||||
|
|
||||||
|
def reg2imm(self, instruction):
|
||||||
|
"""
|
||||||
|
Intel syntax -> IMM, REG
|
||||||
|
"""
|
||||||
|
opcode = instruction.opcode
|
||||||
|
imm = instruction.args[0]
|
||||||
|
reg = instruction.args[1]
|
||||||
|
print(instruction)
|
||||||
|
if reg.name != "ip":
|
||||||
|
if opcode.uint8() and reg.uint8() and imm.uint16():
|
||||||
|
self.assembled_code += opcode.uint8() + imm.uint16() + reg.uint8()
|
||||||
|
else:
|
||||||
|
sys.stderr.write(
|
||||||
|
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
|
||||||
|
return False
|
||||||
|
else:
|
||||||
|
sys.stderr.write("CAN'T MOVI TO IP!\n")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def imm(self, instruction):
|
||||||
|
return
|
||||||
|
|
||||||
|
def movi(self, instruction):
|
||||||
|
if not self.imm2reg(instruction):
|
||||||
|
print("WAT")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def movr(self, instruction):
|
||||||
|
if not self.reg2reg(instruction):
|
||||||
|
print("WAT")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def getm(self, instruction):
|
||||||
|
if not self.imm2reg(instruction):
|
||||||
|
print("WAT")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def putm(self, instruction):
|
||||||
|
if not self.reg2imm(instruction):
|
||||||
|
print("WAT")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
def addi(self, instruction):
|
||||||
|
if not self.imm2reg(instruction):
|
||||||
|
print("WAT")
|
||||||
|
return False
|
||||||
|
return True
|
||||||
|
|
||||||
|
|
||||||
|
class VMComponent:
|
||||||
|
"""
|
||||||
|
Represents a register or a operation the VM recognizes
|
||||||
|
"""
|
||||||
|
name = ""
|
||||||
|
value = ""
|
||||||
|
|
||||||
|
def __init__(self, name, value):
|
||||||
|
self.name = name
|
||||||
|
self.value = value
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{}".format(self.name)
|
||||||
|
|
||||||
|
def uint8(self):
|
||||||
|
numre = re.compile("^[0-9]+$")
|
||||||
|
if isinstance(self.value, int):
|
||||||
|
return struct.pack("<B", self.value)
|
||||||
|
elif self.value.startswith("0x"):
|
||||||
|
return struct.pack("<B", int(self.value, 16))
|
||||||
|
elif numre.match(self.value): # only numbers
|
||||||
|
return struct.pack("<B", int(self.value))
|
||||||
|
return None
|
||||||
|
|
||||||
|
def uint16(self):
|
||||||
|
numre = re.compile("^[0-9]+$")
|
||||||
|
if isinstance(self.value, int):
|
||||||
|
return struct.pack("<H", self.value)
|
||||||
|
elif self.value.startswith("0x"):
|
||||||
|
return struct.pack("<H", int(self.value, 16))
|
||||||
|
elif numre.match(self.value): # only numbers
|
||||||
|
return struct.pack("<H", int(self.value))
|
||||||
|
return None
|
||||||
|
|
||||||
|
|
||||||
|
class VMInstruction:
|
||||||
|
"""
|
||||||
|
Represents an instruction the VM recognizes.
|
||||||
|
e.g: MOVI [R0, 2]
|
||||||
|
^ ^
|
||||||
|
opcode args
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, opcode, instr_list):
|
||||||
|
# TODO EXCEPTION SE REGISTRO / IMM / OPCODE NON VALIDO
|
||||||
|
immediate_regexp = re.compile("^(0x*|[0-9]*$)")
|
||||||
|
opc_name, opc_value = value_from_list(ops, opcode)
|
||||||
|
self.opcode = VMComponent(opc_name, opc_value)
|
||||||
|
self.args = []
|
||||||
|
for el in instr_list:
|
||||||
|
if not immediate_regexp.match(el):
|
||||||
|
# create a VM component for a register
|
||||||
|
reg_name, reg_value = value_from_list(regs, el)
|
||||||
|
self.args.append(VMComponent(reg_name, reg_value))
|
||||||
|
else:
|
||||||
|
# directly append the immediate
|
||||||
|
self.args.append(VMComponent(el, el))
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{} {}".format(self.opcode.name, ", ".join([x.name for x in self.args]))
|
||||||
|
|
||||||
op_names = ["MOVI",
|
op_names = ["MOVI",
|
||||||
"MOVR",
|
"MOVR",
|
||||||
@ -23,215 +177,57 @@ op_names = ["MOVI",
|
|||||||
"CALL",
|
"CALL",
|
||||||
"HALT",
|
"HALT",
|
||||||
"NOPE"]
|
"NOPE"]
|
||||||
|
|
||||||
reg_names = ["R0", "R1", "R2", "R3", "S0", "S1", "S2", "S3", "IP", "BP", "SP"]
|
reg_names = ["R0", "R1", "R2", "R3", "S0", "S1", "S2", "S3", "IP", "BP", "SP"]
|
||||||
section_names = ["DATA:", "CODE:", "STACK:"]
|
section_names = ["DATA:", "CODE:", "STACK:"]
|
||||||
section_flags = {s.casefold(): i + 1 for i, s in enumerate(section_names)}
|
section_flags = {s.casefold(): i + 1 for i, s in enumerate(section_names)}
|
||||||
ops = {s.casefold(): i for i, s in enumerate(op_names)}
|
ops = [VMComponent(s.casefold(), i) for i, s in enumerate(op_names)]
|
||||||
regs = {s.casefold(): i for i, s in enumerate(reg_names)}
|
regs = [VMComponent(s.casefold(), i) for i, s in enumerate(reg_names)]
|
||||||
|
|
||||||
assembled = bytearray()
|
def value_from_list(fromlist, name):
|
||||||
|
"""
|
||||||
|
returns a tuple (name, value) from a list of VMComponents
|
||||||
def to_uint8(data):
|
"""
|
||||||
alphanum = re.compile("^[0-9]+$")
|
for el in fromlist:
|
||||||
if isinstance(data, int):
|
if el.name == name:
|
||||||
return struct.pack("<B", data)
|
return (el.name, el.value)
|
||||||
elif data.startswith("0x"):
|
|
||||||
return struct.pack("<B", int(data, 16))
|
|
||||||
elif alphanum.match(data): # only numbers
|
|
||||||
return struct.pack("<B", int(data))
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def to_uint16(data):
|
def name_from_list(fromlist, value):
|
||||||
alphanum = re.compile("^[0-9]+$")
|
"""
|
||||||
if isinstance(data, int):
|
returns a tuple (name, value) from a list of VMComponents
|
||||||
return struct.pack("<H", data)
|
"""
|
||||||
elif data.startswith("0x"):
|
for el in fromlist:
|
||||||
return struct.pack("<H", int(data, 16))
|
if el.value == value:
|
||||||
elif alphanum.match(data): # only numbers
|
return (el.name, el.value)
|
||||||
return struct.pack("<H", int(data))
|
|
||||||
return None
|
return None
|
||||||
|
|
||||||
|
|
||||||
def is_reg(data):
|
|
||||||
if data not in [rn.casefold() for rn in reg_names]:
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def movi(op_str, dst_str, src_str):
|
|
||||||
global assembled
|
|
||||||
if is_reg(dst_str):
|
|
||||||
if dst_str != "ip":
|
|
||||||
op_val = to_uint8(ops[op_str])
|
|
||||||
dst_val = to_uint8(regs[dst_str])
|
|
||||||
src_val = to_uint16(src_str)
|
|
||||||
if op_val and dst_val and src_val:
|
|
||||||
assembled += op_val + dst_val + src_val
|
|
||||||
else:
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
sys.stderr.write("CAN'T MOVI TO IP!\n")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(dst_str))
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def movr(op_str, dst_str, src_str):
|
|
||||||
global assembled
|
|
||||||
|
|
||||||
if is_reg(dst_str) and is_reg(src_str):
|
|
||||||
if dst_str != "ip" and src_str != "ip":
|
|
||||||
op_val = to_uint8(ops[op_str])
|
|
||||||
dstsrc_val = (to_uint8(regs[dst_str])[0]
|
|
||||||
<< 4) ^ to_uint8(regs[src_str])[0]
|
|
||||||
if op_val and dstsrc_val:
|
|
||||||
assembled += op_val + to_uint8(dstsrc_val)
|
|
||||||
else:
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
sys.stderr.write("CAN'T MOVR IP!\n")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
if not is_reg(dst_str):
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(dst_str))
|
|
||||||
else:
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(src_str))
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def getm(op_str, dst_str, src_str):
|
|
||||||
global assembled
|
|
||||||
if is_reg(dst_str):
|
|
||||||
if dst_str != "ip":
|
|
||||||
op_val = to_uint8(ops[op_str])
|
|
||||||
dst_val = to_uint8(regs[dst_str])
|
|
||||||
src_val = to_uint16(src_str)
|
|
||||||
if op_val and dst_val and src_val:
|
|
||||||
assembled += op_val + dst_val + src_val
|
|
||||||
else:
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
sys.stderr.write("CAN'T MOVI TO IP!\n")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(dst_str))
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def putm(op_str, dst_str, src_str):
|
|
||||||
global assembled
|
|
||||||
if is_reg(src_str):
|
|
||||||
if src_str != "ip":
|
|
||||||
op_val = to_uint8(ops[op_str])
|
|
||||||
src_val = to_uint8(regs[src_str])
|
|
||||||
dst_val = to_uint16(dst_str)
|
|
||||||
if op_val and dst_val and src_val:
|
|
||||||
assembled += op_val + dst_val + src_val
|
|
||||||
else:
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
sys.stderr.write("CAN'T MOVI TO IP!\n")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(dst_str))
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
def addi(op_str, dst_str, src_str):
|
|
||||||
global assembled
|
|
||||||
if is_reg(dst_str):
|
|
||||||
if src_str != "ip":
|
|
||||||
op_val = to_uint8(ops[op_str])
|
|
||||||
src_val = to_uint16(src_str)
|
|
||||||
dst_val = to_uint8(regs[dst_str])
|
|
||||||
if op_val and dst_val and src_val:
|
|
||||||
assembled += op_val + dst_val + src_val
|
|
||||||
else:
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN VALUES\n")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
sys.stderr.write("CAN'T MOVI TO IP!\n")
|
|
||||||
return False
|
|
||||||
else:
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN REGISTER: {}\n".format(dst_str))
|
|
||||||
return False
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def assemble_code(line):
|
|
||||||
global assembled
|
|
||||||
sys.stdout.write("CODE: ")
|
|
||||||
instruction = [x for x in re.split('\W', line) if x]
|
|
||||||
op_name = instruction[0].casefold()
|
|
||||||
|
|
||||||
if op_name not in [on.casefold() for on in op_names]:
|
|
||||||
sys.stderr.write(
|
|
||||||
"ERROR WHILE ASSEMBLING UNKNOWN OPERATION: {}\n".format(op_name))
|
|
||||||
return False
|
|
||||||
sys.stdout.write("{} {}\n".format(op_name, ", ".join(instruction[1::])))
|
|
||||||
if op_name == "movi":
|
|
||||||
movi(op_name, instruction[1], instruction[2])
|
|
||||||
elif op_name == "movr":
|
|
||||||
movr(op_name, instruction[1], instruction[2])
|
|
||||||
elif op_name == "getm":
|
|
||||||
getm(op_name, instruction[1], instruction[2])
|
|
||||||
elif op_name == "putm":
|
|
||||||
putm(op_name, instruction[1], instruction[2])
|
|
||||||
elif op_name == "addi":
|
|
||||||
addi(op_name, instruction[1], instruction[2])
|
|
||||||
return True
|
|
||||||
|
|
||||||
|
|
||||||
def assemble_data(line):
|
def assemble_data(line):
|
||||||
sys.stdout.write("DATA:\t")
|
sys.stdout.write("DATA:\t")
|
||||||
sys.stdout.write(line.strip(",") + "\n")
|
sys.stdout.write(line.strip(",") + "\n")
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
global assembled
|
|
||||||
if len(sys.argv) < 3:
|
if len(sys.argv) < 3:
|
||||||
print("Usage: {} file_to_assemble output".format(sys.argv[0]))
|
print("Usage: {} file_to_assemble output".format(sys.argv[0]))
|
||||||
return
|
return
|
||||||
|
vma = VMAssembler()
|
||||||
with open(sys.argv[1], 'r') as f:
|
with open(sys.argv[1], 'r') as f:
|
||||||
gen = (line.casefold().strip("\n") for line in f if line != "\n")
|
gen = (line.casefold().strip("\n") for line in f if line != "\n")
|
||||||
flag = None
|
flag = None
|
||||||
|
|
||||||
for line in gen:
|
for line in gen:
|
||||||
if line.startswith(tuple([sn.casefold() for sn in section_names])):
|
if line in section_flags:
|
||||||
flag = section_flags[line]
|
flag = section_flags[line]
|
||||||
continue
|
continue
|
||||||
if flag == section_flags["data:"]:
|
if flag == section_flags["data:"]:
|
||||||
assemble_data(line)
|
vma.process_code_line(line)
|
||||||
elif flag == section_flags["code:"]:
|
elif flag == section_flags["code:"]:
|
||||||
assemble_code(line)
|
vma.process_code_line(line)
|
||||||
if not flag:
|
if not flag:
|
||||||
sys.stderr.write(
|
sys.stderr.write(
|
||||||
"Nothing was assembled! Did you use the section delimiters?\n")
|
"Nothing was assembled! Did you use the section delimiters?\n")
|
||||||
with open(sys.argv[2], 'wb') as f:
|
with open(sys.argv[2], 'wb') as f:
|
||||||
f.write(assembled)
|
f.write(vma.assembled_code)
|
||||||
|
|
||||||
if __name__ == '__main__':
|
if __name__ == '__main__':
|
||||||
main()
|
main()
|
||||||
|
Loading…
Reference in New Issue
Block a user