LABEL E FUNZIONI ROTTE. RISCRITTO TUTTO
This commit is contained in:
parent
536773bc9e
commit
adf7ca002d
@ -49,20 +49,54 @@ class InvalidValue(AssemblerException):
|
|||||||
|
|
||||||
|
|
||||||
class VMAssembler:
|
class VMAssembler:
|
||||||
|
def __init__(self, key, data):
|
||||||
def __init__(self, key):
|
self.data = data
|
||||||
self.assembled_code = bytearray()
|
self.assembled_code = bytearray()
|
||||||
self.encrypt_ops(key)
|
self.functions = []
|
||||||
|
self.decrypt_ops(key)
|
||||||
|
self.parse_functions()
|
||||||
|
print(self.functions)
|
||||||
|
main = next((x for x in self.functions if x.name == "main"), None)
|
||||||
|
if main == None:
|
||||||
|
print("Main has to be defined")
|
||||||
|
return
|
||||||
|
|
||||||
def parse(self, instruction):
|
def parse_functions(self):
|
||||||
action = getattr(self, "{}".format(instruction.opcode.method))
|
cur_fun_size = 0
|
||||||
action(instruction)
|
cur_fun_name = None
|
||||||
|
fun_start = 0
|
||||||
|
|
||||||
def process_code_line(self, line):
|
# first parse to get every function name
|
||||||
components = [x for x in re.split('\W', line) if x]
|
for i, line in enumerate(self.data):
|
||||||
instruction = VMInstruction(components[0], components[1:])
|
match = function_re.match(line)
|
||||||
sys.stdout.write(str(instruction) + "\n")
|
if match:
|
||||||
self.parse(instruction)
|
if cur_fun_name:
|
||||||
|
f = VMFunction(cur_fun_name, self.data[fun_start:i])
|
||||||
|
self.functions.append(f)
|
||||||
|
cur_fun_name = match.group(1)
|
||||||
|
fun_start = i + 1
|
||||||
|
f = VMFunction(cur_fun_name, self.data[fun_start:i + 1])
|
||||||
|
self.functions.append(f)
|
||||||
|
|
||||||
|
# putting main in first position in order to assemble it first
|
||||||
|
for i, f in enumerate(self.functions):
|
||||||
|
if f.name == "main" and i is not 0:
|
||||||
|
self.functions[0], self.functions[i] = self.functions[i], self.functions[0]
|
||||||
|
break
|
||||||
|
|
||||||
|
# calculating functions offsets
|
||||||
|
for i in range(1, len(self.functions)):
|
||||||
|
prev_fun_tot_size = self.functions[i-1].size + self.functions[i-1].offset
|
||||||
|
cur_fun_size = self.functions[i].size
|
||||||
|
self.functions[i].set_offset(prev_fun_tot_size)
|
||||||
|
|
||||||
|
return
|
||||||
|
|
||||||
|
def parse(self):
|
||||||
|
for f in self.functions:
|
||||||
|
for i in f.instructions:
|
||||||
|
action = getattr(self, "{}".format(i.opcode.method))
|
||||||
|
action(i)
|
||||||
|
|
||||||
def imm2reg(self, instruction):
|
def imm2reg(self, instruction):
|
||||||
"""
|
"""
|
||||||
@ -169,11 +203,26 @@ class VMAssembler:
|
|||||||
def jump(self, instruction):
|
def jump(self, instruction):
|
||||||
imm_op_re = re.compile(".*[iI]$")
|
imm_op_re = re.compile(".*[iI]$")
|
||||||
reg_op_re = re.compile(".*[rR]$")
|
reg_op_re = re.compile(".*[rR]$")
|
||||||
arg = instruction.args[0]
|
symcall = symcall_re.match(str(instruction))
|
||||||
section = next((x for x in functions if x.name == arg.name), None)
|
|
||||||
# TODO this is due the VMComponent structure
|
dst = instruction.args[0]
|
||||||
instruction.args[0].name = section.offset
|
# let's check if the jump is to a label or a function
|
||||||
instruction.args[0].value = section.offset
|
if symcall:
|
||||||
|
# the symbal has not been resolved
|
||||||
|
if dst.name == dst.value:
|
||||||
|
# check whether it is a function
|
||||||
|
val = next((x.offset for x in self.functions if x.name == dst.name), None)
|
||||||
|
# check whether it is a label
|
||||||
|
if val == None:
|
||||||
|
for f in self.functions:
|
||||||
|
for i in f.instructions:
|
||||||
|
if i.label == dst.name:
|
||||||
|
val = f.offset_of_label(dst) + f.offset
|
||||||
|
if val == None:
|
||||||
|
raise AssemblerException()
|
||||||
|
# resolving the symbol
|
||||||
|
instruction.args[0].set_value(val)
|
||||||
|
# define the kind of jump: to immediate or to register
|
||||||
if imm_op_re.match(instruction.opcode.name):
|
if imm_op_re.match(instruction.opcode.name):
|
||||||
self.immonly(instruction)
|
self.immonly(instruction)
|
||||||
elif reg_op_re.match(instruction.opcode.name):
|
elif reg_op_re.match(instruction.opcode.name):
|
||||||
@ -189,7 +238,7 @@ class VMAssembler:
|
|||||||
self.assembled_code += opcode.uint8()
|
self.assembled_code += opcode.uint8()
|
||||||
return
|
return
|
||||||
|
|
||||||
def encrypt_ops(self, key):
|
def decrypt_ops(self, key):
|
||||||
key_ba = bytearray(key, 'utf-8')
|
key_ba = bytearray(key, 'utf-8')
|
||||||
olds = copy.deepcopy(ops)
|
olds = copy.deepcopy(ops)
|
||||||
|
|
||||||
@ -206,13 +255,95 @@ class VMAssembler:
|
|||||||
for o, n in zip(olds, ops):
|
for o, n in zip(olds, ops):
|
||||||
print("{} : {}->{}".format(o.name, hex(o.value), hex(n.value)))
|
print("{} : {}->{}".format(o.name, hex(o.value), hex(n.value)))
|
||||||
|
|
||||||
|
class VMFunction:
|
||||||
|
def __init__(self, name, code):
|
||||||
|
self.name = name
|
||||||
|
self.size = 0
|
||||||
|
self.offset = 0
|
||||||
|
self.instructions = []
|
||||||
|
|
||||||
|
# populating instructions
|
||||||
|
i = 0
|
||||||
|
while i < len(code):
|
||||||
|
line = code[i]
|
||||||
|
ins = instruction_re.match(line)
|
||||||
|
label = label_re.match(line)
|
||||||
|
if label:
|
||||||
|
label_name = label.group(1)
|
||||||
|
self.instructions.append(VMInstruction(code[i+1], label_name))
|
||||||
|
i += 2
|
||||||
|
elif ins:
|
||||||
|
self.instructions.append(VMInstruction(line))
|
||||||
|
i+=1
|
||||||
|
|
||||||
|
self.calc_size()
|
||||||
|
|
||||||
|
def calc_size(self):
|
||||||
|
for i in self.instructions:
|
||||||
|
self.size += i.size
|
||||||
|
|
||||||
|
def set_offset(self, offset):
|
||||||
|
self.offset = offset
|
||||||
|
|
||||||
|
def offset_of_label(self, label):
|
||||||
|
offset = 0
|
||||||
|
for i in self.instructions:
|
||||||
|
offset += i.size
|
||||||
|
if i.label == label:
|
||||||
|
break
|
||||||
|
return offset
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{}: size {}, offset {}".format(self.name, hex(self.size), hex(self.offset))
|
||||||
|
|
||||||
|
class VMInstruction:
|
||||||
|
"""
|
||||||
|
Represents an instruction the VM recognizes.
|
||||||
|
e.g: MOVI [R0, 2]
|
||||||
|
^ ^
|
||||||
|
opcode args
|
||||||
|
"""
|
||||||
|
|
||||||
|
def __init__(self, line, label = None):
|
||||||
|
self.opcode = None
|
||||||
|
self.args = []
|
||||||
|
self.size = 1
|
||||||
|
self.label = label
|
||||||
|
|
||||||
|
ins = instruction_re.match(line)
|
||||||
|
symcall = symcall_re.match(line)
|
||||||
|
|
||||||
|
opcode = ins.group(1)
|
||||||
|
self.opcode = next((x for x in ops if x.name == opcode), None)
|
||||||
|
if self.opcode == None:
|
||||||
|
raise InvalidOperation(opcode)
|
||||||
|
|
||||||
|
args = [x for x in ins.groups()[1:] if x is not None]
|
||||||
|
for a in args:
|
||||||
|
if immediate_re.match(a) or symcall:
|
||||||
|
# directly append the immediate
|
||||||
|
self.args.append(VMComponent(a, a))
|
||||||
|
self.size += 2
|
||||||
|
continue
|
||||||
|
elif register_re.match(a):
|
||||||
|
# create a VM component for a register
|
||||||
|
reg = next((x for x in regs if x.name == a), None)
|
||||||
|
if reg == None:
|
||||||
|
raise InvalidRegister(a)
|
||||||
|
self.args.append(reg)
|
||||||
|
self.size += 1
|
||||||
|
continue
|
||||||
|
|
||||||
|
def __repr__(self):
|
||||||
|
return "{} {}".format(self.opcode.name, ", ".join([x.name for x in self.args]))
|
||||||
|
|
||||||
|
|
||||||
class VMComponent:
|
class VMComponent:
|
||||||
"""
|
"""
|
||||||
Represents a register, operation or an immediate the VM recognizes
|
Represents a register, operation or an immediate the VM recognizes
|
||||||
"""
|
"""
|
||||||
|
|
||||||
def __init__(self, name, value, method=None):
|
def __init__(self, name, value, method = None):
|
||||||
self.name = name.casefold()
|
self.name = name.casefold()
|
||||||
self.value = value
|
self.value = value
|
||||||
self.method = method
|
self.method = method
|
||||||
@ -257,84 +388,15 @@ class VMComponent:
|
|||||||
return True
|
return True
|
||||||
|
|
||||||
def isimm(self):
|
def isimm(self):
|
||||||
if not immediate_re.match(str(self.name)):
|
name_alpha = alpha_re.match(str(self.name))
|
||||||
|
value_alpha = alpha_re.match(str(self.value))
|
||||||
|
name_imm = immediate_re.match(str(self.name))
|
||||||
|
value_imm = immediate_re.match(str(self.value))
|
||||||
|
|
||||||
|
if name_alpha and value_alpha and not name_imm and not value_imm:
|
||||||
return False
|
return False
|
||||||
return True
|
return True
|
||||||
|
|
||||||
|
|
||||||
class VMInstruction:
|
|
||||||
"""
|
|
||||||
Represents an instruction the VM recognizes.
|
|
||||||
e.g: MOVI [R0, 2]
|
|
||||||
^ ^
|
|
||||||
opcode args
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, opcode, instr_list):
|
|
||||||
self.opcode = None
|
|
||||||
self.args = None
|
|
||||||
self.size = 1
|
|
||||||
|
|
||||||
self.opcode = next((x for x in ops if x.name == opcode), None)
|
|
||||||
if self.opcode == None:
|
|
||||||
raise InvalidOperation(opcode)
|
|
||||||
self.args = []
|
|
||||||
for el in instr_list:
|
|
||||||
if immediate_re.match(el):
|
|
||||||
# directly append the immediate
|
|
||||||
self.args.append(VMComponent(el, el))
|
|
||||||
self.size += 2
|
|
||||||
continue
|
|
||||||
elif register_re.match(el):
|
|
||||||
# create a VM component for a register
|
|
||||||
reg_comp = next((x for x in regs if x.name == el), None)
|
|
||||||
self.args.append(reg_comp)
|
|
||||||
self.size += 1
|
|
||||||
continue
|
|
||||||
else:
|
|
||||||
# section
|
|
||||||
print(el)
|
|
||||||
sec_comp = next((x for x in functions if x.name == el), None)
|
|
||||||
if sec_comp:
|
|
||||||
self.args.append(VMComponent(
|
|
||||||
sec_comp.name, sec_comp.offset))
|
|
||||||
self.size += 2
|
|
||||||
continue
|
|
||||||
raise AssemblerException()
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "{} {}".format(self.opcode.name, ", ".join([x.name for x in self.args]))
|
|
||||||
|
|
||||||
|
|
||||||
class VMFunction:
|
|
||||||
"""
|
|
||||||
Represents a code section or "label" such as "main:"
|
|
||||||
"""
|
|
||||||
|
|
||||||
def __init__(self, name, line_start):
|
|
||||||
self.name = name
|
|
||||||
self.size = 0
|
|
||||||
self.offset = 0
|
|
||||||
self.line_start = line_start
|
|
||||||
self.line_end = 0
|
|
||||||
self.labels = []
|
|
||||||
|
|
||||||
def set_size(self, size):
|
|
||||||
self.size = size
|
|
||||||
|
|
||||||
def set_offset(self, offset):
|
|
||||||
self.offset = offset
|
|
||||||
|
|
||||||
def set_line_start(self, start):
|
|
||||||
self.line_start = start
|
|
||||||
|
|
||||||
def set_line_end(self, end):
|
|
||||||
self.line_end = end
|
|
||||||
|
|
||||||
|
|
||||||
def __repr__(self):
|
|
||||||
return "{} | ls: {}, le: {}, s: {}, o: {}".format(self.name, hex(self.line_start), hex(self.line_end), hex(self.size), hex(self.offset))
|
|
||||||
|
|
||||||
op_names = [["MOVI", "imm2reg"],
|
op_names = [["MOVI", "imm2reg"],
|
||||||
["MOVR", "reg2reg"],
|
["MOVR", "reg2reg"],
|
||||||
["LOAD", "imm2reg"],
|
["LOAD", "imm2reg"],
|
||||||
@ -379,61 +441,13 @@ op_names = [["MOVI", "imm2reg"],
|
|||||||
reg_names = ["R0", "R1", "R2", "R3", "S0", "S1", "S2", "S3", "IP", "BP", "SP"]
|
reg_names = ["R0", "R1", "R2", "R3", "S0", "S1", "S2", "S3", "IP", "BP", "SP"]
|
||||||
ops = [VMComponent(le[0], i, le[1]) for i, le in enumerate(op_names)]
|
ops = [VMComponent(le[0], i, le[1]) for i, le in enumerate(op_names)]
|
||||||
regs = [VMComponent(s.casefold(), i) for i, s in enumerate(reg_names)]
|
regs = [VMComponent(s.casefold(), i) for i, s in enumerate(reg_names)]
|
||||||
functions = []
|
instruction_re = re.compile("^([\w]{4})(?:\ +(?:([\w]+)\ *(?:,[\ ]*([\w]+))*))?$") # 1: opcode 2+: args
|
||||||
instruction_re = re.compile("([\w]{4})(?:\ +(?:([\w]+)\ *(?:,[\ ]*([\w]+))*))?") # 1: opcode 2+: args
|
|
||||||
function_re = re.compile("(?:def\ )([a-zA-Z]*)\:")
|
function_re = re.compile("(?:def\ )([a-zA-Z]*)\:")
|
||||||
immediate_re = re.compile("(?:0x)?[0-9]*[0-9]$")
|
immediate_re = re.compile("(?:0x)?[0-9]*[0-9]$")
|
||||||
register_re = re.compile("(^[rRsS]{1}[0-4]{1}$)|([iIrRsS]{1}[pP]{1}$)")
|
alpha_re = re.compile("^[a-zA-Z]*$")
|
||||||
labeldef_re = re.compile("([a-zA-Z]*)\:")
|
register_re = re.compile("(^[rRsS][0-4]$)|([iIrRsS][pP]$)")
|
||||||
labelcall_re = re.compile("(?:[jJ]{1}[pPmM]{1}[pPaAbBeEnN]{1}[iIrR]{1}\ *)([\w]*)")
|
label_re = re.compile("^([a-zA-Z]+)\:$")
|
||||||
|
symcall_re = re.compile("^([jJ][pPmM][pPaAbBeEnN][iIrR])\ +([\w]*)$")
|
||||||
def parse_functions(lines):
|
|
||||||
current_size = 0
|
|
||||||
cur_func = None
|
|
||||||
|
|
||||||
# first parsing to get functions' names
|
|
||||||
for i, line in enumerate(lines):
|
|
||||||
match = function_re.match(line)
|
|
||||||
if match:
|
|
||||||
if cur_func:
|
|
||||||
tmp = next(x for x in functions if x.name == cur_func)
|
|
||||||
tmp.set_line_end(i-1)
|
|
||||||
cur_func = match.group(2)
|
|
||||||
functions.append(VMFunction(cur_func, i + 1))
|
|
||||||
continue
|
|
||||||
tmp = next(x for x in functions if x.name == cur_func)
|
|
||||||
tmp.set_line_end(i)
|
|
||||||
|
|
||||||
# calculating sizes and offsets
|
|
||||||
for line in lines:
|
|
||||||
match = function_re.match(line)
|
|
||||||
if match:
|
|
||||||
if cur_func:
|
|
||||||
tmp = next(x for x in functions if x.name == cur_func)
|
|
||||||
tmp.set_size(current_size)
|
|
||||||
cur_func = match.group(2)
|
|
||||||
current_size = 0
|
|
||||||
continue
|
|
||||||
components = [x for x in instruction_re.match(line).groups() if x is not None]
|
|
||||||
current_size += VMInstruction(components[0], components[1:]).size
|
|
||||||
tmp = next(x for x in functions if x.name == cur_func)
|
|
||||||
tmp.set_size(current_size)
|
|
||||||
|
|
||||||
# if not, main as to be the first entry
|
|
||||||
for i in range(len(functions)):
|
|
||||||
if functions[i].name == "main" and i is not 0:
|
|
||||||
functions[0], functions[i] = functions[i], functions[0]
|
|
||||||
break
|
|
||||||
|
|
||||||
calc_fun_offsets()
|
|
||||||
|
|
||||||
def calc_fun_offsets():
|
|
||||||
current_offset = 0
|
|
||||||
for i in range(1, len(functions)):
|
|
||||||
prev_size = functions[i - 1].size
|
|
||||||
current_offset += prev_size
|
|
||||||
functions[i].set_offset(current_offset)
|
|
||||||
|
|
||||||
|
|
||||||
def main():
|
def main():
|
||||||
if len(sys.argv) < 4:
|
if len(sys.argv) < 4:
|
||||||
@ -441,22 +455,12 @@ def main():
|
|||||||
sys.argv[0]))
|
sys.argv[0]))
|
||||||
return
|
return
|
||||||
|
|
||||||
vma = VMAssembler(sys.argv[1])
|
|
||||||
with open(sys.argv[2], 'r') as f:
|
with open(sys.argv[2], 'r') as f:
|
||||||
filedata = f.readlines()
|
filedata = f.readlines()
|
||||||
filedata = [x.strip() for x in filedata if x.strip()]
|
filedata = [x.strip() for x in filedata if x.strip()]
|
||||||
|
|
||||||
# let's parse the whole file for labels
|
vma = VMAssembler(sys.argv[1], filedata)
|
||||||
parse_functions(filedata)
|
vma.parse()
|
||||||
|
|
||||||
if "main" not in [x.name for x in functions]:
|
|
||||||
sys.stderr.write("No main specified!")
|
|
||||||
return
|
|
||||||
|
|
||||||
for s in functions:
|
|
||||||
section_code = filedata[s.line_start:s.line_end+1]
|
|
||||||
for line in section_code:
|
|
||||||
vma.process_code_line(line)
|
|
||||||
|
|
||||||
with open(sys.argv[3], 'wb') as f:
|
with open(sys.argv[3], 'wb') as f:
|
||||||
f.write(vma.assembled_code)
|
f.write(vma.assembled_code)
|
||||||
|
Loading…
Reference in New Issue
Block a user