diff --git a/assembler/assembler.py b/assembler/assembler.py index 543301f..10e6b7a 100644 --- a/assembler/assembler.py +++ b/assembler/assembler.py @@ -49,20 +49,54 @@ class InvalidValue(AssemblerException): class VMAssembler: - - def __init__(self, key): + def __init__(self, key, data): + self.data = data self.assembled_code = bytearray() - self.encrypt_ops(key) + self.functions = [] + self.decrypt_ops(key) + self.parse_functions() + print(self.functions) + main = next((x for x in self.functions if x.name == "main"), None) + if main == None: + print("Main has to be defined") + return - def parse(self, instruction): - action = getattr(self, "{}".format(instruction.opcode.method)) - action(instruction) + def parse_functions(self): + cur_fun_size = 0 + cur_fun_name = None + fun_start = 0 - def process_code_line(self, line): - components = [x for x in re.split('\W', line) if x] - instruction = VMInstruction(components[0], components[1:]) - sys.stdout.write(str(instruction) + "\n") - self.parse(instruction) + # first parse to get every function name + for i, line in enumerate(self.data): + match = function_re.match(line) + if match: + if cur_fun_name: + f = VMFunction(cur_fun_name, self.data[fun_start:i]) + self.functions.append(f) + cur_fun_name = match.group(1) + fun_start = i + 1 + f = VMFunction(cur_fun_name, self.data[fun_start:i + 1]) + self.functions.append(f) + + # putting main in first position in order to assemble it first + for i, f in enumerate(self.functions): + if f.name == "main" and i is not 0: + self.functions[0], self.functions[i] = self.functions[i], self.functions[0] + break + + # calculating functions offsets + for i in range(1, len(self.functions)): + prev_fun_tot_size = self.functions[i-1].size + self.functions[i-1].offset + cur_fun_size = self.functions[i].size + self.functions[i].set_offset(prev_fun_tot_size) + + return + + def parse(self): + for f in self.functions: + for i in f.instructions: + action = getattr(self, "{}".format(i.opcode.method)) + action(i) def imm2reg(self, instruction): """ @@ -169,11 +203,26 @@ class VMAssembler: def jump(self, instruction): imm_op_re = re.compile(".*[iI]$") reg_op_re = re.compile(".*[rR]$") - arg = instruction.args[0] - section = next((x for x in functions if x.name == arg.name), None) - # TODO this is due the VMComponent structure - instruction.args[0].name = section.offset - instruction.args[0].value = section.offset + symcall = symcall_re.match(str(instruction)) + + dst = instruction.args[0] + # let's check if the jump is to a label or a function + if symcall: + # the symbal has not been resolved + if dst.name == dst.value: + # check whether it is a function + val = next((x.offset for x in self.functions if x.name == dst.name), None) + # check whether it is a label + if val == None: + for f in self.functions: + for i in f.instructions: + if i.label == dst.name: + val = f.offset_of_label(dst) + f.offset + if val == None: + raise AssemblerException() + # resolving the symbol + instruction.args[0].set_value(val) + # define the kind of jump: to immediate or to register if imm_op_re.match(instruction.opcode.name): self.immonly(instruction) elif reg_op_re.match(instruction.opcode.name): @@ -189,7 +238,7 @@ class VMAssembler: self.assembled_code += opcode.uint8() return - def encrypt_ops(self, key): + def decrypt_ops(self, key): key_ba = bytearray(key, 'utf-8') olds = copy.deepcopy(ops) @@ -206,13 +255,95 @@ class VMAssembler: for o, n in zip(olds, ops): print("{} : {}->{}".format(o.name, hex(o.value), hex(n.value))) +class VMFunction: + def __init__(self, name, code): + self.name = name + self.size = 0 + self.offset = 0 + self.instructions = [] + + # populating instructions + i = 0 + while i < len(code): + line = code[i] + ins = instruction_re.match(line) + label = label_re.match(line) + if label: + label_name = label.group(1) + self.instructions.append(VMInstruction(code[i+1], label_name)) + i += 2 + elif ins: + self.instructions.append(VMInstruction(line)) + i+=1 + + self.calc_size() + + def calc_size(self): + for i in self.instructions: + self.size += i.size + + def set_offset(self, offset): + self.offset = offset + + def offset_of_label(self, label): + offset = 0 + for i in self.instructions: + offset += i.size + if i.label == label: + break + return offset + + def __repr__(self): + return "{}: size {}, offset {}".format(self.name, hex(self.size), hex(self.offset)) + +class VMInstruction: + """ + Represents an instruction the VM recognizes. + e.g: MOVI [R0, 2] + ^ ^ + opcode args + """ + + def __init__(self, line, label = None): + self.opcode = None + self.args = [] + self.size = 1 + self.label = label + + ins = instruction_re.match(line) + symcall = symcall_re.match(line) + + opcode = ins.group(1) + self.opcode = next((x for x in ops if x.name == opcode), None) + if self.opcode == None: + raise InvalidOperation(opcode) + + args = [x for x in ins.groups()[1:] if x is not None] + for a in args: + if immediate_re.match(a) or symcall: + # directly append the immediate + self.args.append(VMComponent(a, a)) + self.size += 2 + continue + elif register_re.match(a): + # create a VM component for a register + reg = next((x for x in regs if x.name == a), None) + if reg == None: + raise InvalidRegister(a) + self.args.append(reg) + self.size += 1 + continue + + def __repr__(self): + return "{} {}".format(self.opcode.name, ", ".join([x.name for x in self.args])) + class VMComponent: """ Represents a register, operation or an immediate the VM recognizes """ - def __init__(self, name, value, method=None): + def __init__(self, name, value, method = None): self.name = name.casefold() self.value = value self.method = method @@ -257,84 +388,15 @@ class VMComponent: return True def isimm(self): - if not immediate_re.match(str(self.name)): + name_alpha = alpha_re.match(str(self.name)) + value_alpha = alpha_re.match(str(self.value)) + name_imm = immediate_re.match(str(self.name)) + value_imm = immediate_re.match(str(self.value)) + + if name_alpha and value_alpha and not name_imm and not value_imm: return False return True - -class VMInstruction: - """ - Represents an instruction the VM recognizes. - e.g: MOVI [R0, 2] - ^ ^ - opcode args - """ - - def __init__(self, opcode, instr_list): - self.opcode = None - self.args = None - self.size = 1 - - self.opcode = next((x for x in ops if x.name == opcode), None) - if self.opcode == None: - raise InvalidOperation(opcode) - self.args = [] - for el in instr_list: - if immediate_re.match(el): - # directly append the immediate - self.args.append(VMComponent(el, el)) - self.size += 2 - continue - elif register_re.match(el): - # create a VM component for a register - reg_comp = next((x for x in regs if x.name == el), None) - self.args.append(reg_comp) - self.size += 1 - continue - else: - # section - print(el) - sec_comp = next((x for x in functions if x.name == el), None) - if sec_comp: - self.args.append(VMComponent( - sec_comp.name, sec_comp.offset)) - self.size += 2 - continue - raise AssemblerException() - - def __repr__(self): - return "{} {}".format(self.opcode.name, ", ".join([x.name for x in self.args])) - - -class VMFunction: - """ - Represents a code section or "label" such as "main:" - """ - - def __init__(self, name, line_start): - self.name = name - self.size = 0 - self.offset = 0 - self.line_start = line_start - self.line_end = 0 - self.labels = [] - - def set_size(self, size): - self.size = size - - def set_offset(self, offset): - self.offset = offset - - def set_line_start(self, start): - self.line_start = start - - def set_line_end(self, end): - self.line_end = end - - - def __repr__(self): - return "{} | ls: {}, le: {}, s: {}, o: {}".format(self.name, hex(self.line_start), hex(self.line_end), hex(self.size), hex(self.offset)) - op_names = [["MOVI", "imm2reg"], ["MOVR", "reg2reg"], ["LOAD", "imm2reg"], @@ -379,61 +441,13 @@ op_names = [["MOVI", "imm2reg"], reg_names = ["R0", "R1", "R2", "R3", "S0", "S1", "S2", "S3", "IP", "BP", "SP"] ops = [VMComponent(le[0], i, le[1]) for i, le in enumerate(op_names)] regs = [VMComponent(s.casefold(), i) for i, s in enumerate(reg_names)] -functions = [] -instruction_re = re.compile("([\w]{4})(?:\ +(?:([\w]+)\ *(?:,[\ ]*([\w]+))*))?") # 1: opcode 2+: args +instruction_re = re.compile("^([\w]{4})(?:\ +(?:([\w]+)\ *(?:,[\ ]*([\w]+))*))?$") # 1: opcode 2+: args function_re = re.compile("(?:def\ )([a-zA-Z]*)\:") immediate_re = re.compile("(?:0x)?[0-9]*[0-9]$") -register_re = re.compile("(^[rRsS]{1}[0-4]{1}$)|([iIrRsS]{1}[pP]{1}$)") -labeldef_re = re.compile("([a-zA-Z]*)\:") -labelcall_re = re.compile("(?:[jJ]{1}[pPmM]{1}[pPaAbBeEnN]{1}[iIrR]{1}\ *)([\w]*)") - -def parse_functions(lines): - current_size = 0 - cur_func = None - - # first parsing to get functions' names - for i, line in enumerate(lines): - match = function_re.match(line) - if match: - if cur_func: - tmp = next(x for x in functions if x.name == cur_func) - tmp.set_line_end(i-1) - cur_func = match.group(2) - functions.append(VMFunction(cur_func, i + 1)) - continue - tmp = next(x for x in functions if x.name == cur_func) - tmp.set_line_end(i) - - # calculating sizes and offsets - for line in lines: - match = function_re.match(line) - if match: - if cur_func: - tmp = next(x for x in functions if x.name == cur_func) - tmp.set_size(current_size) - cur_func = match.group(2) - current_size = 0 - continue - components = [x for x in instruction_re.match(line).groups() if x is not None] - current_size += VMInstruction(components[0], components[1:]).size - tmp = next(x for x in functions if x.name == cur_func) - tmp.set_size(current_size) - - # if not, main as to be the first entry - for i in range(len(functions)): - if functions[i].name == "main" and i is not 0: - functions[0], functions[i] = functions[i], functions[0] - break - - calc_fun_offsets() - -def calc_fun_offsets(): - current_offset = 0 - for i in range(1, len(functions)): - prev_size = functions[i - 1].size - current_offset += prev_size - functions[i].set_offset(current_offset) - +alpha_re = re.compile("^[a-zA-Z]*$") +register_re = re.compile("(^[rRsS][0-4]$)|([iIrRsS][pP]$)") +label_re = re.compile("^([a-zA-Z]+)\:$") +symcall_re = re.compile("^([jJ][pPmM][pPaAbBeEnN][iIrR])\ +([\w]*)$") def main(): if len(sys.argv) < 4: @@ -441,22 +455,12 @@ def main(): sys.argv[0])) return - vma = VMAssembler(sys.argv[1]) with open(sys.argv[2], 'r') as f: filedata = f.readlines() filedata = [x.strip() for x in filedata if x.strip()] - # let's parse the whole file for labels - parse_functions(filedata) - - if "main" not in [x.name for x in functions]: - sys.stderr.write("No main specified!") - return - - for s in functions: - section_code = filedata[s.line_start:s.line_end+1] - for line in section_code: - vma.process_code_line(line) + vma = VMAssembler(sys.argv[1], filedata) + vma.parse() with open(sys.argv[3], 'wb') as f: f.write(vma.assembled_code)