diff --git a/core/__pycache__/obfuscator.cpython-311.pyc b/core/__pycache__/obfuscator.cpython-311.pyc index 64053a7..b54a4f0 100644 Binary files a/core/__pycache__/obfuscator.cpython-311.pyc and b/core/__pycache__/obfuscator.cpython-311.pyc differ diff --git a/core/__pycache__/parser.cpython-311.pyc b/core/__pycache__/parser.cpython-311.pyc index 9b9da3c..c444df7 100644 Binary files a/core/__pycache__/parser.cpython-311.pyc and b/core/__pycache__/parser.cpython-311.pyc differ diff --git a/core/obfuscator.py b/core/obfuscator.py index 54e5aa5..ad98441 100644 --- a/core/obfuscator.py +++ b/core/obfuscator.py @@ -16,44 +16,27 @@ class LuauVMObfuscator: "FORLOOP", "FORPREP", "TFORLOOP", "SETLIST", "CLOSE", "CLOSURE", "VARARG" ] - self.k1 = random.randint(1000, 5000) - self.k2 = random.randint(1000, 5000) - - self.op_to_id = {name: (self.opcodes.index(name) + self.k1) ^ self.k2 for name in self.opcodes} - - self.var_map = {} - self.used_vars = set() + self.k1, self.k2, self.k3 = random.randint(500, 2000), random.randint(500, 2000), random.randint(500, 2000) + self.op_to_id = {name: ((self.opcodes.index(name) + self.k1) ^ self.k2) + self.k3 for name in self.opcodes} + self.var_map, self.used_vars = {}, set() def get_var(self, hint="var"): if hint in self.var_map: return self.var_map[hint] - chars = "lI1" - length = random.randint(32, 48) - new_var = "_" + "".join(random.choice(chars) for _ in range(length)) - while new_var in self.used_vars: - new_var = "_" + "".join(random.choice(chars) for _ in range(length)) - self.used_vars.add(new_var) - self.var_map[hint] = new_var - return new_var + c = "Il1"; l = random.randint(24, 32) + v = "_" + "".join(random.choice(c) for _ in range(l)) + while v in self.used_vars: v = "_" + "".join(random.choice(c) for _ in range(l)) + self.used_vars.add(v); self.var_map[hint] = v + return v - def to_expr(self, n, bit32_var="bit32", depth=0): - if depth > 2 or (depth > 0 and random.random() < 0.2): - return str(n) - - r = random.randint(1, 100000) - choice = random.choice(['add', 'sub', 'xor']) - - if choice == 'add': - return f"({self.to_expr(n - r, bit32_var, depth + 1)} + {self.to_expr(r, bit32_var, depth + 1)})" - elif choice == 'sub': - return f"({self.to_expr(n + r, bit32_var, depth + 1)} - {self.to_expr(r, bit32_var, depth + 1)})" - elif choice == 'xor': - return f"{bit32_var}.bxor({self.to_expr(n ^ r, bit32_var, depth + 1)}, {self.to_expr(r, bit32_var, depth + 1)})" - - return str(n) + def to_expr(self, n): + r = random.randint(1, 1000) + ch = random.choice(['add', 'sub', 'xor']) + if ch == 'add': return f"({n-r}+{r})" + if ch == 'sub': return f"({n+r}-{r})" + return f"bit32.bxor({n^r},{r})" def encrypt_string(self, s, key): - res = [] - last = key % 256 + res, last = [], key % 256 for i, c in enumerate(s): k = (key + i + last) % 256 last = ord(c) @@ -68,200 +51,168 @@ class LuauVMObfuscator: return code.strip() def generate_vm_source(self, bytecode): - raw_instructions = bytecode['instructions'] - indices = list(range(len(raw_instructions))) - random.shuffle(indices) - - pos_map = {orig: shuffled_idx for shuffled_idx, orig in enumerate(indices)} - - final_insts = [None] * len(raw_instructions) - for i, orig_idx in enumerate(indices): - inst = raw_instructions[orig_idx] - next_shuffled = pos_map[orig_idx + 1] if orig_idx + 1 < len(raw_instructions) else 0 - - packed = [ - inst[0] & 0xFF, (inst[0] >> 8) & 0xFF, - inst[1] & 0xFF, inst[2] & 0xFF, inst[3] & 0xFF, - next_shuffled & 0xFF, (next_shuffled >> 8) & 0xFF - ] - final_insts[i] = packed - + raw_ins = bytecode['instructions'] + indices = list(range(len(raw_ins))) + random.shuffle(indices); pos_map = {orig: i for i, orig in enumerate(indices)} + final_insts = [] + for orig_idx in indices: + inst = raw_ins[orig_idx] + next_sh = pos_map[orig_idx + 1] if orig_idx + 1 < len(raw_ins) else 0 + final_insts.append([inst[0]&0xFF,(inst[0]>>8)&0xFF,inst[1]&0xFF,inst[2]&0xFF,inst[3]&0xFF,next_sh&0xFF,(next_sh>>8)&0xFF]) inst_str = "".join(chr(i) for inst in final_insts for i in inst) inst_b64 = base64.b64encode(inst_str.encode('latin-1')).decode() - encrypted_consts = [] + consts = [] salt = random.randint(100000, 999999) for i, c in enumerate(bytecode['constants']): if c['type'] == 'string': - key = (i * 149 + salt) % 256 - enc_val = self.encrypt_string(c['value'], key) - encrypted_consts.append({"t": 1, "v": base64.b64encode(enc_val.encode('latin-1')).decode()}) - else: - encrypted_consts.append({"t": 2, "v": c['value']}) + k = (i * 149 + salt) % 256; enc = self.encrypt_string(c['value'], k) + consts.append({"t": 1, "v": base64.b64encode(enc.encode('latin-1')).decode()}) + elif c['type'] == 'number': consts.append({"t": 2, "v": c['value']}) + else: consts.append({"t": 3, "v": c['value']}) - consts_json = json.dumps(encrypted_consts) - start_idx = pos_map[0] + v = { + "BIT": "bit32", "ENV": self.get_var("env"), "DEC": self.get_var("dec"), + "INST": self.get_var("inst"), "CONSTS": self.get_var("consts"), "SALT": self.to_expr(salt), + "EXEC": self.get_var("exec"), "REGS": self.get_var("regs"), "CURR": self.get_var("curr"), + "PTR": self.get_var("ptr"), "OP": self.get_var("op"), "A": self.get_var("a"), + "B": self.get_var("b"), "C": self.get_var("c"), "UNP": "unpack or table.unpack", + "SPW": "task and task.spawn or spawn", "JSON": "game:GetService('HttpService')", + "CHARS": self.get_var("chars"), "LOOKUP": self.get_var("lookup"), "GETC": self.get_var("getc") + } - V_ENV = self.get_var("env") - V_BIT = self.get_var("bit") - V_B64 = self.get_var("b64") - V_DEC = self.get_var("dec") - V_INST = self.get_var("inst") - V_CONSTS = self.get_var("consts") - V_SALT = self.get_var("salt") - V_EXEC = self.get_var("exec") - V_REGS = self.get_var("regs") - V_CURR = self.get_var("curr") - V_RUN = self.get_var("run") - V_GETC = self.get_var("getc") - V_PTR = self.get_var("ptr") - V_OP = self.get_var("op") - V_A = self.get_var("a") - V_B = self.get_var("b") - V_C = self.get_var("c") - V_UNP = self.get_var("unp") - V_SPW = self.get_var("spw") - V_RAW = self.get_var("raw") - V_KEY = self.get_var("key") - V_RES = self.get_var("res") - V_LST = self.get_var("lst") - - vm_lua = f"""local {V_BIT}=bit32 -local {V_ENV}=setmetatable({{}},{{__index=getfenv()}}) -local {V_UNP}=unpack or table.unpack -local {V_SPW}=task and task.spawn or spawn -local {V_B64}='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' -local {V_DEC}=function(data) - data=data:gsub('[^'..{V_B64}..'=]','') - local res,bits={{}},'' - for i=1,#data do - local c=data:sub(i,i) - if c=='=' then break end - local f={V_B64}:find(c)-1 - for j=5,0,-1 do bits=bits..({V_BIT}.extract(f,j,1)) end - end - for i=1,#bits-7,8 do - local b=0 - for j=1,8 do b=b+(bits:sub(i+j-1,i+j-1)=='1' and 2^(8-j) or 0) end - res[#res+1]=string.char(b) + vm_lua = f"""local {v['ENV']}=setmetatable({{}},{{__index=getfenv()}}) +local {v['CHARS']}='ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/' +local {v['LOOKUP']}={{}} +for i=1,64 do {v['LOOKUP']}[{v['CHARS']}:sub(i,i)]=i-1 end +local function {v['DEC']}(data) + data=data:gsub('[^%a%d%+/]','') + local res={{}} + for i=1,#data,4 do + local a,b,c,d={v['LOOKUP']}[data:sub(i,i)],{v['LOOKUP']}[data:sub(i+1,i+1)],{v['LOOKUP']}[data:sub(i+2,i+2)],{v['LOOKUP']}[data:sub(i+3,i+3)] + local n={v['BIT']}.lshift(a or 0,18)+{v['BIT']}.lshift(b or 0,12)+{v['BIT']}.lshift(c or 0,6)+(d or 0) + res[#res+1]=string.char({v['BIT']}.extract(n,16,8)) + if c then res[#res+1]=string.char({v['BIT']}.extract(n,8,8)) end + if d then res[#res+1]=string.char({v['BIT']}.extract(n,0,8)) end end return table.concat(res) end -local {V_INST}={V_DEC}('{inst_b64}') -local {V_CONSTS}=game:GetService("HttpService"):JSONDecode([=[{consts_json}]=]) -local {V_SALT}={self.to_expr(salt, V_BIT)} -local function {V_EXEC}() - local {V_REGS}={{}} - local {V_CURR}={self.to_expr(start_idx, V_BIT)} - local {V_RUN}=true - local function {V_GETC}(idx) - if not idx then return nil end - local c={V_CONSTS}[idx+1] - if not c then return nil end - if c.t=={self.to_expr(1, V_BIT)} then - local {V_RAW}={V_DEC}(c.v) - local {V_KEY}=(idx*{self.to_expr(149, V_BIT)}+{V_SALT})%256 - local {V_RES}={{}} - local {V_LST}={V_KEY}%256 - for i=1,#{V_RAW} do - local k=({V_KEY}+i+{V_LST}-1)%256 - local b=string.byte({V_RAW},i) - local char={V_BIT}.bxor(b,k) - {V_RES}[i]=string.char(char) - {V_LST}=char +local {v['INST']}={v['DEC']}('{inst_b64}') +local {v['CONSTS']}={v['JSON']}:JSONDecode([=[{json.dumps(consts)}]=]) +local {v['EXEC']}=function() + local {v['REGS']}={{}} + local {v['CURR']}={self.to_expr(pos_map[0])} + local function {v['GETC']}(idx) + local c={v['CONSTS']}[idx+1] + if not c then return end + if c.t==1 then + local r,k={v['DEC']}(c.v),(idx*149+{v['SALT']})%256 + local res,lst={{}},k%256 + for i=1,#r do + local char={v['BIT']}.bxor(string.byte(r,i),(k+i+lst-1)%256) + res[i]=string.char(char) + lst=char end - return table.concat({V_RES}) - end + return table.concat(res) + elseif c.t==3 then return c.v=='true' and true or (c.v=='false' and false or nil) end return c.v end - while {V_RUN} do - local {V_PTR}={V_CURR}*7+1 - local op_l=string.byte({V_INST},{V_PTR}) or 0 - local op_h=string.byte({V_INST},{V_PTR}+1) or 0 - local {V_A}=string.byte({V_INST},{V_PTR}+2) or 0 - local {V_B}=string.byte({V_INST},{V_PTR}+3) or 0 - local {V_C}=string.byte({V_INST},{V_PTR}+4) or 0 - local n_l=string.byte({V_INST},{V_PTR}+5) or 0 - local n_h=string.byte({V_INST},{V_PTR}+6) or 0 - {V_CURR}=n_l+({V_BIT}.lshift(n_h,8)) - local op_raw=op_l+({V_BIT}.lshift(op_h,8)) - local {V_OP}={V_BIT}.bxor(op_raw,{self.to_expr(self.k2, V_BIT)})-{self.to_expr(self.k1, V_BIT)} - if {V_OP}=={self.opcodes.index('MOVE')} then - {V_REGS}[{V_A}]={V_REGS}[{V_B}] - elseif {V_OP}=={self.opcodes.index('LOADK')} then - {V_REGS}[{V_A}]={V_GETC}({V_B}) - elseif {V_OP}=={self.opcodes.index('GETGLOBAL')} then - local n={V_GETC}({V_B}) - if n then {V_REGS}[{V_A}]={V_ENV}[n] end - elseif {V_OP}=={self.opcodes.index('SETGLOBAL')} then - local n={V_GETC}({V_B}) - if n then {V_ENV}[n]={V_REGS}[{V_A}] end - elseif {V_OP}=={self.opcodes.index('CALL')} then - local f={V_REGS}[{V_A}] + while true do + local {v['PTR']}={v['CURR']}*7+1 + local b1,b2,b3,b4,b5,b6,b7=string.byte({v['INST']},{v['PTR']},{v['PTR']}+6) + if not b1 then break end + {v['CURR']}=b6+({v['BIT']}.lshift(b7,8)) + local {v['OP']}={v['BIT']}.bxor(b1+{v['BIT']}.lshift(b2,8),{self.to_expr(self.k2)})-{self.to_expr(self.k1)} + local {v['A']},{v['B']},{v['C']}=b3,b4,b5 + if {v['OP']}=={self.opcodes.index('MOVE')} then {v['REGS']}[{v['A']}]={v['REGS']}[{v['B']}] + elseif {v['OP']}=={self.opcodes.index('LOADK')} then {v['REGS']}[{v['A']}]={v['GETC']}({v['B']}) + elseif {v['OP']}=={self.opcodes.index('GETGLOBAL')} then {v['REGS']}[{v['A']}]={v['ENV']}[{v['GETC']}({v['B']})] + elseif {v['OP']}=={self.opcodes.index('SETGLOBAL')} then {v['ENV']}[{v['GETC']}({v['B']})]={v['REGS']}[{v['A']}] + elseif {v['OP']}=={self.opcodes.index('GETTABLE')} then + local b={v['REGS']}[{v['B']}] + if b then {v['REGS']}[{v['A']}]=b[{v['REGS']}[{v['C']}] or {v['GETC']}({v['C']})] end + elseif {v['OP']}=={self.opcodes.index('SETTABLE')} then + local a={v['REGS']}[{v['A']}] + if a then a[{v['REGS']}[{v['B']}] or {v['GETC']}({v['B']})]={v['REGS']}[{v['C']}] end + elseif {v['OP']}=={self.opcodes.index('ADD')} then {v['REGS']}[{v['A']}]=({v['REGS']}[{v['B']}] or 0)+({v['REGS']}[{v['C']}] or 0) + elseif {v['OP']}=={self.opcodes.index('SUB')} then {v['REGS']}[{v['A']}]=({v['REGS']}[{v['B']}] or 0)-({v['REGS']}[{v['C']}] or 0) + elseif {v['OP']}=={self.opcodes.index('MUL')} then {v['REGS']}[{v['A']}]=({v['REGS']}[{v['B']}] or 0)*({v['REGS']}[{v['C']}] or 0) + elseif {v['OP']}=={self.opcodes.index('DIV')} then {v['REGS']}[{v['A']}]=({v['REGS']}[{v['B']}] or 0)/({v['REGS']}[{v['C']}] or 1) + elseif {v['OP']}=={self.opcodes.index('UNM')} then {v['REGS']}[{v['A']}]=-({v['REGS']}[{v['B']}] or 0) + elseif {v['OP']}=={self.opcodes.index('NOT')} then {v['REGS']}[{v['A']}]=not {v['REGS']}[{v['B']}] + elseif {v['OP']}=={self.opcodes.index('LEN')} then {v['REGS']}[{v['A']}]=#{v['REGS']}[{v['B']}] or 0 + elseif {v['OP']}=={self.opcodes.index('CALL')} then + local f={v['REGS']}[{v['A']}] if f then local args={{}} - if {V_B}>1 then for i=1,{V_B}-1 do args[i]={V_REGS}[{V_A}+i] end end - local res={{f({V_UNP}(args))}} - if {V_C}>1 then for i=1,{V_C}-1 do {V_REGS}[{V_A}+i-1]=res[i] end end + if {v['B']}>1 then for i=1,{v['B']}-1 do args[i]={v['REGS']}[{v['A']}+i] end end + local res={{f(({v['UNP']})(args))}} + if {v['C']}>1 then for i=1,{v['C']}-1 do {v['REGS']}[{v['A']}+i-1]=res[i] end end end - elseif {V_OP}=={self.opcodes.index('RETURN')} then - {V_RUN}=false - end + elseif {v['OP']}=={self.opcodes.index('RETURN')} then break end end end -{V_SPW}({V_EXEC})""" +{v['SPW']}({v['EXEC']})""" return self.minify(vm_lua) def compile_to_bytecode(self, ast): - constants = [] - instructions = [] - locals_map = {} - next_reg = 0 - + constants, instructions, locals_map = [], [], {} + self.next_reg = 0 def add_const(val): - if isinstance(val, str): - s_q = chr(39); d_q = chr(34) - if (val.startswith(s_q) and val.endswith(s_q)) or (val.startswith(d_q) and val.endswith(d_q)): - val = val[1:-1] + if isinstance(val, str) and ((val.startswith("'") and val.endswith("'")) or (val.startswith('"') and val.endswith('"'))): val = val[1:-1] for i, c in enumerate(constants): if c['value'] == val: return i t = 'string' if isinstance(val, str) else 'number' + if val in ['true', 'false', 'nil']: t = 'keyword' constants.append({'type': t, 'value': val}) return len(constants) - 1 - - def load_expr_to_reg(expr, reg): - if expr['type'] == 'IDENT': - if expr['value'] in locals_map: - instructions.append([self.op_to_id["MOVE"], reg, locals_map[expr['value']], 0]) - else: - instructions.append([self.op_to_id["GETGLOBAL"], reg, add_const(expr['value']), 0]) - elif expr['type'] in ['STRING', 'NUMBER']: - val = expr['value'] - if expr['type'] == 'NUMBER': - try: val = float(val) - except: pass - instructions.append([self.op_to_id["LOADK"], reg, add_const(val), 0]) - + def emit(op, a=0, b=0, c=0): instructions.append([self.op_to_id[op], a, b, c]) + def gen_expr(expr, target): + if not expr: return + if expr['type'] == 'NUMBER': emit("LOADK", target, add_const(float(expr['value']))) + elif expr['type'] == 'STRING': emit("LOADK", target, add_const(expr['value'])) + elif expr['type'] == 'KEYWORD': emit("LOADK", target, add_const(expr['value'])) + elif expr['type'] == 'variable': + if expr['name'] in locals_map: emit("MOVE", target, locals_map[expr['name']]) + else: emit("GETGLOBAL", target, add_const(expr['name'])) + elif expr['type'] == 'index': + br = self.next_reg; self.next_reg += 1; gen_expr(expr['base'], br) + kr = self.next_reg; self.next_reg += 1; gen_expr(expr['key'], kr) + emit("GETTABLE", target, br, kr); self.next_reg -= 2 + elif expr['type'] == 'binary': + lr = self.next_reg; self.next_reg += 1; gen_expr(expr['left'], lr) + rr = self.next_reg; self.next_reg += 1; gen_expr(expr['right'], rr) + op_m = {'+': 'ADD', '-': 'SUB', '*': 'MUL', '/': 'DIV'} + emit(op_m.get(expr['op'], 'ADD'), target, lr, rr); self.next_reg -= 2 + elif expr['type'] == 'unary': + or_reg = self.next_reg; self.next_reg += 1; gen_expr(expr['operand'], or_reg) + op_m = {'-': 'UNM', '#': 'LEN', 'not': 'NOT'} + emit(op_m.get(expr['op'], 'UNM'), target, or_reg); self.next_reg -= 1 + elif expr['type'] == 'call': gen_call(expr, target) + def gen_call(node, target): + fr = self.next_reg; self.next_reg += 1 + if node['func']['type'] == 'variable': + if node['func']['name'] in locals_map: emit("MOVE", fr, locals_map[node['func']['name']]) + else: emit("GETGLOBAL", fr, add_const(node['func']['name'])) + else: gen_expr(node['func'], fr) + for i, arg in enumerate(node['args']): + ar = self.next_reg; self.next_reg += 1; gen_expr(arg, ar) + emit("CALL", fr, len(node['args']) + 1, 2) + emit("MOVE", target, fr) + self.next_reg = fr for node in ast: - if node['type'] == 'call': - func_reg = next_reg - if node['name'] in locals_map: - instructions.append([self.op_to_id["MOVE"], func_reg, locals_map[node['name']], 0]) - else: - instructions.append([self.op_to_id["GETGLOBAL"], func_reg, add_const(node['name']), 0]) - for i, arg_expr in enumerate(node['args']): - load_expr_to_reg(arg_expr, func_reg + 1 + i) - instructions.append([self.op_to_id["CALL"], func_reg, len(node['args']) + 1, 1]) - elif node['type'] == 'assign': - val_reg = next_reg - load_expr_to_reg(node['value'], val_reg) - if node.get('local'): - locals_map[node['name']] = val_reg - next_reg += 1 - else: - instructions.append([self.op_to_id["SETGLOBAL"], val_reg, add_const(node['name']), 0]) - - instructions.append([self.op_to_id["RETURN"], 0, 0, 0]) + if node['type'] == 'assign': + vr = self.next_reg; self.next_reg += 1; gen_expr(node['value'], vr) + if node.get('local'): locals_map[node['name']] = vr + elif 'target' in node: + t = node['target'] + if t['type'] == 'index': + br = self.next_reg; self.next_reg += 1; gen_expr(t['base'], br) + kr = self.next_reg; self.next_reg += 1; gen_expr(t['key'], kr) + emit("SETTABLE", br, kr, vr); self.next_reg -= 2 + else: emit("SETGLOBAL", vr, add_const(node['name'])) + elif node['type'] == 'call': gen_call(node, self.next_reg) + emit("RETURN") return {"instructions": instructions, "constants": constants} def obfuscate(self, code): diff --git a/core/parser.py b/core/parser.py index 3676e12..ee0fcda 100644 --- a/core/parser.py +++ b/core/parser.py @@ -5,102 +5,88 @@ class Lexer: self.code = code self.tokens = [] self.pos = 0 - - # Using chr() to build regex parts and avoid tool-induced corruption. - LB = chr(91) - RB = chr(93) - DQ = chr(34) - SQ = chr(39) - BS = chr(92) - - # List every operator individually to avoid character set issues. - OP_LIST = '==|~=|<=|>=|\.\.\.|\.\.|>>|<<|\+|\-|\*|/|%|\^|#|=|\<|\>|\(|\)|\{|\}|' + BS + LB + '|' + BS + RB + '|;|:|,|\.' - + LB, RB, DQ, SQ, BS = chr(91), chr(93), chr(34), chr(39), chr(92) + OP_LIST = r'==|~=|<=|>=|\.\.\.|\.\.|>>|<<|\+|\-|\*|/|%|\^|#|=|\<|\>|\(|\)|\{|\}|' + BS + LB + '|' + BS + RB + '|;|:|,|\.' self.rules = [ ('COMMENT', re.compile('--' + LB + LB + '.*?' + RB + RB + '|--.*', re.DOTALL)), - ('STRING', re.compile(DQ + '(?:' + BS + BS + '.|[^' + DQ + BS + BS + '])*' + DQ + '|' + SQ + '(?:' + BS + BS + '.|[^' + SQ + BS + BS + '])*' + SQ + '|' + LB + LB + '.*?' + RB + RB, re.DOTALL)), - ('NUMBER', re.compile(r'\d+\.?\d*')), + ('STRING', re.compile(DQ + r'(?:' + BS + BS + r'.|[^' + DQ + BS + BS + r'])*' + DQ + '|' + SQ + r'(?:' + BS + BS + r'.|[^' + SQ + BS + BS + r'])*' + SQ + '|' + LB + LB + '.*?' + RB + RB, re.DOTALL)), + ('NUMBER', re.compile(r'\b\d+\.?\d*(?:[eE][+-]?\d+)?\b|\b0x[0-9a-fA-F]+\b')), ('KEYWORD', re.compile(r'\b(and|break|do|else|elseif|end|false|for|function|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b')), ('IDENT', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*')), ('OP', re.compile(OP_LIST)), ('SPACE', re.compile(r'\s+')) ] - def tokenize(self): while self.pos < len(self.code): match = None for name, regex in self.rules: match = regex.match(self.code, self.pos) if match: - if name != 'SPACE' and name != 'COMMENT': - self.tokens.append((name, match.group(0))) - self.pos = match.end() - break - if not match: - self.pos += 1 + if name != 'SPACE' and name != 'COMMENT': self.tokens.append((name, match.group(0))) + self.pos = match.end(); break + if not match: self.pos += 1 return self.tokens class Parser: - def __init__(self, tokens): - self.tokens = tokens - self.pos = 0 - + def __init__(self, tokens): self.tokens = tokens; self.pos = 0 def peek(self, offset=0): - index = self.pos + offset - return self.tokens[index] if index < len(self.tokens) else (None, None) - - def consume(self, expected_type=None, expected_value=None): - token = self.peek() - if not token or not token[0]: return None - if expected_type and token[0] != expected_type: return None - if expected_value and token[1] != expected_value: return None - self.pos += 1 - return token - + idx = self.pos + offset + return self.tokens[idx] if idx < len(self.tokens) else (None, None) + def consume(self, t=None, v=None): + tk = self.peek() + if not tk or not tk[0]: return None + if t and tk[0] != t: return None + if v and tk[1] != v: return None + self.pos += 1; return tk def parse(self): nodes = [] while self.pos < len(self.tokens): node = self.parse_statement() - if node: - nodes.append(node) - else: - self.pos += 1 + if node: nodes.append(node) + else: self.pos += 1 return nodes - def parse_statement(self): - token = self.peek() - if not token or not token[0]: return None - - if token[1] == 'local': - self.consume() - ident = self.consume('IDENT') + tk = self.peek() + if not tk or not tk[0]: return None + if tk[1] == 'local': + self.consume(); ident = self.consume('IDENT') if ident: - if self.peek()[1] == '=': - self.consume() - val = self.parse_expression() - return {'type': 'assign', 'name': ident[1], 'value': val, 'local': True} + if self.peek()[1] == '=': self.consume(); return {'type': 'assign', 'name': ident[1], 'value': self.parse_expression(), 'local': True} return None - - if token[0] == 'IDENT': - ident = self.consume()[1] - next_token = self.peek() - if next_token[1] == '(': - self.consume() - args = [] - while self.peek()[1] and self.peek()[1] != ')': + start = self.pos; expr = self.parse_prefix_expression() + if expr: + if self.peek()[1] == '=': self.consume(); return {'type': 'assign', 'target': expr, 'value': self.parse_expression()} + elif expr['type'] == 'call': return expr + self.pos = start; return None + def parse_prefix_expression(self): + tk = self.peek() + if not tk or tk[0] != 'IDENT': return None + expr = {'type': 'variable', 'name': self.consume()[1]} + while True: + nt = self.peek() + if nt[1] == '.': + self.consume(); m = self.consume('IDENT'); expr = {'type': 'index', 'base': expr, 'key': {'type': 'STRING', 'value': '"'+m[1]+'"'}} + elif nt[1] == '[': self.consume(); key = self.parse_expression(); self.consume('OP', ']'); expr = {'type': 'index', 'base': expr, 'key': key} + elif nt[1] == '(': + self.consume(); args = [] + if self.peek()[1] != ')': args.append(self.parse_expression()) - if self.peek()[1] == ',': - self.consume() - self.consume('OP', ')') - return {'type': 'call', 'name': ident, 'args': args} - elif next_token[1] == '=': - self.consume() - val = self.parse_expression() - return {'type': 'assign', 'name': ident, 'value': val, 'local': False} - - return None - + while self.peek()[1] == ',': self.consume(); args.append(self.parse_expression()) + self.consume('OP', ')'); expr = {'type': 'call', 'func': expr, 'args': args} + else: break + return expr def parse_expression(self): - token = self.consume() - if not token: return None - return {'type': token[0], 'value': token[1]} + left = self.parse_primary_expression() + while self.peek()[1] in ['+', '-', '*', '/', '..', '==', '<', '>', '<=', '>=', '~=']: + op = self.consume()[1]; right = self.parse_primary_expression() + left = {'type': 'binary', 'op': op, 'left': left, 'right': right} + return left + def parse_primary_expression(self): + tk = self.peek() + if not tk: return None + if tk[1] in ['-', '#', 'not']: + op = self.consume()[1]; return {'type': 'unary', 'op': op, 'operand': self.parse_primary_expression()} + if tk[0] in ['STRING', 'NUMBER']: v = tk[1]; self.consume(); return {'type': tk[0], 'value': v} + if tk[1] in ['true', 'false', 'nil']: return {'type': 'KEYWORD', 'value': self.consume()[1]} + if tk[1] == '(': self.consume(); e = self.parse_expression(); self.consume('OP', ')'); return e + return self.parse_prefix_expression() \ No newline at end of file