import re class Lexer: def __init__(self, code): self.code = code self.tokens = [] self.pos = 0 LB, RB, DQ, SQ, BS = chr(91), chr(93), chr(34), chr(39), chr(92) OP_LIST = r'==|~=|<=|>=|\.\.\.|\.\.|>>|<<|\+|\-|\*|/|%|\^|#|=|\<|\>|\(|\)|\{|\}|' + BS + LB + '|' + BS + RB + '|;|:|,|\.' self.rules = [ ('COMMENT', re.compile('--' + LB + LB + '.*?' + RB + RB + '|--.*', re.DOTALL)), ('STRING', re.compile(DQ + r'(?:' + BS + BS + r'.|[^' + DQ + BS + BS + r'])*' + DQ + '|' + SQ + r'(?:' + BS + BS + r'.|[^' + SQ + BS + BS + r'])*' + SQ + '|' + LB + LB + '.*?' + RB + RB, re.DOTALL)), ('NUMBER', re.compile(r'\b\d+\.?\d*(?:[eE][+-]?\d+)?\b|\b0x[0-9a-fA-F]+\b')), ('KEYWORD', re.compile(r'\b(and|break|do|else|elseif|end|false|for|function|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b')), ('IDENT', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*')), ('OP', re.compile(OP_LIST)), ('SPACE', re.compile(r'\s+')) ] def tokenize(self): while self.pos < len(self.code): match = None for name, regex in self.rules: match = regex.match(self.code, self.pos) if match: if name != 'SPACE' and name != 'COMMENT': self.tokens.append((name, match.group(0))) self.pos = match.end(); break if not match: self.pos += 1 return self.tokens class Parser: def __init__(self, tokens): self.tokens = tokens; self.pos = 0 def peek(self, offset=0): idx = self.pos + offset return self.tokens[idx] if idx < len(self.tokens) else (None, None) def consume(self, t=None, v=None): tk = self.peek() if not tk or not tk[0]: return None if t and tk[0] != t: return None if v and tk[1] != v: return None self.pos += 1; return tk def parse(self): nodes = [] while self.pos < len(self.tokens): node = self.parse_statement() if node: nodes.append(node) else: self.pos += 1 return nodes def parse_statement(self): tk = self.peek() if not tk or not tk[0]: return None if tk[1] == 'local': self.consume(); ident = self.consume('IDENT') if ident: if self.peek()[1] == '=': self.consume(); return {'type': 'assign', 'name': ident[1], 'value': self.parse_expression(), 'local': True} return {'type': 'assign', 'name': ident[1], 'value': {'type': 'KEYWORD', 'value': 'nil'}, 'local': True} return None if tk[1] == 'return': self.consume() return {'type': 'return', 'value': self.parse_expression()} if tk[1] == ';': self.consume(); return None start = self.pos; expr = self.parse_prefix_expression() if expr: if self.peek()[1] == '=': self.consume(); return {'type': 'assign', 'target': expr, 'value': self.parse_expression()} elif expr['type'] in ['call', 'method_call']: return expr self.pos = start; return None def parse_prefix_expression(self): tk = self.peek() if not tk or tk[0] != 'IDENT': return None expr = {'type': 'variable', 'name': self.consume()[1]} while True: nt = self.peek() if nt[1] == '.': self.consume(); m = self.consume('IDENT') if m: expr = {'type': 'index', 'base': expr, 'key': {'type': 'STRING', 'value': '"'+m[1]+'"'}} else: break elif nt[1] == ':': self.consume(); m = self.consume('IDENT') if m: if self.peek()[1] == '(': self.consume(); args = [] if self.peek()[1] != ')': args.append(self.parse_expression()) while self.peek()[1] == ',': self.consume(); args.append(self.parse_expression()) self.consume('OP', ')'); expr = {'type': 'method_call', 'base': expr, 'method': m[1], 'args': args} else: expr = {'type': 'index', 'base': expr, 'key': {'type': 'STRING', 'value': '"'+m[1]+'"'}} else: break elif nt[1] == '[': self.consume(); key = self.parse_expression(); self.consume('OP', ']'); expr = {'type': 'index', 'base': expr, 'key': key} elif nt[1] == '(': self.consume(); args = [] if self.peek()[1] != ')': args.append(self.parse_expression()) while self.peek()[1] == ',': self.consume(); args.append(self.parse_expression()) self.consume('OP', ')'); expr = {'type': 'call', 'func': expr, 'args': args} else: break return expr def parse_expression(self): left = self.parse_primary_expression() if not left: return None while self.peek()[1] in ['+', '-', '*', '/', '..', '==', '<', '>', '<=', '>=', '~=']: op = self.consume()[1]; right = self.parse_primary_expression() if not right: break left = {'type': 'binary', 'op': op, 'left': left, 'right': right} return left def parse_primary_expression(self): tk = self.peek() if not tk: return None if tk[1] in ['-', '#', 'not']: op = self.consume()[1]; sub = self.parse_primary_expression() return {'type': 'unary', 'op': op, 'operand': sub} if sub else None if tk[0] in ['STRING', 'NUMBER']: v = tk[1]; self.consume(); return {'type': tk[0], 'value': v} if tk[1] in ['true', 'false', 'nil']: return {'type': 'KEYWORD', 'value': self.consume()[1]} if tk[1] == '(': self.consume(); e = self.parse_expression(); self.consume('OP', ')'); return e return self.parse_prefix_expression()