import re class Lexer: def __init__(self, code): self.code = code self.tokens = [] self.pos = 0 self.rules = [ ('COMMENT', r'--\[\[.*?\].*?\]\]|--.*'), ('STRING', r'"(?:\\.|[^"\\])*"|\'(?:\\.|[^\'\])*\'|\[\[.*?\].*?\]\]'), ('NUMBER', r'\d+\.?\d*'), ('KEYWORD', r'\b(and|break|do|else|elseif|end|false|for|function|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b'), ('IDENT', r'[a-zA-Z_][a-zA-Z0-9_]*'), ('OP', r'==|~=|<=|>=|\.\.\.|\.\.|>>|<<|[\+\-\*/%^#=\<>\(\)\{\}\[\];:,\.]'), ('SPACE', r'\s+') ] def tokenize(self): while self.pos < len(self.code): match = None for name, pattern in self.rules: regex = re.compile(pattern, re.DOTALL) match = regex.match(self.code, self.pos) if match: if name != 'SPACE' and name != 'COMMENT': self.tokens.append((name, match.group(0))) self.pos = match.end() break if not match: self.pos += 1 # Skip unknown return self.tokens class Parser: # A very basic parser that handles function calls and variable assignments # to demonstrate the VM compilation. def __init__(self, tokens): self.tokens = tokens self.pos = 0 def peek(self): return self.tokens[self.pos] if self.pos < len(self.tokens) else (None, None) def consume(self, expected_type=None): token = self.peek() if expected_type and token[0] != expected_type: return None self.pos += 1 return token def parse(self): nodes = [] while self.pos < len(self.tokens): node = self.parse_statement() if node: nodes.append(node) else: self.pos += 1 return nodes def parse_statement(self): token = self.peek() if token[0] == 'IDENT': ident = self.consume()[1] next_token = self.peek() if next_token[1] == '(': # Function call self.consume() # ( args = [] while self.peek()[1] != ')': args.append(self.peek()[1]) # Simplified: only strings/numbers/idents self.consume() if self.peek()[1] == ',': self.consume() self.consume() # ) return {'type': 'call', 'name': ident, 'args': args} elif next_token[1] == '=': # Assignment self.consume() # = value = self.consume()[1] return {'type': 'assign', 'name': ident, 'value': value} return None