38029-vm/core/parser.py
Flatlogic Bot d2c45727f6 19:32
2026-01-31 18:32:21 +00:00

117 lines
5.9 KiB
Python

import re
class Lexer:
def __init__(self, code):
self.code = code
self.tokens = []
self.pos = 0
LB, RB, DQ, SQ, BS = chr(91), chr(93), chr(34), chr(39), chr(92)
OP_LIST = r'==|~=|<=|>=|\.\.\.|\.\.|>>|<<|\+|\-|\*|/|%|\^|#|=|\<|\>|\(|\)|\{|\}|' + BS + LB + '|' + BS + RB + '|;|:|,|\.'
self.rules = [
('COMMENT', re.compile('--' + LB + LB + '.*?' + RB + RB + '|--.*', re.DOTALL)),
('STRING', re.compile(DQ + r'(?:' + BS + BS + r'.|[^' + DQ + BS + BS + r'])*' + DQ + '|' + SQ + r'(?:' + BS + BS + r'.|[^' + SQ + BS + BS + r'])*' + SQ + '|' + LB + LB + '.*?' + RB + RB, re.DOTALL)),
('NUMBER', re.compile(r'\b\d+\.?\d*(?:[eE][+-]?\d+)?\b|\b0x[0-9a-fA-F]+\b')),
('KEYWORD', re.compile(r'\b(and|break|do|else|elseif|end|false|for|function|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b')),
('IDENT', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*')),
('OP', re.compile(OP_LIST)),
('SPACE', re.compile(r'\s+'))
]
def tokenize(self):
while self.pos < len(self.code):
match = None
for name, regex in self.rules:
match = regex.match(self.code, self.pos)
if match:
if name != 'SPACE' and name != 'COMMENT': self.tokens.append((name, match.group(0)))
self.pos = match.end(); break
if not match: self.pos += 1
return self.tokens
class Parser:
def __init__(self, tokens): self.tokens = tokens; self.pos = 0
def peek(self, offset=0):
idx = self.pos + offset
return self.tokens[idx] if idx < len(self.tokens) else (None, None)
def consume(self, t=None, v=None):
tk = self.peek()
if not tk or not tk[0]: return None
if t and tk[0] != t: return None
if v and tk[1] != v: return None
self.pos += 1; return tk
def parse(self):
nodes = []
while self.pos < len(self.tokens):
node = self.parse_statement()
if node: nodes.append(node)
else: self.pos += 1
return nodes
def parse_statement(self):
tk = self.peek()
if not tk or not tk[0]: return None
if tk[1] == 'local':
self.consume(); ident = self.consume('IDENT')
if ident:
if self.peek()[1] == '=':
self.consume(); return {'type': 'assign', 'name': ident[1], 'value': self.parse_expression(), 'local': True}
return {'type': 'assign', 'name': ident[1], 'value': {'type': 'KEYWORD', 'value': 'nil'}, 'local': True}
return None
if tk[1] == 'return':
self.consume()
return {'type': 'return', 'value': self.parse_expression()}
if tk[1] == ';':
self.consume(); return None
start = self.pos; expr = self.parse_prefix_expression()
if expr:
if self.peek()[1] == '=':
self.consume(); return {'type': 'assign', 'target': expr, 'value': self.parse_expression()}
elif expr['type'] in ['call', 'method_call']: return expr
self.pos = start; return None
def parse_prefix_expression(self):
tk = self.peek()
if not tk or tk[0] != 'IDENT': return None
expr = {'type': 'variable', 'name': self.consume()[1]}
while True:
nt = self.peek()
if nt[1] == '.':
self.consume(); m = self.consume('IDENT')
if m: expr = {'type': 'index', 'base': expr, 'key': {'type': 'STRING', 'value': '"'+m[1]+'"'}}
else: break
elif nt[1] == ':':
self.consume(); m = self.consume('IDENT')
if m:
if self.peek()[1] == '(':
self.consume(); args = []
if self.peek()[1] != ')':
args.append(self.parse_expression())
while self.peek()[1] == ',': self.consume(); args.append(self.parse_expression())
self.consume('OP', ')'); expr = {'type': 'method_call', 'base': expr, 'method': m[1], 'args': args}
else: expr = {'type': 'index', 'base': expr, 'key': {'type': 'STRING', 'value': '"'+m[1]+'"'}}
else: break
elif nt[1] == '[':
self.consume(); key = self.parse_expression(); self.consume('OP', ']'); expr = {'type': 'index', 'base': expr, 'key': key}
elif nt[1] == '(':
self.consume(); args = []
if self.peek()[1] != ')':
args.append(self.parse_expression())
while self.peek()[1] == ',': self.consume(); args.append(self.parse_expression())
self.consume('OP', ')'); expr = {'type': 'call', 'func': expr, 'args': args}
else: break
return expr
def parse_expression(self):
left = self.parse_primary_expression()
if not left: return None
while self.peek()[1] in ['+', '-', '*', '/', '..', '==', '<', '>', '<=', '>=', '~=']:
op = self.consume()[1]; right = self.parse_primary_expression()
if not right: break
left = {'type': 'binary', 'op': op, 'left': left, 'right': right}
return left
def parse_primary_expression(self):
tk = self.peek()
if not tk: return None
if tk[1] in ['-', '#', 'not']:
op = self.consume()[1]; sub = self.parse_primary_expression()
return {'type': 'unary', 'op': op, 'operand': sub} if sub else None
if tk[0] in ['STRING', 'NUMBER']: v = tk[1]; self.consume(); return {'type': tk[0], 'value': v}
if tk[1] in ['true', 'false', 'nil']: return {'type': 'KEYWORD', 'value': self.consume()[1]}
if tk[1] == '(': self.consume(); e = self.parse_expression(); self.consume('OP', ')'); return e
return self.parse_prefix_expression()