38029-vm/core/parser.py
Flatlogic Bot bfe3f1d544 14:36
2026-01-31 13:36:20 +00:00

107 lines
3.8 KiB
Python

import re
class Lexer:
def __init__(self, code):
self.code = code
self.tokens = []
self.pos = 0
# Using chr() to build regex parts and avoid tool-induced corruption.
LB = chr(91)
RB = chr(93)
DQ = chr(34)
SQ = chr(39)
BS = chr(92)
# List every operator individually to avoid character set issues.
OP_LIST = '==|~=|<=|>=|\.\.\.|\.\.|>>|<<|\+|\-|\*|/|%|\^|#|=|\<|\>|\(|\)|\{|\}|' + BS + LB + '|' + BS + RB + '|;|:|,|\.'
self.rules = [
('COMMENT', re.compile('--' + LB + LB + '.*?' + RB + RB + '|--.*', re.DOTALL)),
('STRING', re.compile(DQ + '(?:' + BS + BS + '.|[^' + DQ + BS + BS + '])*' + DQ + '|' + SQ + '(?:' + BS + BS + '.|[^' + SQ + BS + BS + '])*' + SQ + '|' + LB + LB + '.*?' + RB + RB, re.DOTALL)),
('NUMBER', re.compile(r'\d+\.?\d*')),
('KEYWORD', re.compile(r'\b(and|break|do|else|elseif|end|false|for|function|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b')),
('IDENT', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*')),
('OP', re.compile(OP_LIST)),
('SPACE', re.compile(r'\s+'))
]
def tokenize(self):
while self.pos < len(self.code):
match = None
for name, regex in self.rules:
match = regex.match(self.code, self.pos)
if match:
if name != 'SPACE' and name != 'COMMENT':
self.tokens.append((name, match.group(0)))
self.pos = match.end()
break
if not match:
self.pos += 1
return self.tokens
class Parser:
def __init__(self, tokens):
self.tokens = tokens
self.pos = 0
def peek(self, offset=0):
index = self.pos + offset
return self.tokens[index] if index < len(self.tokens) else (None, None)
def consume(self, expected_type=None, expected_value=None):
token = self.peek()
if not token or not token[0]: return None
if expected_type and token[0] != expected_type: return None
if expected_value and token[1] != expected_value: return None
self.pos += 1
return token
def parse(self):
nodes = []
while self.pos < len(self.tokens):
node = self.parse_statement()
if node:
nodes.append(node)
else:
self.pos += 1
return nodes
def parse_statement(self):
token = self.peek()
if not token or not token[0]: return None
if token[1] == 'local':
self.consume()
ident = self.consume('IDENT')
if ident:
if self.peek()[1] == '=':
self.consume()
val = self.parse_expression()
return {'type': 'assign', 'name': ident[1], 'value': val, 'local': True}
return None
if token[0] == 'IDENT':
ident = self.consume()[1]
next_token = self.peek()
if next_token[1] == '(':
self.consume()
args = []
while self.peek()[1] and self.peek()[1] != ')':
args.append(self.parse_expression())
if self.peek()[1] == ',':
self.consume()
self.consume('OP', ')')
return {'type': 'call', 'name': ident, 'args': args}
elif next_token[1] == '=':
self.consume()
val = self.parse_expression()
return {'type': 'assign', 'name': ident, 'value': val, 'local': False}
return None
def parse_expression(self):
token = self.consume()
if not token: return None
return {'type': token[0], 'value': token[1]}