117 lines
5.9 KiB
Python
117 lines
5.9 KiB
Python
import re
|
|
|
|
class Lexer:
|
|
def __init__(self, code):
|
|
self.code = code
|
|
self.tokens = []
|
|
self.pos = 0
|
|
LB, RB, DQ, SQ, BS = chr(91), chr(93), chr(34), chr(39), chr(92)
|
|
OP_LIST = r'==|~=|<=|>=|\.\.\.|\.\.|>>|<<|\+|\-|\*|/|%|\^|#|=|\<|\>|\(|\)|\{|\}|' + BS + LB + '|' + BS + RB + '|;|:|,|\.'
|
|
self.rules = [
|
|
('COMMENT', re.compile('--' + LB + LB + '.*?' + RB + RB + '|--.*', re.DOTALL)),
|
|
('STRING', re.compile(DQ + r'(?:' + BS + BS + r'.|[^' + DQ + BS + BS + r'])*' + DQ + '|' + SQ + r'(?:' + BS + BS + r'.|[^' + SQ + BS + BS + r'])*' + SQ + '|' + LB + LB + '.*?' + RB + RB, re.DOTALL)),
|
|
('NUMBER', re.compile(r'\b\d+\.?\d*(?:[eE][+-]?\d+)?\b|\b0x[0-9a-fA-F]+\b')),
|
|
('KEYWORD', re.compile(r'\b(and|break|do|else|elseif|end|false|for|function|if|in|local|nil|not|or|repeat|return|then|true|until|while)\b')),
|
|
('IDENT', re.compile(r'[a-zA-Z_][a-zA-Z0-9_]*')),
|
|
('OP', re.compile(OP_LIST)),
|
|
('SPACE', re.compile(r'\s+'))
|
|
]
|
|
def tokenize(self):
|
|
while self.pos < len(self.code):
|
|
match = None
|
|
for name, regex in self.rules:
|
|
match = regex.match(self.code, self.pos)
|
|
if match:
|
|
if name != 'SPACE' and name != 'COMMENT': self.tokens.append((name, match.group(0)))
|
|
self.pos = match.end(); break
|
|
if not match: self.pos += 1
|
|
return self.tokens
|
|
|
|
class Parser:
|
|
def __init__(self, tokens): self.tokens = tokens; self.pos = 0
|
|
def peek(self, offset=0):
|
|
idx = self.pos + offset
|
|
return self.tokens[idx] if idx < len(self.tokens) else (None, None)
|
|
def consume(self, t=None, v=None):
|
|
tk = self.peek()
|
|
if not tk or not tk[0]: return None
|
|
if t and tk[0] != t: return None
|
|
if v and tk[1] != v: return None
|
|
self.pos += 1; return tk
|
|
def parse(self):
|
|
nodes = []
|
|
while self.pos < len(self.tokens):
|
|
node = self.parse_statement()
|
|
if node: nodes.append(node)
|
|
else: self.pos += 1
|
|
return nodes
|
|
def parse_statement(self):
|
|
tk = self.peek()
|
|
if not tk or not tk[0]: return None
|
|
if tk[1] == 'local':
|
|
self.consume(); ident = self.consume('IDENT')
|
|
if ident:
|
|
if self.peek()[1] == '=':
|
|
self.consume(); return {'type': 'assign', 'name': ident[1], 'value': self.parse_expression(), 'local': True}
|
|
return {'type': 'assign', 'name': ident[1], 'value': {'type': 'KEYWORD', 'value': 'nil'}, 'local': True}
|
|
return None
|
|
if tk[1] == 'return':
|
|
self.consume()
|
|
return {'type': 'return', 'value': self.parse_expression()}
|
|
if tk[1] == ';':
|
|
self.consume(); return None
|
|
start = self.pos; expr = self.parse_prefix_expression()
|
|
if expr:
|
|
if self.peek()[1] == '=':
|
|
self.consume(); return {'type': 'assign', 'target': expr, 'value': self.parse_expression()}
|
|
elif expr['type'] in ['call', 'method_call']: return expr
|
|
self.pos = start; return None
|
|
def parse_prefix_expression(self):
|
|
tk = self.peek()
|
|
if not tk or tk[0] != 'IDENT': return None
|
|
expr = {'type': 'variable', 'name': self.consume()[1]}
|
|
while True:
|
|
nt = self.peek()
|
|
if nt[1] == '.':
|
|
self.consume(); m = self.consume('IDENT')
|
|
if m: expr = {'type': 'index', 'base': expr, 'key': {'type': 'STRING', 'value': '"'+m[1]+'"'}}
|
|
else: break
|
|
elif nt[1] == ':':
|
|
self.consume(); m = self.consume('IDENT')
|
|
if m:
|
|
if self.peek()[1] == '(':
|
|
self.consume(); args = []
|
|
if self.peek()[1] != ')':
|
|
args.append(self.parse_expression())
|
|
while self.peek()[1] == ',': self.consume(); args.append(self.parse_expression())
|
|
self.consume('OP', ')'); expr = {'type': 'method_call', 'base': expr, 'method': m[1], 'args': args}
|
|
else: expr = {'type': 'index', 'base': expr, 'key': {'type': 'STRING', 'value': '"'+m[1]+'"'}}
|
|
else: break
|
|
elif nt[1] == '[':
|
|
self.consume(); key = self.parse_expression(); self.consume('OP', ']'); expr = {'type': 'index', 'base': expr, 'key': key}
|
|
elif nt[1] == '(':
|
|
self.consume(); args = []
|
|
if self.peek()[1] != ')':
|
|
args.append(self.parse_expression())
|
|
while self.peek()[1] == ',': self.consume(); args.append(self.parse_expression())
|
|
self.consume('OP', ')'); expr = {'type': 'call', 'func': expr, 'args': args}
|
|
else: break
|
|
return expr
|
|
def parse_expression(self):
|
|
left = self.parse_primary_expression()
|
|
if not left: return None
|
|
while self.peek()[1] in ['+', '-', '*', '/', '..', '==', '<', '>', '<=', '>=', '~=']:
|
|
op = self.consume()[1]; right = self.parse_primary_expression()
|
|
if not right: break
|
|
left = {'type': 'binary', 'op': op, 'left': left, 'right': right}
|
|
return left
|
|
def parse_primary_expression(self):
|
|
tk = self.peek()
|
|
if not tk: return None
|
|
if tk[1] in ['-', '#', 'not']:
|
|
op = self.consume()[1]; sub = self.parse_primary_expression()
|
|
return {'type': 'unary', 'op': op, 'operand': sub} if sub else None
|
|
if tk[0] in ['STRING', 'NUMBER']: v = tk[1]; self.consume(); return {'type': tk[0], 'value': v}
|
|
if tk[1] in ['true', 'false', 'nil']: return {'type': 'KEYWORD', 'value': self.consume()[1]}
|
|
if tk[1] == '(': self.consume(); e = self.parse_expression(); self.consume('OP', ')'); return e
|
|
return self.parse_prefix_expression() |