Flatlogic Bot ca3a32f23e V 4
2026-02-16 06:55:36 +00:00

311 lines
7.6 KiB
JavaScript

import { startsWith, endsWith, stringIncludes, arrayIncludes } from './compat'
export function feedPosition(position, str, len) {
const start = position.index
const end = (position.index = start + len)
for (let i = start; i < end; i++) {
const char = str.charAt(i)
if (char === '\n') {
position.line++
position.column = 0
} else {
position.column++
}
}
}
export function jumpPosition(position, str, end) {
const len = end - position.index
return feedPosition(position, str, len)
}
export function makeInitialPosition() {
return {
index: 0,
column: 0,
line: 0,
}
}
export function copyPosition(position) {
return {
index: position.index,
line: position.line,
column: position.column,
}
}
export default function lexer(str, options) {
const state = {
str,
options,
position: makeInitialPosition(),
tokens: [],
}
lex(state)
return state.tokens
}
export function lex(state) {
const {
str,
options: { childlessTags },
} = state
const len = str.length
while (state.position.index < len) {
const start = state.position.index
lexText(state)
if (state.position.index === start) {
const isComment = startsWith(str, '!--', start + 1)
if (isComment) {
lexComment(state)
} else {
const tagName = lexTag(state)
const safeTag = tagName.toLowerCase()
if (arrayIncludes(childlessTags, safeTag)) {
lexSkipTag(tagName, state)
}
}
}
}
}
const alphanumeric = /[A-Za-z0-9]/
export function findTextEnd(str, index) {
while (true) {
const textEnd = str.indexOf('<', index)
if (textEnd === -1) {
return textEnd
}
const char = str.charAt(textEnd + 1)
if (char === '/' || char === '!' || alphanumeric.test(char)) {
return textEnd
}
index = textEnd + 1
}
}
export function lexText(state) {
const type = 'text'
const { str, position } = state
let textEnd = findTextEnd(str, position.index)
if (textEnd === position.index) return
if (textEnd === -1) {
textEnd = str.length
}
const start = copyPosition(position)
const content = str.slice(position.index, textEnd)
jumpPosition(position, str, textEnd)
const end = copyPosition(position)
state.tokens.push({ type, content, position: { start, end } })
}
export function lexComment(state) {
const { str, position } = state
const start = copyPosition(position)
feedPosition(position, str, 4) // "<!--".length
let contentEnd = str.indexOf('-->', position.index)
let commentEnd = contentEnd + 3 // "-->".length
if (contentEnd === -1) {
contentEnd = commentEnd = str.length
}
const content = str.slice(position.index, contentEnd)
jumpPosition(position, str, commentEnd)
state.tokens.push({
type: 'comment',
content,
position: {
start,
end: copyPosition(position),
},
})
}
export function lexTag(state) {
const { str, position } = state
{
const secondChar = str.charAt(position.index + 1)
const close = secondChar === '/'
const start = copyPosition(position)
feedPosition(position, str, close ? 2 : 1)
state.tokens.push({ type: 'tag-start', close, position: { start } })
}
const tagName = lexTagName(state)
lexTagAttributes(state)
{
const firstChar = str.charAt(position.index)
const close = firstChar === '/'
feedPosition(position, str, close ? 2 : 1)
const end = copyPosition(position)
state.tokens.push({ type: 'tag-end', close, position: { end } })
}
return tagName
}
// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#special-white-space
const whitespace = /\s/
export function isWhitespaceChar(char) {
return whitespace.test(char)
}
export function lexTagName(state) {
const { str, position } = state
const len = str.length
let start = position.index
while (start < len) {
const char = str.charAt(start)
const isTagChar = !(isWhitespaceChar(char) || char === '/' || char === '>')
if (isTagChar) break
start++
}
let end = start + 1
while (end < len) {
const char = str.charAt(end)
const isTagChar = !(isWhitespaceChar(char) || char === '/' || char === '>')
if (!isTagChar) break
end++
}
jumpPosition(position, str, end)
const tagName = str.slice(start, end)
state.tokens.push({
type: 'tag',
content: tagName,
})
return tagName
}
export function lexTagAttributes(state) {
const { str, position, tokens } = state
let cursor = position.index
let quote = null // null, single-, or double-quote
let wordBegin = cursor // index of word start
const words = [] // "key", "key=value", "key='value'", etc
const len = str.length
while (cursor < len) {
const char = str.charAt(cursor)
if (quote) {
const isQuoteEnd = char === quote
if (isQuoteEnd) {
quote = null
}
cursor++
continue
}
const isTagEnd = char === '/' || char === '>'
if (isTagEnd) {
if (cursor !== wordBegin) {
words.push(str.slice(wordBegin, cursor))
}
break
}
const isWordEnd = isWhitespaceChar(char)
if (isWordEnd) {
if (cursor !== wordBegin) {
words.push(str.slice(wordBegin, cursor))
}
wordBegin = cursor + 1
cursor++
continue
}
const isQuoteStart = char === "'" || char === '"'
if (isQuoteStart) {
quote = char
cursor++
continue
}
cursor++
}
jumpPosition(position, str, cursor)
const wLen = words.length
const type = 'attribute'
for (let i = 0; i < wLen; i++) {
const word = words[i]
const isNotPair = word.indexOf('=') === -1
if (isNotPair) {
const secondWord = words[i + 1]
if (secondWord && startsWith(secondWord, '=')) {
if (secondWord.length > 1) {
const newWord = word + secondWord
tokens.push({ type, content: newWord })
i += 1
continue
}
const thirdWord = words[i + 2]
i += 1
if (thirdWord) {
const newWord = word + '=' + thirdWord
tokens.push({ type, content: newWord })
i += 1
continue
}
}
}
if (endsWith(word, '=')) {
const secondWord = words[i + 1]
if (secondWord && !stringIncludes(secondWord, '=')) {
const newWord = word + secondWord
tokens.push({ type, content: newWord })
i += 1
continue
}
const newWord = word.slice(0, -1)
tokens.push({ type, content: newWord })
continue
}
tokens.push({ type, content: word })
}
}
const push = [].push
export function lexSkipTag(tagName, state) {
const { str, position, tokens } = state
const safeTagName = tagName.toLowerCase()
const len = str.length
let index = position.index
while (index < len) {
const nextTag = str.indexOf('</', index)
if (nextTag === -1) {
lexText(state)
break
}
const tagStartPosition = copyPosition(position)
jumpPosition(tagStartPosition, str, nextTag)
const tagState = { str, position: tagStartPosition, tokens: [] }
const name = lexTag(tagState)
if (safeTagName !== name.toLowerCase()) {
index = tagState.position.index
continue
}
if (nextTag !== position.index) {
const textStart = copyPosition(position)
jumpPosition(position, str, nextTag)
tokens.push({
type: 'text',
content: str.slice(textStart.index, nextTag),
position: {
start: textStart,
end: copyPosition(position),
},
})
}
push.apply(tokens, tagState.tokens)
jumpPosition(position, str, tagState.position.index)
break
}
}