347 lines
8.7 KiB
JavaScript
347 lines
8.7 KiB
JavaScript
"use strict";
|
|
|
|
Object.defineProperty(exports, "__esModule", {
|
|
value: true
|
|
});
|
|
exports.copyPosition = copyPosition;
|
|
exports["default"] = lexer;
|
|
exports.feedPosition = feedPosition;
|
|
exports.findTextEnd = findTextEnd;
|
|
exports.isWhitespaceChar = isWhitespaceChar;
|
|
exports.jumpPosition = jumpPosition;
|
|
exports.lex = lex;
|
|
exports.lexComment = lexComment;
|
|
exports.lexSkipTag = lexSkipTag;
|
|
exports.lexTag = lexTag;
|
|
exports.lexTagAttributes = lexTagAttributes;
|
|
exports.lexTagName = lexTagName;
|
|
exports.lexText = lexText;
|
|
exports.makeInitialPosition = makeInitialPosition;
|
|
var _compat = require("./compat");
|
|
function feedPosition(position, str, len) {
|
|
var start = position.index;
|
|
var end = position.index = start + len;
|
|
for (var i = start; i < end; i++) {
|
|
var _char = str.charAt(i);
|
|
if (_char === '\n') {
|
|
position.line++;
|
|
position.column = 0;
|
|
} else {
|
|
position.column++;
|
|
}
|
|
}
|
|
}
|
|
function jumpPosition(position, str, end) {
|
|
var len = end - position.index;
|
|
return feedPosition(position, str, len);
|
|
}
|
|
function makeInitialPosition() {
|
|
return {
|
|
index: 0,
|
|
column: 0,
|
|
line: 0
|
|
};
|
|
}
|
|
function copyPosition(position) {
|
|
return {
|
|
index: position.index,
|
|
line: position.line,
|
|
column: position.column
|
|
};
|
|
}
|
|
function lexer(str, options) {
|
|
var state = {
|
|
str: str,
|
|
options: options,
|
|
position: makeInitialPosition(),
|
|
tokens: []
|
|
};
|
|
lex(state);
|
|
return state.tokens;
|
|
}
|
|
function lex(state) {
|
|
var str = state.str,
|
|
childlessTags = state.options.childlessTags;
|
|
var len = str.length;
|
|
while (state.position.index < len) {
|
|
var start = state.position.index;
|
|
lexText(state);
|
|
if (state.position.index === start) {
|
|
var isComment = (0, _compat.startsWith)(str, '!--', start + 1);
|
|
if (isComment) {
|
|
lexComment(state);
|
|
} else {
|
|
var tagName = lexTag(state);
|
|
var safeTag = tagName.toLowerCase();
|
|
if ((0, _compat.arrayIncludes)(childlessTags, safeTag)) {
|
|
lexSkipTag(tagName, state);
|
|
}
|
|
}
|
|
}
|
|
}
|
|
}
|
|
var alphanumeric = /[A-Za-z0-9]/;
|
|
function findTextEnd(str, index) {
|
|
while (true) {
|
|
var textEnd = str.indexOf('<', index);
|
|
if (textEnd === -1) {
|
|
return textEnd;
|
|
}
|
|
var _char2 = str.charAt(textEnd + 1);
|
|
if (_char2 === '/' || _char2 === '!' || alphanumeric.test(_char2)) {
|
|
return textEnd;
|
|
}
|
|
index = textEnd + 1;
|
|
}
|
|
}
|
|
function lexText(state) {
|
|
var type = 'text';
|
|
var str = state.str,
|
|
position = state.position;
|
|
var textEnd = findTextEnd(str, position.index);
|
|
if (textEnd === position.index) return;
|
|
if (textEnd === -1) {
|
|
textEnd = str.length;
|
|
}
|
|
var start = copyPosition(position);
|
|
var content = str.slice(position.index, textEnd);
|
|
jumpPosition(position, str, textEnd);
|
|
var end = copyPosition(position);
|
|
state.tokens.push({
|
|
type: type,
|
|
content: content,
|
|
position: {
|
|
start: start,
|
|
end: end
|
|
}
|
|
});
|
|
}
|
|
function lexComment(state) {
|
|
var str = state.str,
|
|
position = state.position;
|
|
var start = copyPosition(position);
|
|
feedPosition(position, str, 4); // "<!--".length
|
|
var contentEnd = str.indexOf('-->', position.index);
|
|
var commentEnd = contentEnd + 3; // "-->".length
|
|
if (contentEnd === -1) {
|
|
contentEnd = commentEnd = str.length;
|
|
}
|
|
var content = str.slice(position.index, contentEnd);
|
|
jumpPosition(position, str, commentEnd);
|
|
state.tokens.push({
|
|
type: 'comment',
|
|
content: content,
|
|
position: {
|
|
start: start,
|
|
end: copyPosition(position)
|
|
}
|
|
});
|
|
}
|
|
function lexTag(state) {
|
|
var str = state.str,
|
|
position = state.position;
|
|
{
|
|
var secondChar = str.charAt(position.index + 1);
|
|
var close = secondChar === '/';
|
|
var start = copyPosition(position);
|
|
feedPosition(position, str, close ? 2 : 1);
|
|
state.tokens.push({
|
|
type: 'tag-start',
|
|
close: close,
|
|
position: {
|
|
start: start
|
|
}
|
|
});
|
|
}
|
|
var tagName = lexTagName(state);
|
|
lexTagAttributes(state);
|
|
{
|
|
var firstChar = str.charAt(position.index);
|
|
var _close = firstChar === '/';
|
|
feedPosition(position, str, _close ? 2 : 1);
|
|
var end = copyPosition(position);
|
|
state.tokens.push({
|
|
type: 'tag-end',
|
|
close: _close,
|
|
position: {
|
|
end: end
|
|
}
|
|
});
|
|
}
|
|
return tagName;
|
|
}
|
|
|
|
// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#special-white-space
|
|
var whitespace = /\s/;
|
|
function isWhitespaceChar(_char3) {
|
|
return whitespace.test(_char3);
|
|
}
|
|
function lexTagName(state) {
|
|
var str = state.str,
|
|
position = state.position;
|
|
var len = str.length;
|
|
var start = position.index;
|
|
while (start < len) {
|
|
var _char4 = str.charAt(start);
|
|
var isTagChar = !(isWhitespaceChar(_char4) || _char4 === '/' || _char4 === '>');
|
|
if (isTagChar) break;
|
|
start++;
|
|
}
|
|
var end = start + 1;
|
|
while (end < len) {
|
|
var _char5 = str.charAt(end);
|
|
var _isTagChar = !(isWhitespaceChar(_char5) || _char5 === '/' || _char5 === '>');
|
|
if (!_isTagChar) break;
|
|
end++;
|
|
}
|
|
jumpPosition(position, str, end);
|
|
var tagName = str.slice(start, end);
|
|
state.tokens.push({
|
|
type: 'tag',
|
|
content: tagName
|
|
});
|
|
return tagName;
|
|
}
|
|
function lexTagAttributes(state) {
|
|
var str = state.str,
|
|
position = state.position,
|
|
tokens = state.tokens;
|
|
var cursor = position.index;
|
|
var quote = null; // null, single-, or double-quote
|
|
var wordBegin = cursor; // index of word start
|
|
var words = []; // "key", "key=value", "key='value'", etc
|
|
var len = str.length;
|
|
while (cursor < len) {
|
|
var _char6 = str.charAt(cursor);
|
|
if (quote) {
|
|
var isQuoteEnd = _char6 === quote;
|
|
if (isQuoteEnd) {
|
|
quote = null;
|
|
}
|
|
cursor++;
|
|
continue;
|
|
}
|
|
var isTagEnd = _char6 === '/' || _char6 === '>';
|
|
if (isTagEnd) {
|
|
if (cursor !== wordBegin) {
|
|
words.push(str.slice(wordBegin, cursor));
|
|
}
|
|
break;
|
|
}
|
|
var isWordEnd = isWhitespaceChar(_char6);
|
|
if (isWordEnd) {
|
|
if (cursor !== wordBegin) {
|
|
words.push(str.slice(wordBegin, cursor));
|
|
}
|
|
wordBegin = cursor + 1;
|
|
cursor++;
|
|
continue;
|
|
}
|
|
var isQuoteStart = _char6 === "'" || _char6 === '"';
|
|
if (isQuoteStart) {
|
|
quote = _char6;
|
|
cursor++;
|
|
continue;
|
|
}
|
|
cursor++;
|
|
}
|
|
jumpPosition(position, str, cursor);
|
|
var wLen = words.length;
|
|
var type = 'attribute';
|
|
for (var i = 0; i < wLen; i++) {
|
|
var word = words[i];
|
|
var isNotPair = word.indexOf('=') === -1;
|
|
if (isNotPair) {
|
|
var secondWord = words[i + 1];
|
|
if (secondWord && (0, _compat.startsWith)(secondWord, '=')) {
|
|
if (secondWord.length > 1) {
|
|
var newWord = word + secondWord;
|
|
tokens.push({
|
|
type: type,
|
|
content: newWord
|
|
});
|
|
i += 1;
|
|
continue;
|
|
}
|
|
var thirdWord = words[i + 2];
|
|
i += 1;
|
|
if (thirdWord) {
|
|
var _newWord = word + '=' + thirdWord;
|
|
tokens.push({
|
|
type: type,
|
|
content: _newWord
|
|
});
|
|
i += 1;
|
|
continue;
|
|
}
|
|
}
|
|
}
|
|
if ((0, _compat.endsWith)(word, '=')) {
|
|
var _secondWord = words[i + 1];
|
|
if (_secondWord && !(0, _compat.stringIncludes)(_secondWord, '=')) {
|
|
var _newWord2 = word + _secondWord;
|
|
tokens.push({
|
|
type: type,
|
|
content: _newWord2
|
|
});
|
|
i += 1;
|
|
continue;
|
|
}
|
|
var _newWord3 = word.slice(0, -1);
|
|
tokens.push({
|
|
type: type,
|
|
content: _newWord3
|
|
});
|
|
continue;
|
|
}
|
|
tokens.push({
|
|
type: type,
|
|
content: word
|
|
});
|
|
}
|
|
}
|
|
var push = [].push;
|
|
function lexSkipTag(tagName, state) {
|
|
var str = state.str,
|
|
position = state.position,
|
|
tokens = state.tokens;
|
|
var safeTagName = tagName.toLowerCase();
|
|
var len = str.length;
|
|
var index = position.index;
|
|
while (index < len) {
|
|
var nextTag = str.indexOf('</', index);
|
|
if (nextTag === -1) {
|
|
lexText(state);
|
|
break;
|
|
}
|
|
var tagStartPosition = copyPosition(position);
|
|
jumpPosition(tagStartPosition, str, nextTag);
|
|
var tagState = {
|
|
str: str,
|
|
position: tagStartPosition,
|
|
tokens: []
|
|
};
|
|
var name = lexTag(tagState);
|
|
if (safeTagName !== name.toLowerCase()) {
|
|
index = tagState.position.index;
|
|
continue;
|
|
}
|
|
if (nextTag !== position.index) {
|
|
var textStart = copyPosition(position);
|
|
jumpPosition(position, str, nextTag);
|
|
tokens.push({
|
|
type: 'text',
|
|
content: str.slice(textStart.index, nextTag),
|
|
position: {
|
|
start: textStart,
|
|
end: copyPosition(position)
|
|
}
|
|
});
|
|
}
|
|
push.apply(tokens, tagState.tokens);
|
|
jumpPosition(position, str, tagState.position.index);
|
|
break;
|
|
}
|
|
}
|
|
//# sourceMappingURL=lexer.js.map
|