Flatlogic Bot ca3a32f23e V 4
2026-02-16 06:55:36 +00:00

437 lines
13 KiB
JavaScript

import test from 'ava'
import lexer, {
lexText,
lexComment,
lexTag,
lexTagName,
lexTagAttributes,
lexSkipTag,
findTextEnd,
isWhitespaceChar,
} from '../lib/lexer'
function ps(index) {
return { index, line: 0, column: index }
}
test('lexer should return tokens', (t) => {
const str = '<h1>Test case</h1>'
const options = { childlessTags: [] }
const tokens = lexer(str, options)
t.deepEqual(tokens, [
{ type: 'tag-start', close: false, position: { start: ps(0) } },
{ type: 'tag', content: 'h1' },
{ type: 'tag-end', close: false, position: { end: ps(4) } },
{
type: 'text',
content: 'Test case',
position: { start: ps(4), end: ps(13) },
},
{ type: 'tag-start', close: true, position: { start: ps(13) } },
{ type: 'tag', content: 'h1' },
{ type: 'tag-end', close: false, position: { end: ps(str.length) } },
])
})
test('lexer should parse tags beginning with alphanumeric names', (t) => {
{
const str = '2 <= 4 >'
const options = { childlessTags: [] }
const tokens = lexer(str, options)
t.deepEqual(tokens, [
{
type: 'text',
content: '2 <= 4 >',
position: { start: ps(0), end: ps(str.length) },
},
])
}
{
const str = '2 <a 4 >'
const options = { childlessTags: [] }
const tokens = lexer(str, options)
t.deepEqual(tokens, [
{ type: 'text', content: '2 ', position: { start: ps(0), end: ps(2) } },
{ type: 'tag-start', close: false, position: { start: ps(2) } },
{ type: 'tag', content: 'a' },
{ type: 'attribute', content: '4' },
{ type: 'tag-end', close: false, position: { end: ps(str.length) } },
])
}
})
test('lexer should skip lexing the content of childless tags', (t) => {
const str = '<template>Hello <img/></template>'
const options = { childlessTags: ['template'] }
const tokens = lexer(str, options)
t.deepEqual(tokens, [
{ type: 'tag-start', close: false, position: { start: ps(0) } },
{ type: 'tag', content: 'template' },
{ type: 'tag-end', close: false, position: { end: ps(10) } },
{
type: 'text',
content: 'Hello <img/>',
position: { start: ps(10), end: ps(22) },
},
{ type: 'tag-start', close: true, position: { start: ps(22) } },
{ type: 'tag', content: 'template' },
{ type: 'tag-end', close: false, position: { end: ps(str.length) } },
])
})
test('findTextEnd should find the end of the text segment', (t) => {
t.is(findTextEnd('</end', 0), 0)
t.is(findTextEnd('<= 4', 0), -1)
t.is(findTextEnd('a<b', 0), 1)
t.is(findTextEnd('<= <= <=', 0), -1)
})
test('lexText should tokenize the next text segment', (t) => {
const str = 'text that ends<x>'
const finish = str.indexOf('<')
const state = { str, position: ps(0), tokens: [] }
lexText(state)
t.is(state.position.index, finish)
const token = state.tokens[0]
t.deepEqual(token, {
type: 'text',
content: 'text that ends',
position: {
start: ps(0),
end: ps(14),
},
})
})
test('lexText should tokenize from the current position', (t) => {
const str = 'abcdtext that ends<x>'
const finish = str.indexOf('<')
const state = { str, position: ps(4), tokens: [] }
lexText(state)
t.is(state.position.index, finish)
const token = state.tokens[0]
t.deepEqual(token, {
type: 'text',
content: 'text that ends',
position: {
start: ps(4),
end: ps(18),
},
})
})
test('lexText should tokenize safely to string end', (t) => {
const str = 'text that does not end'
const finish = str.length
const state = { str, position: ps(0), tokens: [] }
lexText(state)
t.is(state.position.index, finish)
const token = state.tokens[0]
t.deepEqual(token, {
type: 'text',
content: 'text that does not end',
position: {
start: ps(0),
end: ps(str.length),
},
})
})
test('lexText should not add a token for an empty text', (t) => {
const str = ' <x>never reach here</x>'
const start = 2
const finish = 2
const state = { str, position: ps(start), tokens: [] }
lexText(state)
t.is(state.position.index, finish)
t.is(state.tokens.length, 0)
})
test('lexComment should tokenize the next comment', (t) => {
const str = '<!-- this is a comment -->abcd'
const finish = str.indexOf('abcd')
const state = { str, position: ps(0), tokens: [] }
lexComment(state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens[0], {
type: 'comment',
content: ' this is a comment ',
position: {
start: ps(0),
end: ps(finish),
},
})
})
test('lexComment should tokenize safely to string end', (t) => {
const str = '<!-- this is a comment'
const finish = str.length
const state = { str, position: ps(0), tokens: [] }
lexComment(state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens[0], {
type: 'comment',
content: ' this is a comment',
position: {
start: ps(0),
end: ps(finish),
},
})
})
test('lexComment should tokenize from current position', (t) => {
const str = 'abcd<!-- comment text --><x>'
const finish = str.indexOf('<x>')
const state = { str, position: ps(4), tokens: [] }
lexComment(state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens[0], {
type: 'comment',
content: ' comment text ',
position: {
start: ps(4),
end: ps(finish),
},
})
})
test('lexComment should add a token for an empty comment', (t) => {
const str = '<!---->'
const finish = str.length
const state = { str, position: ps(0), tokens: [] }
lexComment(state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens[0], {
type: 'comment',
content: '',
position: {
start: ps(0),
end: ps(finish),
},
})
})
test('lexTag should tokenize the next tag', (t) => {
const str = '<img/>abcd'
const finish = str.indexOf('abcd')
const state = { str, position: ps(0), tokens: [] }
lexTag(state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens, [
{ type: 'tag-start', close: false, position: { start: ps(0) } },
{ type: 'tag', content: 'img' }, // not a part of this test
{ type: 'tag-end', close: true, position: { end: ps(finish) } },
])
})
test('lexTagName should tokenize the next tag name', (t) => {
const str = 'h1 id="title"> test'
const finish = 2
const state = { str, position: ps(0), tokens: [] }
lexTagName(state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens[0], {
type: 'tag',
content: 'h1',
})
})
test('lexTagName should ignore leading not-tagname characters', (t) => {
const str = '>/ div'
const state = { str, position: ps(0), tokens: [] }
lexTagName(state)
t.is(state.position.index, str.length)
t.deepEqual(state.tokens[0], {
type: 'tag',
content: 'div',
})
})
test('lexTagAttributes should tokenize attributes until tag end', (t) => {
const str = 'yes="no" maybe data-type="array">abcd'
const finish = str.indexOf('>abcd')
const state = { str, position: ps(0), tokens: [] }
lexTagAttributes(state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens, [
{ type: 'attribute', content: 'yes="no"' },
{ type: 'attribute', content: 'maybe' },
{ type: 'attribute', content: 'data-type="array"' },
])
})
test('lexTagAttributes should tokenize independent of whitespace', (t) => {
const str = 'yes = "no" maybe data-type= "array" key ="value" >abcd'
const finish = str.indexOf('>abcd')
const state = { str, position: ps(0), tokens: [] }
lexTagAttributes(state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens, [
{ type: 'attribute', content: 'yes="no"' },
{ type: 'attribute', content: 'maybe' },
{ type: 'attribute', content: 'data-type="array"' },
{ type: 'attribute', content: 'key="value"' },
])
})
test('lexTagAttributes should handle an unset attribute name', (t) => {
const str = '<div foo= bar="baz"></div>'
const state = { str, position: ps(4), tokens: [] }
lexTagAttributes(state)
t.is(state.position.index, str.indexOf('></div>'))
t.deepEqual(state.tokens, [
{ type: 'attribute', content: 'foo' },
{ type: 'attribute', content: 'bar="baz"' },
])
})
test('lexTagAttributes should handle newline separated attributes', (t) => {
const str = '<div foo="bar"\nbaz="bat"></div>'
const state = { str, position: ps(4), tokens: [] }
lexTagAttributes(state)
t.is(state.position.index, str.indexOf('></div>'))
t.deepEqual(state.tokens, [
{ type: 'attribute', content: 'foo="bar"' },
{ type: 'attribute', content: 'baz="bat"' },
])
})
test('lexTagAttributes should handle tab separated attributes', (t) => {
const str = '<div foo="bar"\tbaz="bat"></div>'
const state = { str, position: ps(4), tokens: [] }
lexTagAttributes(state)
t.is(state.position.index, str.indexOf('></div>'))
t.deepEqual(state.tokens, [
{ type: 'attribute', content: 'foo="bar"' },
{ type: 'attribute', content: 'baz="bat"' },
])
})
test('lexTagAttributes should handle prefixed spacing', (t) => {
const str = ' \n\tyes="no">abcd'
const finish = str.indexOf('>abcd')
const state = { str, position: ps(0), tokens: [] }
lexTagAttributes(state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens, [{ type: 'attribute', content: 'yes="no"' }])
})
test('lexTagAttributes should handle unquoted one-word values', (t) => {
const str = 'num=8 ham = steak>abcd'
const finish = str.indexOf('>abcd')
const state = { str, position: ps(0), tokens: [] }
lexTagAttributes(state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens, [
{ type: 'attribute', content: 'num=8' },
{ type: 'attribute', content: 'ham=steak' },
])
})
test('lexTagAttributes should handle incomplete attributes', (t) => {
const str = 'x = >abcd'
const finish = str.indexOf('>abcd')
const state = { str, position: ps(0), tokens: [] }
lexTagAttributes(state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens, [{ type: 'attribute', content: 'x' }])
})
test('lexSkipTag should tokenize as text until the matching tag name', (t) => {
const str = 'abcd<test><h1>Test case</h1></test><x>'
const finish = str.indexOf('<x>')
const state = { str, position: ps(10), tokens: [] }
lexSkipTag('test', state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens, [
{
type: 'text',
content: '<h1>Test case</h1>',
position: { start: ps(10), end: ps(28) },
},
{ type: 'tag-start', close: true, position: { start: ps(28) } },
{ type: 'tag', content: 'test' },
{ type: 'tag-end', close: false, position: { end: ps(finish) } },
])
})
test('lexSkipTag should stop at the case-insensitive matching tag name', (t) => {
const str = '<tEsT>proving <???> the point</TeSt><x>'
const finish = str.indexOf('<x>')
const state = { str, position: ps(6), tokens: [] }
lexSkipTag('tEsT', state)
t.is(state.position.index, finish)
t.deepEqual(state.tokens, [
{
type: 'text',
content: 'proving <???> the point',
position: { start: ps(6), end: ps(29) },
},
{ type: 'tag-start', close: true, position: { start: ps(29) } },
{ type: 'tag', content: 'TeSt' },
{ type: 'tag-end', close: false, position: { end: ps(finish) } },
])
})
test('lexSkipTag should auto-close if the end tag is not found', (t) => {
const str = '<script>This never ends'
const state = { str, position: ps(8), tokens: [] }
lexSkipTag('script', state)
t.is(state.position.index, str.length)
t.deepEqual(state.tokens, [
{
type: 'text',
content: 'This never ends',
position: { start: ps(8), end: ps(str.length) },
},
])
})
test('lexSkipTag should handle finding a stray "</" [resilience]', (t) => {
const str = '<script>proving </nothing></script>'
const state = { str, position: ps(8), tokens: [] }
lexSkipTag('script', state)
t.is(state.position.index, str.length)
t.deepEqual(state.tokens, [
{
type: 'text',
content: 'proving </nothing>',
position: { start: ps(8), end: ps(26) },
},
{ type: 'tag-start', close: true, position: { start: ps(26) } },
{ type: 'tag', content: 'script' },
{ type: 'tag-end', close: false, position: { end: ps(str.length) } },
])
})
test('lexSkipTag should not add an empty inner text node', (t) => {
const str = '<script></script>'
const state = { str, position: ps(8), tokens: [] }
lexSkipTag('script', state)
t.is(state.position.index, str.length)
t.deepEqual(state.tokens, [
{ type: 'tag-start', close: true, position: { start: ps(8) } },
{ type: 'tag', content: 'script' },
{ type: 'tag-end', close: false, position: { end: ps(str.length) } },
])
})
test('isWhitespace should work', (t) => {
t.is(isWhitespaceChar(' '), true)
t.is(isWhitespaceChar('\n'), true)
t.is(isWhitespaceChar('\t'), true)
t.is(isWhitespaceChar('x'), false)
})