import test from 'ava' import lexer, { lexText, lexComment, lexTag, lexTagName, lexTagAttributes, lexSkipTag, findTextEnd, isWhitespaceChar, } from '../lib/lexer' function ps(index) { return { index, line: 0, column: index } } test('lexer should return tokens', (t) => { const str = '

Test case

' const options = { childlessTags: [] } const tokens = lexer(str, options) t.deepEqual(tokens, [ { type: 'tag-start', close: false, position: { start: ps(0) } }, { type: 'tag', content: 'h1' }, { type: 'tag-end', close: false, position: { end: ps(4) } }, { type: 'text', content: 'Test case', position: { start: ps(4), end: ps(13) }, }, { type: 'tag-start', close: true, position: { start: ps(13) } }, { type: 'tag', content: 'h1' }, { type: 'tag-end', close: false, position: { end: ps(str.length) } }, ]) }) test('lexer should parse tags beginning with alphanumeric names', (t) => { { const str = '2 <= 4 >' const options = { childlessTags: [] } const tokens = lexer(str, options) t.deepEqual(tokens, [ { type: 'text', content: '2 <= 4 >', position: { start: ps(0), end: ps(str.length) }, }, ]) } { const str = '2 ' const options = { childlessTags: [] } const tokens = lexer(str, options) t.deepEqual(tokens, [ { type: 'text', content: '2 ', position: { start: ps(0), end: ps(2) } }, { type: 'tag-start', close: false, position: { start: ps(2) } }, { type: 'tag', content: 'a' }, { type: 'attribute', content: '4' }, { type: 'tag-end', close: false, position: { end: ps(str.length) } }, ]) } }) test('lexer should skip lexing the content of childless tags', (t) => { const str = '' const options = { childlessTags: ['template'] } const tokens = lexer(str, options) t.deepEqual(tokens, [ { type: 'tag-start', close: false, position: { start: ps(0) } }, { type: 'tag', content: 'template' }, { type: 'tag-end', close: false, position: { end: ps(10) } }, { type: 'text', content: 'Hello ', position: { start: ps(10), end: ps(22) }, }, { type: 'tag-start', close: true, position: { start: ps(22) } }, { type: 'tag', content: 'template' }, { type: 'tag-end', close: false, position: { end: ps(str.length) } }, ]) }) test('findTextEnd should find the end of the text segment', (t) => { t.is(findTextEnd(' { const str = 'text that ends' const finish = str.indexOf('<') const state = { str, position: ps(0), tokens: [] } lexText(state) t.is(state.position.index, finish) const token = state.tokens[0] t.deepEqual(token, { type: 'text', content: 'text that ends', position: { start: ps(0), end: ps(14), }, }) }) test('lexText should tokenize from the current position', (t) => { const str = 'abcdtext that ends' const finish = str.indexOf('<') const state = { str, position: ps(4), tokens: [] } lexText(state) t.is(state.position.index, finish) const token = state.tokens[0] t.deepEqual(token, { type: 'text', content: 'text that ends', position: { start: ps(4), end: ps(18), }, }) }) test('lexText should tokenize safely to string end', (t) => { const str = 'text that does not end' const finish = str.length const state = { str, position: ps(0), tokens: [] } lexText(state) t.is(state.position.index, finish) const token = state.tokens[0] t.deepEqual(token, { type: 'text', content: 'text that does not end', position: { start: ps(0), end: ps(str.length), }, }) }) test('lexText should not add a token for an empty text', (t) => { const str = ' never reach here' const start = 2 const finish = 2 const state = { str, position: ps(start), tokens: [] } lexText(state) t.is(state.position.index, finish) t.is(state.tokens.length, 0) }) test('lexComment should tokenize the next comment', (t) => { const str = 'abcd' const finish = str.indexOf('abcd') const state = { str, position: ps(0), tokens: [] } lexComment(state) t.is(state.position.index, finish) t.deepEqual(state.tokens[0], { type: 'comment', content: ' this is a comment ', position: { start: ps(0), end: ps(finish), }, }) }) test('lexComment should tokenize safely to string end', (t) => { const str = '' const finish = str.indexOf('') const state = { str, position: ps(4), tokens: [] } lexComment(state) t.is(state.position.index, finish) t.deepEqual(state.tokens[0], { type: 'comment', content: ' comment text ', position: { start: ps(4), end: ps(finish), }, }) }) test('lexComment should add a token for an empty comment', (t) => { const str = '' const finish = str.length const state = { str, position: ps(0), tokens: [] } lexComment(state) t.is(state.position.index, finish) t.deepEqual(state.tokens[0], { type: 'comment', content: '', position: { start: ps(0), end: ps(finish), }, }) }) test('lexTag should tokenize the next tag', (t) => { const str = 'abcd' const finish = str.indexOf('abcd') const state = { str, position: ps(0), tokens: [] } lexTag(state) t.is(state.position.index, finish) t.deepEqual(state.tokens, [ { type: 'tag-start', close: false, position: { start: ps(0) } }, { type: 'tag', content: 'img' }, // not a part of this test { type: 'tag-end', close: true, position: { end: ps(finish) } }, ]) }) test('lexTagName should tokenize the next tag name', (t) => { const str = 'h1 id="title"> test' const finish = 2 const state = { str, position: ps(0), tokens: [] } lexTagName(state) t.is(state.position.index, finish) t.deepEqual(state.tokens[0], { type: 'tag', content: 'h1', }) }) test('lexTagName should ignore leading not-tagname characters', (t) => { const str = '>/ div' const state = { str, position: ps(0), tokens: [] } lexTagName(state) t.is(state.position.index, str.length) t.deepEqual(state.tokens[0], { type: 'tag', content: 'div', }) }) test('lexTagAttributes should tokenize attributes until tag end', (t) => { const str = 'yes="no" maybe data-type="array">abcd' const finish = str.indexOf('>abcd') const state = { str, position: ps(0), tokens: [] } lexTagAttributes(state) t.is(state.position.index, finish) t.deepEqual(state.tokens, [ { type: 'attribute', content: 'yes="no"' }, { type: 'attribute', content: 'maybe' }, { type: 'attribute', content: 'data-type="array"' }, ]) }) test('lexTagAttributes should tokenize independent of whitespace', (t) => { const str = 'yes = "no" maybe data-type= "array" key ="value" >abcd' const finish = str.indexOf('>abcd') const state = { str, position: ps(0), tokens: [] } lexTagAttributes(state) t.is(state.position.index, finish) t.deepEqual(state.tokens, [ { type: 'attribute', content: 'yes="no"' }, { type: 'attribute', content: 'maybe' }, { type: 'attribute', content: 'data-type="array"' }, { type: 'attribute', content: 'key="value"' }, ]) }) test('lexTagAttributes should handle an unset attribute name', (t) => { const str = '

' const state = { str, position: ps(4), tokens: [] } lexTagAttributes(state) t.is(state.position.index, str.indexOf('>')) t.deepEqual(state.tokens, [ { type: 'attribute', content: 'foo' }, { type: 'attribute', content: 'bar="baz"' }, ]) }) test('lexTagAttributes should handle newline separated attributes', (t) => { const str = '

' const state = { str, position: ps(4), tokens: [] } lexTagAttributes(state) t.is(state.position.index, str.indexOf('>')) t.deepEqual(state.tokens, [ { type: 'attribute', content: 'foo="bar"' }, { type: 'attribute', content: 'baz="bat"' }, ]) }) test('lexTagAttributes should handle tab separated attributes', (t) => { const str = '

' const state = { str, position: ps(4), tokens: [] } lexTagAttributes(state) t.is(state.position.index, str.indexOf('>')) t.deepEqual(state.tokens, [ { type: 'attribute', content: 'foo="bar"' }, { type: 'attribute', content: 'baz="bat"' }, ]) }) test('lexTagAttributes should handle prefixed spacing', (t) => { const str = ' \n\tyes="no">abcd' const finish = str.indexOf('>abcd') const state = { str, position: ps(0), tokens: [] } lexTagAttributes(state) t.is(state.position.index, finish) t.deepEqual(state.tokens, [{ type: 'attribute', content: 'yes="no"' }]) }) test('lexTagAttributes should handle unquoted one-word values', (t) => { const str = 'num=8 ham = steak>abcd' const finish = str.indexOf('>abcd') const state = { str, position: ps(0), tokens: [] } lexTagAttributes(state) t.is(state.position.index, finish) t.deepEqual(state.tokens, [ { type: 'attribute', content: 'num=8' }, { type: 'attribute', content: 'ham=steak' }, ]) }) test('lexTagAttributes should handle incomplete attributes', (t) => { const str = 'x = >abcd' const finish = str.indexOf('>abcd') const state = { str, position: ps(0), tokens: [] } lexTagAttributes(state) t.is(state.position.index, finish) t.deepEqual(state.tokens, [{ type: 'attribute', content: 'x' }]) }) test('lexSkipTag should tokenize as text until the matching tag name', (t) => { const str = 'abcd

Test case

' const finish = str.indexOf('') const state = { str, position: ps(10), tokens: [] } lexSkipTag('test', state) t.is(state.position.index, finish) t.deepEqual(state.tokens, [ { type: 'text', content: '

Test case

', position: { start: ps(10), end: ps(28) }, }, { type: 'tag-start', close: true, position: { start: ps(28) } }, { type: 'tag', content: 'test' }, { type: 'tag-end', close: false, position: { end: ps(finish) } }, ]) }) test('lexSkipTag should stop at the case-insensitive matching tag name', (t) => { const str = 'proving the point' const finish = str.indexOf('') const state = { str, position: ps(6), tokens: [] } lexSkipTag('tEsT', state) t.is(state.position.index, finish) t.deepEqual(state.tokens, [ { type: 'text', content: 'proving the point', position: { start: ps(6), end: ps(29) }, }, { type: 'tag-start', close: true, position: { start: ps(29) } }, { type: 'tag', content: 'TeSt' }, { type: 'tag-end', close: false, position: { end: ps(finish) } }, ]) }) test('lexSkipTag should auto-close if the end tag is not found', (t) => { const str = '' const state = { str, position: ps(8), tokens: [] } lexSkipTag('script', state) t.is(state.position.index, str.length) t.deepEqual(state.tokens, [ { type: 'text', content: 'proving ', position: { start: ps(8), end: ps(26) }, }, { type: 'tag-start', close: true, position: { start: ps(26) } }, { type: 'tag', content: 'script' }, { type: 'tag-end', close: false, position: { end: ps(str.length) } }, ]) }) test('lexSkipTag should not add an empty inner text node', (t) => { const str = '' const state = { str, position: ps(8), tokens: [] } lexSkipTag('script', state) t.is(state.position.index, str.length) t.deepEqual(state.tokens, [ { type: 'tag-start', close: true, position: { start: ps(8) } }, { type: 'tag', content: 'script' }, { type: 'tag-end', close: false, position: { end: ps(str.length) } }, ]) }) test('isWhitespace should work', (t) => { t.is(isWhitespaceChar(' '), true) t.is(isWhitespaceChar('\n'), true) t.is(isWhitespaceChar('\t'), true) t.is(isWhitespaceChar('x'), false) })