38465-vm/bot/node_modules/himalaya/src/lexer.js

import { startsWith, endsWith, stringIncludes, arrayIncludes } from './compat'

export function feedPosition(position, str, len) {
  const start = position.index
  const end = (position.index = start + len)
  for (let i = start; i < end; i++) {
    const char = str.charAt(i)
    if (char === '\n') {
      position.line++
      position.column = 0
    } else {
      position.column++
    }
  }
}

export function jumpPosition(position, str, end) {
  const len = end - position.index
  return feedPosition(position, str, len)
}

export function makeInitialPosition() {
  return {
    index: 0,
    column: 0,
    line: 0,
  }
}

export function copyPosition(position) {
  return {
    index: position.index,
    line: position.line,
    column: position.column,
  }
}

export default function lexer(str, options) {
  const state = {
    str,
    options,
    position: makeInitialPosition(),
    tokens: [],
  }
  lex(state)
  return state.tokens
}

export function lex(state) {
  const {
    str,
    options: { childlessTags },
  } = state
  const len = str.length
  while (state.position.index < len) {
    const start = state.position.index
    lexText(state)
    if (state.position.index === start) {
      const isComment = startsWith(str, '!--', start + 1)
      if (isComment) {
        lexComment(state)
      } else {
        const tagName = lexTag(state)
        const safeTag = tagName.toLowerCase()
        if (arrayIncludes(childlessTags, safeTag)) {
          lexSkipTag(tagName, state)
        }
      }
    }
  }
}

const alphanumeric = /[A-Za-z0-9]/
export function findTextEnd(str, index) {
  while (true) {
    const textEnd = str.indexOf('<', index)
    if (textEnd === -1) {
      return textEnd
    }
    const char = str.charAt(textEnd + 1)
    if (char === '/' || char === '!' || alphanumeric.test(char)) {
      return textEnd
    }
    index = textEnd + 1
  }
}

export function lexText(state) {
  const type = 'text'
  const { str, position } = state
  let textEnd = findTextEnd(str, position.index)
  if (textEnd === position.index) return
  if (textEnd === -1) {
    textEnd = str.length
  }

  const start = copyPosition(position)
  const content = str.slice(position.index, textEnd)
  jumpPosition(position, str, textEnd)
  const end = copyPosition(position)
  state.tokens.push({ type, content, position: { start, end } })
}

export function lexComment(state) {
  const { str, position } = state
  const start = copyPosition(position)
  feedPosition(position, str, 4) // "<!--".length
  let contentEnd = str.indexOf('-->', position.index)
  let commentEnd = contentEnd + 3 // "-->".length
  if (contentEnd === -1) {
    contentEnd = commentEnd = str.length
  }

  const content = str.slice(position.index, contentEnd)
  jumpPosition(position, str, commentEnd)
  state.tokens.push({
    type: 'comment',
    content,
    position: {
      start,
      end: copyPosition(position),
    },
  })
}

export function lexTag(state) {
  const { str, position } = state
  {
    const secondChar = str.charAt(position.index + 1)
    const close = secondChar === '/'
    const start = copyPosition(position)
    feedPosition(position, str, close ? 2 : 1)
    state.tokens.push({ type: 'tag-start', close, position: { start } })
  }
  const tagName = lexTagName(state)
  lexTagAttributes(state)
  {
    const firstChar = str.charAt(position.index)
    const close = firstChar === '/'
    feedPosition(position, str, close ? 2 : 1)
    const end = copyPosition(position)
    state.tokens.push({ type: 'tag-end', close, position: { end } })
  }
  return tagName
}

// See https://developer.mozilla.org/en-US/docs/Web/JavaScript/Guide/Regular_Expressions#special-white-space
const whitespace = /\s/
export function isWhitespaceChar(char) {
  return whitespace.test(char)
}

export function lexTagName(state) {
  const { str, position } = state
  const len = str.length
  let start = position.index
  while (start < len) {
    const char = str.charAt(start)
    const isTagChar = !(isWhitespaceChar(char) || char === '/' || char === '>')
    if (isTagChar) break
    start++
  }

  let end = start + 1
  while (end < len) {
    const char = str.charAt(end)
    const isTagChar = !(isWhitespaceChar(char) || char === '/' || char === '>')
    if (!isTagChar) break
    end++
  }

  jumpPosition(position, str, end)
  const tagName = str.slice(start, end)
  state.tokens.push({
    type: 'tag',
    content: tagName,
  })
  return tagName
}

export function lexTagAttributes(state) {
  const { str, position, tokens } = state
  let cursor = position.index
  let quote = null // null, single-, or double-quote
  let wordBegin = cursor // index of word start
  const words = [] // "key", "key=value", "key='value'", etc
  const len = str.length
  while (cursor < len) {
    const char = str.charAt(cursor)
    if (quote) {
      const isQuoteEnd = char === quote
      if (isQuoteEnd) {
        quote = null
      }
      cursor++
      continue
    }

    const isTagEnd = char === '/' || char === '>'
    if (isTagEnd) {
      if (cursor !== wordBegin) {
        words.push(str.slice(wordBegin, cursor))
      }
      break
    }

    const isWordEnd = isWhitespaceChar(char)
    if (isWordEnd) {
      if (cursor !== wordBegin) {
        words.push(str.slice(wordBegin, cursor))
      }
      wordBegin = cursor + 1
      cursor++
      continue
    }

    const isQuoteStart = char === "'" || char === '"'
    if (isQuoteStart) {
      quote = char
      cursor++
      continue
    }

    cursor++
  }
  jumpPosition(position, str, cursor)

  const wLen = words.length
  const type = 'attribute'
  for (let i = 0; i < wLen; i++) {
    const word = words[i]
    const isNotPair = word.indexOf('=') === -1
    if (isNotPair) {
      const secondWord = words[i + 1]
      if (secondWord && startsWith(secondWord, '=')) {
        if (secondWord.length > 1) {
          const newWord = word + secondWord
          tokens.push({ type, content: newWord })
          i += 1
          continue
        }
        const thirdWord = words[i + 2]
        i += 1
        if (thirdWord) {
          const newWord = word + '=' + thirdWord
          tokens.push({ type, content: newWord })
          i += 1
          continue
        }
      }
    }
    if (endsWith(word, '=')) {
      const secondWord = words[i + 1]
      if (secondWord && !stringIncludes(secondWord, '=')) {
        const newWord = word + secondWord
        tokens.push({ type, content: newWord })
        i += 1
        continue
      }

      const newWord = word.slice(0, -1)
      tokens.push({ type, content: newWord })
      continue
    }

    tokens.push({ type, content: word })
  }
}

const push = [].push

export function lexSkipTag(tagName, state) {
  const { str, position, tokens } = state
  const safeTagName = tagName.toLowerCase()
  const len = str.length
  let index = position.index
  while (index < len) {
    const nextTag = str.indexOf('</', index)
    if (nextTag === -1) {
      lexText(state)
      break
    }

    const tagStartPosition = copyPosition(position)
    jumpPosition(tagStartPosition, str, nextTag)
    const tagState = { str, position: tagStartPosition, tokens: [] }
    const name = lexTag(tagState)
    if (safeTagName !== name.toLowerCase()) {
      index = tagState.position.index
      continue
    }

    if (nextTag !== position.index) {
      const textStart = copyPosition(position)
      jumpPosition(position, str, nextTag)
      tokens.push({
        type: 'text',
        content: str.slice(textStart.index, nextTag),
        position: {
          start: textStart,
          end: copyPosition(position),
        },
      })
    }

    push.apply(tokens, tagState.tokens)
    jumpPosition(position, str, tagState.position.index)
    break
  }
}