/** * @typedef {import('micromark-util-types').Code} Code * @typedef {import('micromark-util-types').Construct} Construct * @typedef {import('micromark-util-types').State} State * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext * @typedef {import('micromark-util-types').Tokenizer} Tokenizer */ import {factorySpace} from 'micromark-factory-space' import { asciiAlpha, asciiAlphanumeric, markdownLineEnding, markdownLineEndingOrSpace, markdownSpace } from 'micromark-util-character' /** @type {Construct} */ export const htmlText = { name: 'htmlText', tokenize: tokenizeHtmlText } /** * @this {TokenizeContext} * @type {Tokenizer} */ function tokenizeHtmlText(effects, ok, nok) { const self = this /** @type {NonNullable | undefined} */ let marker /** @type {number} */ let index /** @type {State} */ let returnState return start /** * Start of HTML (text). * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function start(code) { effects.enter('htmlText') effects.enter('htmlTextData') effects.consume(code) return open } /** * After `<`, at tag name or other stuff. * * ```markdown * > | a c * ^ * > | a c * ^ * > | a c * ^ * ``` * * @type {State} */ function open(code) { if (code === 33) { effects.consume(code) return declarationOpen } if (code === 47) { effects.consume(code) return tagCloseStart } if (code === 63) { effects.consume(code) return instruction } // ASCII alphabetical. if (asciiAlpha(code)) { effects.consume(code) return tagOpen } return nok(code) } /** * After ` | a c * ^ * > | a c * ^ * > | a &<]]> c * ^ * ``` * * @type {State} */ function declarationOpen(code) { if (code === 45) { effects.consume(code) return commentOpenInside } if (code === 91) { effects.consume(code) index = 0 return cdataOpenInside } if (asciiAlpha(code)) { effects.consume(code) return declaration } return nok(code) } /** * In a comment, after ` | a c * ^ * ``` * * @type {State} */ function commentOpenInside(code) { if (code === 45) { effects.consume(code) return commentEnd } return nok(code) } /** * In comment. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function comment(code) { if (code === null) { return nok(code) } if (code === 45) { effects.consume(code) return commentClose } if (markdownLineEnding(code)) { returnState = comment return lineEndingBefore(code) } effects.consume(code) return comment } /** * In comment, after `-`. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function commentClose(code) { if (code === 45) { effects.consume(code) return commentEnd } return comment(code) } /** * In comment, after `--`. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function commentEnd(code) { return code === 62 ? end(code) : code === 45 ? commentClose(code) : comment(code) } /** * After ` | a &<]]> b * ^^^^^^ * ``` * * @type {State} */ function cdataOpenInside(code) { const value = 'CDATA[' if (code === value.charCodeAt(index++)) { effects.consume(code) return index === value.length ? cdata : cdataOpenInside } return nok(code) } /** * In CDATA. * * ```markdown * > | a &<]]> b * ^^^ * ``` * * @type {State} */ function cdata(code) { if (code === null) { return nok(code) } if (code === 93) { effects.consume(code) return cdataClose } if (markdownLineEnding(code)) { returnState = cdata return lineEndingBefore(code) } effects.consume(code) return cdata } /** * In CDATA, after `]`, at another `]`. * * ```markdown * > | a &<]]> b * ^ * ``` * * @type {State} */ function cdataClose(code) { if (code === 93) { effects.consume(code) return cdataEnd } return cdata(code) } /** * In CDATA, after `]]`, at `>`. * * ```markdown * > | a &<]]> b * ^ * ``` * * @type {State} */ function cdataEnd(code) { if (code === 62) { return end(code) } if (code === 93) { effects.consume(code) return cdataEnd } return cdata(code) } /** * In declaration. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function declaration(code) { if (code === null || code === 62) { return end(code) } if (markdownLineEnding(code)) { returnState = declaration return lineEndingBefore(code) } effects.consume(code) return declaration } /** * In instruction. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function instruction(code) { if (code === null) { return nok(code) } if (code === 63) { effects.consume(code) return instructionClose } if (markdownLineEnding(code)) { returnState = instruction return lineEndingBefore(code) } effects.consume(code) return instruction } /** * In instruction, after `?`, at `>`. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function instructionClose(code) { return code === 62 ? end(code) : instruction(code) } /** * After ` | a c * ^ * ``` * * @type {State} */ function tagCloseStart(code) { // ASCII alphabetical. if (asciiAlpha(code)) { effects.consume(code) return tagClose } return nok(code) } /** * After ` | a c * ^ * ``` * * @type {State} */ function tagClose(code) { // ASCII alphanumerical and `-`. if (code === 45 || asciiAlphanumeric(code)) { effects.consume(code) return tagClose } return tagCloseBetween(code) } /** * In closing tag, after tag name. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function tagCloseBetween(code) { if (markdownLineEnding(code)) { returnState = tagCloseBetween return lineEndingBefore(code) } if (markdownSpace(code)) { effects.consume(code) return tagCloseBetween } return end(code) } /** * After ` | a c * ^ * ``` * * @type {State} */ function tagOpen(code) { // ASCII alphanumerical and `-`. if (code === 45 || asciiAlphanumeric(code)) { effects.consume(code) return tagOpen } if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) { return tagOpenBetween(code) } return nok(code) } /** * In opening tag, after tag name. * * ```markdown * > | a c * ^ * ``` * * @type {State} */ function tagOpenBetween(code) { if (code === 47) { effects.consume(code) return end } // ASCII alphabetical and `:` and `_`. if (code === 58 || code === 95 || asciiAlpha(code)) { effects.consume(code) return tagOpenAttributeName } if (markdownLineEnding(code)) { returnState = tagOpenBetween return lineEndingBefore(code) } if (markdownSpace(code)) { effects.consume(code) return tagOpenBetween } return end(code) } /** * In attribute name. * * ```markdown * > | a d * ^ * ``` * * @type {State} */ function tagOpenAttributeName(code) { // ASCII alphabetical and `-`, `.`, `:`, and `_`. if ( code === 45 || code === 46 || code === 58 || code === 95 || asciiAlphanumeric(code) ) { effects.consume(code) return tagOpenAttributeName } return tagOpenAttributeNameAfter(code) } /** * After attribute name, before initializer, the end of the tag, or * whitespace. * * ```markdown * > | a d * ^ * ``` * * @type {State} */ function tagOpenAttributeNameAfter(code) { if (code === 61) { effects.consume(code) return tagOpenAttributeValueBefore } if (markdownLineEnding(code)) { returnState = tagOpenAttributeNameAfter return lineEndingBefore(code) } if (markdownSpace(code)) { effects.consume(code) return tagOpenAttributeNameAfter } return tagOpenBetween(code) } /** * Before unquoted, double quoted, or single quoted attribute value, allowing * whitespace. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueBefore(code) { if ( code === null || code === 60 || code === 61 || code === 62 || code === 96 ) { return nok(code) } if (code === 34 || code === 39) { effects.consume(code) marker = code return tagOpenAttributeValueQuoted } if (markdownLineEnding(code)) { returnState = tagOpenAttributeValueBefore return lineEndingBefore(code) } if (markdownSpace(code)) { effects.consume(code) return tagOpenAttributeValueBefore } effects.consume(code) return tagOpenAttributeValueUnquoted } /** * In double or single quoted attribute value. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueQuoted(code) { if (code === marker) { effects.consume(code) marker = undefined return tagOpenAttributeValueQuotedAfter } if (code === null) { return nok(code) } if (markdownLineEnding(code)) { returnState = tagOpenAttributeValueQuoted return lineEndingBefore(code) } effects.consume(code) return tagOpenAttributeValueQuoted } /** * In unquoted attribute value. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueUnquoted(code) { if ( code === null || code === 34 || code === 39 || code === 60 || code === 61 || code === 96 ) { return nok(code) } if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) { return tagOpenBetween(code) } effects.consume(code) return tagOpenAttributeValueUnquoted } /** * After double or single quoted attribute value, before whitespace or the end * of the tag. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function tagOpenAttributeValueQuotedAfter(code) { if (code === 47 || code === 62 || markdownLineEndingOrSpace(code)) { return tagOpenBetween(code) } return nok(code) } /** * In certain circumstances of a tag where only an `>` is allowed. * * ```markdown * > | a e * ^ * ``` * * @type {State} */ function end(code) { if (code === 62) { effects.consume(code) effects.exit('htmlTextData') effects.exit('htmlText') return ok } return nok(code) } /** * At eol. * * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about * > empty tokens. * * ```markdown * > | a * ``` * * @type {State} */ function lineEndingBefore(code) { effects.exit('htmlTextData') effects.enter('lineEnding') effects.consume(code) effects.exit('lineEnding') return lineEndingAfter } /** * After eol, at optional whitespace. * * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about * > empty tokens. * * ```markdown * | a * ^ * ``` * * @type {State} */ function lineEndingAfter(code) { // Always populated by defaults. return markdownSpace(code) ? factorySpace( effects, lineEndingAfterPrefix, 'linePrefix', self.parser.constructs.disable.null.includes('codeIndented') ? undefined : 4 )(code) : lineEndingAfterPrefix(code) } /** * After eol, after optional whitespace. * * > 👉 **Note**: we can’t have blank lines in text, so no need to worry about * > empty tokens. * * ```markdown * | a * ^ * ``` * * @type {State} */ function lineEndingAfterPrefix(code) { effects.enter('htmlTextData') return returnState(code) } }