/** * @typedef {import('micromark-util-types').Code} Code * @typedef {import('micromark-util-types').Construct} Construct * @typedef {import('micromark-util-types').Resolver} Resolver * @typedef {import('micromark-util-types').State} State * @typedef {import('micromark-util-types').TokenizeContext} TokenizeContext * @typedef {import('micromark-util-types').Tokenizer} Tokenizer */ import { asciiAlpha, asciiAlphanumeric, markdownLineEnding, markdownLineEndingOrSpace, markdownSpace } from 'micromark-util-character' import {htmlBlockNames, htmlRawNames} from 'micromark-util-html-tag-name' import {blankLine} from './blank-line.js' /** @type {Construct} */ export const htmlFlow = { name: 'htmlFlow', tokenize: tokenizeHtmlFlow, resolveTo: resolveToHtmlFlow, concrete: true } /** @type {Construct} */ const blankLineBefore = { tokenize: tokenizeBlankLineBefore, partial: true } const nonLazyContinuationStart = { tokenize: tokenizeNonLazyContinuationStart, partial: true } /** @type {Resolver} */ function resolveToHtmlFlow(events) { let index = events.length while (index--) { if (events[index][0] === 'enter' && events[index][1].type === 'htmlFlow') { break } } if (index > 1 && events[index - 2][1].type === 'linePrefix') { // Add the prefix start to the HTML token. events[index][1].start = events[index - 2][1].start // Add the prefix start to the HTML line token. events[index + 1][1].start = events[index - 2][1].start // Remove the line prefix. events.splice(index - 2, 2) } return events } /** * @this {TokenizeContext} * @type {Tokenizer} */ function tokenizeHtmlFlow(effects, ok, nok) { const self = this /** @type {number} */ let marker /** @type {boolean} */ let closingTag /** @type {string} */ let buffer /** @type {number} */ let index /** @type {Code} */ let markerB return start /** * Start of HTML (flow). * * ```markdown * > | * ^ * ``` * * @type {State} */ function start(code) { // To do: parse indent like `markdown-rs`. return before(code) } /** * At `<`, after optional whitespace. * * ```markdown * > | * ^ * ``` * * @type {State} */ function before(code) { effects.enter('htmlFlow') effects.enter('htmlFlowData') effects.consume(code) return open } /** * After `<`, at tag name or other stuff. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * ``` * * @type {State} */ function open(code) { if (code === 33) { effects.consume(code) return declarationOpen } if (code === 47) { effects.consume(code) closingTag = true return tagCloseStart } if (code === 63) { effects.consume(code) marker = 3 // To do: // tokenizer.concrete = true // To do: use `markdown-rs` style interrupt. // While we’re in an instruction instead of a declaration, we’re on a `?` // right now, so we do need to search for `>`, similar to declarations. return self.interrupt ? ok : continuationDeclarationInside } // ASCII alphabetical. if (asciiAlpha(code)) { effects.consume(code) // @ts-expect-error: not null. buffer = String.fromCharCode(code) return tagName } return nok(code) } /** * After ` | * ^ * > | * ^ * > | &<]]> * ^ * ``` * * @type {State} */ function declarationOpen(code) { if (code === 45) { effects.consume(code) marker = 2 return commentOpenInside } if (code === 91) { effects.consume(code) marker = 5 index = 0 return cdataOpenInside } // ASCII alphabetical. if (asciiAlpha(code)) { effects.consume(code) marker = 4 // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuationDeclarationInside } return nok(code) } /** * After ` | * ^ * ``` * * @type {State} */ function commentOpenInside(code) { if (code === 45) { effects.consume(code) // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuationDeclarationInside } return nok(code) } /** * After ` | &<]]> * ^^^^^^ * ``` * * @type {State} */ function cdataOpenInside(code) { const value = 'CDATA[' if (code === value.charCodeAt(index++)) { effects.consume(code) if (index === value.length) { // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuation } return cdataOpenInside } return nok(code) } /** * After ` | * ^ * ``` * * @type {State} */ function tagCloseStart(code) { if (asciiAlpha(code)) { effects.consume(code) // @ts-expect-error: not null. buffer = String.fromCharCode(code) return tagName } return nok(code) } /** * In tag name. * * ```markdown * > | * ^^ * > | * ^^ * ``` * * @type {State} */ function tagName(code) { if ( code === null || code === 47 || code === 62 || markdownLineEndingOrSpace(code) ) { const slash = code === 47 const name = buffer.toLowerCase() if (!slash && !closingTag && htmlRawNames.includes(name)) { marker = 1 // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok(code) : continuation(code) } if (htmlBlockNames.includes(buffer.toLowerCase())) { marker = 6 if (slash) { effects.consume(code) return basicSelfClosing } // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok(code) : continuation(code) } marker = 7 // Do not support complete HTML when interrupting. return self.interrupt && !self.parser.lazy[self.now().line] ? nok(code) : closingTag ? completeClosingTagAfter(code) : completeAttributeNameBefore(code) } // ASCII alphanumerical and `-`. if (code === 45 || asciiAlphanumeric(code)) { effects.consume(code) buffer += String.fromCharCode(code) return tagName } return nok(code) } /** * After closing slash of a basic tag name. * * ```markdown * > |
* ^ * ``` * * @type {State} */ function basicSelfClosing(code) { if (code === 62) { effects.consume(code) // // Do not form containers. // tokenizer.concrete = true return self.interrupt ? ok : continuation } return nok(code) } /** * After closing slash of a complete tag name. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeClosingTagAfter(code) { if (markdownSpace(code)) { effects.consume(code) return completeClosingTagAfter } return completeEnd(code) } /** * At an attribute name. * * At first, this state is used after a complete tag name, after whitespace, * where it expects optional attributes or the end of the tag. * It is also reused after attributes, when expecting more optional * attributes. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeNameBefore(code) { if (code === 47) { effects.consume(code) return completeEnd } // ASCII alphanumerical and `:` and `_`. if (code === 58 || code === 95 || asciiAlpha(code)) { effects.consume(code) return completeAttributeName } if (markdownSpace(code)) { effects.consume(code) return completeAttributeNameBefore } return completeEnd(code) } /** * In attribute name. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeName(code) { // ASCII alphanumerical and `-`, `.`, `:`, and `_`. if ( code === 45 || code === 46 || code === 58 || code === 95 || asciiAlphanumeric(code) ) { effects.consume(code) return completeAttributeName } return completeAttributeNameAfter(code) } /** * After attribute name, at an optional initializer, the end of the tag, or * whitespace. * * ```markdown * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeNameAfter(code) { if (code === 61) { effects.consume(code) return completeAttributeValueBefore } if (markdownSpace(code)) { effects.consume(code) return completeAttributeNameAfter } return completeAttributeNameBefore(code) } /** * Before unquoted, double quoted, or single quoted attribute value, allowing * whitespace. * * ```markdown * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeValueBefore(code) { if ( code === null || code === 60 || code === 61 || code === 62 || code === 96 ) { return nok(code) } if (code === 34 || code === 39) { effects.consume(code) markerB = code return completeAttributeValueQuoted } if (markdownSpace(code)) { effects.consume(code) return completeAttributeValueBefore } return completeAttributeValueUnquoted(code) } /** * In double or single quoted attribute value. * * ```markdown * > | * ^ * > | * ^ * ``` * * @type {State} */ function completeAttributeValueQuoted(code) { if (code === markerB) { effects.consume(code) markerB = null return completeAttributeValueQuotedAfter } if (code === null || markdownLineEnding(code)) { return nok(code) } effects.consume(code) return completeAttributeValueQuoted } /** * In unquoted attribute value. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeAttributeValueUnquoted(code) { if ( code === null || code === 34 || code === 39 || code === 47 || code === 60 || code === 61 || code === 62 || code === 96 || markdownLineEndingOrSpace(code) ) { return completeAttributeNameAfter(code) } effects.consume(code) return completeAttributeValueUnquoted } /** * After double or single quoted attribute value, before whitespace or the * end of the tag. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeAttributeValueQuotedAfter(code) { if (code === 47 || code === 62 || markdownSpace(code)) { return completeAttributeNameBefore(code) } return nok(code) } /** * In certain circumstances of a complete tag where only an `>` is allowed. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeEnd(code) { if (code === 62) { effects.consume(code) return completeAfter } return nok(code) } /** * After `>` in a complete tag. * * ```markdown * > | * ^ * ``` * * @type {State} */ function completeAfter(code) { if (code === null || markdownLineEnding(code)) { // // Do not form containers. // tokenizer.concrete = true return continuation(code) } if (markdownSpace(code)) { effects.consume(code) return completeAfter } return nok(code) } /** * In continuation of any HTML kind. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuation(code) { if (code === 45 && marker === 2) { effects.consume(code) return continuationCommentInside } if (code === 60 && marker === 1) { effects.consume(code) return continuationRawTagOpen } if (code === 62 && marker === 4) { effects.consume(code) return continuationClose } if (code === 63 && marker === 3) { effects.consume(code) return continuationDeclarationInside } if (code === 93 && marker === 5) { effects.consume(code) return continuationCdataInside } if (markdownLineEnding(code) && (marker === 6 || marker === 7)) { effects.exit('htmlFlowData') return effects.check( blankLineBefore, continuationAfter, continuationStart )(code) } if (code === null || markdownLineEnding(code)) { effects.exit('htmlFlowData') return continuationStart(code) } effects.consume(code) return continuation } /** * In continuation, at eol. * * ```markdown * > | * ^ * | asd * ``` * * @type {State} */ function continuationStart(code) { return effects.check( nonLazyContinuationStart, continuationStartNonLazy, continuationAfter )(code) } /** * In continuation, at eol, before non-lazy content. * * ```markdown * > | * ^ * | asd * ``` * * @type {State} */ function continuationStartNonLazy(code) { effects.enter('lineEnding') effects.consume(code) effects.exit('lineEnding') return continuationBefore } /** * In continuation, before non-lazy content. * * ```markdown * | * > | asd * ^ * ``` * * @type {State} */ function continuationBefore(code) { if (code === null || markdownLineEnding(code)) { return continuationStart(code) } effects.enter('htmlFlowData') return continuation(code) } /** * In comment continuation, after one `-`, expecting another. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationCommentInside(code) { if (code === 45) { effects.consume(code) return continuationDeclarationInside } return continuation(code) } /** * In raw continuation, after `<`, at `/`. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationRawTagOpen(code) { if (code === 47) { effects.consume(code) buffer = '' return continuationRawEndTag } return continuation(code) } /** * In raw continuation, after ` | * ^^^^^^ * ``` * * @type {State} */ function continuationRawEndTag(code) { if (code === 62) { const name = buffer.toLowerCase() if (htmlRawNames.includes(name)) { effects.consume(code) return continuationClose } return continuation(code) } if (asciiAlpha(code) && buffer.length < 8) { effects.consume(code) // @ts-expect-error: not null. buffer += String.fromCharCode(code) return continuationRawEndTag } return continuation(code) } /** * In cdata continuation, after `]`, expecting `]>`. * * ```markdown * > | &<]]> * ^ * ``` * * @type {State} */ function continuationCdataInside(code) { if (code === 93) { effects.consume(code) return continuationDeclarationInside } return continuation(code) } /** * In declaration or instruction continuation, at `>`. * * ```markdown * > | * ^ * > | * ^ * > | * ^ * > | * ^ * > | &<]]> * ^ * ``` * * @type {State} */ function continuationDeclarationInside(code) { if (code === 62) { effects.consume(code) return continuationClose } // More dashes. if (code === 45 && marker === 2) { effects.consume(code) return continuationDeclarationInside } return continuation(code) } /** * In closed continuation: everything we get until the eol/eof is part of it. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationClose(code) { if (code === null || markdownLineEnding(code)) { effects.exit('htmlFlowData') return continuationAfter(code) } effects.consume(code) return continuationClose } /** * Done. * * ```markdown * > | * ^ * ``` * * @type {State} */ function continuationAfter(code) { effects.exit('htmlFlow') // // Feel free to interrupt. // tokenizer.interrupt = false // // No longer concrete. // tokenizer.concrete = false return ok(code) } } /** * @this {TokenizeContext} * @type {Tokenizer} */ function tokenizeNonLazyContinuationStart(effects, ok, nok) { const self = this return start /** * At eol, before continuation. * * ```markdown * > | * ```js * ^ * | b * ``` * * @type {State} */ function start(code) { if (markdownLineEnding(code)) { effects.enter('lineEnding') effects.consume(code) effects.exit('lineEnding') return after } return nok(code) } /** * A continuation. * * ```markdown * | * ```js * > | b * ^ * ``` * * @type {State} */ function after(code) { return self.parser.lazy[self.now().line] ? nok(code) : ok(code) } } /** * @this {TokenizeContext} * @type {Tokenizer} */ function tokenizeBlankLineBefore(effects, ok, nok) { return start /** * Before eol, expecting blank line. * * ```markdown * > |
* ^ * | * ``` * * @type {State} */ function start(code) { effects.enter('lineEnding') effects.consume(code) effects.exit('lineEnding') return effects.attempt(blankLine, ok, nok) } }