/** * @typedef {import('micromark-util-types').Chunk} Chunk * @typedef {import('micromark-util-types').Event} Event * @typedef {import('micromark-util-types').Token} Token */ import {splice} from 'micromark-util-chunked' import {codes, types} from 'micromark-util-symbol' import {ok as assert} from 'devlop' /** * Tokenize subcontent. * * @param {Array} events * List of events. * @returns {boolean} * Whether subtokens were found. */ // eslint-disable-next-line complexity export function subtokenize(events) { /** @type {Record} */ const jumps = {} let index = -1 /** @type {Event} */ let event /** @type {number | undefined} */ let lineIndex /** @type {number} */ let otherIndex /** @type {Event} */ let otherEvent /** @type {Array} */ let parameters /** @type {Array} */ let subevents /** @type {boolean | undefined} */ let more while (++index < events.length) { while (index in jumps) { index = jumps[index] } event = events[index] // Add a hook for the GFM tasklist extension, which needs to know if text // is in the first content of a list item. if ( index && event[1].type === types.chunkFlow && events[index - 1][1].type === types.listItemPrefix ) { assert(event[1]._tokenizer, 'expected `_tokenizer` on subtokens') subevents = event[1]._tokenizer.events otherIndex = 0 if ( otherIndex < subevents.length && subevents[otherIndex][1].type === types.lineEndingBlank ) { otherIndex += 2 } if ( otherIndex < subevents.length && subevents[otherIndex][1].type === types.content ) { while (++otherIndex < subevents.length) { if (subevents[otherIndex][1].type === types.content) { break } if (subevents[otherIndex][1].type === types.chunkText) { subevents[otherIndex][1]._isInFirstContentOfListItem = true otherIndex++ } } } } // Enter. if (event[0] === 'enter') { if (event[1].contentType) { Object.assign(jumps, subcontent(events, index)) index = jumps[index] more = true } } // Exit. else if (event[1]._container) { otherIndex = index lineIndex = undefined while (otherIndex--) { otherEvent = events[otherIndex] if ( otherEvent[1].type === types.lineEnding || otherEvent[1].type === types.lineEndingBlank ) { if (otherEvent[0] === 'enter') { if (lineIndex) { events[lineIndex][1].type = types.lineEndingBlank } otherEvent[1].type = types.lineEnding lineIndex = otherIndex } } else { break } } if (lineIndex) { // Fix position. event[1].end = Object.assign({}, events[lineIndex][1].start) // Switch container exit w/ line endings. parameters = events.slice(lineIndex, index) parameters.unshift(event) splice(events, lineIndex, index - lineIndex + 1, parameters) } } } return !more } /** * Tokenize embedded tokens. * * @param {Array} events * @param {number} eventIndex * @returns {Record} */ function subcontent(events, eventIndex) { const token = events[eventIndex][1] const context = events[eventIndex][2] let startPosition = eventIndex - 1 /** @type {Array} */ const startPositions = [] assert(token.contentType, 'expected `contentType` on subtokens') const tokenizer = token._tokenizer || context.parser[token.contentType](token.start) const childEvents = tokenizer.events /** @type {Array<[number, number]>} */ const jumps = [] /** @type {Record} */ const gaps = {} /** @type {Array} */ let stream /** @type {Token | undefined} */ let previous let index = -1 /** @type {Token | undefined} */ let current = token let adjust = 0 let start = 0 const breaks = [start] // Loop forward through the linked tokens to pass them in order to the // subtokenizer. while (current) { // Find the position of the event for this token. while (events[++startPosition][1] !== current) { // Empty. } assert( !previous || current.previous === previous, 'expected previous to match' ) assert(!previous || previous.next === current, 'expected next to match') startPositions.push(startPosition) if (!current._tokenizer) { stream = context.sliceStream(current) if (!current.next) { stream.push(codes.eof) } if (previous) { tokenizer.defineSkip(current.start) } if (current._isInFirstContentOfListItem) { tokenizer._gfmTasklistFirstContentOfListItem = true } tokenizer.write(stream) if (current._isInFirstContentOfListItem) { tokenizer._gfmTasklistFirstContentOfListItem = undefined } } // Unravel the next token. previous = current current = current.next } // Now, loop back through all events (and linked tokens), to figure out which // parts belong where. current = token while (++index < childEvents.length) { if ( // Find a void token that includes a break. childEvents[index][0] === 'exit' && childEvents[index - 1][0] === 'enter' && childEvents[index][1].type === childEvents[index - 1][1].type && childEvents[index][1].start.line !== childEvents[index][1].end.line ) { assert(current, 'expected a current token') start = index + 1 breaks.push(start) // Help GC. current._tokenizer = undefined current.previous = undefined current = current.next } } // Help GC. tokenizer.events = [] // If there’s one more token (which is the cases for lines that end in an // EOF), that’s perfect: the last point we found starts it. // If there isn’t then make sure any remaining content is added to it. if (current) { // Help GC. current._tokenizer = undefined current.previous = undefined assert(!current.next, 'expected no next token') } else { breaks.pop() } // Now splice the events from the subtokenizer into the current events, // moving back to front so that splice indices aren’t affected. index = breaks.length while (index--) { const slice = childEvents.slice(breaks[index], breaks[index + 1]) const start = startPositions.pop() assert(start !== undefined, 'expected a start position when splicing') jumps.unshift([start, start + slice.length - 1]) splice(events, start, 2, slice) } index = -1 while (++index < jumps.length) { gaps[adjust + jumps[index][0]] = adjust + jumps[index][1] adjust += jumps[index][1] - jumps[index][0] - 1 } return gaps }