/** * @typedef {import('../types.js').SafeConfig} SafeConfig * @typedef {import('../types.js').State} State */ import {patternInScope} from './pattern-in-scope.js' /** * Make a string safe for embedding in markdown constructs. * * In markdown, almost all punctuation characters can, in certain cases, * result in something. * Whether they do is highly subjective to where they happen and in what * they happen. * * To solve this, `mdast-util-to-markdown` tracks: * * * Characters before and after something; * * What “constructs” we are in. * * This information is then used by this function to escape or encode * special characters. * * @param {State} state * Info passed around about the current state. * @param {string | null | undefined} input * Raw value to make safe. * @param {SafeConfig} config * Configuration. * @returns {string} * Serialized markdown safe for embedding. */ export function safe(state, input, config) { const value = (config.before || '') + (input || '') + (config.after || '') /** @type {Array} */ const positions = [] /** @type {Array} */ const result = [] /** @type {Record} */ const infos = {} let index = -1 while (++index < state.unsafe.length) { const pattern = state.unsafe[index] if (!patternInScope(state.stack, pattern)) { continue } const expression = state.compilePattern(pattern) /** @type {RegExpExecArray | null} */ let match while ((match = expression.exec(value))) { const before = 'before' in pattern || Boolean(pattern.atBreak) const after = 'after' in pattern const position = match.index + (before ? match[1].length : 0) if (positions.includes(position)) { if (infos[position].before && !before) { infos[position].before = false } if (infos[position].after && !after) { infos[position].after = false } } else { positions.push(position) infos[position] = {before, after} } } } positions.sort(numerical) let start = config.before ? config.before.length : 0 const end = value.length - (config.after ? config.after.length : 0) index = -1 while (++index < positions.length) { const position = positions[index] // Character before or after matched: if (position < start || position >= end) { continue } // If this character is supposed to be escaped because it has a condition on // the next character, and the next character is definitly being escaped, // then skip this escape. if ( (position + 1 < end && positions[index + 1] === position + 1 && infos[position].after && !infos[position + 1].before && !infos[position + 1].after) || (positions[index - 1] === position - 1 && infos[position].before && !infos[position - 1].before && !infos[position - 1].after) ) { continue } if (start !== position) { // If we have to use a character reference, an ampersand would be more // correct, but as backslashes only care about punctuation, either will // do the trick result.push(escapeBackslashes(value.slice(start, position), '\\')) } start = position if ( /[!-/:-@[-`{-~]/.test(value.charAt(position)) && (!config.encode || !config.encode.includes(value.charAt(position))) ) { // Character escape. result.push('\\') } else { // Character reference. result.push( '&#x' + value.charCodeAt(position).toString(16).toUpperCase() + ';' ) start++ } } result.push(escapeBackslashes(value.slice(start, end), config.after)) return result.join('') } /** * @param {number} a * @param {number} b * @returns {number} */ function numerical(a, b) { return a - b } /** * @param {string} value * @param {string} after * @returns {string} */ function escapeBackslashes(value, after) { const expression = /\\(?=[!-/:-@[-`{-~])/g /** @type {Array} */ const positions = [] /** @type {Array} */ const results = [] const whole = value + after let index = -1 let start = 0 /** @type {RegExpExecArray | null} */ let match while ((match = expression.exec(whole))) { positions.push(match.index) } while (++index < positions.length) { if (start !== positions[index]) { results.push(value.slice(start, positions[index])) } results.push('\\') start = positions[index] } results.push(value.slice(start)) return results.join('') }