'use strict'; const util = require('./util'); const defaultOptions = { allowBooleanAttributes: false, //A tag can have attributes without any value unpairedTags: [] }; //const tagsPattern = new RegExp("<\\/?([\\w:\\-_\.]+)\\s*\/?>","g"); exports.validate = function (xmlData, options) { options = Object.assign({}, defaultOptions, options); //xmlData = xmlData.replace(/(\r\n|\n|\r)/gm,"");//make it single line //xmlData = xmlData.replace(/(^\s*<\?xml.*?\?>)/g,"");//Remove XML starting tag //xmlData = xmlData.replace(/()/g,"");//Remove DOCTYPE const tags = []; let tagFound = false; //indicates that the root tag has been closed (aka. depth 0 has been reached) let reachedRoot = false; if (xmlData[0] === '\ufeff') { // check for byte order mark (BOM) xmlData = xmlData.substr(1); } for (let i = 0; i < xmlData.length; i++) { if (xmlData[i] === '<' && xmlData[i+1] === '?') { i+=2; i = readPI(xmlData,i); if (i.err) return i; }else if (xmlData[i] === '<') { //starting of tag //read until you reach to '>' avoiding any '>' in attribute value let tagStartPos = i; i++; if (xmlData[i] === '!') { i = readCommentAndCDATA(xmlData, i); continue; } else { let closingTag = false; if (xmlData[i] === '/') { //closing tag closingTag = true; i++; } //read tagname let tagName = ''; for (; i < xmlData.length && xmlData[i] !== '>' && xmlData[i] !== ' ' && xmlData[i] !== '\t' && xmlData[i] !== '\n' && xmlData[i] !== '\r'; i++ ) { tagName += xmlData[i]; } tagName = tagName.trim(); //console.log(tagName); if (tagName[tagName.length - 1] === '/') { //self closing tag without attributes tagName = tagName.substring(0, tagName.length - 1); //continue; i--; } if (!validateTagName(tagName)) { let msg; if (tagName.trim().length === 0) { msg = "Invalid space after '<'."; } else { msg = "Tag '"+tagName+"' is an invalid name."; } return getErrorObject('InvalidTag', msg, getLineNumberForPosition(xmlData, i)); } const result = readAttributeStr(xmlData, i); if (result === false) { return getErrorObject('InvalidAttr', "Attributes for '"+tagName+"' have open quote.", getLineNumberForPosition(xmlData, i)); } let attrStr = result.value; i = result.index; if (attrStr[attrStr.length - 1] === '/') { //self closing tag const attrStrStart = i - attrStr.length; attrStr = attrStr.substring(0, attrStr.length - 1); const isValid = validateAttributeString(attrStr, options); if (isValid === true) { tagFound = true; //continue; //text may presents after self closing tag } else { //the result from the nested function returns the position of the error within the attribute //in order to get the 'true' error line, we need to calculate the position where the attribute begins (i - attrStr.length) and then add the position within the attribute //this gives us the absolute index in the entire xml, which we can use to find the line at last return getErrorObject(isValid.err.code, isValid.err.msg, getLineNumberForPosition(xmlData, attrStrStart + isValid.err.line)); } } else if (closingTag) { if (!result.tagClosed) { return getErrorObject('InvalidTag', "Closing tag '"+tagName+"' doesn't have proper closing.", getLineNumberForPosition(xmlData, i)); } else if (attrStr.trim().length > 0) { return getErrorObject('InvalidTag', "Closing tag '"+tagName+"' can't have attributes or invalid starting.", getLineNumberForPosition(xmlData, tagStartPos)); } else { const otg = tags.pop(); if (tagName !== otg.tagName) { let openPos = getLineNumberForPosition(xmlData, otg.tagStartPos); return getErrorObject('InvalidTag', "Expected closing tag '"+otg.tagName+"' (opened in line "+openPos.line+", col "+openPos.col+") instead of closing tag '"+tagName+"'.", getLineNumberForPosition(xmlData, tagStartPos)); } //when there are no more tags, we reached the root level. if (tags.length == 0) { reachedRoot = true; } } } else { const isValid = validateAttributeString(attrStr, options); if (isValid !== true) { //the result from the nested function returns the position of the error within the attribute //in order to get the 'true' error line, we need to calculate the position where the attribute begins (i - attrStr.length) and then add the position within the attribute //this gives us the absolute index in the entire xml, which we can use to find the line at last return getErrorObject(isValid.err.code, isValid.err.msg, getLineNumberForPosition(xmlData, i - attrStr.length + isValid.err.line)); } //if the root level has been reached before ... if (reachedRoot === true) { return getErrorObject('InvalidXml', 'Multiple possible root nodes found.', getLineNumberForPosition(xmlData, i)); } else if(options.unpairedTags.indexOf(tagName) !== -1){ //don't push into stack } else { tags.push({tagName, tagStartPos}); } tagFound = true; } //skip tag text value //It may include comments and CDATA value for (i++; i < xmlData.length; i++) { if (xmlData[i] === '<') { if (xmlData[i + 1] === '!') { //comment or CADATA i++; i = readCommentAndCDATA(xmlData, i); continue; } else if (xmlData[i+1] === '?') { i = readPI(xmlData, ++i); if (i.err) return i; } else{ break; } } else if (xmlData[i] === '&') { const afterAmp = validateAmpersand(xmlData, i); if (afterAmp == -1) return getErrorObject('InvalidChar', "char '&' is not expected.", getLineNumberForPosition(xmlData, i)); i = afterAmp; }else{ if (reachedRoot === true && !isWhiteSpace(xmlData[i])) { return getErrorObject('InvalidXml', "Extra text at the end", getLineNumberForPosition(xmlData, i)); } } } //end of reading tag text value if (xmlData[i] === '<') { i--; } } } else { if ( isWhiteSpace(xmlData[i])) { continue; } return getErrorObject('InvalidChar', "char '"+xmlData[i]+"' is not expected.", getLineNumberForPosition(xmlData, i)); } } if (!tagFound) { return getErrorObject('InvalidXml', 'Start tag expected.', 1); }else if (tags.length == 1) { return getErrorObject('InvalidTag', "Unclosed tag '"+tags[0].tagName+"'.", getLineNumberForPosition(xmlData, tags[0].tagStartPos)); }else if (tags.length > 0) { return getErrorObject('InvalidXml', "Invalid '"+ JSON.stringify(tags.map(t => t.tagName), null, 4).replace(/\r?\n/g, '')+ "' found.", {line: 1, col: 1}); } return true; }; function isWhiteSpace(char){ return char === ' ' || char === '\t' || char === '\n' || char === '\r'; } /** * Read Processing insstructions and skip * @param {*} xmlData * @param {*} i */ function readPI(xmlData, i) { const start = i; for (; i < xmlData.length; i++) { if (xmlData[i] == '?' || xmlData[i] == ' ') { //tagname const tagname = xmlData.substr(start, i - start); if (i > 5 && tagname === 'xml') { return getErrorObject('InvalidXml', 'XML declaration allowed only at the start of the document.', getLineNumberForPosition(xmlData, i)); } else if (xmlData[i] == '?' && xmlData[i + 1] == '>') { //check if valid attribut string i++; break; } else { continue; } } } return i; } function readCommentAndCDATA(xmlData, i) { if (xmlData.length > i + 5 && xmlData[i + 1] === '-' && xmlData[i + 2] === '-') { //comment for (i += 3; i < xmlData.length; i++) { if (xmlData[i] === '-' && xmlData[i + 1] === '-' && xmlData[i + 2] === '>') { i += 2; break; } } } else if ( xmlData.length > i + 8 && xmlData[i + 1] === 'D' && xmlData[i + 2] === 'O' && xmlData[i + 3] === 'C' && xmlData[i + 4] === 'T' && xmlData[i + 5] === 'Y' && xmlData[i + 6] === 'P' && xmlData[i + 7] === 'E' ) { let angleBracketsCount = 1; for (i += 8; i < xmlData.length; i++) { if (xmlData[i] === '<') { angleBracketsCount++; } else if (xmlData[i] === '>') { angleBracketsCount--; if (angleBracketsCount === 0) { break; } } } } else if ( xmlData.length > i + 9 && xmlData[i + 1] === '[' && xmlData[i + 2] === 'C' && xmlData[i + 3] === 'D' && xmlData[i + 4] === 'A' && xmlData[i + 5] === 'T' && xmlData[i + 6] === 'A' && xmlData[i + 7] === '[' ) { for (i += 8; i < xmlData.length; i++) { if (xmlData[i] === ']' && xmlData[i + 1] === ']' && xmlData[i + 2] === '>') { i += 2; break; } } } return i; } const doubleQuote = '"'; const singleQuote = "'"; /** * Keep reading xmlData until '<' is found outside the attribute value. * @param {string} xmlData * @param {number} i */ function readAttributeStr(xmlData, i) { let attrStr = ''; let startChar = ''; let tagClosed = false; for (; i < xmlData.length; i++) { if (xmlData[i] === doubleQuote || xmlData[i] === singleQuote) { if (startChar === '') { startChar = xmlData[i]; } else if (startChar !== xmlData[i]) { //if vaue is enclosed with double quote then single quotes are allowed inside the value and vice versa } else { startChar = ''; } } else if (xmlData[i] === '>') { if (startChar === '') { tagClosed = true; break; } } attrStr += xmlData[i]; } if (startChar !== '') { return false; } return { value: attrStr, index: i, tagClosed: tagClosed }; } /** * Select all the attributes whether valid or invalid. */ const validAttrStrRegxp = new RegExp('(\\s*)([^\\s=]+)(\\s*=)?(\\s*([\'"])(([\\s\\S])*?)\\5)?', 'g'); //attr, ="sd", a="amit's", a="sd"b="saf", ab cd="" function validateAttributeString(attrStr, options) { //console.log("start:"+attrStr+":end"); //if(attrStr.trim().length === 0) return true; //empty string const matches = util.getAllMatches(attrStr, validAttrStrRegxp); const attrNames = {}; for (let i = 0; i < matches.length; i++) { if (matches[i][1].length === 0) { //nospace before attribute name: a="sd"b="saf" return getErrorObject('InvalidAttr', "Attribute '"+matches[i][2]+"' has no space in starting.", getPositionFromMatch(matches[i])) } else if (matches[i][3] !== undefined && matches[i][4] === undefined) { return getErrorObject('InvalidAttr', "Attribute '"+matches[i][2]+"' is without value.", getPositionFromMatch(matches[i])); } else if (matches[i][3] === undefined && !options.allowBooleanAttributes) { //independent attribute: ab return getErrorObject('InvalidAttr', "boolean attribute '"+matches[i][2]+"' is not allowed.", getPositionFromMatch(matches[i])); } /* else if(matches[i][6] === undefined){//attribute without value: ab= return { err: { code:"InvalidAttr",msg:"attribute " + matches[i][2] + " has no value assigned."}}; } */ const attrName = matches[i][2]; if (!validateAttrName(attrName)) { return getErrorObject('InvalidAttr', "Attribute '"+attrName+"' is an invalid name.", getPositionFromMatch(matches[i])); } if (!attrNames.hasOwnProperty(attrName)) { //check for duplicate attribute. attrNames[attrName] = 1; } else { return getErrorObject('InvalidAttr', "Attribute '"+attrName+"' is repeated.", getPositionFromMatch(matches[i])); } } return true; } function validateNumberAmpersand(xmlData, i) { let re = /\d/; if (xmlData[i] === 'x') { i++; re = /[\da-fA-F]/; } for (; i < xmlData.length; i++) { if (xmlData[i] === ';') return i; if (!xmlData[i].match(re)) break; } return -1; } function validateAmpersand(xmlData, i) { // https://www.w3.org/TR/xml/#dt-charref i++; if (xmlData[i] === ';') return -1; if (xmlData[i] === '#') { i++; return validateNumberAmpersand(xmlData, i); } let count = 0; for (; i < xmlData.length; i++, count++) { if (xmlData[i].match(/\w/) && count < 20) continue; if (xmlData[i] === ';') break; return -1; } return i; } function getErrorObject(code, message, lineNumber) { return { err: { code: code, msg: message, line: lineNumber.line || lineNumber, col: lineNumber.col, }, }; } function validateAttrName(attrName) { return util.isName(attrName); } // const startsWithXML = /^xml/i; function validateTagName(tagname) { return util.isName(tagname) /* && !tagname.match(startsWithXML) */; } //this function returns the line number for the character at the given index function getLineNumberForPosition(xmlData, index) { const lines = xmlData.substring(0, index).split(/\r?\n/); return { line: lines.length, // column number is last line's length + 1, because column numbering starts at 1: col: lines[lines.length - 1].length + 1 }; } //this function returns the position of the first character of match within attrStr function getPositionFromMatch(match) { return match.startIndex + match[1].length; }