%PDF- %PDF-
Direktori : /var/www/html/node_modules/entities/lib/esm/ |
Current File : /var/www/html/node_modules/entities/lib/esm/decode.js |
import htmlDecodeTree from "./generated/decode-data-html.js"; import xmlDecodeTree from "./generated/decode-data-xml.js"; import decodeCodePoint from "./decode_codepoint.js"; // Re-export for use by eg. htmlparser2 export { htmlDecodeTree, xmlDecodeTree, decodeCodePoint }; export { replaceCodePoint, fromCodePoint } from "./decode_codepoint.js"; var CharCodes; (function (CharCodes) { CharCodes[CharCodes["NUM"] = 35] = "NUM"; CharCodes[CharCodes["SEMI"] = 59] = "SEMI"; CharCodes[CharCodes["ZERO"] = 48] = "ZERO"; CharCodes[CharCodes["NINE"] = 57] = "NINE"; CharCodes[CharCodes["LOWER_A"] = 97] = "LOWER_A"; CharCodes[CharCodes["LOWER_F"] = 102] = "LOWER_F"; CharCodes[CharCodes["LOWER_X"] = 120] = "LOWER_X"; /** Bit that needs to be set to convert an upper case ASCII character to lower case */ CharCodes[CharCodes["To_LOWER_BIT"] = 32] = "To_LOWER_BIT"; })(CharCodes || (CharCodes = {})); export var BinTrieFlags; (function (BinTrieFlags) { BinTrieFlags[BinTrieFlags["VALUE_LENGTH"] = 49152] = "VALUE_LENGTH"; BinTrieFlags[BinTrieFlags["BRANCH_LENGTH"] = 16256] = "BRANCH_LENGTH"; BinTrieFlags[BinTrieFlags["JUMP_TABLE"] = 127] = "JUMP_TABLE"; })(BinTrieFlags || (BinTrieFlags = {})); function getDecoder(decodeTree) { return function decodeHTMLBinary(str, strict) { let ret = ""; let lastIdx = 0; let strIdx = 0; while ((strIdx = str.indexOf("&", strIdx)) >= 0) { ret += str.slice(lastIdx, strIdx); lastIdx = strIdx; // Skip the "&" strIdx += 1; // If we have a numeric entity, handle this separately. if (str.charCodeAt(strIdx) === CharCodes.NUM) { // Skip the leading "&#". For hex entities, also skip the leading "x". let start = strIdx + 1; let base = 10; let cp = str.charCodeAt(start); if ((cp | CharCodes.To_LOWER_BIT) === CharCodes.LOWER_X) { base = 16; strIdx += 1; start += 1; } do cp = str.charCodeAt(++strIdx); while ((cp >= CharCodes.ZERO && cp <= CharCodes.NINE) || (base === 16 && (cp | CharCodes.To_LOWER_BIT) >= CharCodes.LOWER_A && (cp | CharCodes.To_LOWER_BIT) <= CharCodes.LOWER_F)); if (start !== strIdx) { const entity = str.substring(start, strIdx); const parsed = parseInt(entity, base); if (str.charCodeAt(strIdx) === CharCodes.SEMI) { strIdx += 1; } else if (strict) { continue; } ret += decodeCodePoint(parsed); lastIdx = strIdx; } continue; } let resultIdx = 0; let excess = 1; let treeIdx = 0; let current = decodeTree[treeIdx]; for (; strIdx < str.length; strIdx++, excess++) { treeIdx = determineBranch(decodeTree, current, treeIdx + 1, str.charCodeAt(strIdx)); if (treeIdx < 0) break; current = decodeTree[treeIdx]; const masked = current & BinTrieFlags.VALUE_LENGTH; // If the branch is a value, store it and continue if (masked) { // If we have a legacy entity while parsing strictly, just skip the number of bytes if (!strict || str.charCodeAt(strIdx) === CharCodes.SEMI) { resultIdx = treeIdx; excess = 0; } // The mask is the number of bytes of the value, including the current byte. const valueLength = (masked >> 14) - 1; if (valueLength === 0) break; treeIdx += valueLength; } } if (resultIdx !== 0) { const valueLength = (decodeTree[resultIdx] & BinTrieFlags.VALUE_LENGTH) >> 14; ret += valueLength === 1 ? String.fromCharCode(decodeTree[resultIdx] & ~BinTrieFlags.VALUE_LENGTH) : valueLength === 2 ? String.fromCharCode(decodeTree[resultIdx + 1]) : String.fromCharCode(decodeTree[resultIdx + 1], decodeTree[resultIdx + 2]); lastIdx = strIdx - excess + 1; } } return ret + str.slice(lastIdx); }; } export function determineBranch(decodeTree, current, nodeIdx, char) { const branchCount = (current & BinTrieFlags.BRANCH_LENGTH) >> 7; const jumpOffset = current & BinTrieFlags.JUMP_TABLE; // Case 1: Single branch encoded in jump offset if (branchCount === 0) { return jumpOffset !== 0 && char === jumpOffset ? nodeIdx : -1; } // Case 2: Multiple branches encoded in jump table if (jumpOffset) { const value = char - jumpOffset; return value < 0 || value >= branchCount ? -1 : decodeTree[nodeIdx + value] - 1; } // Case 3: Multiple branches encoded in dictionary // Binary search for the character. let lo = nodeIdx; let hi = lo + branchCount - 1; while (lo <= hi) { const mid = (lo + hi) >>> 1; const midVal = decodeTree[mid]; if (midVal < char) { lo = mid + 1; } else if (midVal > char) { hi = mid - 1; } else { return decodeTree[mid + branchCount]; } } return -1; } const htmlDecoder = getDecoder(htmlDecodeTree); const xmlDecoder = getDecoder(xmlDecodeTree); /** * Decodes an HTML string, allowing for entities not terminated by a semi-colon. * * @param str The string to decode. * @returns The decoded string. */ export function decodeHTML(str) { return htmlDecoder(str, false); } /** * Decodes an HTML string, requiring all entities to be terminated by a semi-colon. * * @param str The string to decode. * @returns The decoded string. */ export function decodeHTMLStrict(str) { return htmlDecoder(str, true); } /** * Decodes an XML string, requiring all entities to be terminated by a semi-colon. * * @param str The string to decode. * @returns The decoded string. */ export function decodeXML(str) { return xmlDecoder(str, true); } //# sourceMappingURL=decode.js.map