157 lines
4.0 KiB
JavaScript
157 lines
4.0 KiB
JavaScript
'use strict'
|
|
|
|
var entities = require('character-entities-html4')
|
|
var legacy = require('character-entities-legacy')
|
|
var hexadecimal = require('is-hexadecimal')
|
|
var decimal = require('is-decimal')
|
|
var alphanumerical = require('is-alphanumerical')
|
|
var dangerous = require('./dangerous.json')
|
|
|
|
module.exports = encode
|
|
encode.escape = escape
|
|
|
|
var own = {}.hasOwnProperty
|
|
|
|
// List of enforced escapes.
|
|
var escapes = ['"', "'", '<', '>', '&', '`']
|
|
|
|
// Map of characters to names.
|
|
var characters = construct()
|
|
|
|
// Default escapes.
|
|
var defaultEscapes = toExpression(escapes)
|
|
|
|
// Surrogate pairs.
|
|
var surrogatePair = /[\uD800-\uDBFF][\uDC00-\uDFFF]/g
|
|
|
|
// Non-ASCII characters.
|
|
// eslint-disable-next-line no-control-regex, unicorn/no-hex-escape
|
|
var bmp = /[\x01-\t\x0B\f\x0E-\x1F\x7F\x81\x8D\x8F\x90\x9D\xA0-\uFFFF]/g
|
|
|
|
// Encode special characters in `value`.
|
|
function encode(value, options) {
|
|
var settings = options || {}
|
|
var subset = settings.subset
|
|
var set = subset ? toExpression(subset) : defaultEscapes
|
|
var escapeOnly = settings.escapeOnly
|
|
var omit = settings.omitOptionalSemicolons
|
|
|
|
value = value.replace(set, replace)
|
|
|
|
if (subset || escapeOnly) {
|
|
return value
|
|
}
|
|
|
|
return value
|
|
.replace(surrogatePair, replaceSurrogatePair)
|
|
.replace(bmp, replace)
|
|
|
|
function replaceSurrogatePair(pair, pos, val) {
|
|
return toHexReference(
|
|
(pair.charCodeAt(0) - 0xd800) * 0x400 +
|
|
pair.charCodeAt(1) -
|
|
0xdc00 +
|
|
0x10000,
|
|
val.charAt(pos + 2),
|
|
omit
|
|
)
|
|
}
|
|
|
|
function replace(char, pos, val) {
|
|
return one(char, val.charAt(pos + 1), settings)
|
|
}
|
|
}
|
|
|
|
// Shortcut to escape special characters in HTML.
|
|
function escape(value) {
|
|
return encode(value, {escapeOnly: true, useNamedReferences: true})
|
|
}
|
|
|
|
// Encode `char` according to `options`.
|
|
function one(char, next, options) {
|
|
var shortest = options.useShortestReferences
|
|
var omit = options.omitOptionalSemicolons
|
|
var named
|
|
var code
|
|
var numeric
|
|
var decimal
|
|
|
|
if ((shortest || options.useNamedReferences) && own.call(characters, char)) {
|
|
named = toNamed(characters[char], next, omit, options.attribute)
|
|
}
|
|
|
|
if (shortest || !named) {
|
|
code = char.charCodeAt(0)
|
|
numeric = toHexReference(code, next, omit)
|
|
|
|
// Use the shortest numeric reference when requested.
|
|
// A simple algorithm would use decimal for all code points under 100, as
|
|
// those are shorter than hexadecimal:
|
|
//
|
|
// * `c` vs `c` (decimal shorter)
|
|
// * `d` vs `d` (equal)
|
|
//
|
|
// However, because we take `next` into consideration when `omit` is used,
|
|
// And it would be possible that decimals are shorter on bigger values as
|
|
// well if `next` is hexadecimal but not decimal, we instead compare both.
|
|
if (shortest) {
|
|
decimal = toDecimalReference(code, next, omit)
|
|
|
|
if (decimal.length < numeric.length) {
|
|
numeric = decimal
|
|
}
|
|
}
|
|
}
|
|
|
|
if (named && (!shortest || named.length < numeric.length)) {
|
|
return named
|
|
}
|
|
|
|
return numeric
|
|
}
|
|
|
|
// Transform `code` into an entity.
|
|
function toNamed(name, next, omit, attribute) {
|
|
var value = '&' + name
|
|
|
|
if (
|
|
omit &&
|
|
own.call(legacy, name) &&
|
|
dangerous.indexOf(name) === -1 &&
|
|
(!attribute || (next && next !== '=' && !alphanumerical(next)))
|
|
) {
|
|
return value
|
|
}
|
|
|
|
return value + ';'
|
|
}
|
|
|
|
// Transform `code` into a hexadecimal character reference.
|
|
function toHexReference(code, next, omit) {
|
|
var value = '&#x' + code.toString(16).toUpperCase()
|
|
return omit && next && !hexadecimal(next) ? value : value + ';'
|
|
}
|
|
|
|
// Transform `code` into a decimal character reference.
|
|
function toDecimalReference(code, next, omit) {
|
|
var value = '&#' + String(code)
|
|
return omit && next && !decimal(next) ? value : value + ';'
|
|
}
|
|
|
|
// Create an expression for `characters`.
|
|
function toExpression(characters) {
|
|
return new RegExp('[' + characters.join('') + ']', 'g')
|
|
}
|
|
|
|
// Construct the map.
|
|
function construct() {
|
|
var chars = {}
|
|
var name
|
|
|
|
for (name in entities) {
|
|
chars[entities[name]] = name
|
|
}
|
|
|
|
return chars
|
|
}
|