Skip to content

Commit

Permalink
Update constants
Browse files Browse the repository at this point in the history
  • Loading branch information
asamuzaK committed Dec 20, 2023
1 parent b812fb6 commit 59cba66
Show file tree
Hide file tree
Showing 4 changed files with 33 additions and 22 deletions.
21 changes: 13 additions & 8 deletions src/mjs/constant.js
Original file line number Diff line number Diff line change
Expand Up @@ -8,15 +8,20 @@ export const TYPE_FROM = 8;
export const TYPE_TO = -1;

/* regexp */
export const REG_CHARSET = /^charset=([\w#&\-.;]+)$/;
export const REG_DATA_URL = /data:[\w#&+\-./;=]*,/;
export const REG_DATA_URL_BASE64 = /data:[\w#&+\-./;=]*base64,[\w+/\-=]+/i;
export const REG_DATA_URL_G = /data:[\w#&+\-./;=]*,[^"]+/g;
export const REG_B64 = /^[\w+/=-]+$/;
export const REG_CHARSET = /^charset=([\w#&.;-]+)$/;
export const REG_DATA_URL = /data:[\w#&+./;=-]*,/;
export const REG_DATA_URL_B64 = /data:[\w#&+./;=^]*base64,[\w+/=-]+/i;
export const REG_DATA_URL_G = /data:[\w#&+./;=-]*,[^"]+/g;
export const REG_NUM_REF = /&#(x(?:00)?[\dA-F]{2}|0?\d{1,3});?/gi;
export const REG_MIME_DOM =
/^(?:application\/(?:[\w#&\-.;]+\+)?x|image\/svg\+x|text\/(?:ht|x))ml;?/;
export const REG_MIME_TEXT = /^text\/[\w#&\-.;]+/;
export const REG_SCHEME = /^[a-z][\da-z+\-.]*$/;
export const REG_SCHEME_CUSTOM = /^(?:ext|web)\+[a-z]+$/;
/^(?:application\/(?:[\w#&.;-]+\+)?x|image\/svg\+x|text\/(?:ht|x))ml;?/;
export const REG_MIME_TEXT = /^text\/[\w#&.;-]+/;
export const REG_SCHEME = /^[a-z][\da-z+.-]*$/;
export const REG_SCHEME_EXT = /^(?:ext|web)\+[a-z]+$/;
export const REG_SCRIPT = /(?:java|vb)script/;
export const REG_SCRIPT_BLOB = /(?:java|vb)script|blob/;
export const REG_TAG_QUOT = /%(?:2[27]|3[CE])|[<>"']/;
export const REG_TAG_QUOT_ENC_G = /%(?:2[27]|3[CE])/g;
export const REG_TAG_QUOT_SPACE_G = /[<>"'\s]/g;
export const REG_URL_ENC = /^%[\dA-F]{2}$/i;
12 changes: 6 additions & 6 deletions src/mjs/sanitizer.js
Original file line number Diff line number Diff line change
Expand Up @@ -10,8 +10,8 @@ import {
parseBase64, parseURLEncodedNumCharRef, URISchemes
} from './uri-util.js';
import {
HEX, REG_DATA_URL, REG_DATA_URL_BASE64, REG_DATA_URL_G, REG_MIME_DOM,
REG_SCRIPT_BLOB, REG_TAG_QUOT
HEX, REG_DATA_URL, REG_DATA_URL_B64, REG_DATA_URL_G, REG_MIME_DOM,
REG_SCRIPT_BLOB, REG_TAG_QUOT, REG_TAG_QUOT_ENC_G, REG_TAG_QUOT_SPACE_G
} from './constant.js';

/* typedef */
Expand Down Expand Up @@ -68,8 +68,8 @@ export class URLSanitizer extends URISchemes {
const items = [...matchedDataUrls].reverse();
for (const item of items) {
let [dataUrl] = item;
if (REG_DATA_URL_BASE64.test(dataUrl)) {
[dataUrl] = REG_DATA_URL_BASE64.exec(dataUrl);
if (REG_DATA_URL_B64.test(dataUrl)) {
[dataUrl] = REG_DATA_URL_B64.exec(dataUrl);
}
this.#nest++;
this.#recurse.add(dataUrl);
Expand Down Expand Up @@ -276,12 +276,12 @@ export class URLSanitizer extends URISchemes {
}
if (urlToSanitize) {
sanitizedUrl = urlToSanitize
.replace(/[<>"'\s]/g, getURLEncodedString)
.replace(REG_TAG_QUOT_SPACE_G, getURLEncodedString)
.replace(/%26/g, escapeURLEncodedHTMLChars);
if (finalize) {
if (!isDataUrl) {
sanitizedUrl = sanitizedUrl
.replace(/%(?:2(?:2|7)|3(?:C|E))/g, escapeURLEncodedHTMLChars);
.replace(REG_TAG_QUOT_ENC_G, escapeURLEncodedHTMLChars);
}
this.#nest = 0;
}
Expand Down
13 changes: 7 additions & 6 deletions src/mjs/uri-util.js
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@ import textChars from '../lib/file/text-chars.json' assert { type: 'json' };
import uriSchemes from '../lib/iana/uri-schemes.json' assert { type: 'json' };
import { getType, isString } from './common.js';
import { FileReader } from './file-reader.js';
import { HEX, REG_SCHEME, REG_SCHEME_CUSTOM, REG_SCRIPT } from './constant.js';
import {
HEX, REG_B64, REG_NUM_REF, REG_SCHEME, REG_SCHEME_EXT, REG_SCRIPT, REG_URL_ENC
} from './constant.js';

/**
* get URL encoded string
Expand All @@ -31,7 +33,7 @@ export const getURLEncodedString = str => {
* @returns {string} - escaped URL encoded HTML special char / URL encoded char
*/
export const escapeURLEncodedHTMLChars = ch => {
if (isString(ch) && /^%[\dA-F]{2}$/i.test(ch)) {
if (isString(ch) && REG_URL_ENC.test(ch)) {
ch = ch.toUpperCase();
}
const [amp, num, lt, gt, quot, apos] =
Expand Down Expand Up @@ -61,7 +63,7 @@ export const escapeURLEncodedHTMLChars = ch => {
export const parseBase64 = data => {
if (!isString(data)) {
throw new TypeError(`Expected String but got ${getType(data)}.`);
} else if (!/^[\w+/\-=]+$/.test(data)) {
} else if (!REG_B64.test(data)) {
throw new Error(`Invalid base64 data: ${data}`);
}
const bin = atob(data);
Expand Down Expand Up @@ -94,8 +96,7 @@ export const parseURLEncodedNumCharRef = (str, nest = 0) => {
let res = decodeURIComponent(str);
if (/&#/.test(res)) {
const textCharCodes = new Set(textChars);
const items =
[...res.matchAll(/&#(x(?:00)?[\dA-F]{2}|0?\d{1,3});?/gi)].reverse();
const items = [...res.matchAll(REG_NUM_REF)].reverse();
for (const item of items) {
const [numCharRef, value] = item;
let num;
Expand Down Expand Up @@ -209,7 +210,7 @@ export class URISchemes {
const { protocol } = new URL(uri);
const scheme = protocol.replace(/:$/, '');
const schemeParts = scheme.split('+');
res = (!REG_SCRIPT.test(scheme) && REG_SCHEME_CUSTOM.test(scheme)) ||
res = (!REG_SCRIPT.test(scheme) && REG_SCHEME_EXT.test(scheme)) ||
schemeParts.every(s => this.#schemes.has(s));
} catch (e) {
res = false;
Expand Down
9 changes: 7 additions & 2 deletions types/mjs/constant.d.ts
Original file line number Diff line number Diff line change
@@ -1,14 +1,19 @@
export const HEX: 16;
export const TYPE_FROM: 8;
export const TYPE_TO: -1;
export const REG_B64: RegExp;
export const REG_CHARSET: RegExp;
export const REG_DATA_URL: RegExp;
export const REG_DATA_URL_BASE64: RegExp;
export const REG_DATA_URL_B64: RegExp;
export const REG_DATA_URL_G: RegExp;
export const REG_NUM_REF: RegExp;
export const REG_MIME_DOM: RegExp;
export const REG_MIME_TEXT: RegExp;
export const REG_SCHEME: RegExp;
export const REG_SCHEME_CUSTOM: RegExp;
export const REG_SCHEME_EXT: RegExp;
export const REG_SCRIPT: RegExp;
export const REG_SCRIPT_BLOB: RegExp;
export const REG_TAG_QUOT: RegExp;
export const REG_TAG_QUOT_ENC_G: RegExp;
export const REG_TAG_QUOT_SPACE_G: RegExp;
export const REG_URL_ENC: RegExp;

0 comments on commit 59cba66

Please sign in to comment.