Skip to content

Commit

Permalink
Seperate #applyRule; detect CustomEvent support; Bug fix for `par…
Browse files Browse the repository at this point in the history
…am` mode; Decoding func args
  • Loading branch information
PRO-2684 committed Apr 10, 2024
1 parent 0c1efe8 commit cdb3bed
Show file tree
Hide file tree
Showing 4 changed files with 158 additions and 126 deletions.
12 changes: 7 additions & 5 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ purifier.importRules(rules); // Import rules
const additionalRules = {}; // You can also add your own rules
purifier.importRules(additionalRules);
purifier.addEventListener("statisticschange", e => { // Add an event listener for statistics change
console.log("Statistics changed to:", e.detail);
console.log("Statistics changed to:", e.detail || purifier.getStatistics());
});
purifier.purify("https://example.com/?utm_source=123").then(console.log); // Purify a URL
```
Expand Down Expand Up @@ -69,7 +69,8 @@ new Purlfy({
- `clearRules(): void`: Clear all imported rules.
- `getStatistics(): object`: Get statistics.
- `addEventListener("statisticschange", callback: function): void`: Add an event listener for statistics change.
- The `callback` function will receive an `Event` object with the `detail` property containing the new statistics. (`detail` might not work on nodejs - call `getStatistics`)
- The `callback` function will receive an `CustomEvent` / `Event` object based on whether the platform supports it.
- If platform supports `CustomEvent`, the `detail` property of the event object will contain the new statistics.
- `removeEventListener("statisticschange", callback: function): void`: Remove an event listener for statistics change.

#### Properties
Expand Down Expand Up @@ -217,14 +218,15 @@ Under Blacklist mode, the parameters specified in `params` will be removed, and
Under Specific Parameter mode, pURLfy will:

1. Attempt to extract the parameters specified in `params` in order, until the first existing parameter is matched.
2. Decode the parameter value using the decoding functions specified in the `decode` array in order (if the `decode` value is invalid, this decoding function will be skipped).
2. Decode the parameter value using the decoding functions specified in the `decode` array in order (if any `decode` value is invalid or throws an error, it is considered a failure and the original URL is returned).
3. Use the final result as the new URL.
4. If `continue` is not set to `false`, purify the new URL again.

Currently supported `decode` functions are:
Some decoding functions support parameters, simply append them to the function name separated by a colon (`:`): `func:arg1:arg2...:argn`. The following decoding functions are currently supported:

- `url`: URL decoding (`decodeURIComponent`)
- `base64`: Base64 decoding (`decodeURIComponent(escape(atob(s)))`)
- `base64`: Base64 decoding (`decodeURIComponent(escape(atob(s.replaceAll('_', '/').replaceAll('-', '+'))))`)
- `slice:start:end`: String slicing (`s.slice(start, end)`), `start` and `end` will be converted to integers

#### 🟣 Regex Mode `regex`

Expand Down
14 changes: 8 additions & 6 deletions README_zh.md
Original file line number Diff line number Diff line change
Expand Up @@ -32,7 +32,7 @@ const additionalRules = {}; // 你也可以添加自己的规则
purifier.importRules(additionalRules);
purifier.importRules(rules); // 导入规则
purifier.addEventListener("statisticschange", e => { // 添加统计数据变化的事件监听器
console.log("Statistics changed to:", e.detail);
console.log("Statistics changed to:", e.detail || purifier.getStatistics());
});
purifier.purify("https://example.com/?utm_source=123").then(console.log); // 净化一个 URL
```
Expand Down Expand Up @@ -69,7 +69,8 @@ new Purlfy({
- `clearRules(): void`: 清空所有已导入的规则
- `getStatistics(): object`: 获取统计数据
- `addEventListener("statisticschange", callback: function): void`: 添加统计数据变化的事件监听器
- `callback` 函数会接收一个 `Event` 对象,其中 `detail` 属性为新的统计数据
- 根据平台是否支持,`callback` 函数会接收一个 `CustomEvent` / `Event` 对象
- 若支持 `CustomEvent`,则其 `detail` 属性为新的统计数据
- `removeEventListener("statisticschange", callback: function): void`: 移除统计数据变化的事件监听器

#### 属性
Expand Down Expand Up @@ -217,14 +218,15 @@ new Purlfy({
取特定参数模式下,pURLfy 会:

1. 依次尝试取出 `params` 中指定的参数,直到匹配到第一个存在的参数
2. 使用 `decode` 数组中指定的解码函数依次对参数值进行解码 ( `decode` 值无效,则跳过这个解码函数)
2. 使用 `decode` 数组中指定的解码函数依次对参数值进行解码 (若任一 `decode` 值无效或执行出错,则认定失败,返回原 URL)
3. 将最终的结果作为新的 URL
4.`continue` 未被设置为 `false`,则再次净化新的 URL

`decode` 目前支持如下值:
部分解码函数支持传入参数,只需用 `:` 分隔即可:`func:arg1:arg2...:argn`。目前支持的解码函数如下:

- `url`: 解码 URL 编码 (`decodeURIComponent`)
- `base64`: 解码 Base64 编码 (`decodeURIComponent(escape(atob(s)))`)
- `url`: URL 解码 (`decodeURIComponent`)
- `base64`: Base64 解码 (`decodeURIComponent(escape(atob(s.replaceAll('_', '/').replaceAll('-', '+'))))`)
- `slice:start:end`: 截取字符串 (`s.slice(start, end)`),`start``end` 会被转换为整数

#### 🟣 正则模式 `regex`

Expand Down
250 changes: 139 additions & 111 deletions purlfy.js
Original file line number Diff line number Diff line change
Expand Up @@ -5,7 +5,8 @@ class Purlfy extends EventTarget {
#log = console.log.bind(console, "\x1b[38;2;220;20;60m[pURLfy]\x1b[0m");
#paramDecoders = {
"url": decodeURIComponent,
"base64": s => decodeURIComponent(escape(atob(s))),
"base64": s => decodeURIComponent(escape(atob(s.replaceAll('_', '/').replaceAll('-', '+')))),
"slice": (s, start, end) => s.slice(parseInt(start), end ? parseInt(end) : undefined),
};
#statistics = {
url: 0,
Expand Down Expand Up @@ -93,147 +94,174 @@ class Purlfy extends EventTarget {
return null;
}

#onStatisticsChange() {
this.dispatchEvent(new Event("statisticschange", {
detail: this.#statistics
}));
}

async purify(originalUrl) { // Purify the given URL based on `rules`
let shallContinue = true;
let url = originalUrl;
let firstRule = null;
let iteration = 0;
this.#log("Purifying URL:", url);
while (shallContinue && iteration++ < this.maxIterations) {
const logi = (...args) => this.#log(`[#${iteration}]`, ...args);
let urlObj;
if (URL.canParse(url)) {
urlObj = new URL(url);
} else {
logi(`Cannot parse URL ${url}`);
return url;
}
const protocol = urlObj.protocol;
if (protocol !== "http:" && protocol !== "https:") { // Not a valid HTTP URL
logi(`Not a HTTP URL: ${url}`);
return url;
}
const hostAndPath = urlObj.host + urlObj.pathname;
const parts = hostAndPath.split("/").filter(part => part !== "");
const rule = this.#matchRule(parts);
if (!rule) { // No matching rule found
logi(`No matching rule found for ${url}.`);
return url;
}
firstRule ??= rule;
logi(`Matching rule: ${rule.description} by ${rule.author}`);
const mode = rule.mode;
const paramsCntBefore = urlObj.searchParams.size;
shallContinue = false;
switch (mode) { // Purifies `urlObj` based on the rule
case "white": { // Whitelist mode
const newParams = new URLSearchParams();
for (const param of rule.params) {
if (urlObj.searchParams.has(param)) {
newParams.set(param, urlObj.searchParams.get(param));
}
async #applyRule(urlObj, rule, logFunc) { // Apply the given rule to the given URL object, returning the new URL object and whether to continue
const mode = rule.mode;
const lengthBefore = urlObj.href.length;
const paramsCntBefore = urlObj.searchParams.size;
let shallContinue = false;
switch (mode) { // Purifies `urlObj` based on the rule
case "white": { // Whitelist mode
const newParams = new URLSearchParams();
for (const param of rule.params) {
if (urlObj.searchParams.has(param)) {
newParams.set(param, urlObj.searchParams.get(param));
}
urlObj.search = newParams.toString();
break;
}
case "black": { // Blacklist mode
for (const param of rule.params) {
urlObj.searchParams.delete(param);
}
break;
urlObj.search = newParams.toString();
break;
}
case "black": { // Blacklist mode
for (const param of rule.params) {
urlObj.searchParams.delete(param);
}
case "param": { // Specific param mode
// Decode given parameter to be used as a new URL
let paramValue = null;
for (const param of rule.params) { // Find the first available parameter value
if (urlObj.searchParams.has(param)) {
paramValue = urlObj.searchParams.get(param);
break;
}
}
if (!paramValue) {
logi("Parameter(s) not found:", rule.params.join(", "));
break;
}
case "param": { // Specific param mode
// Decode given parameter to be used as a new URL
let paramValue = null;
for (const param of rule.params) { // Find the first available parameter value
if (urlObj.searchParams.has(param)) {
paramValue = urlObj.searchParams.get(param);
break;
}
let dest = paramValue;
for (const name of (rule.decode ?? ["url"])) {
const decoder = this.#paramDecoders[name] ?? (s => s);
dest = decoder(dest);
}
urlObj = new URL(dest);
shallContinue = rule.continue ?? true;
this.#statistics.decoded++;
break;
}
case "regex": { // Regex mode
logi("Regex mode not implemented yet");
if (!paramValue) {
logFunc("Parameter(s) not found:", rule.params.join(", "));
break;
}
case "redirect": { // Redirect mode
if (!this.redirectEnabled) {
logi("Redirect mode is disabled.");
let dest = paramValue;
let success = true;
for (const cmd of (rule.decode ?? ["url"])) {
const args = cmd.split(":");
const name = args[0];
const decoder = this.#paramDecoders[name];
if (!decoder) {
logFunc("Invalid decoder:", cmd);
success = false;
break;
}
let r = null;
try {
r = await fetch(url, {
method: "HEAD",
redirect: "manual"
});
dest = decoder(dest, ...args.slice(1));
} catch (e) {
logi("Error fetching URL:", e);
logFunc(`Error decoding parameter with decoder "${name}":`, e);
break;
}
if ((r.status === 301 || r.status === 302) && r.headers.has("location")) {
let dest = r.headers.get("location");
urlObj = new URL(dest);
shallContinue = rule.continue ?? true;
this.#statistics.redirected++;
}
}
if (!success) break;
if (URL.canParse(dest)) { // Valid URL
urlObj = new URL(dest);
} else { // Invalid URL
logFunc("Invalid URL:", dest);
break;
}
case "lambda": {
if (!this.lambdaEnabled) {
logi("Lambda mode is disabled.");
break;
}
try {
const lambda = new Function("url", rule.lambda);
urlObj = lambda(urlObj);
shallContinue = rule.continue ?? true;
} catch (e) {
logi("Error executing lambda:", e);
}
shallContinue = rule.continue ?? true;
this.#statistics.decoded++;
break;
}
case "regex": { // Regex mode
logFunc("Regex mode not implemented yet");
break;
}
case "redirect": { // Redirect mode
if (!this.redirectEnabled) {
logFunc("Redirect mode is disabled.");
break;
}
let r = null;
try {
r = await fetch(urlObj.href, {
method: "HEAD",
redirect: "manual"
});
} catch (e) {
logFunc("Error fetching URL:", e);
break;
}
default: {
logi("Invalid mode:", mode);
if ((r.status === 301 || r.status === 302) && r.headers.has("location")) {
let dest = r.headers.get("location");
urlObj = new URL(dest);
shallContinue = rule.continue ?? true;
this.#statistics.redirected++;
}
break;
}
case "lambda": {
if (!this.lambdaEnabled) {
logFunc("Lambda mode is disabled.");
break;
}
try {
const lambda = new Function("url", rule.lambda);
urlObj = lambda(urlObj);
shallContinue = rule.continue ?? true;
} catch (e) {
logFunc("Error executing lambda:", e);
}
break;
}
default: {
logFunc("Invalid mode:", mode);
break;
}
}
const paramsCntAfter = urlObj.searchParams.size;
this.#statistics.param += (["white", "black"].includes(mode)) ? (paramsCntBefore - paramsCntAfter) : 0;
this.#statistics.char += Math.max(lengthBefore - urlObj.href.length, 0); // Prevent negative char count
return [urlObj, shallContinue];
}

#onStatisticsChange() {
if (typeof CustomEvent === "function") {
this.dispatchEvent(new CustomEvent("statisticschange", {
detail: this.#statistics
}));
} else {
this.dispatchEvent(new Event("statisticschange"));
}
}

async purify(originalUrl) { // Purify the given URL based on `rules`
let shallContinue = true;
let firstRule = null;
let iteration = 0;
let urlObj;
this.#log("Purifying URL:", originalUrl);
if (URL.canParse(originalUrl)) {
urlObj = new URL(originalUrl);
} else {
log(`Cannot parse URL ${originalUrl}`);
return originalUrl;
}
while (shallContinue && iteration++ < this.maxIterations) {
const logi = (...args) => this.#log(`[#${iteration}]`, ...args);
const protocol = urlObj.protocol;
if (protocol !== "http:" && protocol !== "https:") { // Not a valid HTTP URL
logi(`Not a HTTP URL: ${urlObj.href}`);
return urlObj.href;
}
const hostAndPath = urlObj.host + urlObj.pathname;
const parts = hostAndPath.split("/").filter(part => part !== "");
const rule = this.#matchRule(parts);
if (!rule) { // No matching rule found
logi(`No matching rule found for ${urlObj.href}.`);
return urlObj.href;
}
firstRule ??= rule;
logi(`Matching rule: ${rule.description} by ${rule.author}`);
[urlObj, shallContinue] = await this.#applyRule(urlObj, rule, logi);
logi("Purified URL:", urlObj.href);
const paramsCntAfter = urlObj.searchParams.size;
this.#statistics.param += (["white", "black"].includes(mode)) ? (paramsCntBefore - paramsCntAfter) : 0;
this.#statistics.char += Math.max(url.length - urlObj.href.length, 0); // Prevent negative char count
url = urlObj.href;
}
if (originalUrl === url) { // No changes made
if (originalUrl === urlObj.href) { // No changes made
this.#log("No changes made.");
return {
url: url,
url: originalUrl,
rule: `* ${firstRule.description} by ${firstRule.author}`
};
}
this.#statistics.url++;
this.#onStatisticsChange();
return {
url: url,
url: urlObj.href,
rule: `${firstRule.description} by ${firstRule.author}`
};
}
Expand Down
Loading

0 comments on commit cdb3bed

Please sign in to comment.