From 22e6ec310562efd602a1c144bfdce362cccf8194 Mon Sep 17 00:00:00 2001 From: amishaj-eb <122432448+amishaj-eb@users.noreply.github.com> Date: Wed, 6 Dec 2023 12:07:50 +0530 Subject: [PATCH 1/3] Removing evidon references --- crawler_detect/resources/crawler_regex_list.txt | 1 - crawler_detect/resources/known_crawler_useragents.txt | 2 +- 2 files changed, 1 insertion(+), 2 deletions(-) diff --git a/crawler_detect/resources/crawler_regex_list.txt b/crawler_detect/resources/crawler_regex_list.txt index 8fa8ec6..22fa8af 100644 --- a/crawler_detect/resources/crawler_regex_list.txt +++ b/crawler_detect/resources/crawler_regex_list.txt @@ -188,7 +188,6 @@ Embedly europarchive\.org evc-batch\/[0-9] EventMachine HttpClient -Evidon Evrinid ExactSearch ExaleadCloudview diff --git a/crawler_detect/resources/known_crawler_useragents.txt b/crawler_detect/resources/known_crawler_useragents.txt index ea2c07a..a37e231 100644 --- a/crawler_detect/resources/known_crawler_useragents.txt +++ b/crawler_detect/resources/known_crawler_useragents.txt @@ -170,7 +170,7 @@ iqdb/0.1 (+http://iqdb.org/) Mozilla/5.0 (compatible; GimmeUSAbot/1.0; +https://gimmeusa.com/pages/crawler) Motoricerca-Robots.txt-Checker/1.0 (http://tool.motoricerca.info/robots-checker.phtml) Mozilla/5.0 (compatible; Tagoobot/3.0; +http://www.tagoo.ru) -Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0 Evidon (lab@evidon.com) +Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox Mozilla/5.0 (compatible; Nmap Scripting Engine; http://nmap.org/book/nse.html) yacybot (i386 Linux 2.6.32-22-generic; java 1.6.0_20; Europe/de) http://yacy.net/bot.html Mozilla/5.0 (Linux; Android 4.4.3; HTC One Build/KTU84L) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2125.111 Mobile Safari/537.36 DareBoost From e2b459122331e379fd1f78740cc80f576c04895f Mon Sep 17 00:00:00 2001 From: Ashish Date: Tue, 9 Jan 2024 05:40:57 +0530 Subject: [PATCH 2/3] changed incorrect useragent --- crawler_detect/resources/known_crawler_useragents.txt | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/crawler_detect/resources/known_crawler_useragents.txt b/crawler_detect/resources/known_crawler_useragents.txt index a37e231..67c3907 100644 --- a/crawler_detect/resources/known_crawler_useragents.txt +++ b/crawler_detect/resources/known_crawler_useragents.txt @@ -170,7 +170,7 @@ iqdb/0.1 (+http://iqdb.org/) Mozilla/5.0 (compatible; GimmeUSAbot/1.0; +https://gimmeusa.com/pages/crawler) Motoricerca-Robots.txt-Checker/1.0 (http://tool.motoricerca.info/robots-checker.phtml) Mozilla/5.0 (compatible; Tagoobot/3.0; +http://www.tagoo.ru) -Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox +Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0 Mozilla/5.0 (compatible; Nmap Scripting Engine; http://nmap.org/book/nse.html) yacybot (i386 Linux 2.6.32-22-generic; java 1.6.0_20; Europe/de) http://yacy.net/bot.html Mozilla/5.0 (Linux; Android 4.4.3; HTC One Build/KTU84L) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2125.111 Mobile Safari/537.36 DareBoost From a44977e5dd30a6b94c020cef9848c1b9815fb4dc Mon Sep 17 00:00:00 2001 From: Ashish Date: Tue, 9 Jan 2024 16:03:11 +0530 Subject: [PATCH 3/3] removed evoidon bot/crawler --- crawler_detect/resources/known_crawler_useragents.txt | 1 - 1 file changed, 1 deletion(-) diff --git a/crawler_detect/resources/known_crawler_useragents.txt b/crawler_detect/resources/known_crawler_useragents.txt index 67c3907..daf7314 100644 --- a/crawler_detect/resources/known_crawler_useragents.txt +++ b/crawler_detect/resources/known_crawler_useragents.txt @@ -170,7 +170,6 @@ iqdb/0.1 (+http://iqdb.org/) Mozilla/5.0 (compatible; GimmeUSAbot/1.0; +https://gimmeusa.com/pages/crawler) Motoricerca-Robots.txt-Checker/1.0 (http://tool.motoricerca.info/robots-checker.phtml) Mozilla/5.0 (compatible; Tagoobot/3.0; +http://www.tagoo.ru) -Mozilla/5.0 (Windows NT 6.1; WOW64; rv:26.0) Gecko/20100101 Firefox/26.0 Mozilla/5.0 (compatible; Nmap Scripting Engine; http://nmap.org/book/nse.html) yacybot (i386 Linux 2.6.32-22-generic; java 1.6.0_20; Europe/de) http://yacy.net/bot.html Mozilla/5.0 (Linux; Android 4.4.3; HTC One Build/KTU84L) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/40.0.2125.111 Mobile Safari/537.36 DareBoost