From 2d5d6999541add350fb396ef02dc42ca3215049e Mon Sep 17 00:00:00 2001 From: Chris O'Hara Date: Thu, 31 Oct 2013 11:41:57 +1100 Subject: [PATCH] Remove the XSS filter. The xss() function was originally a port of the XSS filter from CodeIgniter. I added it to the library because there wasn't an alternative at the time. Unfortunately I don't have the time or expertise to maintain the XSS filter or keep merging upstream changes. If you need one for your app, I suggest looking at Caja sanitisation engine maintained by Google. (https://code.google.com/p/google-caja/ source/browse/trunk/src/com/google/caja/plugin/html-sanitizer.js) Closes #123, #138, #181, #206, #210, #221, #223, #226, #227, #231, #232 --- README.md | 5 - lib/filter.js | 6 -- lib/xss.js | 228 -------------------------------------------- package.json | 2 +- test/filter.test.js | 33 ------- validator-min.js | 2 +- validator.js | 209 +--------------------------------------- 7 files changed, 3 insertions(+), 482 deletions(-) delete mode 100755 lib/xss.js diff --git a/README.md b/README.md index 528e01139..36d36f544 100755 --- a/README.md +++ b/README.md @@ -34,7 +34,6 @@ var int = sanitize('0123').toInt(); //123 var bool = sanitize('true').toBoolean(); //true var str = sanitize(' \t\r hello \n').trim(); //'hello' var str = sanitize('aaaaaaaaab').ltrim('a'); //'b' -var str = sanitize(large_input_str).xss(); var str = sanitize('<a>').entityDecode(); //'' ``` @@ -58,7 +57,6 @@ get('/', function (req, res) { req.checkHeader('referer').contains('localhost'); //Sanitize user input - req.sanitize('textarea').xss(); req.sanitize('foo').toBoolean(); //etc. @@ -130,8 +128,6 @@ toBooleanStrict() //False unless str = '1' or 'true' entityDecode() //Decode HTML entities entityEncode() escape() //Escape &, <, >, and " -xss() //Remove common XSS attack vectors from user-supplied HTML -xss(true) //Remove common XSS attack vectors from images ``` ## Extending the library @@ -221,7 +217,6 @@ var errors = validator.getErrors(); // ['Invalid email', 'String is too small'] - [oris](https://github.com/orls) - Added in() - [mren](https://github.com/mren) - Decoupled rules - [Thorsten Basse](https://github.com/tbasse) - Cleanup and refinement of existing validators -- [Neal Poole](https://github.com/nealpoole) - Port the latest xss() updates from CodeIgniter ## LICENSE diff --git a/lib/filter.js b/lib/filter.js index f828c33ee..c739d8498 100755 --- a/lib/filter.js +++ b/lib/filter.js @@ -1,5 +1,4 @@ var entities = require('./entities'); -var xss = require('./xss'); var Filter = exports.Filter = function() {} @@ -28,11 +27,6 @@ Filter.prototype.convert = Filter.prototype.sanitize = function(str) { return this; } -Filter.prototype.xss = function(is_image) { - this.modify(xss.clean(this.str, is_image)); - return this.wrap(this.str); -} - Filter.prototype.entityDecode = function() { this.modify(entities.decode(this.str)); return this.wrap(this.str); diff --git a/lib/xss.js b/lib/xss.js deleted file mode 100755 index 4a2ed9e66..000000000 --- a/lib/xss.js +++ /dev/null @@ -1,228 +0,0 @@ -//This module is adapted from the CodeIgniter framework -//The license is available at http://codeigniter.com/ - -var html_entity_decode = require('./entities').decode; - -var never_allowed_str = { - 'document.cookie': '[removed]', - 'document.write': '[removed]', - '.parentNode': '[removed]', - '.innerHTML': '[removed]', - 'window.location': '[removed]', - '-moz-binding': '[removed]', - '': '-->', - '(': '<comment>' -}; - -var never_allowed_regex = { - 'javascript\\s*:': '[removed]', - 'expression\\s*(\\(|()': '[removed]', - 'vbscript\\s*:': '[removed]', - 'Redirect\\s+302': '[removed]', - "([\"'])?data\\s*:[^\\1]*?base64[^\\1]*?,[^\\1]*?\\1?": '[removed]' -}; - -var non_displayables = [ - /%0[0-8bcef]/g, // url encoded 00-08, 11, 12, 14, 15 - /%1[0-9a-f]/g, // url encoded 16-31 - /[\x00-\x08]/g, // 00-08 - /\x0b/g, /\x0c/g, // 11,12 - /[\x0e-\x1f]/g // 14-31 -]; - -var compact_words = [ - 'javascript', 'expression', 'vbscript', - 'script', 'base64', 'applet', 'alert', - 'document', 'write', 'cookie', 'window' -]; - -exports.clean = function(str, is_image) { - - //Remove invisible characters - str = remove_invisible_characters(str); - - //Protect query string variables in URLs => 901119URL5918AMP18930PROTECT8198 - var hash; - do { - // ensure str does not contain hash before inserting it - hash = xss_hash(); - } while(str.indexOf(hash) >= 0) - str = str.replace(/\&([a-z\_0-9\-]+)\=([a-z\_0-9\-]+)/ig, hash + '$1=$2'); - - //Validate standard character entities. Add a semicolon if missing. We do this to enable - //the conversion of entities to ASCII later. - str = str.replace(/(&#?[0-9a-z]{2,})([\x00-\x20])*;?/ig, '$1;$2'); - - //Validate UTF16 two byte encoding (x00) - just as above, adds a semicolon if missing. - str = str.replace(/(&#x?)([0-9A-F]+);?/ig, '$1$2;'); - - //Un-protect query string variables - str = str.replace(new RegExp(hash, 'g'), '&'); - - //Decode just in case stuff like this is submitted: - //Google - try{ - str = decodeURIComponent(str); - } - catch(error){ - // str was not actually URI-encoded - } - - //Convert character entities to ASCII - this permits our tests below to work reliably. - //We only convert entities that are within tags since these are the ones that will pose security problems. - str = str.replace(/[a-z]+=([\'\"]).*?\1/gi, function(m, match) { - return m.replace(match, convert_attribute(match)); - }); - str = str.replace(/<\w+.*/gi, function(m) { - return m.replace(m, html_entity_decode(m)); - }); - - //Remove invisible characters again - str = remove_invisible_characters(str); - - //Convert tabs to spaces - str = str.replace('\t', ' '); - - //Captured the converted string for later comparison - var converted_string = str; - - //Remove strings that are never allowed - for (var i in never_allowed_str) { - str = str.replace(new RegExp(i, "gi"), never_allowed_str[i]); - } - - //Remove regex patterns that are never allowed - for (var i in never_allowed_regex) { - str = str.replace(new RegExp(i, 'gi'), never_allowed_regex[i]); - } - - //Compact any exploded words like: j a v a s c r i p t - // We only want to do this when it is followed by a non-word character - for (var i = 0, l = compact_words.length; i < l; i++) { - var spacified = compact_words[i].split('').join('\\s*')+'\\s*'; - - str = str.replace(new RegExp('('+spacified+')(\\W)', 'ig'), function(m, compat, after) { - return compat.replace(/\s+/g, '') + after; - }); - } - - //Remove disallowed Javascript in links or img tags - do { - var original = str; - - if (str.match(/]*?)(>|$)/gi, function(m, attributes, end_tag) { - var filtered_attributes = filter_attributes(attributes.replace('<','').replace('>','')); - filtered_attributes = filtered_attributes.replace(/href=.*?(?:alert\(|alert(|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|]*?)(\s?\/?>|$)/gi, function(m, attributes, end_tag) { - var filtered_attributes = filter_attributes(attributes.replace('<','').replace('>','')); - filtered_attributes = filtered_attributes.replace(/src=.*?(?:alert\(|alert(|javascript:|livescript:|mocha:|charset=|window\.|document\.|\.cookie|/gi, '[removed]'); - } - - } while(original !== str); - - // Remove Evil HTML Attributes (like event handlers and style) - var event_handlers = ['\\bon\\w*', '\\bstyle', '\\bformaction']; - - //Adobe Photoshop puts XML metadata into JFIF images, including namespacing, - //so we have to allow this for images - if (!is_image) { - event_handlers.push('xmlns'); - } - - do { - var attribs = []; - var count = 0; - - attribs = attribs.concat(str.match(new RegExp("("+event_handlers.join('|')+")\\s*=\\s*(\\x22|\\x27)([^\\2]*?)(\\2)", 'ig'))); - attribs = attribs.concat(str.match(new RegExp("("+event_handlers.join('|')+")\\s*=\\s*([^\\s>]*)", 'ig'))); - attribs = attribs.filter(function(element) { return element !== null; }); - - if (attribs.length > 0) { - for (var i = 0; i < attribs.length; ++i) { - attribs[i] = attribs[i].replace(new RegExp('[.\\\\+*?\\[\\^\\]$(){}=!<>|:\\-]', 'g'), '\\$&') - } - - str = str.replace(new RegExp("(<]+?)([^A-Za-z<>\\-])(.*?)("+attribs.join('|')+")(.*?)([\\s><]?)([><]*)", 'i'), function(m, a, b, c, d, e, f, g, h) { - ++count; - return a + b + ' ' + d + f + g + h; - }); - } - } while (count > 0); - - //Sanitize naughty HTML elements - //If a tag containing any of the words in the list - //below is found, the tag gets converted to entities. - //So this: - //Becomes: <blink> - var naughty = 'alert|applet|audio|basefont|base|behavior|bgsound|blink|body|embed|expression|form|frameset|frame|head|html|ilayer|iframe|input|isindex|layer|link|meta|object|plaintext|style|script|textarea|title|video|xml|xss'; - str = str.replace(new RegExp('<(/*\\s*)('+naughty+')([^><]*)([><]*)', 'gi'), function(m, a, b, c, d) { - return '<' + a + b + c + d.replace('>','>').replace('<','<'); - }); - - //Sanitize naughty scripting elements Similar to above, only instead of looking for - //tags it looks for PHP and JavaScript commands that are disallowed. Rather than removing the - //code, it simply converts the parenthesis to entities rendering the code un-executable. - //For example: eval('some code') - //Becomes: eval('some code') - str = str.replace(/(alert|cmd|passthru|eval|exec|expression|system|fopen|fsockopen|file|file_get_contents|readfile|unlink)(\s*)\((.*?)\)/gi, '$1$2($3)'); - - //This adds a bit of extra precaution in case something got through the above filters - for (var i in never_allowed_str) { - str = str.replace(new RegExp(i, "gi"), never_allowed_str[i]); - } - for (var i in never_allowed_regex) { - str = str.replace(new RegExp(i, 'gi'), never_allowed_regex[i]); - } - - //Images are handled in a special way - if (is_image && str !== converted_string) { - throw new Error('Image may contain XSS'); - } - - return str; -} - -function remove_invisible_characters(str) { - for (var i = 0, l = non_displayables.length; i < l; i++) { - str = str.replace(non_displayables[i], ''); - } - return str; -} - -function xss_hash() { - var str = '', num = 10; - while (num--) str += String.fromCharCode(Math.random() * 25 | 97); - return str; -} - -function convert_attribute(str) { - return str.replace('>','>').replace('<','<').replace('\\','\\\\'); -} - -function filter_attributes(str) { - var result = ""; - - var match = str.match(/\s*[a-z-]+\s*=\s*(\x22|\x27)([^\1]*?)\1/ig); - if (match) { - for (var i = 0; i < match.length; ++i) { - result += match[i].replace(/\*.*?\*/g, ''); - } - } - - return result; -} - diff --git a/package.json b/package.json index f2b7bcd57..bf5f62f02 100644 --- a/package.json +++ b/package.json @@ -2,7 +2,7 @@ "description" : "Data validation, filtering and sanitization for node.js", "version" : "1.5.1", "homepage" : "http://github.com/chriso/node-validator", - "keywords" : ["validator", "validation", "assert", "params", "sanitization", "xss", "entities", "sanitize", "sanitisation", "input"], + "keywords" : ["validator", "validation", "assert", "params", "sanitization", "entities", "sanitize", "sanitisation", "input"], "author" : "Chris O'Hara ", "main" : "./lib", "directories" : { "lib" : "./lib" }, diff --git a/test/filter.test.js b/test/filter.test.js index 65f3b68c4..99e288b17 100755 --- a/test/filter.test.js +++ b/test/filter.test.js @@ -132,39 +132,6 @@ module.exports = { assert.equal('½', Filter.sanitize('½').entityEncode()); }, - 'test #xss()': function () { - //Need more tests! - assert.equal('[removed] foobar', Filter.sanitize('javascript : foobar').xss()); - assert.equal('[removed] foobar', Filter.sanitize('j a vasc ri pt: foobar').xss()); - assert.equal('some text', Filter.sanitize('some text').xss()); - - assert.equal(' This is a test', Filter.sanitize(' This is a test').xss()); - assert.equal('">test', Filter.sanitize('">test').xss()); - assert.equal('

You have won

Please click the link and enter your login details: http://good.com
', Filter.sanitize('

You have won

Please click the link and enter your login details: http://good.com
').xss()); - assert.equal('prompt(1);', Filter.sanitize('prompt(1);').xss()); - assert.equal('', Filter.sanitize('').xss()); - assert.equal('', Filter.sanitize('').xss()); - assert.equal('