From 2298bc4b1fdcdbe8f5fc8f405033e879fe9bcd4e Mon Sep 17 00:00:00 2001 From: Brian White Date: Wed, 18 Jan 2017 04:19:03 -0500 Subject: [PATCH] querystring: improve parse() performance PR-URL: https://github.com/nodejs/node/pull/10874 Reviewed-By: James M Snell --- benchmark/querystring/querystring-parse.js | 33 +++---- lib/querystring.js | 104 ++++++++++++--------- 2 files changed, 74 insertions(+), 63 deletions(-) diff --git a/benchmark/querystring/querystring-parse.js b/benchmark/querystring/querystring-parse.js index d78ef99f84f3d4..fe14d95a53f0a0 100644 --- a/benchmark/querystring/querystring-parse.js +++ b/benchmark/querystring/querystring-parse.js @@ -3,35 +3,26 @@ var common = require('../common.js'); var querystring = require('querystring'); var v8 = require('v8'); -var types = [ - 'noencode', - 'multicharsep', - 'encodemany', - 'encodelast', - 'multivalue', - 'multivaluemany', - 'manypairs' -]; +var inputs = { + noencode: 'foo=bar&baz=quux&xyzzy=thud', + multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud', + encodefake: 'foo=%©ar&baz=%A©uux&xyzzy=%©ud', + encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d', + encodelast: 'foo=bar&baz=quux&xyzzy=thu%64', + multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz', + multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' + + 'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz', + manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z' +}; var bench = common.createBenchmark(main, { - type: types, + type: Object.keys(inputs), n: [1e6], }); function main(conf) { var type = conf.type; var n = conf.n | 0; - - var inputs = { - noencode: 'foo=bar&baz=quux&xyzzy=thud', - multicharsep: 'foo=bar&&&&&&&&&&baz=quux&&&&&&&&&&xyzzy=thud', - encodemany: '%66%6F%6F=bar&%62%61%7A=quux&xyzzy=%74h%75d', - encodelast: 'foo=bar&baz=quux&xyzzy=thu%64', - multivalue: 'foo=bar&foo=baz&foo=quux&quuy=quuz', - multivaluemany: 'foo=bar&foo=baz&foo=quux&quuy=quuz&foo=abc&foo=def&' + - 'foo=ghi&foo=jkl&foo=mno&foo=pqr&foo=stu&foo=vwxyz', - manypairs: 'a&b&c&d&e&f&g&h&i&j&k&l&m&n&o&p&q&r&s&t&u&v&w&x&y&z' - }; var input = inputs[type]; // Force-optimize querystring.parse() so that the benchmark doesn't get diff --git a/lib/querystring.js b/lib/querystring.js index ff1b47fb51c614..c22c7c998dbd21 100644 --- a/lib/querystring.js +++ b/lib/querystring.js @@ -1,8 +1,19 @@ -// Query String Utilities - 'use strict'; -const QueryString = exports; +const QueryString = module.exports = { + unescapeBuffer, + // `unescape()` is a JS global, so we need to use a different local name + unescape: qsUnescape, + + // `escape()` is a JS global, so we need to use a different local name + escape: qsEscape, + + stringify, + encode: stringify, + + parse, + decode: parse +}; const Buffer = require('buffer').Buffer; // This constructor is used to store parsed query string values. Instantiating @@ -13,7 +24,7 @@ ParsedQueryString.prototype = Object.create(null); // a safe fast alternative to decodeURIComponent -QueryString.unescapeBuffer = function(s, decodeSpaces) { +function unescapeBuffer(s, decodeSpaces) { var out = Buffer.allocUnsafe(s.length); var state = 0; var n, m, hexchar; @@ -77,7 +88,7 @@ QueryString.unescapeBuffer = function(s, decodeSpaces) { // TODO support returning arbitrary buffers. return out.slice(0, outIndex - 1); -}; +} function qsUnescape(s, decodeSpaces) { @@ -87,13 +98,12 @@ function qsUnescape(s, decodeSpaces) { return QueryString.unescapeBuffer(s, decodeSpaces).toString(); } } -QueryString.unescape = qsUnescape; var hexTable = new Array(256); for (var i = 0; i < 256; ++i) hexTable[i] = '%' + ((i < 16 ? '0' : '') + i.toString(16)).toUpperCase(); -QueryString.escape = function(str) { +function qsEscape(str) { // replaces encodeURIComponent // http://www.ecma-international.org/ecma-262/5.1/#sec-15.1.3.4 if (typeof str !== 'string') { @@ -164,9 +174,9 @@ QueryString.escape = function(str) { if (lastPos < str.length) return out + str.slice(lastPos); return out; -}; +} -var stringifyPrimitive = function(v) { +function stringifyPrimitive(v) { if (typeof v === 'string') return v; if (typeof v === 'number' && isFinite(v)) @@ -174,10 +184,10 @@ var stringifyPrimitive = function(v) { if (typeof v === 'boolean') return v ? 'true' : 'false'; return ''; -}; +} -QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) { +function stringify(obj, sep, eq, options) { sep = sep || '&'; eq = eq || '='; @@ -215,34 +225,43 @@ QueryString.stringify = QueryString.encode = function(obj, sep, eq, options) { return fields; } return ''; -}; +} -// Parse a key/val string. -QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { - sep = sep || '&'; - eq = eq || '='; +function charCodes(str) { + if (str.length === 0) return []; + if (str.length === 1) return [str.charCodeAt(0)]; + const ret = []; + for (var i = 0; i < str.length; ++i) + ret[ret.length] = str.charCodeAt(i); + return ret; +} +const defSepCodes = [38]; // & +const defEqCodes = [61]; // = +// Parse a key/val string. +function parse(qs, sep, eq, options) { const obj = new ParsedQueryString(); if (typeof qs !== 'string' || qs.length === 0) { return obj; } - if (typeof sep !== 'string') - sep += ''; - - const eqLen = eq.length; - const sepLen = sep.length; + var sepCodes = (!sep ? defSepCodes : charCodes(sep + '')); + var eqCodes = (!eq ? defEqCodes : charCodes(eq + '')); + const sepLen = sepCodes.length; + const eqLen = eqCodes.length; - var maxKeys = 1000; + var pairs = 1000; if (options && typeof options.maxKeys === 'number') { - maxKeys = options.maxKeys; + // -1 is used in place of a value like Infinity for meaning + // "unlimited pairs" because of additional checks V8 (at least as of v5.4) + // has to do when using variables that contain values like Infinity. Since + // `pairs` is always decremented and checked explicitly for 0, -1 works + // effectively the same as Infinity, while providing a significant + // performance boost. + pairs = (options.maxKeys > 0 ? options.maxKeys : -1); } - var pairs = Infinity; - if (maxKeys > 0) - pairs = maxKeys; - var decode = QueryString.unescape; if (options && typeof options.decodeURIComponent === 'function') { decode = options.decodeURIComponent; @@ -262,7 +281,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { const code = qs.charCodeAt(i); // Try matching key/value pair separator (e.g. '&') - if (code === sep.charCodeAt(sepIdx)) { + if (code === sepCodes[sepIdx]) { if (++sepIdx === sepLen) { // Key/value pair separator match! const end = i - sepIdx + 1; @@ -284,10 +303,10 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { keys[keys.length] = key; } else { const curValue = obj[key]; - // `instanceof Array` is used instead of Array.isArray() because it - // is ~15-20% faster with v8 4.7 and is safe to use because we are - // using it with values being created within this function - if (curValue instanceof Array) + // A simple Array-specific property check is enough here to + // distinguish from a string value and is faster and still safe since + // we are generating all of the values being assigned. + if (curValue.pop) curValue[curValue.length] = value; else obj[key] = [curValue, value]; @@ -322,7 +341,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { // Try matching key/value separator (e.g. '=') if we haven't already if (eqIdx < eqLen) { - if (code === eq.charCodeAt(eqIdx)) { + if (code === eqCodes[eqIdx]) { if (++eqIdx === eqLen) { // Key/value separator match! const end = i - eqIdx + 1; @@ -354,12 +373,12 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { if (code === 43/*+*/) { if (eqIdx < eqLen) { - if (i - lastPos > 0) + if (lastPos < i) key += qs.slice(lastPos, i); key += '%20'; keyEncoded = true; } else { - if (i - lastPos > 0) + if (lastPos < i) value += qs.slice(lastPos, i); value += '%20'; valEncoded = true; @@ -369,7 +388,7 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { } // Check if we have leftover key or value data - if (pairs > 0 && (lastPos < qs.length || eqIdx > 0)) { + if (pairs !== 0 && (lastPos < qs.length || eqIdx > 0)) { if (lastPos < qs.length) { if (eqIdx < eqLen) key += qs.slice(lastPos); @@ -387,10 +406,10 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { keys[keys.length] = key; } else { const curValue = obj[key]; - // `instanceof Array` is used instead of Array.isArray() because it - // is ~15-20% faster with v8 4.7 and is safe to use because we are - // using it with values being created within this function - if (curValue instanceof Array) + // A simple Array-specific property check is enough here to + // distinguish from a string value and is faster and still safe since + // we are generating all of the values being assigned. + if (curValue.pop) curValue[curValue.length] = value; else obj[key] = [curValue, value]; @@ -398,11 +417,12 @@ QueryString.parse = QueryString.decode = function(qs, sep, eq, options) { } return obj; -}; +} // v8 does not optimize functions with try-catch blocks, so we isolate them here -// to minimize the damage +// to minimize the damage (Note: no longer true as of V8 5.4 -- but still will +// not be inlined). function decodeStr(s, decoder) { try { return decoder(s);