Skip to content

Commit

Permalink
fix: speedup header parsing & bug fixes & nyc coverage
Browse files Browse the repository at this point in the history
  • Loading branch information
KhafraDev committed Jan 11, 2023
1 parent cb631ec commit b7b9645
Show file tree
Hide file tree
Showing 6 changed files with 151 additions and 77 deletions.
5 changes: 4 additions & 1 deletion lib/formdata/constants.js
Original file line number Diff line number Diff line change
Expand Up @@ -27,12 +27,15 @@ const chars = {

const emptyBuffer = Buffer.alloc(0)

const crlfBuffer = Buffer.from('\r\n')

const maxHeaderLength = 16 * 1024

module.exports = {
states,
chars,
headerStates,
emptyBuffer,
maxHeaderLength
maxHeaderLength,
crlfBuffer
}
153 changes: 81 additions & 72 deletions lib/formdata/parser.js
Original file line number Diff line number Diff line change
Expand Up @@ -2,12 +2,15 @@

const { Writable } = require('stream')
const { win32: { basename } } = require('path')
const { states, chars, headerStates, emptyBuffer, maxHeaderLength } = require('./constants')
const { states, chars, headerStates, emptyBuffer, maxHeaderLength, crlfBuffer } = require('./constants')
const { FileStream } = require('./filestream')
const {
collectHTTPQuotedStringLenient,
findCharacterType
findCharacterType,
hasHeaderValue,
isWhitespace
} = require('./util')
const { TextSearch } = require('./textsearch')
const {
parseMIMEType,
collectASequenceOfCodePoints,
Expand Down Expand Up @@ -43,13 +46,19 @@ class FormDataParser extends Writable {
},
headers: {
attributes: {}
}
},
headerEndSearch: new TextSearch(Buffer.from('\r\n\r\n')),
headerBuffersChecked: 0
}

#opts = {}

constructor (opts) {
super({ highWaterMark: opts?.highWaterMark })
super({
highWaterMark: opts?.highWaterMark,
autoDestroy: true,
emitClose: true
})

const contentType = opts.headers.get?.('content-type') ?? opts.headers['content-type']
const mimeType = contentType ? parseMIMEType(contentType) : null
Expand Down Expand Up @@ -114,6 +123,10 @@ class FormDataParser extends Writable {
this.#info.stream = null
}

if (this.#info.headerEndSearch.lookedAt) {
this.emit('error', new Error('Malformed part header'))
}

cb(err)
}

Expand Down Expand Up @@ -178,76 +191,60 @@ class FormDataParser extends Writable {
// arbitrary length. Therefore it's easier to read the headers, and
// then parse once we receive 2 CRLFs which marks the body's start.

if (this.#byteOffset < 4) {
if (this.#byteOffset < crlfBuffer.length * 2) {
return callback()
}

// number of bytes to remove from the last buffer
let bytesToRemove = 0

done: { // eslint-disable-line no-labels
this.#info.headerState = headerStates.DEFAULT

for (let j = 0; j < this.#buffers.length; j++) {
const buffer = this.#buffers[j]

for (let i = 0; i < buffer.length; i++) {
const byte = buffer[i]
const state = this.#info.headerState

if (byte === chars.cr && state === headerStates.DEFAULT) {
this.#info.headerState = headerStates.FIRST
} else if (byte === chars.lf && state === headerStates.FIRST) {
this.#info.headerState = headerStates.SECOND
} else if (byte === chars.cr && state === headerStates.SECOND) {
this.#info.headerState = headerStates.THIRD
} else if (byte === chars.lf && state === headerStates.THIRD) {
// Got \r\n\r\n which marks the end of the headers.
this.#state = states.READ_BODY
bytesToRemove += i

const removed = this.consume(bytesToRemove - 3) // remove \r\n\r
const headers = this.#info.headers

// This is a very lazy implementation.
if ('content-type' in headers && headers['content-type'].length === 0) {
const mimeUnparsed = removed.toString(this.#opts.defCharset)
const mimeType = parseMIMEType(mimeUnparsed)

if (mimeType !== 'failure') {
headers['content-type'] = mimeType.essence
}
}

this.#parseRawHeaders(removed)
this.consume(4)

break done // eslint-disable-line no-labels
} else {
this.#info.headerState = headerStates.DEFAULT

if (state === headerStates.SECOND) {
const removed = this.consume(bytesToRemove + i - 2)

if (this.#info.skipPart) {
// If we are meant to skip this part, we shouldn't
// parse the headers. We just need to consume the bytes.
continue
}

this.#parseRawHeaders(removed)
this.consume(2)
bytesToRemove -= removed.length + 2
}
}
}
let i = this.#info.headerBuffersChecked

while (!this.#info.headerEndSearch.finished && i < this.#buffers.length) {
this.#info.headerEndSearch.write(this.#buffers[i++])
this.#info.headerBuffersChecked++

bytesToRemove += buffer.length
if (this.#info.headerEndSearch.lookedAt > maxHeaderLength) {
this.#info.skipPart = 'Malformed part header'
break
}
}

if (!this.#info.headerEndSearch.finished) {
return callback()
}

const consumed = this.consume(this.#info.headerEndSearch.lookedAt)

this.#info.headerBuffersChecked = 0
this.#info.headerEndSearch.reset()

let index = consumed.indexOf(crlfBuffer)
let start = 0

while (index !== -1 && start < index) {
const buffer = consumed.subarray(start, index)
const valueIndex = buffer.indexOf(':')

if (valueIndex === -1) {
index = consumed.indexOf(crlfBuffer, index + buffer.length)
continue
} else if (!hasHeaderValue(buffer, valueIndex + 1)) {
// HTTP/1.1 header field values can be folded onto multiple lines if the
// continuation line begins with a space or horizontal tab.
index = consumed.indexOf(crlfBuffer, index + buffer.length)
continue
}

this.#parseRawHeaders(buffer)

start = index + 2
index = consumed.indexOf(crlfBuffer, start)

if (this.#info.skipPart) {
break
}
}

this.#state = states.READ_BODY

const { attributes, ...headers } = this.#info.headers

if (
Expand All @@ -257,6 +254,7 @@ class FormDataParser extends Writable {
!attributes.name
)
) {
this.#info.headers = { attributes: {} }
// https://www.rfc-editor.org/rfc/rfc7578#section-4.2
this.#info.skipPart = 'invalid content-disposition'
} else if (
Expand All @@ -266,13 +264,18 @@ class FormDataParser extends Writable {
) {
if (this.#info.limits.files >= this.#opts.limits.files) {
this.#info.skipPart = 'file'
this.#info.headers = { attributes: {} }
continue
}

// End the active stream if there is one
this.#info.stream?.push(null)
this.#info.stream = new FileStream({ highWaterMark: this.#opts.fileHwm })
this.#info.stream.truncated = false // TODO
this.#info.stream = new FileStream({
highWaterMark: this.#opts.fileHwm,
autoDestroy: true,
emitClose: true
})
this.#info.stream.truncated = false

this.emit(
'file',
Expand All @@ -284,6 +287,7 @@ class FormDataParser extends Writable {
mimeType: headers['content-type'] || 'application/octet-stream'
}
)
this.#info.headers = { attributes: {} }
}
} else if (this.#state === states.READ_BODY) {
// A part's body can contain CRLFs so they cannot be used to
Expand Down Expand Up @@ -432,6 +436,7 @@ class FormDataParser extends Writable {
}
)

this.#info.headers = { attributes: {} }
this.#info.limits.fields++
body.chunks.length = 0
body.length = 0
Expand Down Expand Up @@ -556,11 +561,15 @@ class FormDataParser extends Writable {
continue
}

if (buffer[position.position + 1] === chars[' ']) {
position.position += 2 // skip "; "
} else {
position.position++ // skip ":"
}
position.position++ // skip semicolon

// Header values can contain leading whitespace, make sure
// we remove it all.
collectASequenceOfCodePoints(
(char) => isWhitespace(char),
buffer,
position
)

const value = collectASequenceOfCodePoints(
(char) => {
Expand Down
47 changes: 47 additions & 0 deletions lib/formdata/textsearch.js
Original file line number Diff line number Diff line change
@@ -0,0 +1,47 @@
'use strict'

class TextSearch {
/** @param {Buffer} pattern */
constructor (pattern) {
this.pattern = pattern

this.back = 0
this.lookedAt = 0
}

/**
* @param {Buffer} chunk
*/
write (chunk) {
if (this.finished) {
return true
}

for (const byte of chunk) {
this.lookedAt++

if (byte !== this.pattern[this.back]) {
this.back = 0
} else {
if (++this.back === this.pattern.length) {
return true
}
}
}

return this.back === this.pattern.length
}

reset () {
this.back = 0
this.lookedAt = 0
}

get finished () {
return this.back === this.pattern.length
}
}

module.exports = {
TextSearch
}
18 changes: 17 additions & 1 deletion lib/formdata/util.js
Original file line number Diff line number Diff line change
Expand Up @@ -33,7 +33,23 @@ function findCharacterType (type) {
}
}

function isWhitespace (char) {
return char === 0x09 || char === 0x20 || char === 0x0D || char === 0x0A
}

function hasHeaderValue (chunk, start) {
for (let i = start; i < chunk.length; i++) {
if (!isWhitespace(chunk[i])) {
return true
}
}

return false
}

module.exports = {
collectHTTPQuotedStringLenient,
findCharacterType
findCharacterType,
isWhitespace,
hasHeaderValue
}
3 changes: 2 additions & 1 deletion package.json
Original file line number Diff line number Diff line change
Expand Up @@ -46,7 +46,8 @@
"build:wasm": "node build/wasm.js --docker",
"lint": "standard | snazzy",
"lint:fix": "standard --fix | snazzy",
"test": "npm run test:tap && npm run test:node-fetch && npm run test:fetch && npm run test:wpt && npm run test:websocket && npm run test:jest && tsd",
"test": "npm run test:tap && npm run test:node-fetch && npm run test:fetch && npm run test:wpt && npm run test:websocket && npm run test:busboy && npm run test:jest && tsd",
"test:busboy": "node test/busboy/test.js",
"test:node-fetch": "node scripts/verifyVersion.js 16 || mocha test/node-fetch",
"test:fetch": "node scripts/verifyVersion.js 16 || (npm run build:node && tap test/fetch/*.js && tap test/webidl/*.js)",
"test:jest": "node scripts/verifyVersion.js 14 || jest",
Expand Down
2 changes: 0 additions & 2 deletions test/busboy/test-types-multipart.js
Original file line number Diff line number Diff line change
Expand Up @@ -781,8 +781,6 @@ const tests = [
what: 'Files limit'
},
{
// Note: this test is very slow because we need to write > 64 KiB
// of data one byte at a time.
source: [
['-----------------------------paZqsnEHRufoShdX6fh0lUhXBP4k',
'Content-Disposition: form-data; ' +
Expand Down

0 comments on commit b7b9645

Please sign in to comment.