Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
121 changes: 83 additions & 38 deletions deps/dicer/lib/HeaderParser.js
Original file line number Diff line number Diff line change
Expand Up @@ -4,17 +4,13 @@ const EventEmitter = require('node:events').EventEmitter
const inherits = require('node:util').inherits
const getLimit = require('../../../lib/utils/getLimit')

const StreamSearch = require('../../streamsearch/sbmh')

const B_DCRLF = Buffer.from('\r\n\r\n')
const RE_CRLF = /\r\n/g
const RE_HDR = /^([^:]+):[ \t]?([\x00-\xFF]+)?$/ // eslint-disable-line no-control-regex
const S_DCRLF = '\r\n\r\n'

function HeaderParser (cfg) {
EventEmitter.call(this)

cfg = cfg || {}
const self = this
this.nread = 0
this.maxed = false
this.npairs = 0
Expand All @@ -23,40 +19,79 @@ function HeaderParser (cfg) {
this.buffer = ''
this.header = {}
this.finished = false
this.ss = new StreamSearch(B_DCRLF)
this.ss.on('info', function (isMatch, data, start, end) {
if (data && !self.maxed) {
if (self.nread + end - start >= self.maxHeaderSize) {
end = self.maxHeaderSize - self.nread + start
self.nread = self.maxHeaderSize
self.maxed = true
} else { self.nread += (end - start) }

self.buffer += data.toString('binary', start, end)
}
if (isMatch) { self._finish() }
})
this.tail = ''
}
inherits(HeaderParser, EventEmitter)

HeaderParser.prototype.push = function (data) {
const r = this.ss.push(data)
if (this.finished) { return r }
if (!Buffer.isBuffer(data)) { data = Buffer.from(data, 'binary') }

let end = data.length
let appendEnd = data.length
let found = false
const tail = this.tail

for (let i = tail.length; i > 0; --i) {
if (tail.endsWith(S_DCRLF.slice(0, i))) {
let matched = data.length >= S_DCRLF.length - i
for (let j = i; matched && j < S_DCRLF.length; ++j) {
matched = data[j - i] === S_DCRLF.charCodeAt(j)
}
if (matched) {
end = S_DCRLF.length - i
appendEnd = 0
found = true
break
}
}
}

if (!found) {
const pos = data.indexOf(B_DCRLF)
if (pos !== -1) {
end = pos + B_DCRLF.length
appendEnd = pos
found = true
}
}

if (!found) {
this.tail = data.length >= 3
? data.toString('binary', data.length - 3)
: (tail + data.toString('binary')).slice(-3)
} else { this.tail = '' }

if (appendEnd !== 0 && !this.maxed) {
const remaining = this.maxHeaderSize - this.nread
if (appendEnd >= remaining) {
this.buffer += data.toString('binary', 0, remaining)
this.nread = this.maxHeaderSize
this.maxed = true
} else {
this.buffer += data.toString('binary', 0, appendEnd)
this.nread += appendEnd
}
}

if (found) {
this._finish()
return end
}
}

HeaderParser.prototype.reset = function () {
this.finished = false
this.buffer = ''
this.header = {}
this.ss.reset()
this.tail = ''
}

HeaderParser.prototype._finish = function () {
if (this.buffer) { this._parseHeader() }
this.ss.matches = this.ss.maxMatches
const header = this.header
this.header = {}
this.buffer = ''
this.tail = ''
this.finished = true
this.nread = this.npairs = 0
this.maxed = false
Expand All @@ -66,34 +101,44 @@ HeaderParser.prototype._finish = function () {
HeaderParser.prototype._parseHeader = function () {
if (this.npairs === this.maxHeaderPairs) { return }

const lines = this.buffer.split(RE_CRLF)
const len = lines.length
let m, h
const buffer = this.buffer
let h
let lineStart = 0

for (var i = 0; i < len; ++i) { // eslint-disable-line no-var
if (lines[i].length === 0) { continue }
if (lines[i][0] === '\t' || lines[i][0] === ' ') {
while (lineStart < buffer.length) {
let lineEnd = buffer.indexOf('\r\n', lineStart)
if (lineEnd === -1) { lineEnd = buffer.length }

if (lineEnd === lineStart) {
lineStart = lineEnd + 2
continue
}

if ((buffer[lineStart] === '\t' || buffer[lineStart] === ' ') && h) {
// folded header content
// RFC2822 says to just remove the CRLF and not the whitespace following
// it, so we follow the RFC and include the leading whitespace ...
if (h) {
this.header[h][this.header[h].length - 1] += lines[i]
continue
}
this.header[h][this.header[h].length - 1] += buffer.slice(lineStart, lineEnd)
lineStart = lineEnd + 2
continue
}

const posColon = lines[i].indexOf(':')
const posColon = buffer.indexOf(':', lineStart)
if (
posColon === -1 ||
posColon === 0
posColon === lineStart ||
posColon > lineEnd
) {
return
}
m = RE_HDR.exec(lines[i])
h = m[1].toLowerCase()
this.header[h] = this.header[h] || []
this.header[h].push((m[2] || ''))
h = buffer.slice(lineStart, posColon).toLowerCase()
let valueStart = posColon + 1
if (buffer[valueStart] === ' ' || buffer[valueStart] === '\t') { ++valueStart }
const values = this.header[h] || (this.header[h] = [])
values.push(buffer.slice(valueStart, lineEnd))
if (++this.npairs === this.maxHeaderPairs) { break }

lineStart = lineEnd + 2
}
}

Expand Down
19 changes: 12 additions & 7 deletions lib/types/multipart.js
Original file line number Diff line number Diff line change
Expand Up @@ -118,13 +118,18 @@ function Multipart (boy, cfg) {
let nsize = 0

if (header['content-type']) {
parsed = parseParams(header['content-type'][0])
if (parsed[0]) {
contype = parsed[0].toLowerCase()
for (i = 0, len = parsed.length; i < len; ++i) {
if (RE_CHARSET.test(parsed[i][0])) {
charset = parsed[i][1].toLowerCase()
break
const contentType = header['content-type'][0]
if (contentType.length !== 0 && contentType.indexOf(';') === -1 && contentType.indexOf(' ') === -1 && contentType.indexOf('\t') === -1) {
contype = contentType.toLowerCase()
} else {
parsed = parseParams(contentType)
if (parsed[0]) {
contype = parsed[0].toLowerCase()
for (i = 0, len = parsed.length; i < len; ++i) {
if (RE_CHARSET.test(parsed[i][0])) {
charset = parsed[i][1].toLowerCase()
break
}
}
}
}
Expand Down
8 changes: 8 additions & 0 deletions lib/utils/decodeText.js
Original file line number Diff line number Diff line change
Expand Up @@ -44,12 +44,20 @@ function getDecoder (charset) {
}
}

function isAscii (data) {
for (var i = 0, len = data.length; i < len; ++i) { // eslint-disable-line no-var
if (data.charCodeAt(i) > 0x7F) { return false }
}
return true
}

const decoders = {
utf8: (data, sourceEncoding) => {
if (data.length === 0) {
return ''
}
if (typeof data === 'string') {
if (isAscii(data)) { return data }
data = Buffer.from(data, sourceEncoding)
}
return data.utf8Slice(0, data.length)
Expand Down
41 changes: 39 additions & 2 deletions lib/utils/parseParams.js
Original file line number Diff line number Diff line change
Expand Up @@ -495,12 +495,49 @@ function encodedReplacer (match) {
return EncodedLookup[match]
}

function decodeUtf8Text (text) {
for (var i = 0, len = text.length; i < len; ++i) { // eslint-disable-line no-var
if (text.charCodeAt(i) > 0x7F) { return decodeText(text, 'binary', 'utf8') }
}
return text
}

const STATE_KEY = 0
const STATE_VALUE = 1
const STATE_CHARSET = 2
const STATE_LANG = 3

function parseFormDataDisposition (str) {
if (!str.startsWith('form-data; name="')) { return }

let valueStart = 17
let valueEnd = str.indexOf('"', valueStart)
if (valueEnd === -1 || str.indexOf('\\', valueStart) !== -1) { return }

const res = ['form-data', ['name', decodeUtf8Text(str.slice(valueStart, valueEnd))]]
let pos = valueEnd + 1

while (pos < str.length && str[pos] !== ';') { ++pos }
if (pos === str.length) { return res }

++pos
while (str[pos] === ' ' || str[pos] === '\t') { ++pos }
if (!str.startsWith('filename="', pos)) { return }

valueStart = pos + 10
valueEnd = str.indexOf('"', valueStart)
if (valueEnd === -1 || str.indexOf('\\', valueStart) !== -1) { return }

res.push(['filename', decodeUtf8Text(str.slice(valueStart, valueEnd))])
return res
}

function parseParams (str) {
if (str.indexOf(';') === -1) { return [decodeUtf8Text(str)] }

const fastFormData = parseFormDataDisposition(str)
if (fastFormData !== undefined) { return fastFormData }

const res = []
let state = STATE_KEY
let charset = ''
Expand Down Expand Up @@ -559,7 +596,7 @@ function parseParams (str) {
}
charset = ''
} else if (tmp.length) {
tmp = decodeText(tmp, 'binary', 'utf8')
tmp = decodeUtf8Text(tmp)
}
if (res[p] === undefined) { res[p] = tmp } else { res[p][1] = tmp }
tmp = ''
Expand All @@ -574,7 +611,7 @@ function parseParams (str) {
'binary',
charset)
} else if (tmp) {
tmp = decodeText(tmp, 'binary', 'utf8')
tmp = decodeUtf8Text(tmp)
}

if (res[p] === undefined) {
Expand Down
13 changes: 13 additions & 0 deletions test/dicer-headerparser.test.js
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,19 @@ test('dicer-headerparser', async t => {
expected: {},
what: 'No header'
},
{
source: ['Foo: bar\r', '\n\r', '\nextra'],
expected: { foo: ['bar'] },
what: 'Header terminator across chunks'
},
{
source: ['Foo: bar\r', '\n\r', '\nextra'],
cfg: {
maxHeaderSize: 0
},
expected: {},
what: 'Header terminator across chunks after max header size'
},
{
source: ['Content-Type:\t text/plain',
'Content-Length:0'
Expand Down
Loading