From a0666e896070765f9f1cefae047b830f72b85a74 Mon Sep 17 00:00:00 2001 From: Vasiliy Faronov Date: Wed, 9 Aug 2017 11:25:47 +0400 Subject: [PATCH] Fix parsing the Forwarded header (#2170) (#2173) This fixes #2170 by parsing Forwarded more carefully, while staying about as fast, simple, and robust in the face of potential injection attacks. (Speed was measured with IPython's %timeit on my laptop on a few typical and pathological header values.) In particular: - commas and semicolons are allowed inside quoted-strings; - empty forwarded-pairs (as in "for=_1;;by=_2") are allowed; - non-standard parameters are allowed (although this alone could be easily done in the previous parser). This still doesn't parse valid headers containing obs-text, which was an intentional decision in the previous parser (see comments) that I did not change. Also, the previous parser used to bail out of forwarded-elements containing duplicate parameter names. No rationale was given in the code, and I don't think this is important, so the new parser doesn't enforce this. --- CONTRIBUTORS.txt | 1 + aiohttp/web_request.py | 76 +++++++++++++++++---------------- changes/2170.bugfix | 1 + tests/test_web_request.py | 88 +++++++++++++++++++++++++++++++++++++-- 4 files changed, 127 insertions(+), 39 deletions(-) create mode 100644 changes/2170.bugfix diff --git a/CONTRIBUTORS.txt b/CONTRIBUTORS.txt index fe67c5e800d..1f50b3224e2 100644 --- a/CONTRIBUTORS.txt +++ b/CONTRIBUTORS.txt @@ -172,6 +172,7 @@ Thomas Grainger Tolga Tezel Vaibhav Sagar Vamsi Krishna Avula +Vasiliy Faronov Vasyl Baran Vikas Kawadia Vitalik Verhovodov diff --git a/aiohttp/web_request.py b/aiohttp/web_request.py index 9a6d9ebf0f0..ffae3b50a62 100644 --- a/aiohttp/web_request.py +++ b/aiohttp/web_request.py @@ -28,7 +28,7 @@ _TCHAR = string.digits + string.ascii_letters + r"!#$%&'*+.^_`|~-" # '-' at the end to prevent interpretation as range in a char class -_TOKEN = r'[{tchar}]*'.format(tchar=_TCHAR) +_TOKEN = r'[{tchar}]+'.format(tchar=_TCHAR) _QDTEXT = r'[{}]'.format( r''.join(chr(c) for c in (0x09, 0x20, 0x21) + tuple(range(0x23, 0x7F)))) @@ -40,12 +40,8 @@ _QUOTED_STRING = r'"(?:{quoted_pair}|{qdtext})*"'.format( qdtext=_QDTEXT, quoted_pair=_QUOTED_PAIR) -_FORWARDED_PARAMS = ( - r'[bB][yY]|[fF][oO][rR]|[hH][oO][sS][tT]|[pP][rR][oO][tT][oO]') - _FORWARDED_PAIR = ( - r'^({forwarded_params})=({token}|{quoted_string})$'.format( - forwarded_params=_FORWARDED_PARAMS, + r'({token})=({token}|{quoted_string})'.format( token=_TOKEN, quoted_string=_QUOTED_STRING)) @@ -209,37 +205,45 @@ def forwarded(self): Returns a tuple containing one or more immutable dicts """ - def _parse_forwarded(forwarded_headers): - for field_value in forwarded_headers: - # by=...;for=..., For=..., BY=... - for forwarded_elm in field_value.split(','): - # by=...;for=... - fvparams = dict() - forwarded_pairs = ( - _FORWARDED_PAIR_RE.findall(pair) - for pair in forwarded_elm.strip().split(';')) - for forwarded_pair in forwarded_pairs: - # by=... - if len(forwarded_pair) != 1: - # non-compliant syntax - break - param, value = forwarded_pair[0] - if param.lower() in fvparams: - # duplicate param in field-value - break - if value and value[0] == '"': - # quoted string: replace quotes and escape - # sequences - value = _QUOTED_PAIR_REPLACE_RE.sub( - r'\1', value[1:-1]) - fvparams[param.lower()] = value + elems = [] + for field_value in self._message.headers.getall(hdrs.FORWARDED, ()): + length = len(field_value) + pos = 0 + need_separator = False + elem = {} + elems.append(types.MappingProxyType(elem)) + while 0 <= pos < length: + match = _FORWARDED_PAIR_RE.match(field_value, pos) + if match is not None: # got a valid forwarded-pair + if need_separator: + # bad syntax here, skip to next comma + pos = field_value.find(',', pos) else: - yield types.MappingProxyType(fvparams) - continue - yield dict() - - return tuple( - _parse_forwarded(self._message.headers.getall(hdrs.FORWARDED, ()))) + (name, value) = match.groups() + if value[0] == '"': + # quoted string: remove quotes and unescape + value = _QUOTED_PAIR_REPLACE_RE.sub(r'\1', + value[1:-1]) + elem[name.lower()] = value + pos += len(match.group(0)) + need_separator = True + elif field_value[pos] == ',': # next forwarded-element + need_separator = False + elem = {} + elems.append(types.MappingProxyType(elem)) + pos += 1 + elif field_value[pos] == ';': # next forwarded-pair + need_separator = False + pos += 1 + elif field_value[pos] in ' \t': + # Allow whitespace even between forwarded-pairs, though + # RFC 7239 doesn't. This simplifies code and is in line + # with Postel's law. + pos += 1 + else: + # bad syntax here, skip to next comma + pos = field_value.find(',', pos) + return tuple(elems) @reify def _scheme(self): diff --git a/changes/2170.bugfix b/changes/2170.bugfix new file mode 100644 index 00000000000..35961c5651e --- /dev/null +++ b/changes/2170.bugfix @@ -0,0 +1 @@ +Fix parsing the Forwarded header according to RFC 7239. diff --git a/tests/test_web_request.py b/tests/test_web_request.py index e143b4e151b..2ea5655c9c3 100644 --- a/tests/test_web_request.py +++ b/tests/test_web_request.py @@ -290,6 +290,61 @@ def test_single_forwarded_header_quoted_escaped(): assert req.forwarded[0]['proto'] == 'lala land~ 123!&' +def test_single_forwarded_header_custom_param(): + header = r'BY=identifier;PROTO=https;SOME="other, \"value\""' + req = make_mocked_request('GET', '/', + headers=CIMultiDict({'Forwarded': header})) + assert len(req.forwarded) == 1 + assert req.forwarded[0]['by'] == 'identifier' + assert req.forwarded[0]['proto'] == 'https' + assert req.forwarded[0]['some'] == 'other, "value"' + + +def test_single_forwarded_header_empty_params(): + # This is allowed by the grammar given in RFC 7239 + header = ';For=identifier;;PROTO=https;;;' + req = make_mocked_request('GET', '/', + headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded[0]['for'] == 'identifier' + assert req.forwarded[0]['proto'] == 'https' + + +def test_single_forwarded_header_bad_separator(): + header = 'BY=identifier PROTO=https' + req = make_mocked_request('GET', '/', + headers=CIMultiDict({'Forwarded': header})) + assert 'proto' not in req.forwarded[0] + + +def test_single_forwarded_header_injection1(): + # We might receive a header like this if we're sitting behind a reverse + # proxy that blindly appends a forwarded-element without checking + # the syntax of existing field-values. We should be able to recover + # the appended element anyway. + header = 'for=_injected;by=", for=_real' + req = make_mocked_request('GET', '/', + headers=CIMultiDict({'Forwarded': header})) + assert len(req.forwarded) == 2 + assert 'by' not in req.forwarded[0] + assert req.forwarded[1]['for'] == '_real' + + +def test_single_forwarded_header_injection2(): + header = 'very bad syntax, for=_real' + req = make_mocked_request('GET', '/', + headers=CIMultiDict({'Forwarded': header})) + assert len(req.forwarded) == 2 + assert 'for' not in req.forwarded[0] + assert req.forwarded[1]['for'] == '_real' + + +def test_single_forwarded_header_long_quoted_string(): + header = 'for="' + '\\\\' * 5000 + '"' + req = make_mocked_request('GET', '/', + headers=CIMultiDict({'Forwarded': header})) + assert req.forwarded[0]['for'] == '\\' * 5000 + + def test_multiple_forwarded_headers(): headers = CIMultiDict() headers.add('Forwarded', 'By=identifier1;for=identifier2, BY=identifier3') @@ -303,10 +358,37 @@ def test_multiple_forwarded_headers(): assert req.forwarded[2]['for'] == 'identifier5' +def test_multiple_forwarded_headers_bad_syntax(): + headers = CIMultiDict() + headers.add('Forwarded', 'for=_1;by=_2') + headers.add('Forwarded', 'invalid value') + headers.add('Forwarded', '') + headers.add('Forwarded', 'for=_3;by=_4') + req = make_mocked_request('GET', '/', headers=headers) + assert len(req.forwarded) == 4 + assert req.forwarded[0]['for'] == '_1' + assert 'for' not in req.forwarded[1] + assert 'for' not in req.forwarded[2] + assert req.forwarded[3]['by'] == '_4' + + +def test_multiple_forwarded_headers_injection(): + headers = CIMultiDict() + # This could be sent by an attacker, hoping to "shadow" the second header. + headers.add('Forwarded', 'for=_injected;by="') + # This is added by our trusted reverse proxy. + headers.add('Forwarded', 'for=_real;by=_actual_proxy') + req = make_mocked_request('GET', '/', headers=headers) + assert len(req.forwarded) == 2 + assert 'by' not in req.forwarded[0] + assert req.forwarded[1]['for'] == '_real' + assert req.forwarded[1]['by'] == '_actual_proxy' + + def test_https_scheme_by_forwarded_header(): + header = 'by=_1;for=_2;host=_3;proto=https' req = make_mocked_request('GET', '/', - headers=CIMultiDict( - {'Forwarded': 'by=;for=;host=;proto=https'})) + headers=CIMultiDict({'Forwarded': header})) assert "https" == req.scheme assert req.secure is True @@ -338,7 +420,7 @@ def test_https_scheme_by_x_forwarded_proto_header_no_tls(): def test_host_by_forwarded_header(): headers = CIMultiDict() headers.add('Forwarded', 'By=identifier1;for=identifier2, BY=identifier3') - headers.add('Forwarded', 'by=;for=;host=example.com') + headers.add('Forwarded', 'by=unknown;for=unknown;host=example.com') req = make_mocked_request('GET', '/', headers=headers) assert req.host == 'example.com'