From 198ff3347c6c39c25691202e1de71f7c13c4df81 Mon Sep 17 00:00:00 2001 From: Nikolay Kim Date: Mon, 20 Mar 2017 22:03:06 -0700 Subject: [PATCH] Content disposition with semicolon in filename #917 --- CHANGES.rst | 1 + aiohttp/multipart.py | 20 ++++++++++++++++++-- tests/test_multipart.py | 7 +++++++ 3 files changed, 26 insertions(+), 2 deletions(-) diff --git a/CHANGES.rst b/CHANGES.rst index 5f3a6a3ff90..695272babaa 100644 --- a/CHANGES.rst +++ b/CHANGES.rst @@ -5,6 +5,7 @@ Changes 2.1.0 (2017-xx-xx) ------------------ +- Content disposition with semicolon in filename #917 2.0.0 (2017-03-20) diff --git a/aiohttp/multipart.py b/aiohttp/multipart.py index c0aa90f5c85..658546a2d05 100644 --- a/aiohttp/multipart.py +++ b/aiohttp/multipart.py @@ -32,6 +32,7 @@ class BadContentDispositionParam(RuntimeWarning): def parse_content_disposition(header): + def is_token(string): return string and TOKEN >= set(string) @@ -63,7 +64,9 @@ def unescape(text, *, chars=''.join(map(re.escape, CHAR))): return None, {} params = {} - for item in parts: + while parts: + item = parts.pop(0) + if '=' not in item: warnings.warn(BadContentDispositionHeader(header)) return None, {} @@ -102,9 +105,22 @@ def unescape(text, *, chars=''.join(map(re.escape, CHAR))): continue else: + failed = True if is_quoted(value): + failed = False value = unescape(value[1:-1].lstrip('\\/')) - elif not is_token(value): + elif is_token(value): + failed = False + elif parts: + # maybe just ; in filename, in any case this is just + # one case fix, for proper fix we need to redesign parser + _value = '%s;%s' % (value, parts[0]) + if is_quoted(_value): + parts.pop(0) + value = unescape(_value[1:-1].lstrip('\\/')) + failed = False + + if failed: warnings.warn(BadContentDispositionHeader(header)) return None, {} diff --git a/tests/test_multipart.py b/tests/test_multipart.py index dc709b0629c..312abbd409e 100644 --- a/tests/test_multipart.py +++ b/tests/test_multipart.py @@ -1057,6 +1057,13 @@ def test_inlonlyquoted(self): self.assertEqual(None, disptype) self.assertEqual({}, params) + def test_semicolon(self): + disptype, params = parse_content_disposition( + 'form-data; name="data"; filename="file ; name.mp4"') + self.assertEqual(disptype, 'form-data') + self.assertEqual( + params, {'name': 'data', 'filename': 'file ; name.mp4'}) + def test_inlwithasciifilename(self): disptype, params = parse_content_disposition( 'inline; filename="foo.html"')