From 9c6fce3ac8db13d0c47e58bbf08335b83e74b442 Mon Sep 17 00:00:00 2001 From: Alice Bevan-McGregor Date: Thu, 25 Mar 2021 14:28:38 -0400 Subject: [PATCH] Support for IDNA / Punycode Unicode encoding for host names. Unicode version is natively stored, accessible via `URI.host`, however string and repr will present the IDNA-encoded versions. Tests TBD. --- uri/part/host.py | 13 +++++++++++++ 1 file changed, 13 insertions(+) diff --git a/uri/part/host.py b/uri/part/host.py index 3f65e7e..52cb246 100644 --- a/uri/part/host.py +++ b/uri/part/host.py @@ -11,6 +11,11 @@ class HostPart(ProxyPart): def render(self, obj, value): result = super(HostPart, self).render(obj, value) + try: + result.encode('ascii') + except UnicodeEncodeError: + result = result.encode('idna').decode('ascii') + if result: try: # Identify and armour IPv6 address literals. inet_pton(AF_INET6, value) @@ -20,3 +25,11 @@ def render(self, obj, value): result = '[' + result + ']' return result + + def __set__(self, obj, value): + if isinstance(value, bytes): + value = value.decode('idna') + elif value.startswith('xn--'): + value = value.encode('ascii').decode('idna') + + super().__set__(obj, value)