python · vstinner · Oct 14, 2020 · Sep 12, 2020 · Sep 12, 2020 · Sep 12, 2020
diff --git a/Lib/encodings/__init__.py b/Lib/encodings/__init__.py
@@ -61,7 +61,8 @@ def normalize_encoding(encoding):
         if c.isalnum() or c == '.':
             if punct and chars:
                 chars.append('_')
-            chars.append(c)
+            if c.isascii():
+                chars.append(c)
             punct = False
         else:
             punct = True

diff --git a/Lib/test/test_codecs.py b/Lib/test/test_codecs.py
@@ -3440,5 +3440,21 @@ def search_function(encoding):
         self.assertEqual(NOT_FOUND, codecs.lookup('a\xe9\u20ac-8'))
 
 
+class EncodingNormalizationTest(unittest.TestCase):
+
+    def test_normalization(self):
+        # encodings.normalize_encoding() ignores non-ASCII letters.
+        out = encodings.normalize_encoding('utf\xE9\u20AC\U0010ffff-8')
+        self.assertEqual(out, 'utf_8')
+        out = encodings.normalize_encoding('utf_8')
+        self.assertEqual(out, 'utf_8')
+        out = encodings.normalize_encoding('utf   8')
+        self.assertEqual(out, 'utf_8')
+        out = encodings.normalize_encoding('UTF 8')
+        self.assertEqual(out, 'UTF_8')
+        out = encodings.normalize_encoding('utf...8')
+        self.assertEqual(out, 'utf...8')
+
+
 if __name__ == "__main__":
     unittest.main()
diff --git a/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst b/Misc/NEWS.d/next/Library/2020-09-13-02-02-18.bpo-39337.L3NXTt.rst
@@ -0,0 +1 @@
+:func:`encodings.normalize_encoding` now ignores non-ASCII letters.
Original file line number	Diff line number	Diff line change
		@@ -0,0 +1 @@
		:func:`encodings.normalize_encoding` now ignores non-ASCII letters.
Comment thread shihai1991 marked this conversation as resolved. Outdated