Skip to content

Commit 5ae7bab

Browse files
vstinnerhroncok
authored andcommitted
00415: [CVE-2023-27043] pythongh-102988: Reject malformed addresses in email.parseaddr() (python#111116)
Detect email address parsing errors and return empty tuple to indicate the parsing error (old API). Add an optional 'strict' parameter to getaddresses() and parseaddr() functions. Patch by Thomas Dwyer. Co-Authored-By: Thomas Dwyer <github@tomd.tel> Changes for Python 2: - Define encoding for test_email - Adjust import so we don't need change the tests - Do not use f-strings - Do not use SubTest - KW only function arguments are not supported
1 parent 004e7da commit 5ae7bab

File tree

4 files changed

+342
-23
lines changed

4 files changed

+342
-23
lines changed

Doc/library/email.utils.rst

Lines changed: 15 additions & 4 deletions
Original file line numberDiff line numberDiff line change
@@ -21,13 +21,18 @@ There are several useful utilities provided in the :mod:`email.utils` module:
2121
begins with angle brackets, they are stripped off.
2222

2323

24-
.. function:: parseaddr(address)
24+
.. function:: parseaddr(address, *, strict=True)
2525

2626
Parse address -- which should be the value of some address-containing field such
2727
as :mailheader:`To` or :mailheader:`Cc` -- into its constituent *realname* and
2828
*email address* parts. Returns a tuple of that information, unless the parse
2929
fails, in which case a 2-tuple of ``('', '')`` is returned.
3030

31+
If *strict* is true, use a strict parser which rejects malformed inputs.
32+
33+
.. versionchanged:: 3.13
34+
Add *strict* optional parameter and reject malformed inputs by default.
35+
3136

3237
.. function:: formataddr(pair)
3338

@@ -37,12 +42,15 @@ There are several useful utilities provided in the :mod:`email.utils` module:
3742
second element is returned unmodified.
3843

3944

40-
.. function:: getaddresses(fieldvalues)
45+
.. function:: getaddresses(fieldvalues, *, strict=True)
4146

4247
This method returns a list of 2-tuples of the form returned by ``parseaddr()``.
4348
*fieldvalues* is a sequence of header field values as might be returned by
44-
:meth:`Message.get_all <email.message.Message.get_all>`. Here's a simple
45-
example that gets all the recipients of a message::
49+
:meth:`Message.get_all <email.message.Message.get_all>`.
50+
51+
If *strict* is true, use a strict parser which rejects malformed inputs.
52+
53+
Here's a simple example that gets all the recipients of a message::
4654

4755
from email.utils import getaddresses
4856

@@ -52,6 +60,9 @@ There are several useful utilities provided in the :mod:`email.utils` module:
5260
resent_ccs = msg.get_all('resent-cc', [])
5361
all_recipients = getaddresses(tos + ccs + resent_tos + resent_ccs)
5462

63+
.. versionchanged:: 3.13
64+
Add *strict* optional parameter and reject malformed inputs by default.
65+
5566

5667
.. function:: parsedate(date)
5768

Lib/email/test/test_email.py

Lines changed: 177 additions & 9 deletions
Original file line numberDiff line numberDiff line change
@@ -1,3 +1,4 @@
1+
# -*- coding: utf-8 -*-
12
# Copyright (C) 2001-2010 Python Software Foundation
23
# Contact: email-sig@python.org
34
# email package unit tests
@@ -30,7 +31,7 @@
3031
from email.MIMEBase import MIMEBase
3132
from email.MIMEMessage import MIMEMessage
3233
from email.MIMEMultipart import MIMEMultipart
33-
from email import Utils
34+
from email import Utils, utils
3435
from email import Errors
3536
from email import Encoders
3637
from email import Iterators
@@ -2414,15 +2415,135 @@ def test_getaddresses(self):
24142415
[('Al Person', 'aperson@dom.ain'),
24152416
('Bud Person', 'bperson@dom.ain')])
24162417

2418+
def test_parsing_errors(self):
2419+
"""Test for parsing errors from CVE-2023-27043 and CVE-2019-16056"""
2420+
alice = 'alice@example.org'
2421+
bob = 'bob@example.com'
2422+
empty = ('', '')
2423+
2424+
# Test utils.getaddresses() and utils.parseaddr() on malformed email
2425+
# addresses: default behavior (strict=True) rejects malformed address,
2426+
# and strict=False which tolerates malformed address.
2427+
for invalid_separator, expected_non_strict in (
2428+
('(', [('<' + bob + '>', alice)]),
2429+
(')', [('', alice), empty, ('', bob)]),
2430+
('<', [('', alice), empty, ('', bob), empty]),
2431+
('>', [('', alice), empty, ('', bob)]),
2432+
('[', [('', alice + '[<' + bob + '>]')]),
2433+
(']', [('', alice), empty, ('', bob)]),
2434+
('@', [empty, empty, ('', bob)]),
2435+
(';', [('', alice), empty, ('', bob)]),
2436+
(':', [('', alice), ('', bob)]),
2437+
('.', [('', alice + '.'), ('', bob)]),
2438+
('"', [('', alice), ('', '<' + bob + '>')]),
2439+
):
2440+
address = alice + invalid_separator + '<' + bob + '>'
2441+
self.assertEqual(utils.getaddresses([address]),
2442+
[empty])
2443+
self.assertEqual(utils.getaddresses([address], strict=False),
2444+
expected_non_strict)
2445+
2446+
self.assertEqual(utils.parseaddr([address]),
2447+
empty)
2448+
self.assertEqual(utils.parseaddr([address], strict=False),
2449+
('', address))
2450+
2451+
# Comma (',') is treated differently depending on strict parameter.
2452+
# Comma without quotes.
2453+
address = alice + ',<' + bob + '>'
2454+
self.assertEqual(utils.getaddresses([address]),
2455+
[('', alice), ('', bob)])
2456+
self.assertEqual(utils.getaddresses([address], strict=False),
2457+
[('', alice), ('', bob)])
2458+
self.assertEqual(utils.parseaddr([address]),
2459+
empty)
2460+
self.assertEqual(utils.parseaddr([address], strict=False),
2461+
('', address))
2462+
2463+
# Real name between quotes containing comma.
2464+
address = '"Alice, alice@example.org" <bob@example.com>'
2465+
expected_strict = ('Alice, alice@example.org', 'bob@example.com')
2466+
self.assertEqual(utils.getaddresses([address]), [expected_strict])
2467+
self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
2468+
self.assertEqual(utils.parseaddr([address]), expected_strict)
2469+
self.assertEqual(utils.parseaddr([address], strict=False),
2470+
('', address))
2471+
2472+
# Valid parenthesis in comments.
2473+
address = 'alice@example.org (Alice)'
2474+
expected_strict = ('Alice', 'alice@example.org')
2475+
self.assertEqual(utils.getaddresses([address]), [expected_strict])
2476+
self.assertEqual(utils.getaddresses([address], strict=False), [expected_strict])
2477+
self.assertEqual(utils.parseaddr([address]), expected_strict)
2478+
self.assertEqual(utils.parseaddr([address], strict=False),
2479+
('', address))
2480+
2481+
# Invalid parenthesis in comments.
2482+
address = 'alice@example.org )Alice('
2483+
self.assertEqual(utils.getaddresses([address]), [empty])
2484+
self.assertEqual(utils.getaddresses([address], strict=False),
2485+
[('', 'alice@example.org'), ('', ''), ('', 'Alice')])
2486+
self.assertEqual(utils.parseaddr([address]), empty)
2487+
self.assertEqual(utils.parseaddr([address], strict=False),
2488+
('', address))
2489+
2490+
# Two addresses with quotes separated by comma.
2491+
address = '"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>'
2492+
self.assertEqual(utils.getaddresses([address]),
2493+
[('Jane Doe', 'jane@example.net'),
2494+
('John Doe', 'john@example.net')])
2495+
self.assertEqual(utils.getaddresses([address], strict=False),
2496+
[('Jane Doe', 'jane@example.net'),
2497+
('John Doe', 'john@example.net')])
2498+
self.assertEqual(utils.parseaddr([address]), empty)
2499+
self.assertEqual(utils.parseaddr([address], strict=False),
2500+
('', address))
2501+
2502+
# Test email.utils.supports_strict_parsing attribute
2503+
self.assertEqual(email.utils.supports_strict_parsing, True)
2504+
24172505
def test_getaddresses_nasty(self):
2418-
eq = self.assertEqual
2419-
eq(Utils.getaddresses(['foo: ;']), [('', '')])
2420-
eq(Utils.getaddresses(
2421-
['[]*-- =~$']),
2422-
[('', ''), ('', ''), ('', '*--')])
2423-
eq(Utils.getaddresses(
2424-
['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>']),
2425-
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')])
2506+
for addresses, expected in (
2507+
(['"Sürname, Firstname" <to@example.com>'],
2508+
[('Sürname, Firstname', 'to@example.com')]),
2509+
2510+
(['foo: ;'],
2511+
[('', '')]),
2512+
2513+
(['foo: ;', '"Jason R. Mastaler" <jason@dom.ain>'],
2514+
[('', ''), ('Jason R. Mastaler', 'jason@dom.ain')]),
2515+
2516+
([r'Pete(A nice \) chap) <pete(his account)@silly.test(his host)>'],
2517+
[('Pete (A nice ) chap his account his host)', 'pete@silly.test')]),
2518+
2519+
(['(Empty list)(start)Undisclosed recipients :(nobody(I know))'],
2520+
[('', '')]),
2521+
2522+
(['Mary <@machine.tld:mary@example.net>, , jdoe@test . example'],
2523+
[('Mary', 'mary@example.net'), ('', ''), ('', 'jdoe@test.example')]),
2524+
2525+
(['John Doe <jdoe@machine(comment). example>'],
2526+
[('John Doe (comment)', 'jdoe@machine.example')]),
2527+
2528+
(['"Mary Smith: Personal Account" <smith@home.example>'],
2529+
[('Mary Smith: Personal Account', 'smith@home.example')]),
2530+
2531+
(['Undisclosed recipients:;'],
2532+
[('', '')]),
2533+
2534+
([r'<boss@nil.test>, "Giant; \"Big\" Box" <bob@example.net>'],
2535+
[('', 'boss@nil.test'), ('Giant; "Big" Box', 'bob@example.net')]),
2536+
):
2537+
self.assertEqual(utils.getaddresses(addresses),
2538+
expected)
2539+
self.assertEqual(utils.getaddresses(addresses, strict=False),
2540+
expected)
2541+
2542+
addresses = ['[]*-- =~$']
2543+
self.assertEqual(utils.getaddresses(addresses),
2544+
[('', '')])
2545+
self.assertEqual(utils.getaddresses(addresses, strict=False),
2546+
[('', ''), ('', ''), ('', '*--')])
24262547

24272548
def test_getaddresses_embedded_comment(self):
24282549
"""Test proper handling of a nested comment"""
@@ -2533,6 +2654,53 @@ def test_partial_falls_inside_message_delivery_status(self):
25332654
text/rfc822-headers
25342655
""")
25352656

2657+
def test_iter_escaped_chars(self):
2658+
self.assertEqual(list(utils._iter_escaped_chars(r'a\\b\"c\\"d')),
2659+
[(0, 'a'),
2660+
(2, '\\\\'),
2661+
(3, 'b'),
2662+
(5, '\\"'),
2663+
(6, 'c'),
2664+
(8, '\\\\'),
2665+
(9, '"'),
2666+
(10, 'd')])
2667+
self.assertEqual(list(utils._iter_escaped_chars('a\\')),
2668+
[(0, 'a'), (1, '\\')])
2669+
2670+
def test_strip_quoted_realnames(self):
2671+
def check(addr, expected):
2672+
self.assertEqual(utils._strip_quoted_realnames(addr), expected)
2673+
2674+
check('"Jane Doe" <jane@example.net>, "John Doe" <john@example.net>',
2675+
' <jane@example.net>, <john@example.net>')
2676+
check(r'"Jane \"Doe\"." <jane@example.net>',
2677+
' <jane@example.net>')
2678+
2679+
# special cases
2680+
check(r'before"name"after', 'beforeafter')
2681+
check(r'before"name"', 'before')
2682+
check(r'b"name"', 'b') # single char
2683+
check(r'"name"after', 'after')
2684+
check(r'"name"a', 'a') # single char
2685+
check(r'"name"', '')
2686+
2687+
# no change
2688+
for addr in (
2689+
'Jane Doe <jane@example.net>, John Doe <john@example.net>',
2690+
'lone " quote',
2691+
):
2692+
self.assertEqual(utils._strip_quoted_realnames(addr), addr)
2693+
2694+
2695+
def test_check_parenthesis(self):
2696+
addr = 'alice@example.net'
2697+
self.assertTrue(utils._check_parenthesis(addr + ' (Alice)'))
2698+
self.assertFalse(utils._check_parenthesis(addr + ' )Alice('))
2699+
self.assertFalse(utils._check_parenthesis(addr + ' (Alice))'))
2700+
self.assertFalse(utils._check_parenthesis(addr + ' ((Alice)'))
2701+
2702+
# Ignore real name between quotes
2703+
self.assertTrue(utils._check_parenthesis('")Alice((" ' + addr))
25362704

25372705

25382706
# Test the iterator/generators

0 commit comments

Comments
 (0)