Skip to content

Commit 7febbe6

Browse files
gh-144001: Support ignorechars in binascii.a2b_base64() and base64.b64decode() (GH-144024)
1 parent 9181d77 commit 7febbe6

File tree

13 files changed

+303
-116
lines changed

13 files changed

+303
-116
lines changed

Doc/library/base64.rst

Lines changed: 12 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -73,6 +73,7 @@ POST request.
7373

7474

7575
.. function:: b64decode(s, altchars=None, validate=False)
76+
b64decode(s, altchars=None, validate=True, *, ignorechars)
7677
7778
Decode the Base64 encoded :term:`bytes-like object` or ASCII string
7879
*s* and return the decoded :class:`bytes`.
@@ -84,11 +85,17 @@ POST request.
8485
A :exc:`binascii.Error` exception is raised
8586
if *s* is incorrectly padded.
8687

87-
If *validate* is false (the default), characters that are neither
88+
If *ignorechars* is specified, it should be a :term:`bytes-like object`
89+
containing characters to ignore from the input when *validate* is true.
90+
The default value of *validate* is ``True`` if *ignorechars* is specified,
91+
``False`` otherwise.
92+
93+
If *validate* is false, characters that are neither
8894
in the normal base-64 alphabet nor the alternative alphabet are
8995
discarded prior to the padding check, but the ``+`` and ``/`` characters
9096
keep their meaning if they are not in *altchars* (they will be discarded
9197
in future Python versions).
98+
9299
If *validate* is true, these non-alphabet characters in the input
93100
result in a :exc:`binascii.Error`.
94101

@@ -99,6 +106,10 @@ POST request.
99106
is now deprecated.
100107

101108

109+
.. versionchanged:: next
110+
Added the *ignorechars* parameter.
111+
112+
102113
.. function:: standard_b64encode(s)
103114

104115
Encode :term:`bytes-like object` *s* using the standard Base64 alphabet

Doc/library/binascii.rst

Lines changed: 9 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -49,10 +49,16 @@ The :mod:`binascii` module defines the following functions:
4949

5050

5151
.. function:: a2b_base64(string, /, *, strict_mode=False)
52+
a2b_base64(string, /, *, strict_mode=True, ignorechars)
5253
5354
Convert a block of base64 data back to binary and return the binary data. More
5455
than one line may be passed at a time.
5556

57+
If *ignorechars* is specified, it should be a :term:`bytes-like object`
58+
containing characters to ignore from the input when *strict_mode* is true.
59+
The default value of *strict_mode* is ``True`` if *ignorechars* is specified,
60+
``False`` otherwise.
61+
5662
If *strict_mode* is true, only valid base64 data will be converted. Invalid base64
5763
data will raise :exc:`binascii.Error`.
5864

@@ -66,6 +72,9 @@ The :mod:`binascii` module defines the following functions:
6672
.. versionchanged:: 3.11
6773
Added the *strict_mode* parameter.
6874

75+
.. versionchanged:: next
76+
Added the *ignorechars* parameter.
77+
6978

7079
.. function:: b2a_base64(data, *, wrapcol=0, newline=True)
7180

Doc/whatsnew/3.15.rst

Lines changed: 5 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -444,13 +444,18 @@ base64
444444
* Added the *wrapcol* parameter in :func:`~base64.b64encode`.
445445
(Contributed by Serhiy Storchaka in :gh:`143214`.)
446446

447+
* Added the *ignorechars* parameter in :func:`~base64.b64decode`.
448+
(Contributed by Serhiy Storchaka in :gh:`144001`.)
447449

448450
binascii
449451
--------
450452

451453
* Added the *wrapcol* parameter in :func:`~binascii.b2a_base64`.
452454
(Contributed by Serhiy Storchaka in :gh:`143214`.)
453455

456+
* Added the *ignorechars* parameter in :func:`~binascii.a2b_base64`.
457+
(Contributed by Serhiy Storchaka in :gh:`144001`.)
458+
454459

455460
calendar
456461
--------

Include/internal/pycore_global_objects_fini_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_global_strings.h

Lines changed: 1 addition & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -520,6 +520,7 @@ struct _Py_global_strings {
520520
STRUCT_FOR_ID(ident)
521521
STRUCT_FOR_ID(identity_hint)
522522
STRUCT_FOR_ID(ignore)
523+
STRUCT_FOR_ID(ignorechars)
523524
STRUCT_FOR_ID(imag)
524525
STRUCT_FOR_ID(implieslink)
525526
STRUCT_FOR_ID(importlib)

Include/internal/pycore_runtime_init_generated.h

Lines changed: 1 addition & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Include/internal/pycore_unicodeobject_generated.h

Lines changed: 4 additions & 0 deletions
Some generated files are not rendered by default. Learn more about customizing how changed files appear on GitHub.

Lib/base64.py

Lines changed: 41 additions & 13 deletions
Original file line numberDiff line numberDiff line change
@@ -26,6 +26,8 @@
2626
]
2727

2828

29+
_NOT_SPECIFIED = ['NOT SPECIFIED']
30+
2931
bytes_types = (bytes, bytearray) # Types acceptable as binary data
3032

3133
def _bytes_from_decode_data(s):
@@ -62,7 +64,7 @@ def b64encode(s, altchars=None, *, wrapcol=0):
6264
return encoded
6365

6466

65-
def b64decode(s, altchars=None, validate=False):
67+
def b64decode(s, altchars=None, validate=_NOT_SPECIFIED, *, ignorechars=_NOT_SPECIFIED):
6668
"""Decode the Base64 encoded bytes-like object or ASCII string s.
6769
6870
Optional altchars must be a bytes-like object or ASCII string of length 2
@@ -72,38 +74,64 @@ def b64decode(s, altchars=None, validate=False):
7274
The result is returned as a bytes object. A binascii.Error is raised if
7375
s is incorrectly padded.
7476
75-
If validate is false (the default), characters that are neither in the
76-
normal base-64 alphabet nor the alternative alphabet are discarded prior
77-
to the padding check. If validate is true, these non-alphabet characters
78-
in the input result in a binascii.Error.
77+
If ignorechars is specified, it should be a byte string containing
78+
characters to ignore from the input. The default value of validate is
79+
True if ignorechars is specified, False otherwise.
80+
81+
If validate is false, characters that are neither in the normal base-64
82+
alphabet nor the alternative alphabet are discarded prior to the
83+
padding check. If validate is true, these non-alphabet characters in
84+
the input result in a binascii.Error if they are not in ignorechars.
7985
For more information about the strict base64 check, see:
8086
8187
https://docs.python.org/3.11/library/binascii.html#binascii.a2b_base64
8288
"""
8389
s = _bytes_from_decode_data(s)
90+
if validate is _NOT_SPECIFIED:
91+
validate = ignorechars is not _NOT_SPECIFIED
92+
if ignorechars is _NOT_SPECIFIED:
93+
ignorechars = b''
8494
badchar = None
95+
badchar_strict = False
8596
if altchars is not None:
8697
altchars = _bytes_from_decode_data(altchars)
8798
if len(altchars) != 2:
8899
raise ValueError(f'invalid altchars: {altchars!r}')
89100
for b in b'+/':
90101
if b not in altchars and b in s:
91-
badchar = b
92-
break
102+
if badchar is None:
103+
badchar = b
104+
if not validate:
105+
break
106+
if not isinstance(ignorechars, (bytes, bytearray)):
107+
ignorechars = memoryview(ignorechars).cast('B')
108+
if b not in ignorechars:
109+
badchar_strict = True
110+
badchar = b
111+
break
93112
s = s.translate(bytes.maketrans(altchars, b'+/'))
94-
result = binascii.a2b_base64(s, strict_mode=validate)
113+
result = binascii.a2b_base64(s, strict_mode=validate,
114+
ignorechars=ignorechars)
95115
if badchar is not None:
96116
import warnings
97-
if validate:
117+
if badchar_strict:
98118
warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
99119
f'with altchars={altchars!r} and validate=True '
100120
f'will be an error in future Python versions',
101121
DeprecationWarning, stacklevel=2)
102122
else:
103-
warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
104-
f'with altchars={altchars!r} and validate=False '
105-
f'will be discarded in future Python versions',
106-
FutureWarning, stacklevel=2)
123+
ignorechars = bytes(ignorechars)
124+
if ignorechars:
125+
warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
126+
f'with altchars={altchars!r} '
127+
f'and ignorechars={ignorechars!r} '
128+
f'will be discarded in future Python versions',
129+
FutureWarning, stacklevel=2)
130+
else:
131+
warnings.warn(f'invalid character {chr(badchar)!a} in Base64 data '
132+
f'with altchars={altchars!r} and validate=False '
133+
f'will be discarded in future Python versions',
134+
FutureWarning, stacklevel=2)
107135
return result
108136

109137

Lib/test/test_base64.py

Lines changed: 81 additions & 25 deletions
Original file line numberDiff line numberDiff line change
@@ -303,22 +303,26 @@ def test_b64decode_padding_error(self):
303303

304304
def test_b64decode_invalid_chars(self):
305305
# issue 1466065: Test some invalid characters.
306-
tests = ((b'%3d==', b'\xdd'),
307-
(b'$3d==', b'\xdd'),
308-
(b'[==', b''),
309-
(b'YW]3=', b'am'),
310-
(b'3{d==', b'\xdd'),
311-
(b'3d}==', b'\xdd'),
312-
(b'@@', b''),
313-
(b'!', b''),
314-
(b"YWJj\n", b"abc"),
315-
(b'YWJj\nYWI=', b'abcab'))
306+
tests = ((b'%3d==', b'\xdd', b'%$'),
307+
(b'$3d==', b'\xdd', b'%$'),
308+
(b'[==', b'', None),
309+
(b'YW]3=', b'am', b']'),
310+
(b'3{d==', b'\xdd', b'{}'),
311+
(b'3d}==', b'\xdd', b'{}'),
312+
(b'@@', b'', b'@!'),
313+
(b'!', b'', b'@!'),
314+
(b"YWJj\n", b"abc", b'\n'),
315+
(b'YWJj\nYWI=', b'abcab', b'\n'),
316+
(b'YW\nJj', b'abc', b'\n'),
317+
(b'YW\nJj', b'abc', bytearray(b'\n')),
318+
(b'YW\nJj', b'abc', memoryview(b'\n')),
319+
)
316320
funcs = (
317321
base64.b64decode,
318322
base64.standard_b64decode,
319323
base64.urlsafe_b64decode,
320324
)
321-
for bstr, res in tests:
325+
for bstr, res, ignorechars in tests:
322326
for func in funcs:
323327
with self.subTest(bstr=bstr, func=func):
324328
self.assertEqual(func(bstr), res)
@@ -327,24 +331,76 @@ def test_b64decode_invalid_chars(self):
327331
base64.b64decode(bstr, validate=True)
328332
with self.assertRaises(binascii.Error):
329333
base64.b64decode(bstr.decode('ascii'), validate=True)
334+
with self.assertRaises(binascii.Error):
335+
# Even empty ignorechars enables the strict mode.
336+
base64.b64decode(bstr, ignorechars=b'')
337+
if ignorechars is not None:
338+
r = base64.b64decode(bstr, ignorechars=ignorechars)
339+
self.assertEqual(r, res)
340+
341+
with self.assertRaises(TypeError):
342+
base64.b64decode(b'', ignorechars='')
343+
with self.assertRaises(TypeError):
344+
base64.b64decode(b'', ignorechars=[])
345+
with self.assertRaises(TypeError):
346+
base64.b64decode(b'', ignorechars=None)
330347

331348
# Normal alphabet characters will be discarded when alternative given
332-
with self.assertWarns(FutureWarning):
333-
self.assertEqual(base64.b64decode(b'++++', altchars=b'-_'),
334-
b'\xfb\xef\xbe')
335-
with self.assertWarns(FutureWarning):
336-
self.assertEqual(base64.b64decode(b'////', altchars=b'-_'),
337-
b'\xff\xff\xff')
338-
with self.assertWarns(DeprecationWarning):
339-
self.assertEqual(base64.b64decode(b'++++', altchars=b'-_', validate=True),
340-
b'\xfb\xef\xbe')
341-
with self.assertWarns(DeprecationWarning):
342-
self.assertEqual(base64.b64decode(b'////', altchars=b'-_', validate=True),
343-
b'\xff\xff\xff')
344-
with self.assertWarns(FutureWarning):
349+
discarded = ("invalid character %a in Base64 data with %s "
350+
"will be discarded in future Python versions")
351+
error = ("invalid character %a in Base64 data with %s "
352+
"will be an error in future Python versions")
353+
with self.assertWarns(FutureWarning) as cm:
354+
r = base64.b64decode(b'++++', altchars=b'-_')
355+
self.assertEqual(r, b'\xfb\xef\xbe')
356+
self.assertEqual(str(cm.warning),
357+
discarded % ('+', "altchars=b'-_' and validate=False"))
358+
with self.assertWarns(FutureWarning) as cm:
359+
r = base64.b64decode(b'////', altchars=b'-_')
360+
self.assertEqual(r, b'\xff\xff\xff')
361+
self.assertEqual(str(cm.warning),
362+
discarded % ('/', "altchars=b'-_' and validate=False"))
363+
with self.assertWarns(DeprecationWarning) as cm:
364+
r = base64.b64decode(b'++++', altchars=b'-_', validate=True)
365+
self.assertEqual(r, b'\xfb\xef\xbe')
366+
self.assertEqual(str(cm.warning),
367+
error % ('+', "altchars=b'-_' and validate=True"))
368+
with self.assertWarns(DeprecationWarning) as cm:
369+
r = base64.b64decode(b'////', altchars=b'-_', validate=True)
370+
self.assertEqual(r, b'\xff\xff\xff')
371+
self.assertEqual(str(cm.warning),
372+
error % ('/', "altchars=b'-_' and validate=True"))
373+
with self.assertWarns(FutureWarning) as cm:
374+
r = base64.b64decode(b'++++', altchars=b'-_', ignorechars=b'+')
375+
self.assertEqual(r, b'\xfb\xef\xbe')
376+
self.assertEqual(str(cm.warning),
377+
discarded % ('+', "altchars=b'-_' and ignorechars=b'+'"))
378+
with self.assertWarns(FutureWarning) as cm:
379+
r = base64.b64decode(b'////', altchars=b'-_', ignorechars=b'/')
380+
self.assertEqual(r, b'\xff\xff\xff')
381+
self.assertEqual(str(cm.warning),
382+
discarded % ('/', "altchars=b'-_' and ignorechars=b'/'"))
383+
with self.assertWarns(DeprecationWarning) as cm:
384+
r = base64.b64decode(b'++++////', altchars=b'-_', ignorechars=b'+')
385+
self.assertEqual(r, b'\xfb\xef\xbe\xff\xff\xff')
386+
self.assertEqual(str(cm.warning),
387+
error % ('/', "altchars=b'-_' and validate=True"))
388+
with self.assertWarns(DeprecationWarning) as cm:
389+
r = base64.b64decode(b'++++////', altchars=b'-_', ignorechars=b'/')
390+
self.assertEqual(r, b'\xfb\xef\xbe\xff\xff\xff')
391+
self.assertEqual(str(cm.warning),
392+
error % ('+', "altchars=b'-_' and validate=True"))
393+
394+
with self.assertWarns(FutureWarning) as cm:
345395
self.assertEqual(base64.urlsafe_b64decode(b'++++'), b'\xfb\xef\xbe')
346-
with self.assertWarns(FutureWarning):
396+
self.assertEqual(str(cm.warning),
397+
"invalid character '+' in URL-safe Base64 data "
398+
"will be discarded in future Python versions")
399+
with self.assertWarns(FutureWarning) as cm:
347400
self.assertEqual(base64.urlsafe_b64decode(b'////'), b'\xff\xff\xff')
401+
self.assertEqual(str(cm.warning),
402+
"invalid character '/' in URL-safe Base64 data "
403+
"will be discarded in future Python versions")
348404
with self.assertRaises(binascii.Error):
349405
base64.b64decode(b'+/!', altchars=b'-_')
350406

0 commit comments

Comments
 (0)