Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
8 changes: 8 additions & 0 deletions CHANGES/11112.bugfix.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
Fixed cookie parsing to be more lenient when handling cookies with special characters
in names or values. Cookies with characters like ``{``, ``}``, and ``/`` in names are now
accepted instead of causing a :exc:`~http.cookies.CookieError` and 500 errors. Additionally,
cookies with mismatched quotes in values are now parsed correctly, and quoted cookie
values are now handled consistently whether or not they include special attributes
like ``Domain``. Also fixed :class:`~aiohttp.CookieJar` to ensure shared cookies (domain="", path="")
respect the ``quote_cookie`` parameter, making cookie quoting behavior consistent for
all cookies -- by :user:`bdraco`.
1 change: 1 addition & 0 deletions CHANGES/11114.misc.rst
Original file line number Diff line number Diff line change
@@ -0,0 +1 @@
Downgraded the logging level for connector close errors from ERROR to DEBUG, as these are expected behavior with TLS 1.3 connections -- by :user:`bdraco`.
1 change: 1 addition & 0 deletions CHANGES/2683.bugfix.rst
1 change: 1 addition & 0 deletions CHANGES/5397.bugfix.rst
1 change: 1 addition & 0 deletions CHANGES/7993.bugfix.rst
221 changes: 221 additions & 0 deletions aiohttp/_cookie_helpers.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,221 @@
"""
Internal cookie handling helpers.

This module contains internal utilities for cookie parsing and manipulation.
These are not part of the public API and may change without notice.
"""

import re
import sys
from http.cookies import Morsel
from typing import List, Optional, Sequence, Tuple, cast

from .log import internal_logger

__all__ = ("parse_cookie_headers", "preserve_morsel_with_coded_value")

# Cookie parsing constants
# Allow more characters in cookie names to handle real-world cookies
# that don't strictly follow RFC standards (fixes #2683)
# RFC 6265 defines cookie-name token as per RFC 2616 Section 2.2,
# but many servers send cookies with characters like {} [] () etc.
# This makes the cookie parser more tolerant of real-world cookies
# while still providing some validation to catch obviously malformed names.
_COOKIE_NAME_RE = re.compile(r"^[!#$%&\'()*+\-./0-9:<=>?@A-Z\[\]^_`a-z{|}~]+$")
_COOKIE_KNOWN_ATTRS = frozenset( # AKA Morsel._reserved
(
"path",
"domain",
"max-age",
"expires",
"secure",
"httponly",
"samesite",
"partitioned",
"version",
"comment",
)
)
_COOKIE_BOOL_ATTRS = frozenset( # AKA Morsel._flags
("secure", "httponly", "partitioned")
)

# SimpleCookie's pattern for parsing cookies with relaxed validation
# Based on http.cookies pattern but extended to allow more characters in cookie names
# to handle real-world cookies (fixes #2683)
_COOKIE_PATTERN = re.compile(
r"""
\s* # Optional whitespace at start of cookie
(?P<key> # Start of group 'key'
# aiohttp has extended to include [] for compatibility with real-world cookies
[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]+? # Any word of at least one letter
) # End of group 'key'
( # Optional group: there may not be a value.
\s*=\s* # Equal Sign
(?P<val> # Start of group 'val'
"(?:[^\\"]|\\.)*" # Any double-quoted string (properly closed)
| # or
"[^";]* # Unmatched opening quote (differs from SimpleCookie - issue #7993)
| # or
# Special case for "expires" attr - RFC 822, RFC 850, RFC 1036, RFC 1123
(\w{3,6}day|\w{3}),\s # Day of the week or abbreviated day (with comma)
[\w\d\s-]{9,11}\s[\d:]{8}\s # Date and time in specific format
(GMT|[+-]\d{4}) # Timezone: GMT or RFC 2822 offset like -0000, +0100
# NOTE: RFC 2822 timezone support is an aiohttp extension
# for issue #4493 - SimpleCookie does NOT support this
| # or
# ANSI C asctime() format: "Wed Jun 9 10:18:14 2021"
# NOTE: This is an aiohttp extension for issue #4327 - SimpleCookie does NOT support this format
\w{3}\s+\w{3}\s+[\s\d]\d\s+\d{2}:\d{2}:\d{2}\s+\d{4}
| # or
[\w\d!#%&'~_`><@,:/\$\*\+\-\.\^\|\)\(\?\}\{\=\[\]]* # Any word or empty string
) # End of group 'val'
)? # End of optional value group
\s* # Any number of spaces.
(\s+|;|$) # Ending either at space, semicolon, or EOS.
""",
re.VERBOSE | re.ASCII,
)


def preserve_morsel_with_coded_value(cookie: Morsel[str]) -> Morsel[str]:
"""
Preserve a Morsel's coded_value exactly as received from the server.

This function ensures that cookie encoding is preserved exactly as sent by
the server, which is critical for compatibility with old servers that have
strict requirements about cookie formats.

This addresses the issue described in https://github.com/aio-libs/aiohttp/pull/1453
where Python's SimpleCookie would re-encode cookies, breaking authentication
with certain servers.

Args:
cookie: A Morsel object from SimpleCookie

Returns:
A Morsel object with preserved coded_value

"""
mrsl_val = cast("Morsel[str]", cookie.get(cookie.key, Morsel()))
# We use __setstate__ instead of the public set() API because it allows us to
# bypass validation and set already validated state. This is more stable than
# setting protected attributes directly and unlikely to change since it would
# break pickling.
mrsl_val.__setstate__( # type: ignore[attr-defined]
{"key": cookie.key, "value": cookie.value, "coded_value": cookie.coded_value}
)
return mrsl_val


def _unquote(text: str) -> str:
"""
Unquote a cookie value.

Vendored from http.cookies._unquote to ensure compatibility.
"""
# If there are no quotes, return as-is
if len(text) < 2 or text[0] != '"' or text[-1] != '"':
return text
# Remove quotes and handle escaped characters
text = text[1:-1]
# Replace escaped quotes and backslashes
text = text.replace('\\"', '"').replace("\\\\", "\\")
return text


def parse_cookie_headers(headers: Sequence[str]) -> List[Tuple[str, Morsel[str]]]:
"""
Parse cookie headers using a vendored version of SimpleCookie parsing.

This implementation is based on SimpleCookie.__parse_string to ensure
compatibility with how SimpleCookie parses cookies, including handling
of malformed cookies with missing semicolons.

This function is used for both Cookie and Set-Cookie headers in order to be
forgiving. Ideally we would have followed RFC 6265 Section 5.2 (for Cookie
headers) and RFC 6265 Section 4.2.1 (for Set-Cookie headers), but the
real world data makes it impossible since we need to be a bit more forgiving.

NOTE: This implementation differs from SimpleCookie in handling unmatched quotes.
SimpleCookie will stop parsing when it encounters a cookie value with an unmatched
quote (e.g., 'cookie="value'), causing subsequent cookies to be silently dropped.
This implementation handles unmatched quotes more gracefully to prevent cookie loss.
See https://github.com/aio-libs/aiohttp/issues/7993
"""
parsed_cookies: List[Tuple[str, Morsel[str]]] = []

for header in headers:
if not header:
continue

# Parse cookie string using SimpleCookie's algorithm
i = 0
n = len(header)
current_morsel: Optional[Morsel[str]] = None
morsel_seen = False

while 0 <= i < n:
# Start looking for a cookie
match = _COOKIE_PATTERN.match(header, i)
if not match:
# No more cookies
break

key, value = match.group("key"), match.group("val")
i = match.end(0)
lower_key = key.lower()

if key[0] == "$":
if not morsel_seen:
# We ignore attributes which pertain to the cookie
# mechanism as a whole, such as "$Version".
continue
# Process as attribute
if current_morsel is not None:
attr_lower_key = lower_key[1:]
if attr_lower_key in _COOKIE_KNOWN_ATTRS:
current_morsel[attr_lower_key] = value or ""
elif lower_key in _COOKIE_KNOWN_ATTRS:
if not morsel_seen:
# Invalid cookie string - attribute before cookie
break
if lower_key in _COOKIE_BOOL_ATTRS:
# Boolean attribute with any value should be True
if current_morsel is not None:
if lower_key == "partitioned" and sys.version_info < (3, 14):
dict.__setitem__(current_morsel, lower_key, True)
else:
current_morsel[lower_key] = True
elif value is None:
# Invalid cookie string - non-boolean attribute without value
break
elif current_morsel is not None:
# Regular attribute with value
current_morsel[lower_key] = _unquote(value)
elif value is not None:
# This is a cookie name=value pair
# Validate the name
if key in _COOKIE_KNOWN_ATTRS or not _COOKIE_NAME_RE.match(key):
internal_logger.warning(
"Can not load cookies: Illegal cookie name %r", key
)
current_morsel = None
else:
# Create new morsel
current_morsel = Morsel()
# Preserve the original value as coded_value (with quotes if present)
# We use __setstate__ instead of the public set() API because it allows us to
# bypass validation and set already validated state. This is more stable than
# setting protected attributes directly and unlikely to change since it would
# break pickling.
current_morsel.__setstate__( # type: ignore[attr-defined]
{"key": key, "value": _unquote(value), "coded_value": value}
)
parsed_cookies.append((key, current_morsel))
morsel_seen = True
else:
# Invalid cookie string - no value for non-attribute
break

return parsed_cookies
26 changes: 4 additions & 22 deletions aiohttp/abc.py
Original file line number Diff line number Diff line change
Expand Up @@ -2,7 +2,7 @@
import socket
from abc import ABC, abstractmethod
from collections.abc import Sized
from http.cookies import BaseCookie, CookieError, Morsel, SimpleCookie
from http.cookies import BaseCookie, Morsel
from typing import (
TYPE_CHECKING,
Any,
Expand All @@ -22,7 +22,7 @@
from multidict import CIMultiDict
from yarl import URL

from .log import client_logger
from ._cookie_helpers import parse_cookie_headers
from .typedefs import LooseCookies

if TYPE_CHECKING:
Expand Down Expand Up @@ -193,26 +193,8 @@ def update_cookies(self, cookies: LooseCookies, response_url: URL = URL()) -> No
def update_cookies_from_headers(
self, headers: Sequence[str], response_url: URL
) -> None:
"""
Update cookies from raw Set-Cookie headers.

Default implementation parses each header separately to preserve
cookies with same name but different domain/path.
"""
# Default implementation for backward compatibility
cookies_to_update: List[Tuple[str, Morsel[str]]] = []
for cookie_header in headers:
tmp_cookie = SimpleCookie()
try:
tmp_cookie.load(cookie_header)
# Collect all cookies as tuples (name, morsel)
for name, morsel in tmp_cookie.items():
cookies_to_update.append((name, morsel))
except CookieError as exc:
client_logger.warning("Can not load response cookies: %s", exc)

# Update all cookies at once for efficiency
if cookies_to_update:
"""Update cookies from raw Set-Cookie headers."""
if headers and (cookies_to_update := parse_cookie_headers(headers)):
self.update_cookies(cookies_to_update, response_url)

@abstractmethod
Expand Down
21 changes: 9 additions & 12 deletions aiohttp/client_reqrep.py
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@
import warnings
from collections.abc import Mapping
from hashlib import md5, sha1, sha256
from http.cookies import CookieError, Morsel, SimpleCookie
from http.cookies import Morsel, SimpleCookie
from types import MappingProxyType, TracebackType
from typing import (
TYPE_CHECKING,
Expand All @@ -30,6 +30,7 @@
from yarl import URL

from . import hdrs, helpers, http, multipart, payload
from ._cookie_helpers import parse_cookie_headers, preserve_morsel_with_coded_value
from .abc import AbstractStreamWriter
from .client_exceptions import (
ClientConnectionError,
Expand Down Expand Up @@ -64,7 +65,6 @@
HttpVersion11,
StreamWriter,
)
from .log import client_logger
from .streams import StreamReader
from .typedefs import (
DEFAULT_JSON_DECODER,
Expand Down Expand Up @@ -313,11 +313,9 @@ def cookies(self) -> SimpleCookie:
if self._raw_cookie_headers is not None:
# Parse cookies for response.cookies (SimpleCookie for backward compatibility)
cookies = SimpleCookie()
for hdr in self._raw_cookie_headers:
try:
cookies.load(hdr)
except CookieError as exc:
client_logger.warning("Can not load response cookies: %s", exc)
# Use parse_cookie_headers for more lenient parsing that handles
# malformed cookies better than SimpleCookie.load
cookies.update(parse_cookie_headers(self._raw_cookie_headers))
self._cookies = cookies
else:
self._cookies = SimpleCookie()
Expand Down Expand Up @@ -1016,7 +1014,8 @@ def update_cookies(self, cookies: Optional[LooseCookies]) -> None:

c = SimpleCookie()
if hdrs.COOKIE in self.headers:
c.load(self.headers.get(hdrs.COOKIE, ""))
# parse_cookie_headers already preserves coded values
c.update(parse_cookie_headers((self.headers.get(hdrs.COOKIE, ""),)))
del self.headers[hdrs.COOKIE]

if isinstance(cookies, Mapping):
Expand All @@ -1025,10 +1024,8 @@ def update_cookies(self, cookies: Optional[LooseCookies]) -> None:
iter_cookies = cookies # type: ignore[assignment]
for name, value in iter_cookies:
if isinstance(value, Morsel):
# Preserve coded_value
mrsl_val = value.get(value.key, Morsel())
mrsl_val.set(value.key, value.value, value.coded_value)
c[name] = mrsl_val
# Use helper to preserve coded_value exactly as sent by server
c[name] = preserve_morsel_with_coded_value(value)
else:
c[name] = value # type: ignore[assignment]

Expand Down
4 changes: 2 additions & 2 deletions aiohttp/connector.py
Original file line number Diff line number Diff line change
@@ -1,6 +1,5 @@
import asyncio
import functools
import logging
import random
import socket
import sys
Expand Down Expand Up @@ -60,6 +59,7 @@
set_exception,
set_result,
)
from .log import client_logger
from .resolver import DefaultResolver

if sys.version_info >= (3, 12):
Expand Down Expand Up @@ -439,7 +439,7 @@ async def close(self) -> None:
for res in results:
if isinstance(res, Exception):
err_msg = "Error while closing connector: " + repr(res)
logging.error(err_msg)
client_logger.debug(err_msg)

def _close_immediately(self) -> List[Awaitable[object]]:
waiters: List[Awaitable[object]] = []
Expand Down
Loading
Loading