Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
27 changes: 16 additions & 11 deletions jigsawstack/async_request.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,9 +35,13 @@ def __init__(
self.base_url = config.get("base_url")
self.api_key = config.get("api_key")
self.data = data
self.headers = config.get("headers", None) or {"Content-Type": "application/json"}
# Defensive copy: we must not mutate the caller's config dict. Without
# this, __get_headers() could permanently strip keys (e.g. Content-Type)
# from the shared config that all service-class methods reuse.
raw_headers = config.get("headers", None) or {"Content-Type": "application/json"}
self.headers: Dict[str, str] = dict(raw_headers)
self.stream = stream
self.files = files # Store files for multipart requests
self.files = files

def __convert_params(
self, params: Union[Dict[Any, Any], List[Dict[Any, Any]]]
Expand Down Expand Up @@ -175,19 +179,22 @@ def __get_headers(self) -> Dict[str, str]:
"x-api-key": f"{self.api_key}",
}

# only add Content-Type if not using multipart (files)
# Only add Content-Type if not using multipart (files)
if not self.files and not self.data:
h["Content-Type"] = "application/json"

_headers = h.copy()
# Work on a local copy so we never mutate self.headers (which itself is
# already a copy of the original config dict — see __init__).
caller_headers = dict(self.headers)

# don't override Content-Type if using multipart
if self.files and "Content-Type" in self.headers:
self.headers.pop("Content-Type")
# Strip Content-Type for multipart requests so that aiohttp can set
# the correct multipart/form-data boundary itself.
if self.files:
caller_headers.pop("Content-Type", None)

_headers.update(self.headers)
h.update(caller_headers)

return _headers
return h

async def perform_streaming(self) -> AsyncGenerator[Union[T, str], None]:
"""
Expand Down Expand Up @@ -253,8 +260,6 @@ async def make_request(
_form_data.add_field("file", BytesIO(files["file"]), filename="upload")
if params and isinstance(params, dict):
_form_data.add_field("body", json.dumps(params), content_type="application/json")

headers.pop("Content-Type", None)
elif data: # raw data request
_data = data
else: # pure JSON request
Expand Down
22 changes: 14 additions & 8 deletions jigsawstack/request.py
Original file line number Diff line number Diff line change
Expand Up @@ -35,7 +35,11 @@ def __init__(
self.base_url = config.get("base_url")
self.api_key = config.get("api_key")
self.data = data
self.headers = config.get("headers", None) or {"Content-Type": "application/json"}
# Defensive copy: we must not mutate the caller's config dict. Without
# this, __get_headers() could permanently strip keys (e.g. Content-Type)
# from the shared config that all service-class methods reuse.
raw_headers = config.get("headers", None) or {"Content-Type": "application/json"}
self.headers: Dict[str, str] = dict(raw_headers)
self.stream = stream
self.files = files

Expand Down Expand Up @@ -160,15 +164,18 @@ def __get_headers(self) -> Dict[Any, Any]:
if not self.files and not self.data:
h["Content-Type"] = "application/json"

_headers = h.copy()
# Work on a local copy so we never mutate self.headers (which itself is
# already a copy of the original config dict — see __init__).
caller_headers = dict(self.headers)

# Don't override Content-Type if using multipart
if self.files and "Content-Type" in self.headers:
self.headers.pop("Content-Type")
# Strip Content-Type for multipart requests so that the requests
# library can set the correct multipart/form-data boundary itself.
if self.files:
caller_headers.pop("Content-Type", None)

_headers.update(self.headers)
h.update(caller_headers)

return _headers
return h

def perform_streaming(self) -> Generator[Union[T, str], None, None]:
"""Is the main function that makes the HTTP request
Expand Down Expand Up @@ -261,7 +268,6 @@ def make_request(self, url: str) -> requests.Response:
_files = files
if params and isinstance(params, dict):
_data = {"body": json.dumps(params)}
headers.pop("Content-Type", None) # let requests set it for multipart
elif data: # raw data request
_data = data
else: # pure JSON request
Expand Down
153 changes: 153 additions & 0 deletions tests/test_headers_mutation.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,153 @@
"""
tests/test_headers_mutation.py

Regression tests for the __get_headers mutation bug.

Both Request and AsyncRequest were holding a direct reference to the headers
dict inside the caller's config TypedDict and calling .pop("Content-Type") on
it during multipart file-upload requests. That permanently modified the shared
config, so every subsequent request reusing the same config would go out without
Content-Type.

These tests are self-contained and require no API key or network access.
"""

from typing import Optional

from jigsawstack.async_request import AsyncRequest, AsyncRequestConfig
from jigsawstack.request import Request, RequestConfig


# ---------------------------------------------------------------------------
# Helpers
# ---------------------------------------------------------------------------

def _sync_config(extra_headers: Optional[dict] = None) -> RequestConfig:
headers = {"Content-Type": "application/json"}
if extra_headers:
headers.update(extra_headers)
return RequestConfig(base_url="http://test", api_key="key", headers=headers)


def _async_config(extra_headers: Optional[dict] = None) -> AsyncRequestConfig:
headers = {"Content-Type": "application/json"}
if extra_headers:
headers.update(extra_headers)
return AsyncRequestConfig(base_url="http://test", api_key="key", headers=headers)


# ---------------------------------------------------------------------------
# Sync Request
# ---------------------------------------------------------------------------

class TestSyncRequestHeadersMutation:
def test_multipart_does_not_mutate_config_headers(self):
"""A multipart request must not remove Content-Type from the config dict."""
config = _sync_config()
r = Request(config=config, path="/test", params={}, verb="post", files={"file": b"data"})
r._Request__get_headers()
assert "Content-Type" in config["headers"], (
"__get_headers() stripped Content-Type from the caller's config dict"
)

def test_repeated_multipart_calls_do_not_degrade(self):
"""Calling __get_headers() multiple times must always produce the same result."""
config = _sync_config()
r = Request(config=config, path="/test", params={}, verb="post", files={"file": b"data"})
first = r._Request__get_headers()
second = r._Request__get_headers()
assert first == second, "__get_headers() returned different results on second call"

def test_multipart_output_omits_content_type(self):
"""Outgoing headers for a multipart request must not contain Content-Type
so that the requests library can insert the correct multipart boundary."""
config = _sync_config()
r = Request(config=config, path="/test", params={}, verb="post", files={"file": b"data"})
out = r._Request__get_headers()
assert "Content-Type" not in out, (
"Content-Type should be absent from multipart outgoing headers"
)

def test_json_request_includes_content_type(self):
"""Plain JSON requests must still set Content-Type: application/json."""
config = _sync_config()
r = Request(config=config, path="/test", params={"k": "v"}, verb="post")
out = r._Request__get_headers()
assert out.get("Content-Type") == "application/json"

def test_custom_headers_propagated(self):
"""Any extra caller-supplied headers must survive into the outgoing dict."""
config = _sync_config(extra_headers={"x-custom": "value"})
r = Request(config=config, path="/test", params={}, verb="post")
out = r._Request__get_headers()
assert out.get("x-custom") == "value"

def test_second_request_on_same_config_unaffected(self):
"""A second Request built from the same config after a multipart call
must still produce correct headers — the config was not poisoned."""
config = _sync_config()
# First request: multipart — used to trigger the bug
r1 = Request(config=config, path="/test", params={}, verb="post", files={"file": b"x"})
r1._Request__get_headers()
# Second request: plain JSON — previously would be missing Content-Type
r2 = Request(config=config, path="/test", params={"k": "v"}, verb="post")
out = r2._Request__get_headers()
assert out.get("Content-Type") == "application/json", (
"Config was mutated by the first multipart request; second request lost Content-Type"
)


# ---------------------------------------------------------------------------
# Async Request
# ---------------------------------------------------------------------------

class TestAsyncRequestHeadersMutation:
def test_multipart_does_not_mutate_config_headers(self):
"""A multipart request must not remove Content-Type from the config dict."""
config = _async_config()
r = AsyncRequest(config=config, path="/test", params={}, verb="post", files={"file": b"data"})
r._AsyncRequest__get_headers()
assert "Content-Type" in config["headers"], (
"__get_headers() stripped Content-Type from the caller's async config dict"
)

def test_repeated_multipart_calls_do_not_degrade(self):
"""Calling __get_headers() multiple times must always produce the same result."""
config = _async_config()
r = AsyncRequest(config=config, path="/test", params={}, verb="post", files={"file": b"data"})
first = r._AsyncRequest__get_headers()
second = r._AsyncRequest__get_headers()
assert first == second, "__get_headers() returned different results on second call"

def test_multipart_output_omits_content_type(self):
"""Outgoing headers for a multipart request must not contain Content-Type."""
config = _async_config()
r = AsyncRequest(config=config, path="/test", params={}, verb="post", files={"file": b"data"})
out = r._AsyncRequest__get_headers()
assert "Content-Type" not in out

def test_json_request_includes_content_type(self):
"""Plain JSON requests must still set Content-Type: application/json."""
config = _async_config()
r = AsyncRequest(config=config, path="/test", params={"k": "v"}, verb="post")
out = r._AsyncRequest__get_headers()
assert out.get("Content-Type") == "application/json"

def test_custom_headers_propagated(self):
"""Any extra caller-supplied headers must survive into the outgoing dict."""
config = _async_config(extra_headers={"x-custom": "value"})
r = AsyncRequest(config=config, path="/test", params={}, verb="post")
out = r._AsyncRequest__get_headers()
assert out.get("x-custom") == "value"

def test_second_request_on_same_config_unaffected(self):
"""A second AsyncRequest built from the same config after a multipart call
must still produce correct headers."""
config = _async_config()
r1 = AsyncRequest(config=config, path="/test", params={}, verb="post", files={"file": b"x"})
r1._AsyncRequest__get_headers()
r2 = AsyncRequest(config=config, path="/test", params={"k": "v"}, verb="post")
out = r2._AsyncRequest__get_headers()
assert out.get("Content-Type") == "application/json", (
"Config was mutated by the first multipart request; second request lost Content-Type"
)