Skip to content

Commit 74ed873

Browse files
thodson-usgsclaude
andcommitted
feat(http): Migrate from requests to httpx
Swap the package's HTTP client: `requests.Session` → `httpx.Client`, `requests.Response` → `httpx.Response`. Same call patterns, same response shape; only the underlying transport changes. Headers, timeout, and redirect defaults are centralized in `dataretrieval.utils.HTTPX_DEFAULTS` (`httpx.Timeout(60.0, connect=10.0)`, `follow_redirects=True`). The chunker, paginated-loop helpers, and OGC waterdata fetchers route through one `httpx.Client` that the chunker publishes on the `_chunked_client` ContextVar so paginated sub-requests reuse the connection pool across a single chunked call. Three httpx behavior diffs handled defensively: * `httpx.InvalidURL` is raised client-side when a URL component exceeds httpx's 64 KB cap. Caught by `_safe_request_bytes` (treats "too big to construct" as "doesn't fit", so the planner's halving loop keeps shrinking) AND by `_issue` / `_classify_chunk_error` (treats a runtime InvalidURL as `ServiceInterrupted` so partial state remains recoverable via `.call.resume()`). Note that `httpx.InvalidURL` does NOT inherit from `httpx.HTTPError` — it needs an explicit catch. * `httpx.Response.elapsed` is only populated once the response is closed; `pytest-httpx` mock responses don't populate it. The new `_safe_elapsed` helper falls back to `timedelta(0)`. * `httpx.Response.url` is a read-only property. The new `_set_response_url` helper rewrites it by reseating the bound request, with a fallback path for `Mock`-shaped test responses. Tests migrate from `requests_mock` to native `pytest-httpx`. The new `tests/conftest.py` is ~30 lines configuring pytest-httpx strict-mode relaxations. Backwards-compat: * `BaseMetadata.header` is now `httpx.Headers` instead of `requests.structures.CaseInsensitiveDict`. Case-insensitive `.get(...)` still works; literal dict equality (`md.header == {"k": "v"}`) no longer holds because `httpx.Headers` carries auto-added entries. * `BaseMetadata.url` is coerced to `str`. * `RequestExceedsQuota` and `API_USGS_LIMIT` are removed — the chunker no longer pre-empts on `x-ratelimit-remaining`. A natural 429 still surfaces as `QuotaExhausted` via `_classify_chunk_error`, carrying partial state for `.call.resume()`. * The CI flaky-rerun regex now matches `httpx.ConnectError` as well as the legacy `ConnectionError` string. Test count: 404 mocked tests passing, 2 skipped, ruff clean. Co-Authored-By: Claude Opus 4.7 <noreply@anthropic.com>
1 parent 51bdc6f commit 74ed873

23 files changed

Lines changed: 1116 additions & 924 deletions

dataretrieval/nadp.py

Lines changed: 4 additions & 2 deletions
Original file line numberDiff line numberDiff line change
@@ -34,7 +34,9 @@
3434
import warnings
3535
import zipfile
3636

37-
import requests
37+
import httpx
38+
39+
from dataretrieval.utils import HTTPX_DEFAULTS
3840

3941
_DEPRECATION_MESSAGE = (
4042
"The `nadp` module is deprecated and will be removed from `dataretrieval` "
@@ -213,7 +215,7 @@ def get_zip(url, filename):
213215
"""
214216
_warn_deprecated()
215217

216-
req = requests.get(url + filename)
218+
req = httpx.get(url + filename, **HTTPX_DEFAULTS)
217219
req.raise_for_status()
218220

219221
# z = zipfile.ZipFile(io.BytesIO(req.content))

dataretrieval/nldi.py

Lines changed: 9 additions & 1 deletion
Original file line numberDiff line numberDiff line change
@@ -20,7 +20,7 @@ def _query_nldi(url, query_params, error_message):
2020
# A helper function to query the NLDI API
2121
response = query(url, payload=query_params)
2222
if response.status_code != 200:
23-
raise ValueError(f"{error_message}. Error reason: {response.reason}")
23+
raise ValueError(f"{error_message}. Error reason: {response.reason_phrase}")
2424

2525
response_data = {}
2626
try:
@@ -453,6 +453,14 @@ def _validate_data_source(data_source: str):
453453
available_data_sources = _query_nldi(
454454
url, {}, "Error getting available data sources"
455455
)
456+
if not isinstance(available_data_sources, list) or not all(
457+
isinstance(ds, dict) and "source" in ds for ds in available_data_sources
458+
):
459+
raise ValueError(
460+
"NLDI data-source catalog returned an unexpected shape; "
461+
"expected a list of {'source': ..., ...} objects, got: "
462+
f"{available_data_sources!r}"
463+
)
456464
_AVAILABLE_DATA_SOURCES = [ds["source"] for ds in available_data_sources]
457465

458466
if data_source not in _AVAILABLE_DATA_SOURCES:

dataretrieval/nwis.py

Lines changed: 8 additions & 10 deletions
Original file line numberDiff line numberDiff line change
@@ -11,8 +11,8 @@
1111
import warnings
1212
from json import JSONDecodeError
1313

14+
import httpx
1415
import pandas as pd
15-
import requests
1616

1717
from dataretrieval.rdb import read_rdb
1818
from dataretrieval.utils import BaseMetadata
@@ -110,7 +110,7 @@ def wrapper(*args, **kwargs):
110110
return wrapper
111111

112112

113-
def _parse_json_or_raise(response: requests.Response) -> pd.DataFrame:
113+
def _parse_json_or_raise(response: httpx.Response) -> pd.DataFrame:
114114
"""Parse a JSON NWIS response, raising a helpful error on HTML responses."""
115115
try:
116116
return _read_json(response.json())
@@ -364,9 +364,7 @@ def get_stats(
364364

365365

366366
@_deprecated
367-
def query_waterdata(
368-
service: str, ssl_check: bool = True, **kwargs
369-
) -> requests.models.Response:
367+
def query_waterdata(service: str, ssl_check: bool = True, **kwargs) -> httpx.Response:
370368
"""
371369
Queries waterdata.
372370
@@ -382,7 +380,7 @@ def query_waterdata(
382380
383381
Returns
384382
-------
385-
request: ``requests.models.Response``
383+
request: ``httpx.Response``
386384
The response object from the API request to the web service
387385
"""
388386
major_params = ["site_no", "state_cd"]
@@ -412,7 +410,7 @@ def query_waterdata(
412410
@_deprecated
413411
def query_waterservices(
414412
service: str, ssl_check: bool = True, **kwargs
415-
) -> requests.models.Response:
413+
) -> httpx.Response:
416414
"""
417415
Queries waterservices.usgs.gov
418416
@@ -451,7 +449,7 @@ def query_waterservices(
451449
452450
Returns
453451
-------
454-
request: ``requests.models.Response``
452+
request: ``httpx.Response``
455453
The response object from the API request to the web service
456454
457455
"""
@@ -1123,7 +1121,7 @@ class NWIS_Metadata(BaseMetadata):
11231121
Response url
11241122
query_time: datetme.timedelta
11251123
Response elapsed time
1126-
header: requests.structures.CaseInsensitiveDict
1124+
header: httpx.Headers
11271125
Response headers
11281126
comments: str | None
11291127
Metadata comments, if any
@@ -1143,7 +1141,7 @@ def __init__(self, response, **parameters) -> None:
11431141
Parameters
11441142
----------
11451143
response: Response
1146-
Response object from requests module
1144+
Response object from httpx module
11471145
parameters: unpacked dictionary
11481146
Unpacked dictionary of the parameters supplied in the request
11491147

dataretrieval/streamstats.py

Lines changed: 5 additions & 3 deletions
Original file line numberDiff line numberDiff line change
@@ -7,7 +7,9 @@
77

88
import json
99

10-
import requests
10+
import httpx
11+
12+
from dataretrieval.utils import HTTPX_DEFAULTS
1113

1214

1315
def download_workspace(workspaceID, format=""):
@@ -32,7 +34,7 @@ def download_workspace(workspaceID, format=""):
3234
payload = {"workspaceID": workspaceID, "format": format}
3335
url = "https://streamstats.usgs.gov/streamstatsservices/download"
3436

35-
r = requests.get(url, params=payload)
37+
r = httpx.get(url, params=payload, **HTTPX_DEFAULTS)
3638

3739
r.raise_for_status()
3840
return r
@@ -125,7 +127,7 @@ def get_watershed(
125127
}
126128
url = "https://streamstats.usgs.gov/streamstatsservices/watershed.geojson"
127129

128-
r = requests.get(url, params=payload)
130+
r = httpx.get(url, params=payload, **HTTPX_DEFAULTS)
129131

130132
r.raise_for_status()
131133

dataretrieval/utils.py

Lines changed: 48 additions & 30 deletions
Original file line numberDiff line numberDiff line change
@@ -5,12 +5,17 @@
55
import warnings
66
from collections.abc import Iterable
77

8+
import httpx
89
import pandas as pd
9-
import requests
1010

1111
import dataretrieval
1212
from dataretrieval.codes import tz
1313

14+
HTTPX_DEFAULTS = {
15+
"follow_redirects": True,
16+
"timeout": httpx.Timeout(60.0, connect=10.0),
17+
}
18+
1419

1520
def to_str(listlike, delimiter=","):
1621
"""Translates list-like objects into strings.
@@ -205,7 +210,7 @@ class BaseMetadata:
205210
Response url
206211
query_time: datetme.timedelta
207212
Response elapsed time
208-
header: requests.structures.CaseInsensitiveDict
213+
header: httpx.Headers
209214
Response headers
210215
211216
"""
@@ -216,7 +221,7 @@ def __init__(self, response) -> None:
216221
Parameters
217222
----------
218223
response: Response
219-
Response object from requests module
224+
Response object from httpx module
220225
221226
Returns
222227
-------
@@ -225,8 +230,8 @@ def __init__(self, response) -> None:
225230
226231
"""
227232

228-
# These are built from the API response
229-
self.url = response.url
233+
# Coerce httpx.URL -> str: BaseMetadata.url has always been str.
234+
self.url = str(response.url)
230235
self.query_time = response.elapsed
231236
self.header = response.headers
232237
self.comment = None
@@ -254,18 +259,37 @@ def __repr__(self) -> str:
254259
return f"{type(self).__name__}(url={self.url})"
255260

256261

262+
_URL_TOO_LONG_EXAMPLE = """
263+
# n is the number of chunks to divide the query into \n
264+
split_list = np.array_split(site_list, n)
265+
data_list = [] # list to store chunk results in \n
266+
# loop through chunks and make requests \n
267+
for site_list in split_list: \n
268+
data = nwis.get_record(sites=site_list, service='dv', \n
269+
start=start, end=end) \n
270+
data_list.append(data) # append results to list"""
271+
272+
273+
def _url_too_long_error(detail: str) -> ValueError:
274+
return ValueError(
275+
"Request URL too long. Modify your query to use fewer sites. "
276+
f"{detail}. Pseudo-code example of how to split your query: "
277+
f"\n {_URL_TOO_LONG_EXAMPLE}"
278+
)
279+
280+
257281
def query(url, payload, delimiter=",", ssl_check=True):
258282
"""Send a query.
259283
260-
Wrapper for requests.get that handles errors, converts listed
284+
Wrapper for httpx.get that handles errors, converts listed
261285
query parameters to comma separated strings, and returns response.
262286
263287
Parameters
264288
----------
265289
url: string
266290
URL to query
267291
payload: dict
268-
query parameters passed to ``requests.get``
292+
query parameters passed to ``httpx.get``
269293
delimiter: string
270294
delimiter to use with lists
271295
ssl_check: bool
@@ -275,19 +299,27 @@ def query(url, payload, delimiter=",", ssl_check=True):
275299
Returns
276300
-------
277301
string: query response
278-
The response from the API query ``requests.get`` function call.
302+
The response from the API query ``httpx.get`` function call.
279303
"""
280304

281305
for key, value in payload.items():
282306
payload[key] = to_str(value, delimiter)
283-
# for index in range(len(payload)):
284-
# key, value = payload[index]
285-
# payload[index] = (key, to_str(value))
307+
# httpx serializes None params as ``foo=``; USGS rejects with 400.
308+
# Drop them. (``to_str`` returns None for non-iterable scalars like bools.)
309+
payload = {k: v for k, v in payload.items() if v is not None}
286310

287-
# define the user agent for the query
288311
user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"}
289312

290-
response = requests.get(url, params=payload, headers=user_agent, verify=ssl_check)
313+
try:
314+
response = httpx.get(
315+
url,
316+
params=payload,
317+
headers=user_agent,
318+
verify=ssl_check,
319+
**HTTPX_DEFAULTS,
320+
)
321+
except httpx.InvalidURL as exc:
322+
raise _url_too_long_error(f"httpx rejected the URL client-side: {exc}") from exc
291323

292324
if response.status_code == 400:
293325
raise ValueError(
@@ -299,24 +331,10 @@ def query(url, payload, delimiter=",", ssl_check=True):
299331
+ f"URL: {response.url}"
300332
)
301333
elif response.status_code == 414:
302-
_reason = response.reason
303-
_example = """
304-
# n is the number of chunks to divide the query into \n
305-
split_list = np.array_split(site_list, n)
306-
data_list = [] # list to store chunk results in \n
307-
# loop through chunks and make requests \n
308-
for site_list in split_list: \n
309-
data = nwis.get_record(sites=site_list, service='dv', \n
310-
start=start, end=end) \n
311-
data_list.append(data) # append results to list"""
312-
raise ValueError(
313-
"Request URL too long. Modify your query to use fewer sites. "
314-
+ f"API response reason: {_reason}. Pseudo-code example of how to "
315-
+ f"split your query: \n {_example}"
316-
)
317-
elif response.status_code in [500, 502, 503]:
334+
raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
335+
elif 500 <= response.status_code < 600:
318336
raise ValueError(
319-
f"Service Unavailable: {response.status_code} {response.reason}. "
337+
f"Service Unavailable: {response.status_code} {response.reason_phrase}. "
320338
+ f"The service at {response.url} may be down or experiencing issues."
321339
)
322340

dataretrieval/waterdata/api.py

Lines changed: 22 additions & 14 deletions
Original file line numberDiff line numberDiff line change
@@ -13,11 +13,15 @@
1313
from typing import get_args
1414
from urllib.parse import quote
1515

16+
import httpx
1617
import pandas as pd
17-
import requests
18-
from requests.models import PreparedRequest
1918

20-
from dataretrieval.utils import BaseMetadata, _attach_datetime_columns, to_str
19+
from dataretrieval.utils import (
20+
HTTPX_DEFAULTS,
21+
BaseMetadata,
22+
_attach_datetime_columns,
23+
to_str,
24+
)
2125
from dataretrieval.waterdata.filters import FILTER_LANG
2226
from dataretrieval.waterdata.types import (
2327
CODE_SERVICES,
@@ -2110,7 +2114,7 @@ def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame:
21102114

21112115
url = f"{SAMPLES_URL}/codeservice/{code_service}?mimeType=application%2Fjson"
21122116

2113-
response = requests.get(url, headers=_default_headers())
2117+
response = httpx.get(url, headers=_default_headers(), **HTTPX_DEFAULTS)
21142118

21152119
response.raise_for_status()
21162120

@@ -2336,12 +2340,14 @@ def get_samples(
23362340

23372341
url = f"{SAMPLES_URL}/{service}/{profile}"
23382342

2339-
req = PreparedRequest()
2340-
req.prepare_url(url, params=params)
2341-
logger.debug("Request: %s", req.url)
2343+
logger.debug("Request: %s", httpx.URL(url).copy_merge_params(params))
23422344

2343-
response = requests.get(
2344-
url, params=params, verify=ssl_check, headers=_default_headers()
2345+
response = httpx.get(
2346+
url,
2347+
params=params,
2348+
verify=ssl_check,
2349+
headers=_default_headers(),
2350+
**HTTPX_DEFAULTS,
23452351
)
23462352

23472353
response.raise_for_status()
@@ -2408,12 +2414,14 @@ def get_samples_summary(
24082414
url = f"{SAMPLES_URL}/summary/{quote(monitoringLocationIdentifier, safe='')}"
24092415
params = {"mimeType": "text/csv"}
24102416

2411-
req = PreparedRequest()
2412-
req.prepare_url(url, params=params)
2413-
logger.debug("Request: %s", req.url)
2417+
logger.debug("Request: %s", httpx.URL(url).copy_merge_params(params))
24142418

2415-
response = requests.get(
2416-
url, params=params, verify=ssl_check, headers=_default_headers()
2419+
response = httpx.get(
2420+
url,
2421+
params=params,
2422+
verify=ssl_check,
2423+
headers=_default_headers(),
2424+
**HTTPX_DEFAULTS,
24172425
)
24182426

24192427
response.raise_for_status()

0 commit comments

Comments
 (0)