Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 4 additions & 2 deletions dataretrieval/nadp.py
Original file line number Diff line number Diff line change
Expand Up @@ -34,7 +34,9 @@
import warnings
import zipfile

import requests
import httpx

from dataretrieval.utils import HTTPX_DEFAULTS

_DEPRECATION_MESSAGE = (
"The `nadp` module is deprecated and will be removed from `dataretrieval` "
Expand Down Expand Up @@ -213,7 +215,7 @@ def get_zip(url, filename):
"""
_warn_deprecated()

req = requests.get(url + filename)
req = httpx.get(url + filename, **HTTPX_DEFAULTS)
req.raise_for_status()

# z = zipfile.ZipFile(io.BytesIO(req.content))
Expand Down
10 changes: 9 additions & 1 deletion dataretrieval/nldi.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,7 +20,7 @@ def _query_nldi(url, query_params, error_message):
# A helper function to query the NLDI API
response = query(url, payload=query_params)
if response.status_code != 200:
raise ValueError(f"{error_message}. Error reason: {response.reason}")
raise ValueError(f"{error_message}. Error reason: {response.reason_phrase}")

response_data = {}
try:
Expand Down Expand Up @@ -453,6 +453,14 @@ def _validate_data_source(data_source: str):
available_data_sources = _query_nldi(
url, {}, "Error getting available data sources"
)
if not isinstance(available_data_sources, list) or not all(
isinstance(ds, dict) and "source" in ds for ds in available_data_sources
):
raise ValueError(
"NLDI data-source catalog returned an unexpected shape; "
"expected a list of {'source': ..., ...} objects, got: "
f"{available_data_sources!r}"
)
_AVAILABLE_DATA_SOURCES = [ds["source"] for ds in available_data_sources]

if data_source not in _AVAILABLE_DATA_SOURCES:
Expand Down
18 changes: 8 additions & 10 deletions dataretrieval/nwis.py
Original file line number Diff line number Diff line change
Expand Up @@ -11,8 +11,8 @@
import warnings
from json import JSONDecodeError

import httpx
import pandas as pd
import requests

from dataretrieval.rdb import read_rdb
from dataretrieval.utils import BaseMetadata
Expand Down Expand Up @@ -110,7 +110,7 @@ def wrapper(*args, **kwargs):
return wrapper


def _parse_json_or_raise(response: requests.Response) -> pd.DataFrame:
def _parse_json_or_raise(response: httpx.Response) -> pd.DataFrame:
"""Parse a JSON NWIS response, raising a helpful error on HTML responses."""
try:
return _read_json(response.json())
Expand Down Expand Up @@ -364,9 +364,7 @@ def get_stats(


@_deprecated
def query_waterdata(
service: str, ssl_check: bool = True, **kwargs
) -> requests.models.Response:
def query_waterdata(service: str, ssl_check: bool = True, **kwargs) -> httpx.Response:
"""
Queries waterdata.

Expand All @@ -382,7 +380,7 @@ def query_waterdata(

Returns
-------
request: ``requests.models.Response``
request: ``httpx.Response``
The response object from the API request to the web service
"""
major_params = ["site_no", "state_cd"]
Expand Down Expand Up @@ -412,7 +410,7 @@ def query_waterdata(
@_deprecated
def query_waterservices(
service: str, ssl_check: bool = True, **kwargs
) -> requests.models.Response:
) -> httpx.Response:
"""
Queries waterservices.usgs.gov

Expand Down Expand Up @@ -451,7 +449,7 @@ def query_waterservices(

Returns
-------
request: ``requests.models.Response``
request: ``httpx.Response``
The response object from the API request to the web service

"""
Expand Down Expand Up @@ -1123,7 +1121,7 @@ class NWIS_Metadata(BaseMetadata):
Response url
query_time: datetme.timedelta
Response elapsed time
header: requests.structures.CaseInsensitiveDict
header: httpx.Headers
Response headers
comments: str | None
Metadata comments, if any
Expand All @@ -1143,7 +1141,7 @@ def __init__(self, response, **parameters) -> None:
Parameters
----------
response: Response
Response object from requests module
Response object from httpx module
parameters: unpacked dictionary
Unpacked dictionary of the parameters supplied in the request

Expand Down
8 changes: 5 additions & 3 deletions dataretrieval/streamstats.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,7 +7,9 @@

import json

import requests
import httpx

from dataretrieval.utils import HTTPX_DEFAULTS


def download_workspace(workspaceID, format=""):
Expand All @@ -32,7 +34,7 @@ def download_workspace(workspaceID, format=""):
payload = {"workspaceID": workspaceID, "format": format}
url = "https://streamstats.usgs.gov/streamstatsservices/download"

r = requests.get(url, params=payload)
r = httpx.get(url, params=payload, **HTTPX_DEFAULTS)

r.raise_for_status()
return r
Expand Down Expand Up @@ -125,7 +127,7 @@ def get_watershed(
}
url = "https://streamstats.usgs.gov/streamstatsservices/watershed.geojson"

r = requests.get(url, params=payload)
r = httpx.get(url, params=payload, **HTTPX_DEFAULTS)

r.raise_for_status()

Expand Down
78 changes: 48 additions & 30 deletions dataretrieval/utils.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,12 +5,17 @@
import warnings
from collections.abc import Iterable

import httpx
import pandas as pd
import requests

import dataretrieval
from dataretrieval.codes import tz

HTTPX_DEFAULTS = {
"follow_redirects": True,
"timeout": httpx.Timeout(60.0, connect=10.0),
}


def to_str(listlike, delimiter=","):
"""Translates list-like objects into strings.
Expand Down Expand Up @@ -205,7 +210,7 @@ class BaseMetadata:
Response url
query_time: datetme.timedelta
Response elapsed time
header: requests.structures.CaseInsensitiveDict
header: httpx.Headers
Response headers

"""
Expand All @@ -216,7 +221,7 @@ def __init__(self, response) -> None:
Parameters
----------
response: Response
Response object from requests module
Response object from httpx module

Returns
-------
Expand All @@ -225,8 +230,8 @@ def __init__(self, response) -> None:

"""

# These are built from the API response
self.url = response.url
# Coerce httpx.URL -> str: BaseMetadata.url has always been str.
self.url = str(response.url)
self.query_time = response.elapsed
self.header = response.headers
self.comment = None
Expand Down Expand Up @@ -254,18 +259,37 @@ def __repr__(self) -> str:
return f"{type(self).__name__}(url={self.url})"


_URL_TOO_LONG_EXAMPLE = """
# n is the number of chunks to divide the query into \n
split_list = np.array_split(site_list, n)
data_list = [] # list to store chunk results in \n
# loop through chunks and make requests \n
for site_list in split_list: \n
data = nwis.get_record(sites=site_list, service='dv', \n
start=start, end=end) \n
data_list.append(data) # append results to list"""


def _url_too_long_error(detail: str) -> ValueError:
return ValueError(
"Request URL too long. Modify your query to use fewer sites. "
f"{detail}. Pseudo-code example of how to split your query: "
f"\n {_URL_TOO_LONG_EXAMPLE}"
)


def query(url, payload, delimiter=",", ssl_check=True):
"""Send a query.

Wrapper for requests.get that handles errors, converts listed
Wrapper for httpx.get that handles errors, converts listed
query parameters to comma separated strings, and returns response.

Parameters
----------
url: string
URL to query
payload: dict
query parameters passed to ``requests.get``
query parameters passed to ``httpx.get``
delimiter: string
delimiter to use with lists
ssl_check: bool
Expand All @@ -275,19 +299,27 @@ def query(url, payload, delimiter=",", ssl_check=True):
Returns
-------
string: query response
The response from the API query ``requests.get`` function call.
The response from the API query ``httpx.get`` function call.
"""

for key, value in payload.items():
payload[key] = to_str(value, delimiter)
# for index in range(len(payload)):
# key, value = payload[index]
# payload[index] = (key, to_str(value))
# httpx serializes None params as ``foo=``; USGS rejects with 400.
# Drop them. (``to_str`` returns None for non-iterable scalars like bools.)
payload = {k: v for k, v in payload.items() if v is not None}

# define the user agent for the query
user_agent = {"user-agent": f"python-dataretrieval/{dataretrieval.__version__}"}

response = requests.get(url, params=payload, headers=user_agent, verify=ssl_check)
try:
response = httpx.get(
url,
params=payload,
headers=user_agent,
verify=ssl_check,
**HTTPX_DEFAULTS,
)
except httpx.InvalidURL as exc:
raise _url_too_long_error(f"httpx rejected the URL client-side: {exc}") from exc

if response.status_code == 400:
raise ValueError(
Expand All @@ -299,24 +331,10 @@ def query(url, payload, delimiter=",", ssl_check=True):
+ f"URL: {response.url}"
)
elif response.status_code == 414:
_reason = response.reason
_example = """
# n is the number of chunks to divide the query into \n
split_list = np.array_split(site_list, n)
data_list = [] # list to store chunk results in \n
# loop through chunks and make requests \n
for site_list in split_list: \n
data = nwis.get_record(sites=site_list, service='dv', \n
start=start, end=end) \n
data_list.append(data) # append results to list"""
raise ValueError(
"Request URL too long. Modify your query to use fewer sites. "
+ f"API response reason: {_reason}. Pseudo-code example of how to "
+ f"split your query: \n {_example}"
)
elif response.status_code in [500, 502, 503]:
raise _url_too_long_error(f"API response reason: {response.reason_phrase}")
elif 500 <= response.status_code < 600:
raise ValueError(
f"Service Unavailable: {response.status_code} {response.reason}. "
f"Service Unavailable: {response.status_code} {response.reason_phrase}. "
+ f"The service at {response.url} may be down or experiencing issues."
)

Expand Down
36 changes: 22 additions & 14 deletions dataretrieval/waterdata/api.py
Original file line number Diff line number Diff line change
Expand Up @@ -13,11 +13,15 @@
from typing import get_args
from urllib.parse import quote

import httpx
import pandas as pd
import requests
from requests.models import PreparedRequest

from dataretrieval.utils import BaseMetadata, _attach_datetime_columns, to_str
from dataretrieval.utils import (
HTTPX_DEFAULTS,
BaseMetadata,
_attach_datetime_columns,
to_str,
)
from dataretrieval.waterdata.filters import FILTER_LANG
from dataretrieval.waterdata.types import (
CODE_SERVICES,
Expand Down Expand Up @@ -2110,7 +2114,7 @@ def get_codes(code_service: CODE_SERVICES) -> pd.DataFrame:

url = f"{SAMPLES_URL}/codeservice/{code_service}?mimeType=application%2Fjson"

response = requests.get(url, headers=_default_headers())
response = httpx.get(url, headers=_default_headers(), **HTTPX_DEFAULTS)

response.raise_for_status()

Expand Down Expand Up @@ -2336,12 +2340,14 @@ def get_samples(

url = f"{SAMPLES_URL}/{service}/{profile}"

req = PreparedRequest()
req.prepare_url(url, params=params)
logger.debug("Request: %s", req.url)
logger.debug("Request: %s", httpx.URL(url).copy_merge_params(params))

response = requests.get(
url, params=params, verify=ssl_check, headers=_default_headers()
response = httpx.get(
url,
params=params,
verify=ssl_check,
headers=_default_headers(),
**HTTPX_DEFAULTS,
)

response.raise_for_status()
Expand Down Expand Up @@ -2408,12 +2414,14 @@ def get_samples_summary(
url = f"{SAMPLES_URL}/summary/{quote(monitoringLocationIdentifier, safe='')}"
params = {"mimeType": "text/csv"}

req = PreparedRequest()
req.prepare_url(url, params=params)
logger.debug("Request: %s", req.url)
logger.debug("Request: %s", httpx.URL(url).copy_merge_params(params))

response = requests.get(
url, params=params, verify=ssl_check, headers=_default_headers()
response = httpx.get(
url,
params=params,
verify=ssl_check,
headers=_default_headers(),
**HTTPX_DEFAULTS,
)

response.raise_for_status()
Expand Down
Loading
Loading