Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
105 changes: 105 additions & 0 deletions README.md
Original file line number Diff line number Diff line change
Expand Up @@ -64,6 +64,68 @@ patent_client = PatentDataClient(config=config_from_env)
petition_client = FinalPetitionDecisionsClient(config=config_from_env)
```

### Advanced HTTP Configuration

Control timeout behavior, retry logic, and connection pooling using `HTTPConfig`:

```python
from pyUSPTO import PatentDataClient, USPTOConfig, HTTPConfig

# Create HTTP configuration
http_config = HTTPConfig(
timeout=60.0, # 60 second read timeout
connect_timeout=10.0, # 10 seconds to establish connection
max_retries=5, # Retry up to 5 times on failure
backoff_factor=2.0, # Exponential backoff: 2, 4, 8, 16, 32 seconds
retry_status_codes=[429, 500, 502, 503, 504], # Retry on these status codes
pool_connections=20, # Connection pool size
pool_maxsize=20, # Max connections per pool
custom_headers={ # Additional headers for all requests
"User-Agent": "MyApp/1.0",
"X-Tracking-ID": "abc123"
}
)

# Pass HTTPConfig via USPTOConfig
config = USPTOConfig(
api_key="your_api_key",
http_config=http_config
)

client = PatentDataClient(config=config)
```

Configure HTTP settings via environment variables:

```bash
export USPTO_REQUEST_TIMEOUT=60.0 # Read timeout
export USPTO_CONNECT_TIMEOUT=10.0 # Connection timeout
export USPTO_MAX_RETRIES=5 # Max retry attempts
export USPTO_BACKOFF_FACTOR=2.0 # Retry backoff multiplier
export USPTO_POOL_CONNECTIONS=20 # Connection pool size
export USPTO_POOL_MAXSIZE=20 # Max connections per pool
```

Then create config from environment:

```python
config = USPTOConfig.from_env() # Reads both API and HTTP config from env
client = PatentDataClient(config=config)
```

Share HTTP configuration across multiple clients:

```python
# Create once, use multiple times
http_config = HTTPConfig(timeout=60.0, max_retries=5)

patent_config = USPTOConfig(api_key="key1", http_config=http_config)
petition_config = USPTOConfig(api_key="key2", http_config=http_config)

patent_client = PatentDataClient(config=patent_config)
petition_client = FinalPetitionDecisionsClient(config=petition_config)
```

### Patent Data API

```python
Expand All @@ -89,6 +151,49 @@ print(f"Decision Type: {decision.decision_type_code}")
print(f"Application: {decision.application_number_text}")
```

## Warning Control

The library uses Python's standard `warnings` module to report data parsing issues. This allows you to control how warnings are handled based on your needs.

### Warning Categories

All warnings inherit from `USPTODataWarning`:

- `USPTODateParseWarning`: Date/datetime string parsing failures
- `USPTOBooleanParseWarning`: Y/N boolean string parsing failures
- `USPTOTimezoneWarning`: Timezone-related issues
- `USPTOEnumParseWarning`: Enum value parsing failures

### Controlling Warnings

```python
import warnings
from pyUSPTO.warnings import (
USPTODataWarning,
USPTODateParseWarning,
USPTOBooleanParseWarning,
USPTOTimezoneWarning,
USPTOEnumParseWarning
)

# Suppress all pyUSPTO data warnings
warnings.filterwarnings('ignore', category=USPTODataWarning)

# Suppress only date parsing warnings
warnings.filterwarnings('ignore', category=USPTODateParseWarning)

# Turn warnings into errors (strict mode)
warnings.filterwarnings('error', category=USPTODataWarning)

# Show warnings once per location
warnings.filterwarnings('once', category=USPTODataWarning)

# Always show all warnings (default Python behavior)
warnings.filterwarnings('always', category=USPTODataWarning)
```

The library's permissive parsing philosophy returns `None` for fields that cannot be parsed, allowing you to retrieve partial data even when some fields have issues. Warnings inform you when this happens without stopping execution.

## Features

- Access to USPTO Bulk Data API, Patent Data API, and Final Petition Decisions API
Expand Down
15 changes: 15 additions & 0 deletions src/pyUSPTO/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -22,6 +22,7 @@
USPTOApiNotFoundError,
USPTOApiRateLimitError,
)
from pyUSPTO.http_config import HTTPConfig

# Import model implementations
from pyUSPTO.models.bulk_data import (
Expand All @@ -36,6 +37,13 @@
PetitionDecisionDocument,
PetitionDecisionResponse,
)
from pyUSPTO.warnings import (
USPTOBooleanParseWarning,
USPTODataWarning,
USPTODateParseWarning,
USPTOEnumParseWarning,
USPTOTimezoneWarning,
)

__all__ = [
# Base classes
Expand All @@ -44,6 +52,13 @@
"USPTOApiRateLimitError",
"USPTOApiNotFoundError",
"USPTOConfig",
"HTTPConfig",
# Warning classes
"USPTODataWarning",
"USPTODateParseWarning",
"USPTOBooleanParseWarning",
"USPTOTimezoneWarning",
"USPTOEnumParseWarning",
# Bulk Data API
"BulkDataClient",
"BulkDataResponse",
Expand Down
105 changes: 87 additions & 18 deletions src/pyUSPTO/clients/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -23,7 +23,15 @@
from requests.adapters import HTTPAdapter
from urllib3.util.retry import Retry

from pyUSPTO.exceptions import APIErrorArgs, USPTOApiError, get_api_exception
from pyUSPTO.config import USPTOConfig
from pyUSPTO.exceptions import (
APIErrorArgs,
USPTOApiError,
USPTOConnectionError,
USPTOTimeout,
get_api_exception,
)
from pyUSPTO.http_config import HTTPConfig


@runtime_checkable
Expand All @@ -47,32 +55,68 @@ def __init__(
self,
api_key: Optional[str] = None,
base_url: str = "",
config: Optional[USPTOConfig] = None,
):
"""
Initialize the BaseUSPTOClient.
"""Initialize the BaseUSPTOClient.

Args:
api_key: API key for authentication
base_url: The base URL of the API
config: Optional USPTOConfig instance
"""
# Handle config if provided
if config:
self.config = config
self.api_key = api_key or config.api_key
else:
# Backward compatibility: create minimal config
self.config = USPTOConfig(api_key=api_key)
self.api_key = api_key

self.base_url = base_url.rstrip("/")
self.api_key = api_key
self.session = requests.Session()

if api_key:
self.session.headers.update(
{"X-API-KEY": api_key, "content-type": "application/json"}
# Extract HTTP config for session creation
self.http_config = self.config.http_config

# Create session with HTTP config settings
self.session = self._create_session()

def _create_session(self) -> requests.Session:
"""Create configured HTTP session from HTTPConfig settings.

Returns:
Configured requests.Session instance
"""
session = requests.Session()

# Set API key and default headers
if self.api_key:
session.headers.update(
{"X-API-KEY": self.api_key, "content-type": "application/json"}
)

# Configure retries
# Apply custom headers from HTTP config
if self.http_config.custom_headers:
session.headers.update(self.http_config.custom_headers)

# Configure retry strategy from HTTP config
retry_strategy = Retry(
total=3,
backoff_factor=1,
status_forcelist=[429, 500, 502, 503, 504],
total=self.http_config.max_retries,
backoff_factor=self.http_config.backoff_factor,
status_forcelist=self.http_config.retry_status_codes,
)
adapter = HTTPAdapter(max_retries=retry_strategy)
self.session.mount("http://", adapter)
self.session.mount("https://", adapter)

# Create adapter with retry and connection pool settings
adapter = HTTPAdapter(
max_retries=retry_strategy,
pool_connections=self.http_config.pool_connections,
pool_maxsize=self.http_config.pool_maxsize,
)

session.mount("http://", adapter)
session.mount("https://", adapter)

return session

def _make_request(
self,
Expand Down Expand Up @@ -110,12 +154,21 @@ def _make_request(
base = custom_base_url if custom_base_url else self.base_url
url = f"{base}/{endpoint.lstrip('/')}"

# Get timeout from HTTP config
timeout = self.http_config.get_timeout_tuple()

try:
if method.upper() == "GET":
response = self.session.get(url=url, params=params, stream=stream)
response = self.session.get(
url=url, params=params, stream=stream, timeout=timeout
)
elif method.upper() == "POST":
response = self.session.post(
url=url, params=params, json=json_data, stream=stream
url=url,
params=params,
json=json_data,
stream=stream,
timeout=timeout,
)
else:
raise ValueError(f"Unsupported HTTP method: {method}")
Expand Down Expand Up @@ -147,9 +200,25 @@ def _make_request(
api_exception_to_raise = get_api_exception(error_args=current_error_args)
raise api_exception_to_raise from http_err

except requests.exceptions.Timeout as timeout_err:
# Specific handling for timeout errors
raise USPTOTimeout(
message=f"Request to '{url}' timed out",
api_short_error="Timeout",
error_details=str(timeout_err),
) from timeout_err

except requests.exceptions.ConnectionError as conn_err:
# Specific handling for connection errors (DNS, refused connection, etc.)
raise USPTOConnectionError(
message=f"Failed to connect to '{url}'",
api_short_error="Connection Error",
error_details=str(conn_err),
) from conn_err

except (
requests.exceptions.RequestException
) as req_err: # Catches non-HTTP errors from requests
) as req_err: # Catches other non-HTTP errors from requests
client_operation_message = (
f"API request to '{url}' failed" # 'url' is from _make_request scope
)
Expand Down
2 changes: 1 addition & 1 deletion src/pyUSPTO/clients/bulk_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -49,7 +49,7 @@ def __init__(
# Use provided base_url or get from config
base_url = base_url or self.config.bulk_data_base_url

super().__init__(api_key=api_key, base_url=base_url)
super().__init__(api_key=api_key, base_url=base_url, config=self.config)

def get_products(self, params: Optional[Dict[str, Any]] = None) -> BulkDataResponse:
"""
Expand Down
Loading
Loading