Skip to content
Merged

Wip #14

Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
54 changes: 28 additions & 26 deletions CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -7,37 +7,39 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0

## [Unreleased]

## [0.2.1]

### Added

- `USPTODataMismatchWarning` for API data validation
- `sanitize_application_number()` method supporting 8-digit and series code formats
- Optional `include_raw_data` parameter in `USPTOConfig` for debugging
- Content-Disposition header parsing with RFC 2231 support
- `HTTPConfig` class for configurable timeouts, retries, and headers
- `USPTOTimeout` and `USPTOConnectionError` exceptions
- Document type filtering in `get_application_documents()`
- Utility module `models/utils.py` for shared model helpers

### Changed

- Response models now support optional `include_raw_data` parameter
- Replaced print statements with Python warnings module
- Refactored base client to use `HTTPConfig`

## [0.2.0]

### Added

- Full support for USPTO Final Petition Decisions API
- `FinalPetitionDecisionsClient` for interacting with petition decisions
- New data models for petition decisions:
- `PetitionDecision`: Complete petition decision information
- `PetitionDecisionDocument`: Document details and metadata
- `DocumentDownloadOption`: Download options for petition documents
- `PetitionDecisionResponse`: API response wrapper
- `PetitionDecisionDownloadResponse`: Download response wrapper
- Enums for petition decision data:
- `DecisionTypeCode`: Petition decision types
- `DocumentDirectionCategory`: Document direction categories
- Search capabilities with convenience parameters:
- Application number, patent number, technology center
- Decision date ranges, applicant names, inventor names
- Examiner names, decision types, and more
- Pagination support for petition decision searches
- Document download functionality for petition documents
- CSV and JSON export options for petition decisions
- Integration tests for petition decisions (17 tests)
- Unit tests for petition decision models and client (49 tests)
- Example usage file: `examples/petition_decisions_example.py`
- Configuration support for petition decisions base URL in `USPTOConfig`
- `FinalPetitionDecisionsClient` with search, pagination, and document download
- Data models: `PetitionDecision`, `PetitionDecisionDocument`, `PetitionDecisionResponse`
- Enums: `DecisionTypeCode`, `DocumentDirectionCategory`
- CSV and JSON export for petition decisions

## [0.1.2]

### Added

- Initial release of pyUSPTO
- Object Oriented Support for USPTO Patent Data API
- Basic Support for USPTO Bulk Data API
- Full type annotations and docstrings
- Comprehensive test suite
- Initial release
- USPTO Patent Data API support
- USPTO Bulk Data API support
2 changes: 2 additions & 0 deletions src/pyUSPTO/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -39,6 +39,7 @@
)
from pyUSPTO.warnings import (
USPTOBooleanParseWarning,
USPTODataMismatchWarning,
USPTODataWarning,
USPTODateParseWarning,
USPTOEnumParseWarning,
Expand All @@ -59,6 +60,7 @@
"USPTOBooleanParseWarning",
"USPTOTimezoneWarning",
"USPTOEnumParseWarning",
"USPTODataMismatchWarning",
# Bulk Data API
"BulkDataClient",
"BulkDataResponse",
Expand Down
76 changes: 68 additions & 8 deletions src/pyUSPTO/clients/base.py
Original file line number Diff line number Diff line change
Expand Up @@ -4,6 +4,7 @@
This module provides a base client class with common functionality for all USPTO API clients.
"""

import re
from pathlib import Path
from typing import (
Any,
Expand Down Expand Up @@ -39,7 +40,7 @@ class FromDictProtocol(Protocol):
"""Protocol for classes that can be created from a dictionary."""

@classmethod
def from_dict(cls, data: Dict[str, Any]) -> Any:
def from_dict(cls, data: Dict[str, Any], include_raw_data: bool = False) -> Any:
"""Create an object from a dictionary."""
...

Expand Down Expand Up @@ -177,12 +178,13 @@ def _make_request(

# Return the raw response for streaming requests
if stream:
# TODO: Handle Content-Disposition
return response

# Parse the response based on the specified class
if response_class:
parsed_response: T = response_class.from_dict(response.json())
parsed_response: T = response_class.from_dict(
response.json(), include_raw_data=self.config.include_raw_data
)
return parsed_response

# Return the raw JSON for other requests
Expand Down Expand Up @@ -270,37 +272,95 @@ def paginate_results(

offset += limit

@staticmethod
def _extract_filename_from_content_disposition(
content_disposition: Optional[str],
) -> Optional[str]:
"""Extract filename from Content-Disposition header.

Supports both RFC 2231 (filename*) and simple filename formats.

Args:
content_disposition: The Content-Disposition header value.

Returns:
Optional[str]: The extracted filename, or None if not found.

Examples:
>>> _extract_filename_from_content_disposition('attachment; filename="document.pdf"')
'document.pdf'
>>> _extract_filename_from_content_disposition("attachment; filename*=UTF-8''file%20name.pdf")
'file name.pdf'
"""
if not content_disposition:
return None

# Try RFC 2231 format first (filename*=UTF-8''filename)
rfc2231_match = re.search(
r"filename\*=(?:UTF-8|utf-8)?''([^;\s]+)", content_disposition
)
if rfc2231_match:
from urllib.parse import unquote

return unquote(rfc2231_match.group(1))

# Try standard filename="..." or filename=...
filename_match = re.search(
r'filename=(?:"([^"]+)"|([^;\s]+))', content_disposition
)
if filename_match:
return filename_match.group(1) or filename_match.group(2)

return None

def _save_response_to_file(
self, response: requests.Response, file_path: str, overwrite: bool = False
) -> str:
"""Save a streaming response to a file on disk.

If file_path is a directory, attempts to extract filename from
Content-Disposition header and save in that directory.

Args:
response: Streaming response object from requests
file_path: Local path where file should be saved
file_path: Local path where file should be saved. Can be a file path
or a directory (in which case filename from Content-Disposition is used).
overwrite: Whether to overwrite existing files. Default False

Returns:
str: Path to the saved file

Raises:
FileExistsError: If file exists and overwrite=False
ValueError: If file_path is a directory but no filename can be determined
"""
# Check for existing file
from pathlib import Path

path = Path(file_path)

# If path is a directory, try to extract filename from Content-Disposition
if path.is_dir():
content_disp = response.headers.get("Content-Disposition")
filename = self._extract_filename_from_content_disposition(content_disp)
if not filename:
raise ValueError(
f"file_path is a directory ({file_path}) but Content-Disposition "
"header does not contain a filename. Please provide a full file path."
)
path = path / filename

# Check for existing file
if path.exists() and not overwrite:
raise FileExistsError(
f"File already exists: {file_path}. Set overwrite=True to replace."
f"File already exists: {path}. Set overwrite=True to replace."
)

# Save to disk with streaming
with open(file=file_path, mode="wb") as f:
with open(file=str(path), mode="wb") as f:
for chunk in response.iter_content(chunk_size=8192):
if chunk: # Filter out keep-alive chunks
f.write(chunk)
return file_path
return str(path)

def _download_file(self, url: str, file_path: str, overwrite: bool = False) -> str:
"""Download a file directly to disk.
Expand Down
97 changes: 87 additions & 10 deletions src/pyUSPTO/clients/patent_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
It allows you to search for and retrieve patent application data.
"""

import warnings
from pathlib import Path
from typing import Any, Dict, Iterator, List, Optional
from urllib.parse import urljoin, urlparse
Expand All @@ -30,6 +31,7 @@
StatusCodeCollection,
StatusCodeSearchResponse,
)
from pyUSPTO.warnings import USPTODataMismatchWarning


class PatentDataClient(BaseUSPTOClient[PatentDataResponse]):
Expand Down Expand Up @@ -67,7 +69,80 @@ def __init__(
api_key=api_key_to_use, base_url=effective_base_url, config=self.config
)

# TODO: def sanitize_application_no(inputNumber: str) -> str:
def sanitize_application_number(self, input_number: str) -> str:
"""Sanitize and validate a USPTO application number.

Application numbers are either:
- 8 digits (e.g., "16123456")
- Series code format: 2 digits + "/" + 6 digits (e.g., "08/123456")

This method removes common separators (commas, spaces) while preserving
the "/" in series code format.

Args:
input_number: Raw application number input. May include commas,
spaces, or other formatting.

Returns:
str: Sanitized application number (either "NNNNNNNN" or "NN/NNNNNN").

Raises:
ValueError: If the format is invalid.

Examples:
>>> client.sanitize_application_number("16123456")
"16123456"
>>> client.sanitize_application_number("16,123,456")
"16123456"
>>> client.sanitize_application_number("08/123456")
"08/123456"
>>> client.sanitize_application_number("08/123,456")
"08/123456"
"""
if not input_number or not input_number.strip():
raise ValueError("Application number cannot be empty")

# Strip whitespace and remove commas/spaces
cleaned = input_number.strip().replace(",", "").replace(" ", "")

# Check if this is series code format (NN/NNNNNN)
if "/" in cleaned:
parts = cleaned.split("/")
if len(parts) != 2:
raise ValueError(
f"Invalid application number format: {input_number}. "
"Expected format: NNNNNNNN or NN/NNNNNN"
)

series, serial = parts
if not series.isdigit() or not serial.isdigit():
raise ValueError(
f"Invalid application number format: {input_number}. "
"Series and serial must be numeric."
)

if len(series) != 2 or len(serial) != 6:
raise ValueError(
f"Invalid application number format: {input_number}. "
"Expected series code format: NN/NNNNNN (2 digits / 6 digits)"
)

return cleaned

# Standard 8-digit format
if not cleaned.isdigit():
raise ValueError(
f"Invalid application number format: {input_number}. "
"Must contain only digits."
)

if len(cleaned) != 8:
raise ValueError(
f"Invalid application number format: {input_number}. "
"Expected 8 digits."
)

return cleaned

def _get_wrapper_from_response(
self,
Expand All @@ -80,15 +155,17 @@ def _get_wrapper_from_response(

wrapper = response_data.patent_file_wrapper_data_bag[0]

# This should probably just raise an exception rather than print a warning.
# if (
# application_number_for_validation
# and wrapper.application_number_text != application_number_for_validation
# ):
# print(
# f"Warning: Fetched wrapper application number '{wrapper.application_number_text}' "
# f"does not match requested '{application_number_for_validation}'."
# )
if (
application_number_for_validation
and wrapper.application_number_text != application_number_for_validation
):
warnings.warn(
f"API returned application number '{wrapper.application_number_text}' "
f"but requested '{application_number_for_validation}'. "
f"This may indicate an API data inconsistency.",
USPTODataMismatchWarning,
stacklevel=2,
)
return wrapper

def search_applications(
Expand Down
24 changes: 14 additions & 10 deletions src/pyUSPTO/clients/petition_decisions.py
Original file line number Diff line number Diff line change
Expand Up @@ -6,6 +6,7 @@
decisions in publicly available patent applications and patents filed in 2001 or later.
"""

import warnings
from pathlib import Path
from typing import Any, Dict, Iterator, List, Optional, Union

Expand All @@ -19,6 +20,7 @@
PetitionDecisionDownloadResponse,
PetitionDecisionResponse,
)
from pyUSPTO.warnings import USPTODataMismatchWarning


class FinalPetitionDecisionsClient(BaseUSPTOClient[PetitionDecisionResponse]):
Expand Down Expand Up @@ -81,16 +83,18 @@ def _get_decision_from_response(

decision = response_data.petition_decision_data_bag[0]

# This should probably just raise an exception rather than print a warning.
# if (
# petition_decision_record_identifier_for_validation
# and decision.petition_decision_record_identifier
# != petition_decision_record_identifier_for_validation
# ):
# print(
# f"Warning: Fetched decision identifier '{decision.petition_decision_record_identifier}' "
# f"does not match requested '{petition_decision_record_identifier_for_validation}'."
# )
if (
petition_decision_record_identifier_for_validation
and decision.petition_decision_record_identifier
!= petition_decision_record_identifier_for_validation
):
warnings.warn(
f"API returned decision identifier '{decision.petition_decision_record_identifier}' "
f"but requested '{petition_decision_record_identifier_for_validation}'. "
f"This may indicate an API data inconsistency.",
USPTODataMismatchWarning,
stacklevel=2,
)
return decision

def search_decisions(
Expand Down
Loading
Loading