Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions .gitignore
Original file line number Diff line number Diff line change
Expand Up @@ -20,3 +20,4 @@ dist/
Python/
run_integration_tests.py
.plan.md
CLAUDE.md
9 changes: 6 additions & 3 deletions docs/source/conf.py
Original file line number Diff line number Diff line change
@@ -1,3 +1,5 @@
"""Docstring for docs.source.conf."""

# Configuration file for the Sphinx documentation builder.
#
# For the full list of built-in configuration values, see the documentation:
Expand All @@ -6,6 +8,10 @@
# -- Path setup --------------------------------------------------------------
import os
import sys

# The full version, including alpha/beta/rc tags
# Use setuptools_scm to get the version
from importlib.metadata import version as get_version
from typing import Any

sys.path.insert(0, os.path.abspath("../.."))
Expand All @@ -17,9 +23,6 @@
copyright = "2025, Dunlap Codding, P.C."
author = "Andrew Piechocki"

# The full version, including alpha/beta/rc tags
# Use setuptools_scm to get the version
from importlib.metadata import version as get_version

release: str = get_version(distribution_name="pyUSPTO")
# for example take major/minor
Expand Down
221 changes: 145 additions & 76 deletions examples/bulk_data_example.py
Original file line number Diff line number Diff line change
@@ -1,15 +1,14 @@
"""Example usage of the uspto_api module for bulk data.
"""Example usage of the BulkDataClient.

This example demonstrates how to use the BulkDataClient to interact with the USPTO Bulk Data API.
It shows how to retrieve product information, search for products, and download files.
It shows how to search for products, retrieve product details, and download files.
"""

import os

import requests

from pyUSPTO.clients import BulkDataClient # Import from top-level package
from pyUSPTO.clients import BulkDataClient
from pyUSPTO.config import USPTOConfig
from pyUSPTO.models.bulk_data import FileData


def format_size(size_bytes: int | float) -> str:
Expand All @@ -34,93 +33,163 @@ def format_size(size_bytes: int | float) -> str:
return f"{size_bytes:.2f} {size_names[i]}"


# Method 1: Initialize the client with direct API key
# This approach is simple but less flexible
# ============================================================================
# Client Initialization Methods
# ============================================================================

# Method 1: Initialize with API key directly
print("Method 1: Initialize with direct API key")
api_key = "YOUR_API_KEY_HERE" # Replace with your actual API key
client = BulkDataClient(api_key=api_key)

# Method 2: Initialize the client with USPTOConfig
# This approach provides more configuration options
# Method 2: Initialize with USPTOConfig object
print("\nMethod 2: Initialize with USPTOConfig")
config = USPTOConfig(
api_key="YOUR_API_KEY_HERE", # Replace with your actual API key
bulk_data_base_url="https://api.uspto.gov/api/v1/datasets",
patent_data_base_url="https://api.uspto.gov/api/v1/patent",
)
config = USPTOConfig(api_key="YOUR_API_KEY_HERE")
client = BulkDataClient(config=config)

# Method 3: Initialize the client with environment variables
# This is the most secure approach for production use
print("\nMethod 3: Initialize with environment variables")
# Set environment variable (in a real scenario, this would be set outside the script)
os.environ["USPTO_API_KEY"] = "YOUR_API_KEY_HERE" # Replace with your actual API key
# Method 3: Initialize from environment variables (recommended for production)
print("\nMethod 3: Initialize from environment variables")
os.environ["USPTO_API_KEY"] = "YOUR_API_KEY_HERE" # Set this outside your script
config_from_env = USPTOConfig.from_env()
client = BulkDataClient(config=config_from_env)

print("\nBeginning API requests with configured client:")
print("\n" + "=" * 60)
print("Beginning API requests with configured client")
print("=" * 60)


# ============================================================================
# Example 1: Search for Products
# ============================================================================

print("\n--- Example 1: Search for Products ---")
# The Bulk Data API supports full-text search via the query parameter
# Field-specific queries (e.g., "productIdentifier:value") are not supported

# Get all available products
response = client.get_products()
print(f"Found {response.count} products")
# Search for patent-related products
response = client.search_products(query="patent", limit=5)
print(f"Found {response.count} products matching 'patent'")

# Display information about each product
for product in response.bulk_data_product_bag:
print(f"\nProduct: {product.product_title_text}")
print(f"ID: {product.product_identifier}")
print(f"Description: {product.product_description_text}")
print(f"Date range: {product.product_from_date} to {product.product_to_date}")
print(f"Total files: {product.product_file_total_quantity}")
print(f"Total size: {format_size(size_bytes=product.product_total_file_size)}")

# Get detailed product info with files included
print(f"\n Product: {product.product_title_text}")
print(f" ID: {product.product_identifier}")
print(f" Description: {product.product_description_text[:100]}...")
print(f" Total files: {product.product_file_total_quantity}")
print(f" Total size: {format_size(product.product_total_file_size)}")


# ============================================================================
# Example 2: Paginate Through All Products
# ============================================================================

print("\n--- Example 2: Paginate Through Products ---")
# Use pagination to iterate through all matching products

count = 0
for product in client.paginate_products(query="trademark", limit=10):
count += 1
print(f" {count}. {product.product_title_text} ({product.product_identifier})")
if count >= 20: # Limit output for example
print(" ... (stopping after 20 products)")
break


# ============================================================================
# Example 3: Get Product Details by ID
# ============================================================================

print("\n--- Example 3: Get Product by ID ---")
# Retrieve a specific product by its identifier
# Use include_files=True to get file listing

product_id = "PTGRXML" # Patent Grant Full-Text Data (No Images) - XML
product = client.get_product_by_id(product_id, include_files=True, latest=True)

print(f"Product: {product.product_title_text}")
print(f"Description: {product.product_description_text}")
print(f"Frequency: {product.product_frequency_text}")
print(f"Labels: {product.product_label_array_text}")
print(f"Categories: {product.product_dataset_category_array_text}")
print(f"Date range: {product.product_from_date} to {product.product_to_date}")


# ============================================================================
# Example 4: List Files for a Product
# ============================================================================

print("\n--- Example 4: List Files for a Product ---")
# Get product with files and display file details

if product.product_file_bag and product.product_file_bag.file_data_bag:
print(f"Found {len(product.product_file_bag.file_data_bag)} file(s):")

for file_data in product.product_file_bag.file_data_bag:
print(f"\n File: {file_data.file_name}")
print(f" Size: {format_size(file_data.file_size)}")
print(f" Type: {file_data.file_type_text}")
print(
f" Data range: {file_data.file_data_from_date} to {file_data.file_data_to_date}"
)
print(f" Released: {file_data.file_release_date}")
print(f" Download URI: {file_data.file_download_uri}")
else:
print("No files found for this product")


# ============================================================================
# Example 5: Download a File
# ============================================================================

print("\n--- Example 5: Download a File ---")
# Download a file from the product

min_file: FileData | None = None
last_bytes: float = float("inf")

if product.product_file_bag and product.product_file_bag.file_data_bag:
for file_data in product.product_file_bag.file_data_bag:
if file_data.file_size < last_bytes:
last_bytes = file_data.file_size
min_file = file_data

if min_file:
print(f"Downloading smallest file: {min_file.file_name}")
print(f"Size: {format_size(min_file.file_size)}")

try:
detailed_product = client.get_product_by_id(
product.product_identifier, include_files=True
# Download with extraction (default behavior for archives)
downloaded_path = client.download_file(
file_data=min_file,
destination="./downloads",
overwrite=True,
extract=True, # Auto-extract if it's a tar.gz or zip
)
if (
detailed_product.product_file_bag
and detailed_product.product_file_bag.file_data_bag
):
print(f"\nFiles ({detailed_product.product_file_bag.count}):")
for file_data in detailed_product.product_file_bag.file_data_bag:
print(f" - {file_data.file_name} ({format_size(file_data.file_size)})")
print(f" Type: {file_data.file_type_text}")
print(f" Released: {file_data.file_release_date}")
if file_data.file_download_uri:
print(f" Download URI: {file_data.file_download_uri}")
else:
print("\nNo files available for this product")
print(f"SUCCESS: Downloaded to {downloaded_path}")
except Exception as e:
print(f"\nError retrieving detailed product info: {e}")

# Search for products by date range
date_filtered = client.search_products(from_date="2025-01-01", to_date="2025-03-31")
print(f"\nFound {date_filtered.count} products in date range")

# Search for products by label
try:
# Using labels we saw in the API response
label_filtered = client.search_products(labels=["Patent"])
print(f"\nFound {label_filtered.count} products with label 'Patent'")
except requests.exceptions.HTTPError as e:
print(f"Error searching by labels: {e}")

# Get a specific product by ID
product_id = "PEDSJSON" # Using a real product ID from the output
try:
product = client.get_product_by_id(product_id, include_files=True)
print(f"\nRetrieved product: {product.product_title_text}")

# Download a file from this product
if product.product_file_bag and product.product_file_bag.file_data_bag:
file_to_download = product.product_file_bag.file_data_bag[0]
print(f"File download URI: {file_to_download.file_download_uri}")
print(f"ERROR: {e}")


# ============================================================================
# Example 6: Download Without Extraction
# ============================================================================

print("\n--- Example 6: Download Without Extraction ---")
# Download archive file without extracting

if product.product_file_bag and product.product_file_bag.file_data_bag and min_file:
try:
# Download without extraction
downloaded_path = client.download_file(
file_data=file_to_download, destination="./downloads"
file_data=min_file,
destination="./downloads",
overwrite=True,
extract=False, # Keep archive compressed
)
print(f"Downloaded file to: {downloaded_path}")
print(f"File size: {format_size(size_bytes=file_to_download.file_size)}")
print(f"SUCCESS: Archive saved to {downloaded_path}")
except Exception as e:
print(f"ERROR: {e}")


except Exception as e:
print(f"Error retrieving product {product_id}: {e}")
print("\n" + "=" * 60)
print("Examples complete!")
print("=" * 60)
97 changes: 97 additions & 0 deletions examples/error_handling_example.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,97 @@
"""Error handling example for pyUSPTO.

This example demonstrates how to handle common errors when using the USPTO API clients.
"""

import os
import time

from pyUSPTO.clients import PatentDataClient
from pyUSPTO.config import USPTOConfig
from pyUSPTO.exceptions import (
USPTOApiAuthError,
USPTOApiNotFoundError,
USPTOApiRateLimitError,
USPTOConnectionError,
USPTOTimeout,
)
from pyUSPTO.http_config import HTTPConfig

# Initialize client
api_key = os.environ.get("USPTO_API_KEY", "YOUR_API_KEY_HERE")
client = PatentDataClient(api_key=api_key)

# Example 1: Handle authentication errors
print("Example 1: Authentication errors")
try:
# This will fail with invalid API key
bad_client = PatentDataClient(api_key="invalid_key")
bad_client.search_applications(limit=1)
except USPTOApiAuthError as e:
print(f"Authentication failed: {e}")
print("Check your API key and try again.")

# Example 2: Handle not found errors
print("\nExample 2: Not found errors")
try:
# Try to get a non-existent application
client.get_application_by_number("99999999")
except USPTOApiNotFoundError as e:
print(f"Resource not found: {e}")
print("The application number may be invalid or not in the system.")

# Example 3: Handle rate limiting with retry
print("\nExample 3: Rate limiting")
try:
# If you hit rate limits, the API returns 429
results = client.search_applications(limit=100)
print(f"Retrieved {results.count} results")
except USPTOApiRateLimitError as e:
print(f"Rate limit exceeded: {e}")
print("Waiting 60 seconds before retry...")
time.sleep(60)
# Retry the request
results = client.search_applications(limit=100)
print(f"Retry successful: {results.count} results")

# Example 4: Handle timeouts with custom configuration
print("\nExample 4: Timeout handling")
# Configure shorter timeout for demonstration
http_config = HTTPConfig(timeout=1) # Very short timeout
config = USPTOConfig(api_key=api_key, http_config=http_config)
timeout_client = PatentDataClient(config=config)

try:
timeout_client.search_applications(limit=100)
except USPTOTimeout as e:
print(f"Request timed out: {e}")
print("Consider increasing timeout or checking network connection.")

# Example 5: Handle connection errors
print("\nExample 5: Connection errors")
try:
# This might fail due to network issues
client.search_applications(limit=10)
except USPTOConnectionError as e:
print(f"Connection error: {e}")
print("Check your network connection and try again.")

# Example 6: Catch-all for unexpected errors
print("\nExample 6: General error handling")
try:
results = client.search_applications(patent_number_q="10000000", limit=5)
if results.count > 0:
print(f"Found {results.count} matching patents")
else:
print("No results found")
except USPTOApiRateLimitError:
print("Rate limited - wait and retry")
except USPTOApiAuthError:
print("Authentication failed - check API key")
except USPTOTimeout:
print("Request timed out - try again")
except USPTOConnectionError:
print("Connection failed - check network")
except Exception as e:
# Catch any other unexpected errors
print(f"Unexpected error: {e}")
2 changes: 2 additions & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -72,8 +72,10 @@ dev = [

[project.urls]
GitHub = "https://github.com/DunlapCoddingPC/pyUSPTO"
homepage = "https://github.com/DunlapCoddingPC/pyUSPTO"
issues = "https://github.com/DunlapCoddingPC/pyUSPTO/issues"
Documentation = "https://pyuspto.readthedocs.io/en/latest/"
releasenotes = "https://github.com/DunlapCoddingPC/pyUSPTO/releases/latest"


[tool.setuptools_scm]
Expand Down
Loading
Loading