DunlapCoddingPC · dpieski · Nov 13, 2025 · Nov 11, 2025 · Nov 11, 2025 · Nov 11, 2025
diff --git a/CHANGELOG.md b/CHANGELOG.md
@@ -7,37 +7,39 @@ and this project adheres to [Semantic Versioning](https://semver.org/spec/v2.0.0
 
 ## [Unreleased]
 
+## [0.2.1]
+
+### Added
+
+- `USPTODataMismatchWarning` for API data validation
+- `sanitize_application_number()` method supporting 8-digit and series code formats
+- Optional `include_raw_data` parameter in `USPTOConfig` for debugging
+- Content-Disposition header parsing with RFC 2231 support
+- `HTTPConfig` class for configurable timeouts, retries, and headers
+- `USPTOTimeout` and `USPTOConnectionError` exceptions
+- Document type filtering in `get_application_documents()`
+- Utility module `models/utils.py` for shared model helpers
+
+### Changed
+
+- Response models now support optional `include_raw_data` parameter
+- Replaced print statements with Python warnings module
+- Refactored base client to use `HTTPConfig`
+
+## [0.2.0]
+
 ### Added
 
 - Full support for USPTO Final Petition Decisions API
-- `FinalPetitionDecisionsClient` for interacting with petition decisions
-- New data models for petition decisions:
-  - `PetitionDecision`: Complete petition decision information
-  - `PetitionDecisionDocument`: Document details and metadata
-  - `DocumentDownloadOption`: Download options for petition documents
-  - `PetitionDecisionResponse`: API response wrapper
-  - `PetitionDecisionDownloadResponse`: Download response wrapper
-- Enums for petition decision data:
-  - `DecisionTypeCode`: Petition decision types
-  - `DocumentDirectionCategory`: Document direction categories
-- Search capabilities with convenience parameters:
-  - Application number, patent number, technology center
-  - Decision date ranges, applicant names, inventor names
-  - Examiner names, decision types, and more
-- Pagination support for petition decision searches
-- Document download functionality for petition documents
-- CSV and JSON export options for petition decisions
-- Integration tests for petition decisions (17 tests)
-- Unit tests for petition decision models and client (49 tests)
-- Example usage file: `examples/petition_decisions_example.py`
-- Configuration support for petition decisions base URL in `USPTOConfig`
+- `FinalPetitionDecisionsClient` with search, pagination, and document download
+- Data models: `PetitionDecision`, `PetitionDecisionDocument`, `PetitionDecisionResponse`
+- Enums: `DecisionTypeCode`, `DocumentDirectionCategory`
+- CSV and JSON export for petition decisions
 
 ## [0.1.2]
 
 ### Added
 
-- Initial release of pyUSPTO
-- Object Oriented Support for USPTO Patent Data API
-- Basic Support for USPTO Bulk Data API
-- Full type annotations and docstrings
-- Comprehensive test suite
+- Initial release
+- USPTO Patent Data API support
+- USPTO Bulk Data API support
diff --git a/src/pyUSPTO/__init__.py b/src/pyUSPTO/__init__.py
@@ -39,6 +39,7 @@
 )
 from pyUSPTO.warnings import (
     USPTOBooleanParseWarning,
+    USPTODataMismatchWarning,
     USPTODataWarning,
     USPTODateParseWarning,
     USPTOEnumParseWarning,
@@ -59,6 +60,7 @@
     "USPTOBooleanParseWarning",
     "USPTOTimezoneWarning",
     "USPTOEnumParseWarning",
+    "USPTODataMismatchWarning",
     # Bulk Data API
     "BulkDataClient",
     "BulkDataResponse",

diff --git a/src/pyUSPTO/clients/base.py b/src/pyUSPTO/clients/base.py
@@ -4,6 +4,7 @@
 This module provides a base client class with common functionality for all USPTO API clients.
 """
 
+import re
 from pathlib import Path
 from typing import (
     Any,
@@ -39,7 +40,7 @@ class FromDictProtocol(Protocol):
     """Protocol for classes that can be created from a dictionary."""
 
     @classmethod
-    def from_dict(cls, data: Dict[str, Any]) -> Any:
+    def from_dict(cls, data: Dict[str, Any], include_raw_data: bool = False) -> Any:
         """Create an object from a dictionary."""
         ...
 
@@ -177,12 +178,13 @@ def _make_request(
 
             # Return the raw response for streaming requests
             if stream:
-                # TODO: Handle Content-Disposition
                 return response
 
             # Parse the response based on the specified class
             if response_class:
-                parsed_response: T = response_class.from_dict(response.json())
+                parsed_response: T = response_class.from_dict(
+                    response.json(), include_raw_data=self.config.include_raw_data
+                )
                 return parsed_response
 
             # Return the raw JSON for other requests
@@ -270,37 +272,95 @@ def paginate_results(
 
             offset += limit
 
+    @staticmethod
+    def _extract_filename_from_content_disposition(
+        content_disposition: Optional[str],
+    ) -> Optional[str]:
+        """Extract filename from Content-Disposition header.
+
+        Supports both RFC 2231 (filename*) and simple filename formats.
+
+        Args:
+            content_disposition: The Content-Disposition header value.
+
+        Returns:
+            Optional[str]: The extracted filename, or None if not found.
+
+        Examples:
+            >>> _extract_filename_from_content_disposition('attachment; filename="document.pdf"')
+            'document.pdf'
+            >>> _extract_filename_from_content_disposition("attachment; filename*=UTF-8''file%20name.pdf")
+            'file name.pdf'
+        """
+        if not content_disposition:
+            return None
+
+        # Try RFC 2231 format first (filename*=UTF-8''filename)
+        rfc2231_match = re.search(
+            r"filename\*=(?:UTF-8|utf-8)?''([^;\s]+)", content_disposition
+        )
+        if rfc2231_match:
+            from urllib.parse import unquote
+
+            return unquote(rfc2231_match.group(1))
+
+        # Try standard filename="..." or filename=...
+        filename_match = re.search(
+            r'filename=(?:"([^"]+)"|([^;\s]+))', content_disposition
+        )
+        if filename_match:
+            return filename_match.group(1) or filename_match.group(2)
+
+        return None
+
     def _save_response_to_file(
         self, response: requests.Response, file_path: str, overwrite: bool = False
     ) -> str:
         """Save a streaming response to a file on disk.
 
+        If file_path is a directory, attempts to extract filename from
+        Content-Disposition header and save in that directory.
+
         Args:
             response: Streaming response object from requests
-            file_path: Local path where file should be saved
+            file_path: Local path where file should be saved. Can be a file path
+                or a directory (in which case filename from Content-Disposition is used).
             overwrite: Whether to overwrite existing files. Default False
 
         Returns:
             str: Path to the saved file
 
         Raises:
             FileExistsError: If file exists and overwrite=False
+            ValueError: If file_path is a directory but no filename can be determined
         """
-        # Check for existing file
         from pathlib import Path
 
         path = Path(file_path)
+
+        # If path is a directory, try to extract filename from Content-Disposition
+        if path.is_dir():
+            content_disp = response.headers.get("Content-Disposition")
+            filename = self._extract_filename_from_content_disposition(content_disp)
+            if not filename:
+                raise ValueError(
+                    f"file_path is a directory ({file_path}) but Content-Disposition "
+                    "header does not contain a filename. Please provide a full file path."
+                )
+            path = path / filename
+
+        # Check for existing file
         if path.exists() and not overwrite:
             raise FileExistsError(
-                f"File already exists: {file_path}. Set overwrite=True to replace."
+                f"File already exists: {path}. Set overwrite=True to replace."
             )
 
         # Save to disk with streaming
-        with open(file=file_path, mode="wb") as f:
+        with open(file=str(path), mode="wb") as f:
             for chunk in response.iter_content(chunk_size=8192):
                 if chunk:  # Filter out keep-alive chunks
                     f.write(chunk)
-        return file_path
+        return str(path)
 
     def _download_file(self, url: str, file_path: str, overwrite: bool = False) -> str:
         """Download a file directly to disk.

diff --git a/src/pyUSPTO/clients/patent_data.py b/src/pyUSPTO/clients/patent_data.py
@@ -5,6 +5,7 @@
 It allows you to search for and retrieve patent application data.
 """
 
+import warnings
 from pathlib import Path
 from typing import Any, Dict, Iterator, List, Optional
 from urllib.parse import urljoin, urlparse
@@ -30,6 +31,7 @@
     StatusCodeCollection,
     StatusCodeSearchResponse,
 )
+from pyUSPTO.warnings import USPTODataMismatchWarning
 
 
 class PatentDataClient(BaseUSPTOClient[PatentDataResponse]):
@@ -67,7 +69,80 @@ def __init__(
             api_key=api_key_to_use, base_url=effective_base_url, config=self.config
         )
 
-    # TODO: def sanitize_application_no(inputNumber: str) -> str:
+    def sanitize_application_number(self, input_number: str) -> str:
+        """Sanitize and validate a USPTO application number.
+
+        Application numbers are either:
+        - 8 digits (e.g., "16123456")
+        - Series code format: 2 digits + "/" + 6 digits (e.g., "08/123456")
+
+        This method removes common separators (commas, spaces) while preserving
+        the "/" in series code format.
+
+        Args:
+            input_number: Raw application number input. May include commas,
+                spaces, or other formatting.
+
+        Returns:
+            str: Sanitized application number (either "NNNNNNNN" or "NN/NNNNNN").
+
+        Raises:
+            ValueError: If the format is invalid.
+
+        Examples:
+            >>> client.sanitize_application_number("16123456")
+            "16123456"
+            >>> client.sanitize_application_number("16,123,456")
+            "16123456"
+            >>> client.sanitize_application_number("08/123456")
+            "08/123456"
+            >>> client.sanitize_application_number("08/123,456")
+            "08/123456"
+        """
+        if not input_number or not input_number.strip():
+            raise ValueError("Application number cannot be empty")
+
+        # Strip whitespace and remove commas/spaces
+        cleaned = input_number.strip().replace(",", "").replace(" ", "")
+
+        # Check if this is series code format (NN/NNNNNN)
+        if "/" in cleaned:
+            parts = cleaned.split("/")
+            if len(parts) != 2:
+                raise ValueError(
+                    f"Invalid application number format: {input_number}. "
+                    "Expected format: NNNNNNNN or NN/NNNNNN"
+                )
+
+            series, serial = parts
+            if not series.isdigit() or not serial.isdigit():
+                raise ValueError(
+                    f"Invalid application number format: {input_number}. "
+                    "Series and serial must be numeric."
+                )
+
+            if len(series) != 2 or len(serial) != 6:
+                raise ValueError(
+                    f"Invalid application number format: {input_number}. "
+                    "Expected series code format: NN/NNNNNN (2 digits / 6 digits)"
+                )
+
+            return cleaned
+
+        # Standard 8-digit format
+        if not cleaned.isdigit():
+            raise ValueError(
+                f"Invalid application number format: {input_number}. "
+                "Must contain only digits."
+            )
+
+        if len(cleaned) != 8:
+            raise ValueError(
+                f"Invalid application number format: {input_number}. "
+                "Expected 8 digits."
+            )
+
+        return cleaned
 
     def _get_wrapper_from_response(
         self,
@@ -80,15 +155,17 @@ def _get_wrapper_from_response(
 
         wrapper = response_data.patent_file_wrapper_data_bag[0]
 
-        # This should probably just raise an exception rather than print a warning.
-        # if (
-        #     application_number_for_validation
-        #     and wrapper.application_number_text != application_number_for_validation
-        # ):
-        #     print(
-        #         f"Warning: Fetched wrapper application number '{wrapper.application_number_text}' "
-        #         f"does not match requested '{application_number_for_validation}'."
-        #     )
+        if (
+            application_number_for_validation
+            and wrapper.application_number_text != application_number_for_validation
+        ):
+            warnings.warn(
+                f"API returned application number '{wrapper.application_number_text}' "
+                f"but requested '{application_number_for_validation}'. "
+                f"This may indicate an API data inconsistency.",
+                USPTODataMismatchWarning,
+                stacklevel=2,
+            )
         return wrapper
 
     def search_applications(

diff --git a/src/pyUSPTO/clients/petition_decisions.py b/src/pyUSPTO/clients/petition_decisions.py
@@ -6,6 +6,7 @@
 decisions in publicly available patent applications and patents filed in 2001 or later.
 """
 
+import warnings
 from pathlib import Path
 from typing import Any, Dict, Iterator, List, Optional, Union
 
@@ -19,6 +20,7 @@
     PetitionDecisionDownloadResponse,
     PetitionDecisionResponse,
 )
+from pyUSPTO.warnings import USPTODataMismatchWarning
 
 
 class FinalPetitionDecisionsClient(BaseUSPTOClient[PetitionDecisionResponse]):
@@ -81,16 +83,18 @@ def _get_decision_from_response(
 
         decision = response_data.petition_decision_data_bag[0]
 
-        # This should probably just raise an exception rather than print a warning.
-        # if (
-        #     petition_decision_record_identifier_for_validation
-        #     and decision.petition_decision_record_identifier
-        #     != petition_decision_record_identifier_for_validation
-        # ):
-        #     print(
-        #         f"Warning: Fetched decision identifier '{decision.petition_decision_record_identifier}' "
-        #         f"does not match requested '{petition_decision_record_identifier_for_validation}'."
-        #     )
+        if (
+            petition_decision_record_identifier_for_validation
+            and decision.petition_decision_record_identifier
+            != petition_decision_record_identifier_for_validation
+        ):
+            warnings.warn(
+                f"API returned decision identifier '{decision.petition_decision_record_identifier}' "
+                f"but requested '{petition_decision_record_identifier_for_validation}'. "
+                f"This may indicate an API data inconsistency.",
+                USPTODataMismatchWarning,
+                stacklevel=2,
+            )
         return decision
 
     def search_decisions(