Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/cachekit/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import json
import logging
import os
import platform
import random
import threading
import time
Expand Down Expand Up @@ -170,7 +171,7 @@ def __init__(self, name: str, mask_sensitive: bool = True):

# Pre-computed values for performance
self._sampling_threshold = int(SAMPLING_RATE * 100)
self._hostname = os.uname().nodename
self._hostname = platform.node()
self._pid = os.getpid()

# PII patterns to mask (pre-compiled for speed)
Expand Down
63 changes: 50 additions & 13 deletions src/cachekit/serializers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import annotations

import logging
from threading import Lock
from typing import Any
from typing import TYPE_CHECKING, Any

from cachekit._rust_serializer import ByteStorage

from .arrow_serializer import ArrowSerializer
from .auto_serializer import AutoSerializer
from .base import (
SerializationError,
Expand All @@ -16,8 +17,25 @@
from .orjson_serializer import OrjsonSerializer
from .standard_serializer import StandardSerializer

if TYPE_CHECKING:
from .arrow_serializer import ArrowSerializer

logger = logging.getLogger(__name__)

# Lazy import for optional ArrowSerializer (requires pyarrow from [data] extra)
_ArrowSerializer: type | None = None


def _get_arrow_serializer() -> type:
"""Lazy-load ArrowSerializer. Raises ImportError if pyarrow not installed."""
global _ArrowSerializer
if _ArrowSerializer is None:
from .arrow_serializer import ArrowSerializer

_ArrowSerializer = ArrowSerializer
return _ArrowSerializer


# Validate ByteStorage works correctly
test_storage = ByteStorage("msgpack")
test_data = b"test validation data"
Expand All @@ -36,7 +54,7 @@
"auto": AutoSerializer, # Python-specific types (NumPy, pandas, datetime optimization)
"default": StandardSerializer, # Language-agnostic MessagePack for multi-language caches
"std": StandardSerializer, # Explicit StandardSerializer alias
"arrow": ArrowSerializer,
"arrow": None, # Lazy-loaded: requires pyarrow from [data] extra
"orjson": OrjsonSerializer,
"encrypted": EncryptionWrapper, # AutoSerializer + AES-256-GCM encryption
}
Expand Down Expand Up @@ -96,8 +114,13 @@
f"@cache(serializer=MySerializer())"
)

# Get serializer class (lazy-load arrow if needed)
if name == "arrow":
serializer_class = _get_arrow_serializer()
else:
serializer_class = SERIALIZER_REGISTRY[name]

# Instantiate with integrity checking configuration
serializer_class = SERIALIZER_REGISTRY[name]
if name in ("default", "std", "auto", "arrow", "orjson"):
# All core serializers use enable_integrity_checking parameter
serializer = serializer_class(enable_integrity_checking=enable_integrity_checking)
Expand Down Expand Up @@ -167,9 +190,9 @@
def benchmark_serializers() -> dict[str, Any]:
"""Get instantiated serializers for benchmarking."""
serializers = {}
for name, cls in get_available_serializers().items():
for name in SERIALIZER_REGISTRY:
try:
serializers[name] = cls()
serializers[name] = get_serializer(name)
except Exception as e:
logger.warning(f"Failed to instantiate {name} serializer: {e}")
return serializers
Expand All @@ -178,28 +201,42 @@
def get_serializer_info() -> dict[str, dict[str, Any]]:
"""Get information about available serializers."""
info = {}
for name, cls in get_available_serializers().items():
for name in SERIALIZER_REGISTRY:
try:
instance = cls()
instance = get_serializer(name)
info[name] = {
"class": cls.__name__,
"module": cls.__module__,
"class": type(instance).__name__,
"module": type(instance).__module__,
"available": True,
"description": cls.__doc__ or "No description available",
"description": type(instance).__doc__ or "No description available",
}
# Add method info if available
if hasattr(instance, "get_info"):
info[name].update(instance.get_info())

Check failure on line 215 in src/cachekit/serializers/__init__.py

View workflow job for this annotation

GitHub Actions / Format & Lint

Cannot access attribute "get_info" for class "SerializerProtocol"   Attribute "get_info" is unknown (reportAttributeAccessIssue)
except ImportError as e:
info[name] = {
"class": "ArrowSerializer" if name == "arrow" else "Unknown",
"module": "cachekit.serializers.arrow_serializer",
"available": False,
"error": str(e),
}
except Exception as e:
info[name] = {
"class": cls.__name__,
"module": cls.__module__,
"class": "Unknown",
"module": "unknown",
"available": False,
"error": str(e),
}
return info


def __getattr__(name: str) -> Any:
"""Lazy attribute access for optional ArrowSerializer."""
if name == "ArrowSerializer":
return _get_arrow_serializer()
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


# Export the main interface
__all__ = [
"ArrowSerializer",
Expand Down
Loading