Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
3 changes: 2 additions & 1 deletion src/cachekit/logging.py
Original file line number Diff line number Diff line change
Expand Up @@ -7,6 +7,7 @@
import json
import logging
import os
import platform
import random
import threading
import time
Expand Down Expand Up @@ -170,7 +171,7 @@ def __init__(self, name: str, mask_sensitive: bool = True):

# Pre-computed values for performance
self._sampling_threshold = int(SAMPLING_RATE * 100)
self._hostname = os.uname().nodename
self._hostname = platform.node()
self._pid = os.getpid()

# PII patterns to mask (pre-compiled for speed)
Expand Down
65 changes: 51 additions & 14 deletions src/cachekit/serializers/__init__.py
Original file line number Diff line number Diff line change
@@ -1,10 +1,11 @@
from __future__ import annotations

import logging
from threading import Lock
from typing import Any
from typing import TYPE_CHECKING, Any

from cachekit._rust_serializer import ByteStorage

from .arrow_serializer import ArrowSerializer
from .auto_serializer import AutoSerializer
from .base import (
SerializationError,
Expand All @@ -16,8 +17,25 @@
from .orjson_serializer import OrjsonSerializer
from .standard_serializer import StandardSerializer

if TYPE_CHECKING:
from .arrow_serializer import ArrowSerializer

logger = logging.getLogger(__name__)

# Lazy import for optional ArrowSerializer (requires pyarrow from [data] extra)
_ArrowSerializer: type | None = None


def _get_arrow_serializer() -> type:
"""Lazy-load ArrowSerializer. Raises ImportError if pyarrow not installed."""
global _ArrowSerializer
if _ArrowSerializer is None:
from .arrow_serializer import ArrowSerializer

_ArrowSerializer = ArrowSerializer
return _ArrowSerializer


# Validate ByteStorage works correctly
test_storage = ByteStorage("msgpack")
test_data = b"test validation data"
Expand All @@ -36,7 +54,7 @@
"auto": AutoSerializer, # Python-specific types (NumPy, pandas, datetime optimization)
"default": StandardSerializer, # Language-agnostic MessagePack for multi-language caches
"std": StandardSerializer, # Explicit StandardSerializer alias
"arrow": ArrowSerializer,
"arrow": None, # Lazy-loaded: requires pyarrow from [data] extra
"orjson": OrjsonSerializer,
"encrypted": EncryptionWrapper, # AutoSerializer + AES-256-GCM encryption
}
Expand Down Expand Up @@ -96,8 +114,13 @@ def get_serializer(name: str, enable_integrity_checking: bool = True) -> Seriali
f"@cache(serializer=MySerializer())"
)

# Get serializer class (lazy-load arrow if needed)
if name == "arrow":
serializer_class = _get_arrow_serializer()
else:
serializer_class = SERIALIZER_REGISTRY[name]

# Instantiate with integrity checking configuration
serializer_class = SERIALIZER_REGISTRY[name]
if name in ("default", "std", "auto", "arrow", "orjson"):
# All core serializers use enable_integrity_checking parameter
serializer = serializer_class(enable_integrity_checking=enable_integrity_checking)
Expand Down Expand Up @@ -167,9 +190,9 @@ def get_available_serializers() -> dict[str, Any]:
def benchmark_serializers() -> dict[str, Any]:
"""Get instantiated serializers for benchmarking."""
serializers = {}
for name, cls in get_available_serializers().items():
for name in SERIALIZER_REGISTRY:
try:
serializers[name] = cls()
serializers[name] = get_serializer(name)
except Exception as e:
logger.warning(f"Failed to instantiate {name} serializer: {e}")
return serializers
Expand All @@ -178,28 +201,42 @@ def benchmark_serializers() -> dict[str, Any]:
def get_serializer_info() -> dict[str, dict[str, Any]]:
"""Get information about available serializers."""
info = {}
for name, cls in get_available_serializers().items():
for name in SERIALIZER_REGISTRY:
try:
instance = cls()
instance = get_serializer(name)
info[name] = {
"class": cls.__name__,
"module": cls.__module__,
"class": type(instance).__name__,
"module": type(instance).__module__,
"available": True,
"description": cls.__doc__ or "No description available",
"description": type(instance).__doc__ or "No description available",
}
# Add method info if available
if hasattr(instance, "get_info"):
info[name].update(instance.get_info())
info[name].update(instance.get_info()) # type: ignore[attr-defined]
except ImportError as e:
info[name] = {
"class": "ArrowSerializer" if name == "arrow" else "Unknown",
"module": "cachekit.serializers.arrow_serializer",
"available": False,
"error": str(e),
}
except Exception as e:
info[name] = {
"class": cls.__name__,
"module": cls.__module__,
"class": "Unknown",
"module": "unknown",
"available": False,
"error": str(e),
}
return info


def __getattr__(name: str) -> Any:
"""Lazy attribute access for optional ArrowSerializer."""
if name == "ArrowSerializer":
return _get_arrow_serializer()
raise AttributeError(f"module {__name__!r} has no attribute {name!r}")


# Export the main interface
__all__ = [
"ArrowSerializer",
Expand Down
20 changes: 20 additions & 0 deletions tests/unit/conftest.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,20 @@
"""Unit test configuration - no Redis required.

Unit tests are fast, in-memory tests that don't require Redis.
This conftest overrides the root conftest's autouse fixture to
skip Redis setup for pure unit tests.
"""

import pytest


@pytest.fixture(autouse=True)
def setup_di_for_redis_isolation(request):
"""Override root conftest's Redis isolation for pure unit tests.

Unit tests don't need Redis - they test in-memory functionality.
This fixture overrides the parent conftest's autouse fixture
by having the same name.
"""
# No-op for unit tests - just yield without Redis setup
yield
131 changes: 131 additions & 0 deletions tests/unit/test_serializer_lazy_loading.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,131 @@
"""Unit tests for lazy loading of optional serializers.

Tests the lazy import mechanism for ArrowSerializer which requires
the optional [data] extra (pyarrow).
"""

from __future__ import annotations

import pytest

from cachekit.serializers import (
SERIALIZER_REGISTRY,
_get_arrow_serializer,
benchmark_serializers,
get_available_serializers,
get_serializer,
get_serializer_info,
)
from cachekit.serializers.arrow_serializer import ArrowSerializer
from cachekit.serializers.base import SerializerProtocol


class TestLazyArrowSerializerLoading:
"""Test lazy loading mechanism for ArrowSerializer."""

def test_registry_has_none_for_arrow(self):
"""SERIALIZER_REGISTRY stores None for arrow (lazy placeholder)."""
assert "arrow" in SERIALIZER_REGISTRY
assert SERIALIZER_REGISTRY["arrow"] is None

def test_get_arrow_serializer_returns_class(self):
"""_get_arrow_serializer() returns ArrowSerializer class."""
cls = _get_arrow_serializer()
assert cls is ArrowSerializer

def test_get_arrow_serializer_caches_result(self):
"""_get_arrow_serializer() caches the imported class."""
cls1 = _get_arrow_serializer()
cls2 = _get_arrow_serializer()
assert cls1 is cls2

def test_get_serializer_arrow_returns_instance(self):
"""get_serializer('arrow') returns ArrowSerializer instance."""
serializer = get_serializer("arrow")
assert isinstance(serializer, ArrowSerializer)
assert isinstance(serializer, SerializerProtocol)

def test_get_serializer_arrow_with_integrity_checking(self):
"""get_serializer('arrow', enable_integrity_checking=False) works."""
serializer = get_serializer("arrow", enable_integrity_checking=False)
assert isinstance(serializer, ArrowSerializer)
assert serializer.enable_integrity_checking is False

def test_module_getattr_returns_arrow_serializer(self):
"""Module __getattr__ returns ArrowSerializer for lazy access."""
from cachekit import serializers

# Access via module attribute (triggers __getattr__)
cls = serializers.ArrowSerializer
assert cls is ArrowSerializer

def test_module_getattr_raises_for_unknown(self):
"""Module __getattr__ raises AttributeError for unknown names."""
from cachekit import serializers

with pytest.raises(AttributeError, match="has no attribute"):
_ = serializers.NonExistentSerializer


class TestBenchmarkSerializersWithLazyLoading:
"""Test benchmark_serializers handles lazy loading."""

def test_benchmark_serializers_includes_arrow(self):
"""benchmark_serializers() successfully instantiates arrow."""
serializers = benchmark_serializers()
assert "arrow" in serializers
assert isinstance(serializers["arrow"], ArrowSerializer)

def test_benchmark_serializers_returns_available_serializers(self):
"""benchmark_serializers() returns serializers that can be instantiated."""
serializers = benchmark_serializers()
# Should have core serializers (encrypted needs master key, so excluded)
assert "auto" in serializers
assert "default" in serializers
assert "arrow" in serializers
assert "orjson" in serializers
# encrypted may be missing if no master key configured


class TestGetSerializerInfoWithLazyLoading:
"""Test get_serializer_info handles lazy loading."""

def test_get_serializer_info_includes_arrow(self):
"""get_serializer_info() includes arrow with availability info."""
info = get_serializer_info()
assert "arrow" in info
assert info["arrow"]["available"] is True
assert info["arrow"]["class"] == "ArrowSerializer"

def test_get_serializer_info_returns_all_serializers(self):
"""get_serializer_info() returns info for all registered serializers."""
info = get_serializer_info()
for name in SERIALIZER_REGISTRY:
assert name in info
assert "available" in info[name]
assert "class" in info[name]

def test_get_serializer_info_includes_get_info_data(self):
"""get_serializer_info() includes data from serializer.get_info() if available."""
info = get_serializer_info()
# ArrowSerializer has get_info method
arrow_info = info["arrow"]
assert arrow_info["available"] is True
# get_info data should be merged in
assert "module" in arrow_info


class TestGetAvailableSerializers:
"""Test get_available_serializers returns registry copy."""

def test_returns_registry_copy(self):
"""get_available_serializers() returns a copy of the registry."""
available = get_available_serializers()
assert available == SERIALIZER_REGISTRY
# Should be a copy, not the same object
assert available is not SERIALIZER_REGISTRY

def test_arrow_is_none_in_registry(self):
"""Arrow entry is None in the raw registry (lazy placeholder)."""
available = get_available_serializers()
assert available["arrow"] is None
8 changes: 4 additions & 4 deletions uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.

Loading