diff --git a/src/cachekit/serializers/arrow_serializer.py b/src/cachekit/serializers/arrow_serializer.py index 0cd8604..0c0d384 100644 --- a/src/cachekit/serializers/arrow_serializer.py +++ b/src/cachekit/serializers/arrow_serializer.py @@ -162,7 +162,11 @@ def _resolve_compression(compression: str | None) -> str | None: from cachekit.config.singleton import get_settings compression = get_settings().arrow_compression - except Exception: # noqa: BLE001 — settings unavailable: fall back to a sane default + except ImportError: + # Config module genuinely unavailable: fall back to a sane default. + # Invalid CACHEKIT_ARROW_COMPRESSION values surface as a pydantic + # ValidationError from get_settings() and must NOT be masked here — + # let them propagate so misconfiguration fails loud. compression = "zstd" if compression in (None, "none"): return None diff --git a/src/cachekit/serializers/wrapper.py b/src/cachekit/serializers/wrapper.py index 5f440b4..33ace59 100644 --- a/src/cachekit/serializers/wrapper.py +++ b/src/cachekit/serializers/wrapper.py @@ -30,7 +30,7 @@ import base64 import json -from typing import Union +from typing import Any, Union # v3 binary frame constants _MAGIC = b"CK" @@ -70,7 +70,7 @@ class SerializationWrapper: """ @staticmethod - def wrap(data: bytes, metadata: dict, serializer_name: str, version: str = "2.0") -> bytes: + def wrap(data: bytes, metadata: dict[str, Any], serializer_name: str, version: str = "2.0") -> bytes: """Frame serialized data with a metadata header for cache storage. Args: @@ -99,7 +99,7 @@ def wrap(data: bytes, metadata: dict, serializer_name: str, version: str = "2.0" ) @staticmethod - def unwrap(wrapped_data: Union[str, bytes]) -> tuple[bytes, dict, str]: + def unwrap(wrapped_data: Union[str, bytes]) -> tuple[bytes, dict[str, Any], str]: """Unwrap a cache envelope, reading either the v3 frame or the legacy format. Args: @@ -113,11 +113,15 @@ def unwrap(wrapped_data: Union[str, bytes]) -> tuple[bytes, dict, str]: if isinstance(wrapped_data, (bytes, bytearray, memoryview)): mv = memoryview(wrapped_data) if bytes(mv[: len(_MAGIC)]) == _MAGIC: + if mv.nbytes < _PREFIX_LEN: + raise ValueError(f"Truncated cache envelope frame: {mv.nbytes} bytes (minimum {_PREFIX_LEN})") frame_version = mv[len(_MAGIC)] if frame_version != _FRAME_VERSION: raise ValueError(f"Unsupported cache envelope frame version {frame_version} (expected {_FRAME_VERSION})") hdr_len = int.from_bytes(mv[len(_MAGIC) + 1 : _PREFIX_LEN], "big") header_end = _PREFIX_LEN + hdr_len + if header_end > mv.nbytes: + raise ValueError(f"Invalid cache envelope header length {hdr_len}: frame has only {mv.nbytes} bytes") header = json.loads(bytes(mv[_PREFIX_LEN:header_end])) payload = bytes(mv[header_end:]) # single copy of the raw payload return payload, header.get("m", {}), header.get("s", "unknown") diff --git a/tests/unit/test_arrow_serializer.py b/tests/unit/test_arrow_serializer.py index 9e065f2..6aa3ba9 100644 --- a/tests/unit/test_arrow_serializer.py +++ b/tests/unit/test_arrow_serializer.py @@ -440,12 +440,26 @@ def test_auto_resolves_from_settings_env(self, monkeypatch): finally: reset_settings() - def test_default_is_auto_zstd(self): + def test_default_is_auto_zstd(self, monkeypatch): from cachekit.config.singleton import reset_settings + # Env-independent: clear any externally-set override so the default holds. + monkeypatch.delenv("CACHEKIT_ARROW_COMPRESSION", raising=False) reset_settings() # no env override -> default zstd - _, meta = ArrowSerializer().serialize(pd.DataFrame({"a": [1] * 1000})) - assert meta.compressed is True + try: + _, meta = ArrowSerializer().serialize(pd.DataFrame({"a": [1] * 1000})) + assert meta.compressed is True + finally: + reset_settings() + + def test_auto_falls_back_to_zstd_when_settings_module_unavailable(self, monkeypatch): + import sys + + # Simulate the config module being unimportable: auto-resolution must fall + # back to zstd (ImportError path) rather than crash. Setting the module to + # None in sys.modules makes `from cachekit.config.singleton import ...` raise. + monkeypatch.setitem(sys.modules, "cachekit.config.singleton", None) + assert ArrowSerializer._resolve_compression("auto") == "zstd" class TestIntegrityAlwaysOn: diff --git a/tests/unit/test_l1_memory_bounds.py b/tests/unit/test_l1_memory_bounds.py index 1725ea2..deeacd8 100644 --- a/tests/unit/test_l1_memory_bounds.py +++ b/tests/unit/test_l1_memory_bounds.py @@ -35,6 +35,17 @@ def test_rejected_oversized_put_does_not_evict_existing_entries(self): assert cache.get("toobig")[0] is False assert cache._current_memory_bytes <= cache.max_memory_bytes + def test_oversized_update_drops_stale_smaller_entry(self): + """An oversized put for an EXISTING key must drop the stale value, not serve it.""" + cache = L1Cache(max_memory_mb=1) + cache.put("k", b"\x00" * (256 * 1024), redis_ttl=300) # fits + assert cache.get("k")[0] is True + + cache.put("k", b"\x00" * (5 * MB), redis_ttl=300) # same key, now oversized + + assert cache.get("k")[0] is False # stale smaller value evicted, not served + assert cache._current_memory_bytes == 0 + def test_entry_equal_to_budget_is_stored(self): cache = L1Cache(max_memory_mb=1) cache.put("exact", b"\x00" * (1 * MB), redis_ttl=300) diff --git a/tests/unit/test_serialization_wrapper.py b/tests/unit/test_serialization_wrapper.py index b1ae130..485c9bf 100644 --- a/tests/unit/test_serialization_wrapper.py +++ b/tests/unit/test_serialization_wrapper.py @@ -103,6 +103,23 @@ def test_unrecognized_envelope_raises(self): with pytest.raises((ValueError, Exception)): SerializationWrapper.unwrap(b"\x99\x98 not a frame and not json") + def test_truncated_frame_raises_valueerror(self): + # Starts with the CK magic but is shorter than the 7-byte prefix. + with pytest.raises(ValueError, match="Truncated cache envelope frame"): + SerializationWrapper.unwrap(b"CK\x03") + + def test_unsupported_frame_version_raises_valueerror(self): + # Valid prefix length but an unknown frame version byte. + frame = b"CK" + bytes((99,)) + (2).to_bytes(4, "big") + b"{}" + with pytest.raises(ValueError, match="Unsupported cache envelope frame version"): + SerializationWrapper.unwrap(frame) + + def test_header_length_exceeds_frame_raises_valueerror(self): + # Declares a header far larger than the actual frame body. + frame = b"CK" + bytes((3,)) + (9999).to_bytes(4, "big") + b"{}" + with pytest.raises(ValueError, match="Invalid cache envelope header length"): + SerializationWrapper.unwrap(frame) + class TestEncryptionThroughFrame: """The binary frame is on the hot path for @cache.secure too: encrypted payloads and @@ -112,13 +129,13 @@ class TestEncryptionThroughFrame: KEY = "user:42:credentials" @pytest.fixture - def enc_handler(self): - import os - + def enc_handler(self, monkeypatch): from cachekit.config.singleton import reset_settings reset_settings() - os.environ["CACHEKIT_MASTER_KEY"] = "a" * 64 + # monkeypatch.setenv restores any pre-existing CACHEKIT_MASTER_KEY on teardown + # (and unsets it if it was absent), avoiding cross-test process-env leakage. + monkeypatch.setenv("CACHEKIT_MASTER_KEY", "a" * 64) from cachekit.cache_handler import CacheSerializationHandler handler = CacheSerializationHandler( @@ -129,7 +146,6 @@ def enc_handler(self): ) yield handler reset_settings() - os.environ.pop("CACHEKIT_MASTER_KEY", None) def test_encrypted_payload_round_trips_through_frame(self, enc_handler): secret = {"ssn": "123-45-6789", "balance": 99999}