From c9cea012c21bf084087758ff0ea7599668c083b8 Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Sun, 7 Jun 2026 10:49:01 +1000 Subject: [PATCH 1/2] fix: harden cache-envelope framing and compression config resolution MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Addresses review feedback on #152 (merged before these landed): - wrapper.py: validate v3 frame bounds before parsing — truncated/corrupt b"CK" frames now raise a clean ValueError instead of a low-level IndexError/JSON error. Also tighten wrap()/unwrap() public signatures to dict[str, Any]. - arrow_serializer.py: narrow the compression auto-resolution except to ImportError. Invalid CACHEKIT_ARROW_COMPRESSION values now surface as a pydantic ValidationError instead of silently falling back to zstd. - tests: make the default-compression test env-independent (delenv + reset_settings); restore pre-existing CACHEKIT_MASTER_KEY via monkeypatch to stop cross-test env leakage; add same-key oversized-update L1 test. --- src/cachekit/serializers/arrow_serializer.py | 6 +++++- src/cachekit/serializers/wrapper.py | 10 +++++++--- tests/unit/test_arrow_serializer.py | 11 ++++++++--- tests/unit/test_l1_memory_bounds.py | 11 +++++++++++ tests/unit/test_serialization_wrapper.py | 9 ++++----- 5 files changed, 35 insertions(+), 12 deletions(-) diff --git a/src/cachekit/serializers/arrow_serializer.py b/src/cachekit/serializers/arrow_serializer.py index 0cd8604..0c0d384 100644 --- a/src/cachekit/serializers/arrow_serializer.py +++ b/src/cachekit/serializers/arrow_serializer.py @@ -162,7 +162,11 @@ def _resolve_compression(compression: str | None) -> str | None: from cachekit.config.singleton import get_settings compression = get_settings().arrow_compression - except Exception: # noqa: BLE001 — settings unavailable: fall back to a sane default + except ImportError: + # Config module genuinely unavailable: fall back to a sane default. + # Invalid CACHEKIT_ARROW_COMPRESSION values surface as a pydantic + # ValidationError from get_settings() and must NOT be masked here — + # let them propagate so misconfiguration fails loud. compression = "zstd" if compression in (None, "none"): return None diff --git a/src/cachekit/serializers/wrapper.py b/src/cachekit/serializers/wrapper.py index 5f440b4..33ace59 100644 --- a/src/cachekit/serializers/wrapper.py +++ b/src/cachekit/serializers/wrapper.py @@ -30,7 +30,7 @@ import base64 import json -from typing import Union +from typing import Any, Union # v3 binary frame constants _MAGIC = b"CK" @@ -70,7 +70,7 @@ class SerializationWrapper: """ @staticmethod - def wrap(data: bytes, metadata: dict, serializer_name: str, version: str = "2.0") -> bytes: + def wrap(data: bytes, metadata: dict[str, Any], serializer_name: str, version: str = "2.0") -> bytes: """Frame serialized data with a metadata header for cache storage. Args: @@ -99,7 +99,7 @@ def wrap(data: bytes, metadata: dict, serializer_name: str, version: str = "2.0" ) @staticmethod - def unwrap(wrapped_data: Union[str, bytes]) -> tuple[bytes, dict, str]: + def unwrap(wrapped_data: Union[str, bytes]) -> tuple[bytes, dict[str, Any], str]: """Unwrap a cache envelope, reading either the v3 frame or the legacy format. Args: @@ -113,11 +113,15 @@ def unwrap(wrapped_data: Union[str, bytes]) -> tuple[bytes, dict, str]: if isinstance(wrapped_data, (bytes, bytearray, memoryview)): mv = memoryview(wrapped_data) if bytes(mv[: len(_MAGIC)]) == _MAGIC: + if mv.nbytes < _PREFIX_LEN: + raise ValueError(f"Truncated cache envelope frame: {mv.nbytes} bytes (minimum {_PREFIX_LEN})") frame_version = mv[len(_MAGIC)] if frame_version != _FRAME_VERSION: raise ValueError(f"Unsupported cache envelope frame version {frame_version} (expected {_FRAME_VERSION})") hdr_len = int.from_bytes(mv[len(_MAGIC) + 1 : _PREFIX_LEN], "big") header_end = _PREFIX_LEN + hdr_len + if header_end > mv.nbytes: + raise ValueError(f"Invalid cache envelope header length {hdr_len}: frame has only {mv.nbytes} bytes") header = json.loads(bytes(mv[_PREFIX_LEN:header_end])) payload = bytes(mv[header_end:]) # single copy of the raw payload return payload, header.get("m", {}), header.get("s", "unknown") diff --git a/tests/unit/test_arrow_serializer.py b/tests/unit/test_arrow_serializer.py index 9e065f2..4ff6817 100644 --- a/tests/unit/test_arrow_serializer.py +++ b/tests/unit/test_arrow_serializer.py @@ -440,12 +440,17 @@ def test_auto_resolves_from_settings_env(self, monkeypatch): finally: reset_settings() - def test_default_is_auto_zstd(self): + def test_default_is_auto_zstd(self, monkeypatch): from cachekit.config.singleton import reset_settings + # Env-independent: clear any externally-set override so the default holds. + monkeypatch.delenv("CACHEKIT_ARROW_COMPRESSION", raising=False) reset_settings() # no env override -> default zstd - _, meta = ArrowSerializer().serialize(pd.DataFrame({"a": [1] * 1000})) - assert meta.compressed is True + try: + _, meta = ArrowSerializer().serialize(pd.DataFrame({"a": [1] * 1000})) + assert meta.compressed is True + finally: + reset_settings() class TestIntegrityAlwaysOn: diff --git a/tests/unit/test_l1_memory_bounds.py b/tests/unit/test_l1_memory_bounds.py index 1725ea2..deeacd8 100644 --- a/tests/unit/test_l1_memory_bounds.py +++ b/tests/unit/test_l1_memory_bounds.py @@ -35,6 +35,17 @@ def test_rejected_oversized_put_does_not_evict_existing_entries(self): assert cache.get("toobig")[0] is False assert cache._current_memory_bytes <= cache.max_memory_bytes + def test_oversized_update_drops_stale_smaller_entry(self): + """An oversized put for an EXISTING key must drop the stale value, not serve it.""" + cache = L1Cache(max_memory_mb=1) + cache.put("k", b"\x00" * (256 * 1024), redis_ttl=300) # fits + assert cache.get("k")[0] is True + + cache.put("k", b"\x00" * (5 * MB), redis_ttl=300) # same key, now oversized + + assert cache.get("k")[0] is False # stale smaller value evicted, not served + assert cache._current_memory_bytes == 0 + def test_entry_equal_to_budget_is_stored(self): cache = L1Cache(max_memory_mb=1) cache.put("exact", b"\x00" * (1 * MB), redis_ttl=300) diff --git a/tests/unit/test_serialization_wrapper.py b/tests/unit/test_serialization_wrapper.py index b1ae130..edd4a74 100644 --- a/tests/unit/test_serialization_wrapper.py +++ b/tests/unit/test_serialization_wrapper.py @@ -112,13 +112,13 @@ class TestEncryptionThroughFrame: KEY = "user:42:credentials" @pytest.fixture - def enc_handler(self): - import os - + def enc_handler(self, monkeypatch): from cachekit.config.singleton import reset_settings reset_settings() - os.environ["CACHEKIT_MASTER_KEY"] = "a" * 64 + # monkeypatch.setenv restores any pre-existing CACHEKIT_MASTER_KEY on teardown + # (and unsets it if it was absent), avoiding cross-test process-env leakage. + monkeypatch.setenv("CACHEKIT_MASTER_KEY", "a" * 64) from cachekit.cache_handler import CacheSerializationHandler handler = CacheSerializationHandler( @@ -129,7 +129,6 @@ def enc_handler(self): ) yield handler reset_settings() - os.environ.pop("CACHEKIT_MASTER_KEY", None) def test_encrypted_payload_round_trips_through_frame(self, enc_handler): secret = {"ssn": "123-45-6789", "balance": 99999} From 75e2c57aac6955c4dbaa061f58b2effc11973d3f Mon Sep 17 00:00:00 2001 From: Ray Walker Date: Sun, 7 Jun 2026 10:58:45 +1000 Subject: [PATCH 2/2] test: cover frame-bounds guards and compression-config fallback Restores patch coverage for the hardening added in the previous commit: - wrapper.py: tests for truncated frame, unsupported frame version, and oversized header-length (the three ValueError guards) -> 100% coverage. - arrow_serializer.py: test that auto compression falls back to zstd when the settings module is unimportable (the except ImportError branch). --- tests/unit/test_arrow_serializer.py | 9 +++++++++ tests/unit/test_serialization_wrapper.py | 17 +++++++++++++++++ 2 files changed, 26 insertions(+) diff --git a/tests/unit/test_arrow_serializer.py b/tests/unit/test_arrow_serializer.py index 4ff6817..6aa3ba9 100644 --- a/tests/unit/test_arrow_serializer.py +++ b/tests/unit/test_arrow_serializer.py @@ -452,6 +452,15 @@ def test_default_is_auto_zstd(self, monkeypatch): finally: reset_settings() + def test_auto_falls_back_to_zstd_when_settings_module_unavailable(self, monkeypatch): + import sys + + # Simulate the config module being unimportable: auto-resolution must fall + # back to zstd (ImportError path) rather than crash. Setting the module to + # None in sys.modules makes `from cachekit.config.singleton import ...` raise. + monkeypatch.setitem(sys.modules, "cachekit.config.singleton", None) + assert ArrowSerializer._resolve_compression("auto") == "zstd" + class TestIntegrityAlwaysOn: """DATA IS SACRED: corruption is always detected, even with integrity_checking=False.""" diff --git a/tests/unit/test_serialization_wrapper.py b/tests/unit/test_serialization_wrapper.py index edd4a74..485c9bf 100644 --- a/tests/unit/test_serialization_wrapper.py +++ b/tests/unit/test_serialization_wrapper.py @@ -103,6 +103,23 @@ def test_unrecognized_envelope_raises(self): with pytest.raises((ValueError, Exception)): SerializationWrapper.unwrap(b"\x99\x98 not a frame and not json") + def test_truncated_frame_raises_valueerror(self): + # Starts with the CK magic but is shorter than the 7-byte prefix. + with pytest.raises(ValueError, match="Truncated cache envelope frame"): + SerializationWrapper.unwrap(b"CK\x03") + + def test_unsupported_frame_version_raises_valueerror(self): + # Valid prefix length but an unknown frame version byte. + frame = b"CK" + bytes((99,)) + (2).to_bytes(4, "big") + b"{}" + with pytest.raises(ValueError, match="Unsupported cache envelope frame version"): + SerializationWrapper.unwrap(frame) + + def test_header_length_exceeds_frame_raises_valueerror(self): + # Declares a header far larger than the actual frame body. + frame = b"CK" + bytes((3,)) + (9999).to_bytes(4, "big") + b"{}" + with pytest.raises(ValueError, match="Invalid cache envelope header length"): + SerializationWrapper.unwrap(frame) + class TestEncryptionThroughFrame: """The binary frame is on the hot path for @cache.secure too: encrypted payloads and