BonfireAI · Antawari · May 27, 2026
diff --git a/src/bonfire/knowledge/memory.py b/src/bonfire/knowledge/memory.py
@@ -1,10 +1,17 @@
 # SPDX-License-Identifier: Apache-2.0
 # Copyright 2026 BonfireAI
 
-"""In-memory vault backend for testing.
+"""In-memory vault backend.
 
-Stores entries in a list. Query uses substring matching on content.
-No embeddings, no external dependencies. Implements VaultBackend protocol.
+Stores entries in a list with O(1)-indexed dedup and a lowercase-content
+cache for fast substring query. No embeddings, no external dependencies.
+Implements VaultBackend protocol.
+
+Originally documented as "for testing," but the knowledge factory returns
+this backend as the SHIPPING DEFAULT when knowledge is unconfigured
+(``enabled=False`` or ``backend="memory"``). Production users hit this
+class on every ``bonfire scan`` unless they explicitly opt into LanceDB.
+The performance characteristics matter accordingly.
 """
 
 from __future__ import annotations
@@ -18,15 +25,36 @@
 
 
 class InMemoryVaultBackend:
-    """In-memory vault for tests. No embeddings, substring matching."""
+    """In-memory vault. Substring matching, no embeddings.
+
+    Shipping default when knowledge is unconfigured. O(1) ``exists()`` via a
+    side hash-set index; ``query()`` reads pre-computed lowercase content
+    from a side cache so it does NOT allocate ``e.content.lower()`` per
+    entry per call.
+
+    The public ``_entries: list[VaultEntry]`` attribute is preserved as the
+    canonical iteration / inspection surface; ``_hashes`` and
+    ``_lower_content`` are private indexes maintained at ``store()`` time.
+    All three are append-only — ``store()`` does NOT dedup on hash collision
+    (the backend's caller is responsible for the ``exists()``-then-``store()``
+    guard pattern).
+    """
 
     def __init__(self) -> None:
         self._entries: list[VaultEntry] = []
+        # O(1) ``exists()`` — set of all stored content hashes.
+        self._hashes: set[str] = set()
+        # ``query()`` lower-case cache, keyed by ``entry_id``. Populated at
+        # ``store()``; read in the substring-match hot loop so we never call
+        # ``e.content.lower()`` per entry per query.
+        self._lower_content: dict[str, str] = {}
 
     async def store(self, entry: VaultEntry) -> str:
         if not entry.content_hash:
             entry = entry.model_copy(update={"content_hash": compute_hash(entry.content)})
         self._entries.append(entry)
+        self._hashes.add(entry.content_hash)
+        self._lower_content[entry.entry_id] = entry.content.lower()
         return entry.entry_id
 
     async def query(
@@ -39,15 +67,20 @@ async def query(
         candidates = self._entries
         if entry_type is not None:
             candidates = [e for e in candidates if e.entry_type == entry_type]
-        query_lower = query.lower()
-        query_words = query_lower.split()
-        scored = [(e, sum(1 for w in query_words if w in e.content.lower())) for e in candidates]
+        query_words = query.lower().split()
+        # Read from the lower-case cache populated at store() — no per-entry
+        # ``.lower()`` allocation in this hot path.
+        lower_content = self._lower_content
+        scored = [
+            (e, sum(1 for w in query_words if w in lower_content[e.entry_id])) for e in candidates
+        ]
         scored = [(e, s) for e, s in scored if s > 0]
         scored.sort(key=lambda x: x[1], reverse=True)
         return [e for e, _ in scored[:limit]]
 
     async def exists(self, content_hash: str) -> bool:
-        return any(e.content_hash == content_hash for e in self._entries)
+        # O(1) via side index; replaces the prior O(N) ``any(...)`` scan.
+        return content_hash in self._hashes
 
     async def get_by_source(self, source_path: str) -> list[VaultEntry]:
         return [e for e in self._entries if e.source_path == source_path]
diff --git a/tests/unit/test_knowledge_memory_performance.py b/tests/unit/test_knowledge_memory_performance.py
@@ -0,0 +1,194 @@
+# SPDX-License-Identifier: Apache-2.0
+# Copyright 2026 BonfireAI
+
+"""Knight RED tests — InMemoryVaultBackend performance (linear ingest).
+
+The in-memory backend ships as the default when knowledge is unconfigured —
+its docstring says "for testing" but the factory returns it for the bare
+``enabled=False`` and ``backend="memory"`` paths. Production users hit it
+unless they explicitly opt into LanceDB.
+
+The pre-fix implementation:
+
+- ``exists(content_hash)`` is ``any(e.content_hash == hash for e in self._entries)``
+  — linear scan over the entire entries list per call.
+- ``query(...)`` allocates a fresh ``e.content.lower()`` per entry per call —
+  hot-path string allocation that grows linearly with vault size.
+
+The ingest paths (``KnowledgeIngestConsumer._store``, the various
+``knowledge.ingest`` entry points, ``TechScanner.scan_and_store``) ALL call
+``exists()`` before ``store()``. Stacked together: ingesting N entries
+requires N linear-scan exists checks ⇒ **O(N²) ingest** for an "in-memory
+backend marked for testing" that's actually the shipping default.
+
+This Knight pins:
+
+1. **Wall-clock cap on 10,000-entry ingest.** Linear behavior should complete
+   well under 1 second; the pre-fix quadratic shape takes 10s+ on this CPU.
+   The test asserts < 1.0 s with the standard ``exists``-then-``store``
+   guard pattern that production callers use.
+
+2. **Correctness regression guards** ensuring the perf fix (hash-set +
+   lower-cache indexes) doesn't break the existing semantics already pinned
+   by ``test_knowledge_memory.py``. Specifically: the ``_entries`` list MUST
+   still grow on every ``store()`` even when ``content_hash`` collides with
+   a previously-stored entry — the existing
+   ``test_store_appends_to_entries`` contract requires append-on-every-store,
+   not dedup-on-hash.
+
+The fix Warrior is constrained to:
+
+- Add a ``_hashes: set[str]`` index for O(1) ``exists()``.
+- Cache ``content.lower()`` at insert (keyed by ``entry_id``) so ``query()``
+  doesn't re-allocate per call.
+- PRESERVE ``_entries: list[VaultEntry]`` as the public attribute — the
+  innovative-lens conformance test in ``test_knowledge_memory.py`` asserts
+  ``backend._entries == []`` and ``isinstance(backend._entries, list)``
+  directly.
+- Update the class docstring to acknowledge the production-default usage
+  (acceptance criterion #3 in the ticket).
+"""
+
+from __future__ import annotations
+
+import time
+
+from bonfire.knowledge.memory import InMemoryVaultBackend
+from bonfire.protocols import VaultEntry
+
+
+class TestIngestScalesLinearly:
+    """Acceptance criterion #4 — ingest of 10k entries completes in < 1 s.
+
+    Models the production-default ingest path: every entry is ``exists()``-
+    checked before ``store()`` (mirroring ``KnowledgeIngestConsumer._store``,
+    ``knowledge.ingest.ingest_*``, and ``TechScanner.scan_and_store`` — the
+    four callers the ticket enumerates).
+    """
+
+    async def test_10k_entry_ingest_under_one_second(self) -> None:
+        """Pre-fix: ~10s+ (quadratic). Post-fix: well under 1s (linear).
+
+        The wall-clock cap is generous on purpose — the post-fix shape is
+        dominated by Python attribute access + Pydantic instantiation, NOT
+        by the dedup index. A 10x margin over expected wall-clock time
+        keeps the test stable across CPUs and CI runners.
+        """
+        backend = InMemoryVaultBackend()
+        n = 10_000
+
+        # Pre-construct entries so the loop measures *backend operations*, not
+        # Pydantic instantiation. Use distinct content + distinct hash so
+        # exists() always returns False and store() always fires (worst case
+        # for the quadratic shape).
+        entries = [
+            VaultEntry(
+                content=f"entry content body {i}",
+                entry_type="code_chunk",
+                content_hash=f"hash-{i:08d}",
+            )
+            for i in range(n)
+        ]
+
+        start = time.perf_counter()
+        for entry in entries:
+            # Production guard pattern: exists-check then store-on-miss.
+            if not await backend.exists(entry.content_hash):
+                await backend.store(entry)
+        elapsed = time.perf_counter() - start
+
+        # Confirm we actually stored everything (sanity guard against the
+        # exists/store contract drifting).
+        assert len(backend._entries) == n
+
+        # Linear cap: the post-fix shape is O(N); quadratic pre-fix would
+        # take ~10s+ for N=10_000 on this CPU.
+        assert elapsed < 1.0, (
+            f"10k-entry ingest took {elapsed:.3f}s — expected sub-second under "
+            "the linear-time fix; the pre-fix quadratic shape takes 10s+"
+        )
+
+    async def test_query_with_large_vault_does_not_allocate_per_entry_lower(
+        self,
+    ) -> None:
+        """Pre-fix: query() does e.content.lower() per entry per call.
+
+        Post-fix: lowercase is cached at insert time (keyed by entry_id).
+        Verifies the fix by timing 100 queries over a 5k-entry vault — pre-
+        fix this is ~5s+; post-fix it should be sub-second.
+        """
+        backend = InMemoryVaultBackend()
+        n = 5_000
+        for i in range(n):
+            await backend.store(
+                VaultEntry(
+                    content=f"content body number {i} alpha beta gamma",
+                    entry_type="code_chunk",
+                    content_hash=f"hash-{i:08d}",
+                )
+            )
+
+        start = time.perf_counter()
+        for _ in range(100):
+            results = await backend.query("alpha beta gamma", limit=10)
+            assert len(results) == 10
+        elapsed = time.perf_counter() - start
+
+        assert elapsed < 2.0, (
+            f"100 queries over 5k-entry vault took {elapsed:.3f}s — expected "
+            "sub-second with lower-cache; pre-fix per-entry .lower() takes 5s+"
+        )
+
+
+class TestPerfFixPreservesCorrectness:
+    """Regression guards — the perf fix must NOT break existing semantics."""
+
+    async def test_entries_list_grows_on_every_store_even_with_hash_collision(
+        self,
+    ) -> None:
+        """``_entries`` list appends on every store, regardless of hash dedup.
+
+        Test contract from ``test_knowledge_memory.py::test_store_appends_to_entries``:
+        a single store yields ``len(_entries) == 1``. By extension, two stores
+        yield ``len(_entries) == 2`` even when both share a content_hash —
+        the in-memory backend does NOT dedup on store; only ``exists()`` is
+        the contract for "have we seen this hash."
+        """
+        backend = InMemoryVaultBackend()
+        e1 = VaultEntry(content="same body", entry_type="code_chunk", content_hash="dup-hash")
+        e2 = VaultEntry(content="same body", entry_type="code_chunk", content_hash="dup-hash")
+        await backend.store(e1)
+        await backend.store(e2)
+        assert len(backend._entries) == 2, (
+            "perf fix must not silently dedup on store — _entries grows on every store"
+        )
+
+    async def test_exists_returns_true_for_any_stored_hash(self) -> None:
+        """O(1) exists check returns True for hashes from any prior store."""
+        backend = InMemoryVaultBackend()
+        for i in range(50):
+            await backend.store(
+                VaultEntry(
+                    content=f"c{i}",
+                    entry_type="code_chunk",
+                    content_hash=f"h-{i}",
+                )
+            )
+        for i in range(50):
+            assert await backend.exists(f"h-{i}") is True
+        assert await backend.exists("h-not-stored") is False
+
+    async def test_query_lower_cache_handles_uppercase_content(self) -> None:
+        """Cached lowercase must match query.lower() — Unicode-safe."""
+        backend = InMemoryVaultBackend()
+        await backend.store(
+            VaultEntry(
+                content="ALPHA Beta Gamma",
+                entry_type="code_chunk",
+                content_hash="upper-1",
+            )
+        )
+        # Query is lowercase — must still find the uppercase-stored entry.
+        results = await backend.query("alpha beta")
+        assert len(results) == 1
+        assert results[0].content == "ALPHA Beta Gamma"