From 016567de0f9f95e4847587391da7b3a928d5c329 Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Mon, 27 Apr 2026 17:19:49 -0600
Subject: [PATCH 01/13] fix: align embedding result metadata

Replace n_tokens_truncated with a shared truncated flag across local, remote,
and custom embedding results.

Keep remote token counts useful by preferring exact and estimated prompt
token metadata when present.

This avoids carrying truncation semantics that were exact for the local engine
but misleading for remote vectors.space responses.
---
 src/dbmem-embed.h   |  2 +-
 src/dbmem-lembed.c  |  6 +++---
 src/dbmem-rembed.c  | 22 ++++++++++++++++++++--
 src/sqlite-memory.c |  6 +++---
 src/sqlite-memory.h |  2 +-
 5 files changed, 28 insertions(+), 10 deletions(-)

diff --git a/src/dbmem-embed.h b/src/dbmem-embed.h
index 0ddc66d..15c1c1a 100644
--- a/src/dbmem-embed.h
+++ b/src/dbmem-embed.h
@@ -17,7 +17,7 @@ typedef struct dbmem_remote_engine_t dbmem_remote_engine_t;
 // Embedding result structure (always one embedding per call)
 typedef struct {
     int      n_tokens;              // Number of tokens processed
-    int      n_tokens_truncated;    // Number of tokens truncated (0 if none)
+    bool     truncated;             // True when the input was truncated before embedding
     int      n_embd;                // Embedding dimension
     float    *embedding;            // Pointer to embedding (points to engine's buffer, do not free)
 } embedding_result_t;
diff --git a/src/dbmem-lembed.c b/src/dbmem-lembed.c
index ce0f0e1..e3c842f 100644
--- a/src/dbmem-lembed.c
+++ b/src/dbmem-lembed.c
@@ -223,9 +223,9 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
     }
 
     // Handle token overflow: truncate to max context size
-    int n_tokens_truncated = 0;
+    bool truncated = false;
     if (n_tokens > engine->n_ctx) {
-        n_tokens_truncated = n_tokens - engine->n_ctx;
+        truncated = true;
         n_tokens = engine->n_ctx;
     }
 
@@ -275,7 +275,7 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
 
     // Fill result
     result->n_tokens = n_tokens;
-    result->n_tokens_truncated = n_tokens_truncated;
+    result->truncated = truncated;
     result->n_embd = engine->n_embd;
     result->embedding = engine->embedding;
 
diff --git a/src/dbmem-rembed.c b/src/dbmem-rembed.c
index 0613ed4..bdef5b7 100644
--- a/src/dbmem-rembed.c
+++ b/src/dbmem-rembed.c
@@ -210,6 +210,13 @@ static int set_json_error_message (dbmem_remote_engine_t *engine) {
     return -1;
 }
 
+#if ENABLE_DBMEM_DEBUG
+static void dbmem_remote_debug_log_response(dbmem_remote_engine_t *engine, long http_code) {
+    const char *response = engine->data ? engine->data : "";
+    DEBUG_DBMEM_ALWAYS("[dbmem-rembed] vectors.space response (HTTP %ld): %s", http_code, response);
+}
+#endif
+
 // MARK: -
 
 dbmem_remote_engine_t *dbmem_remote_engine_init (void *ctx, const char *provider, const char *model, char err_msg[DBMEM_ERRBUF_SIZE]) {
@@ -450,6 +457,10 @@ int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *t
     sqlite3_free(response_data);
 #endif
 
+#if ENABLE_DBMEM_DEBUG
+    dbmem_remote_debug_log_response(engine, http_code);
+#endif
+
     if (http_code != 200) {
         return set_json_error_message(engine);
     }
@@ -482,6 +493,8 @@ int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *t
     int n_embd = 0;
     int prompt_tokens = 0;
     int estimated_prompt_tokens = 0;
+    int exact_prompt_tokens = 0;
+    bool truncated = false;
     int emb_start = -1;
     size_t emb_count = 0;
 
@@ -501,8 +514,13 @@ int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *t
             n_embd = atoi(engine->data + tokens[i + 1].start);
         } else if (klen == 13 && memcmp(key, "prompt_tokens", 13) == 0 && tokens[i + 1].type == JSMN_PRIMITIVE) {
             prompt_tokens = atoi(engine->data + tokens[i + 1].start);
+        } else if (klen == 19 && memcmp(key, "exact_prompt_tokens", 19) == 0 && tokens[i + 1].type == JSMN_PRIMITIVE) {
+            exact_prompt_tokens = atoi(engine->data + tokens[i + 1].start);
         } else if (klen == 23 && memcmp(key, "estimated_prompt_tokens", 23) == 0) {
             estimated_prompt_tokens = atoi(engine->data + tokens[i + 1].start);
+        } else if (klen == 9 && memcmp(key, "truncated", 9) == 0 && tokens[i + 1].type == JSMN_PRIMITIVE) {
+            truncated = (tokens[i + 1].end - tokens[i + 1].start == 4) &&
+                        (memcmp(engine->data + tokens[i + 1].start, "true", 4) == 0);
         }
     }
 
@@ -529,8 +547,8 @@ int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *t
 
     // Fill result
     result->n_embd = n_embd;
-    result->n_tokens = prompt_tokens;
-    result->n_tokens_truncated = (estimated_prompt_tokens > prompt_tokens) ? estimated_prompt_tokens - prompt_tokens : 0;
+    result->n_tokens = exact_prompt_tokens > 0 ? exact_prompt_tokens : (estimated_prompt_tokens > 0 ? estimated_prompt_tokens : prompt_tokens);
+    result->truncated = truncated;
     result->embedding = engine->embedding;
 
     // Update statistics
diff --git a/src/sqlite-memory.c b/src/sqlite-memory.c
index d82ca9e..25767dc 100644
--- a/src/sqlite-memory.c
+++ b/src/sqlite-memory.c
@@ -642,7 +642,7 @@ int dbmem_context_custom_compute (dbmem_context *ctx, const char *text, int text
     int rc = ctx->custom_provider.compute(ctx->custom_engine, text, text_len, ctx->custom_provider.xdata, &cr);
     if (rc != 0) return rc;
     result->n_tokens = cr.n_tokens;
-    result->n_tokens_truncated = cr.n_tokens_truncated;
+    result->truncated = cr.truncated;
     result->n_embd = cr.n_embd;
     result->embedding = cr.embedding;
     return 0;
@@ -1249,7 +1249,7 @@ static void dbmem_get_option (sqlite3_context *context, int argc, sqlite3_value
 static void dbmem_dump_embeding (const embedding_result_t *result) {
     printf("{\n");
     printf("  \"n_tokens\": %d,\n", result->n_tokens);
-    printf("  \"n_tokens_truncated\": %d,\n", result->n_tokens_truncated);
+    printf("  \"truncated\": %s,\n", result->truncated ? "true" : "false");
     printf("  \"n_embd\": %d,\n", result->n_embd);
     printf("  \"embedding\": [");
 
@@ -1301,7 +1301,7 @@ static bool dbmem_cache_lookup (dbmem_context *ctx, uint64_t text_hash, embeddin
     result->embedding = ctx->cache_buffer;
     result->n_embd = dimension;
     result->n_tokens = 0;
-    result->n_tokens_truncated = 0;
+    result->truncated = false;
     found = true;
 
 cleanup:
diff --git a/src/sqlite-memory.h b/src/sqlite-memory.h
index f4aaefd..b3cb45b 100644
--- a/src/sqlite-memory.h
+++ b/src/sqlite-memory.h
@@ -39,7 +39,7 @@ typedef struct dbmem_context dbmem_context;
 
 typedef struct {
     int      n_tokens;
-    int      n_tokens_truncated;
+    bool     truncated;
     int      n_embd;
     float    *embedding;          // Engine-owned buffer, valid until next call or free
 } dbmem_embedding_result_t;

From a79fd96f3ea1001ab0be7d9f145d3353029a5e04 Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Mon, 27 Apr 2026 17:54:45 -0600
Subject: [PATCH 02/13] Update dbmem-rembed.c

---
 src/dbmem-rembed.c | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/src/dbmem-rembed.c b/src/dbmem-rembed.c
index bdef5b7..5331b44 100644
--- a/src/dbmem-rembed.c
+++ b/src/dbmem-rembed.c
@@ -210,7 +210,7 @@ static int set_json_error_message (dbmem_remote_engine_t *engine) {
     return -1;
 }
 
-#if ENABLE_DBMEM_DEBUG
+#if ENABLE_DBMEM_DEBUG_EMBEDDING
 static void dbmem_remote_debug_log_response(dbmem_remote_engine_t *engine, long http_code) {
     const char *response = engine->data ? engine->data : "";
     DEBUG_DBMEM_ALWAYS("[dbmem-rembed] vectors.space response (HTTP %ld): %s", http_code, response);
@@ -457,7 +457,7 @@ int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *t
     sqlite3_free(response_data);
 #endif
 
-#if ENABLE_DBMEM_DEBUG
+#if ENABLE_DBMEM_DEBUG_EMBEDDING
     dbmem_remote_debug_log_response(engine, http_code);
 #endif
 
@@ -552,7 +552,7 @@ int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *t
     result->embedding = engine->embedding;
 
     // Update statistics
-    engine->total_tokens_processed += prompt_tokens;
+    engine->total_tokens_processed += result->n_tokens;
     engine->total_embeddings_generated++;
 
     return 0;

From c6f407130b208bc8a652f37b3229ce5afda7538b Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Wed, 29 Apr 2026 16:18:46 -0600
Subject: [PATCH 03/13] fix: parse structured vectors.space responses

Read output_dimension, data[0].embedding, data[0].truncated, and nested usage token fields from the documented vectors.space response shape.

This makes the remote embedding parser match the current API envelope instead of relying on flat key scans across the whole JSON payload.
---
 src/dbmem-rembed.c | 126 ++++++++++++++++++++++++++++++++++++---------
 1 file changed, 103 insertions(+), 23 deletions(-)

diff --git a/src/dbmem-rembed.c b/src/dbmem-rembed.c
index 5331b44..2a1a833 100644
--- a/src/dbmem-rembed.c
+++ b/src/dbmem-rembed.c
@@ -210,6 +210,55 @@ static int set_json_error_message (dbmem_remote_engine_t *engine) {
     return -1;
 }
 
+static int dbmem_json_skip_token (const jsmntok_t *tokens, int index) {
+    int next = index + 1;
+
+    if (tokens[index].type == JSMN_ARRAY) {
+        for (int i = 0; i < tokens[index].size; i++) {
+            next = dbmem_json_skip_token(tokens, next);
+        }
+        return next;
+    }
+
+    if (tokens[index].type == JSMN_OBJECT) {
+        for (int i = 0; i < tokens[index].size; i++) {
+            next += 1; // skip key token
+            next = dbmem_json_skip_token(tokens, next);
+        }
+        return next;
+    }
+
+    return next;
+}
+
+static bool dbmem_json_token_equals (const char *json, const jsmntok_t *token, const char *text) {
+    size_t len = strlen(text);
+    size_t token_len = (size_t)(token->end - token->start);
+    return token_len == len && memcmp(json + token->start, text, len) == 0;
+}
+
+static int dbmem_json_object_find (const char *json, const jsmntok_t *tokens, int object_index, const char *key) {
+    if (object_index < 0 || tokens[object_index].type != JSMN_OBJECT) return -1;
+
+    int index = object_index + 1;
+    for (int i = 0; i < tokens[object_index].size; i++) {
+        int key_index = index;
+        int value_index = key_index + 1;
+
+        if (tokens[key_index].type != JSMN_STRING) return -1;
+        if (dbmem_json_token_equals(json, &tokens[key_index], key)) return value_index;
+
+        index = dbmem_json_skip_token(tokens, value_index);
+    }
+
+    return -1;
+}
+
+static bool dbmem_json_parse_bool (const char *json, const jsmntok_t *token) {
+    size_t len = (size_t)(token->end - token->start);
+    return token->type == JSMN_PRIMITIVE && len == 4 && memcmp(json + token->start, "true", 4) == 0;
+}
+
 #if ENABLE_DBMEM_DEBUG_EMBEDDING
 static void dbmem_remote_debug_log_response(dbmem_remote_engine_t *engine, long http_code) {
     const char *response = engine->data ? engine->data : "";
@@ -498,29 +547,60 @@ int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *t
     int emb_start = -1;
     size_t emb_count = 0;
 
-    for (int i = 0; i < ntokens - 1; i++) {
-        if (tokens[i].type != JSMN_STRING) continue;
-        int klen = tokens[i].end - tokens[i].start;
-        const char *key = engine->data + tokens[i].start;
-
-        if (klen == 9 && memcmp(key, "embedding", 9) == 0 && tokens[i + 1].type == JSMN_ARRAY) {
-            if (tokens[i + 1].size <= 0) {
-                dbmem_context_set_error(engine->context, "Invalid embedding array size in API response");
-                return -1;
-            }
-            emb_count = (size_t)tokens[i + 1].size;
-            emb_start = i + 2;
-        } else if (klen == 16 && memcmp(key, "output_dimension", 16) == 0) {
-            n_embd = atoi(engine->data + tokens[i + 1].start);
-        } else if (klen == 13 && memcmp(key, "prompt_tokens", 13) == 0 && tokens[i + 1].type == JSMN_PRIMITIVE) {
-            prompt_tokens = atoi(engine->data + tokens[i + 1].start);
-        } else if (klen == 19 && memcmp(key, "exact_prompt_tokens", 19) == 0 && tokens[i + 1].type == JSMN_PRIMITIVE) {
-            exact_prompt_tokens = atoi(engine->data + tokens[i + 1].start);
-        } else if (klen == 23 && memcmp(key, "estimated_prompt_tokens", 23) == 0) {
-            estimated_prompt_tokens = atoi(engine->data + tokens[i + 1].start);
-        } else if (klen == 9 && memcmp(key, "truncated", 9) == 0 && tokens[i + 1].type == JSMN_PRIMITIVE) {
-            truncated = (tokens[i + 1].end - tokens[i + 1].start == 4) &&
-                        (memcmp(engine->data + tokens[i + 1].start, "true", 4) == 0);
+    if (tokens[0].type != JSMN_OBJECT) {
+        dbmem_context_set_error(engine->context, "Invalid API response shape");
+        return -1;
+    }
+
+    int output_dimension_index = dbmem_json_object_find(engine->data, tokens, 0, "output_dimension");
+    if (output_dimension_index >= 0 && tokens[output_dimension_index].type == JSMN_PRIMITIVE) {
+        n_embd = atoi(engine->data + tokens[output_dimension_index].start);
+    }
+
+    int data_index = dbmem_json_object_find(engine->data, tokens, 0, "data");
+    if (data_index < 0 || tokens[data_index].type != JSMN_ARRAY || tokens[data_index].size <= 0) {
+        dbmem_context_set_error(engine->context, "Missing embedding data in API response");
+        return -1;
+    }
+
+    int item_index = data_index + 1;
+    if (tokens[item_index].type != JSMN_OBJECT) {
+        dbmem_context_set_error(engine->context, "Invalid embedding item in API response");
+        return -1;
+    }
+
+    int embedding_index = dbmem_json_object_find(engine->data, tokens, item_index, "embedding");
+    if (embedding_index < 0 || tokens[embedding_index].type != JSMN_ARRAY) {
+        dbmem_context_set_error(engine->context, "Missing embedding data in API response");
+        return -1;
+    }
+    if (tokens[embedding_index].size <= 0) {
+        dbmem_context_set_error(engine->context, "Invalid embedding array size in API response");
+        return -1;
+    }
+    emb_count = (size_t)tokens[embedding_index].size;
+    emb_start = embedding_index + 1;
+
+    int truncated_index = dbmem_json_object_find(engine->data, tokens, item_index, "truncated");
+    if (truncated_index >= 0) {
+        truncated = dbmem_json_parse_bool(engine->data, &tokens[truncated_index]);
+    }
+
+    int usage_index = dbmem_json_object_find(engine->data, tokens, 0, "usage");
+    if (usage_index >= 0 && tokens[usage_index].type == JSMN_OBJECT) {
+        int prompt_tokens_index = dbmem_json_object_find(engine->data, tokens, usage_index, "prompt_tokens");
+        if (prompt_tokens_index >= 0 && tokens[prompt_tokens_index].type == JSMN_PRIMITIVE) {
+            prompt_tokens = atoi(engine->data + tokens[prompt_tokens_index].start);
+        }
+
+        int exact_prompt_tokens_index = dbmem_json_object_find(engine->data, tokens, usage_index, "exact_prompt_tokens");
+        if (exact_prompt_tokens_index >= 0 && tokens[exact_prompt_tokens_index].type == JSMN_PRIMITIVE) {
+            exact_prompt_tokens = atoi(engine->data + tokens[exact_prompt_tokens_index].start);
+        }
+
+        int estimated_prompt_tokens_index = dbmem_json_object_find(engine->data, tokens, usage_index, "estimated_prompt_tokens");
+        if (estimated_prompt_tokens_index >= 0 && tokens[estimated_prompt_tokens_index].type == JSMN_PRIMITIVE) {
+            estimated_prompt_tokens = atoi(engine->data + tokens[estimated_prompt_tokens_index].start);
         }
     }
 

From 6fa8df467c610fcd90b3d7f0e6a89fe6ceaa6000 Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Wed, 29 Apr 2026 16:26:06 -0600
Subject: [PATCH 04/13] docs: update custom provider result metadata

Align the public C API reference with the current dbmem_embedding_result_t shape by documenting the truncated boolean flag instead of n_tokens_truncated.

This avoids publishing stale custom-provider docs after the shared embedding result metadata was simplified across local, remote, and custom engines.
---
 API.md | 7 ++++---
 1 file changed, 4 insertions(+), 3 deletions(-)

diff --git a/API.md b/API.md
index e77760f..bd9636c 100644
--- a/API.md
+++ b/API.md
@@ -564,8 +564,8 @@ typedef struct {
 **`dbmem_embedding_result_t` struct:**
 ```c
 typedef struct {
-    int    n_tokens;            // Number of tokens processed
-    int    n_tokens_truncated;  // Tokens that were truncated (0 if none)
+    int    n_tokens;            // Number of processed tokens (0 if unknown)
+    bool   truncated;           // True when the input was truncated before embedding
     int    n_embd;              // Embedding dimension
     float *embedding;           // Embedding vector (engine-owned, valid until next call or free)
 } dbmem_embedding_result_t;
@@ -574,6 +574,7 @@ typedef struct {
 **Notes:**
 - Works regardless of `DBMEM_OMIT_LOCAL_ENGINE` / `DBMEM_OMIT_REMOTE_ENGINE` compile flags
 - The `embedding` buffer in `dbmem_embedding_result_t` must remain valid until the next `compute` call or `free` — it is engine-owned, not copied by the caller
+- `n_tokens` is metadata about the processed input when the engine can provide it; `truncated` is a boolean flag, not a truncated-token count
 - Only one custom provider can be registered per connection at a time; registering again replaces the previous one
 - The provider struct is copied by value; the caller does not need to keep it alive after registration
 
@@ -596,7 +597,7 @@ static int my_compute(void *engine, const char *text, int text_len, void *xdata,
     // ... fill vec with your embedding ...
     result->n_embd = e->dimension;
     result->n_tokens = text_len / 4;
-    result->n_tokens_truncated = 0;
+    result->truncated = false;
     result->embedding = vec;
     return 0;
 }

From 4571a6172591e1297abd586387afdb1e148f0641 Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Thu, 30 Apr 2026 11:23:56 -0600
Subject: [PATCH 05/13] Run SQLite extension unit tests in CI

---
 .github/workflows/main.yml | 6 +++---
 test/unittest.c            | 2 +-
 2 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/.github/workflows/main.yml b/.github/workflows/main.yml
index a860452..5a0d025 100644
--- a/.github/workflows/main.yml
+++ b/.github/workflows/main.yml
@@ -364,7 +364,7 @@ jobs:
           echo "::endgroup::"
 
           echo "::group::build unittest binary for android"
-          make build/unittest ${{ matrix.make }} SQLITE_AMALGAM=${SQLITE_DIR}/sqlite3.c
+          make build/unittest ${{ matrix.make }} SQLITE_AMALGAM=${SQLITE_DIR}/sqlite3.c DEFINES="-DTEST_SQLITE_EXTENSION"
           echo "::endgroup::"
 
           echo "::group::build e2e binary for android"
@@ -406,12 +406,12 @@ jobs:
 
       - name: unix test sqlite-memory
         if: matrix.skip_test != true && matrix.os != 'windows-2022' && matrix.name != 'android'
-        run: ${{ matrix.name == 'linux-musl' && matrix.arch == 'arm64' && 'docker exec alpine' || '' }} make test ${{ matrix.make && matrix.make || ''}}
+        run: ${{ matrix.name == 'linux-musl' && matrix.arch == 'arm64' && 'docker exec alpine' || '' }} make test ${{ matrix.make && matrix.make || ''}} DEFINES="-DTEST_SQLITE_EXTENSION"
 
       - name: windows test sqlite-memory
         if: matrix.skip_test != true && matrix.name == 'windows'
         shell: msys2 {0}
-        run: make test ${{ matrix.make && matrix.make || ''}}
+        run: make test ${{ matrix.make && matrix.make || ''}} DEFINES="-DTEST_SQLITE_EXTENSION"
 
       - name: unix e2e sqlite-memory
         if: matrix.skip_test != true && matrix.variant != 'local' && matrix.os != 'windows-2022' && matrix.name != 'android'
diff --git a/test/unittest.c b/test/unittest.c
index cb0e041..154c04d 100644
--- a/test/unittest.c
+++ b/test/unittest.c
@@ -2359,7 +2359,7 @@ static int dummy_compute(void *engine, const char *text, int text_len, void *xda
     dummy_engine_t *e = (dummy_engine_t *)engine;
     e->compute_count++;
     result->n_tokens = text_len / 4;
-    result->n_tokens_truncated = 0;
+    result->truncated = false;
     result->n_embd = e->dimension;
     result->embedding = e->embedding;
     return 0;

From ca542009cfc45467841076aceb5bd017fcff8b1c Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Thu, 30 Apr 2026 12:09:18 -0600
Subject: [PATCH 06/13] Fix extension tests for local-only builds

---
 test/unittest.c | 36 ++++++++++++++++++++++++++++++++++++
 1 file changed, 36 insertions(+)

diff --git a/test/unittest.c b/test/unittest.c
index 154c04d..6987174 100644
--- a/test/unittest.c
+++ b/test/unittest.c
@@ -2610,6 +2610,7 @@ static void tracking_free(void *engine, void *xdata) {
     free(engine);
 }
 
+#ifndef DBMEM_OMIT_REMOTE_ENGINE
 TEST(sqlite_set_model_releases_previous_engine_on_class_switch) {
     sqlite3 *db = open_test_db();
     ASSERT(db != NULL);
@@ -2639,6 +2640,37 @@ TEST(sqlite_set_model_releases_previous_engine_on_class_switch) {
     sqlite3_close(db);
     ASSERT_EQ(state.free_count, 1);
 }
+#else
+TEST(sqlite_set_model_failed_remote_switch_keeps_custom_engine) {
+    sqlite3 *db = open_test_db();
+    ASSERT(db != NULL);
+
+    sqlite3_int64 result = 0;
+    int rc = exec_get_int(db, "SELECT memory_set_apikey('test-key');", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+
+    tracking_free_state_t state = {0};
+    dbmem_provider_t prov = { .init = tracking_init, .compute = tracking_compute, .free = tracking_free, .xdata = &state };
+    rc = sqlite3_memory_register_provider(db, "tracker", &prov);
+    ASSERT_EQ(rc, SQLITE_OK);
+
+    rc = exec_get_int(db, "SELECT memory_set_model('tracker', 'm1');", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(state.free_count, 0);
+
+    sqlite3_stmt *stmt = NULL;
+    rc = sqlite3_prepare_v2(db, "SELECT memory_set_model('openai', 'text-embedding-3-small');", -1, &stmt, NULL);
+    ASSERT_EQ(rc, SQLITE_OK);
+    rc = sqlite3_step(stmt);
+    ASSERT_EQ(rc, SQLITE_ERROR);
+    sqlite3_finalize(stmt);
+
+    ASSERT_EQ(state.free_count, 0);
+
+    sqlite3_close(db);
+    ASSERT_EQ(state.free_count, 1);
+}
+#endif
 
 #endif // TEST_SQLITE_EXTENSION
 
@@ -2779,7 +2811,11 @@ int main(int argc, char *argv[]) {
     RUN_TEST(sqlite_custom_provider_init_error);
     RUN_TEST(sqlite_custom_provider_apikey_passed);
     RUN_TEST(sqlite_set_model_failed_reindex_preserves_existing_rows);
+#ifndef DBMEM_OMIT_REMOTE_ENGINE
     RUN_TEST(sqlite_set_model_releases_previous_engine_on_class_switch);
+#else
+    RUN_TEST(sqlite_set_model_failed_remote_switch_keeps_custom_engine);
+#endif
 #endif
 
     printf("\n=== Results ===\n");

From b3c97f30980f8e48488c2fe71c65e5880d28e6d6 Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Thu, 30 Apr 2026 13:13:31 -0600
Subject: [PATCH 07/13] Use portable temp paths in sync tests

---
 test/unittest.c | 28 ++++++++++++++--------------
 1 file changed, 14 insertions(+), 14 deletions(-)

diff --git a/test/unittest.c b/test/unittest.c
index 6987174..f06d381 100644
--- a/test/unittest.c
+++ b/test/unittest.c
@@ -1844,12 +1844,12 @@ TEST(sqlite_sync_directory_removes_deleted) {
     sqlite3 *db = open_test_db();
     ASSERT(db != NULL);
 
-    const char *test_dir = "/tmp/dbmem_test_sync_del";
-    const char *file_keep = "/tmp/dbmem_test_sync_del/keep.md";
+    const char *test_dir = TEST_TMP_DIR "/dbmem_test_sync_del";
+    const char *file_keep = TEST_TMP_DIR "/dbmem_test_sync_del/keep.md";
 
     // Clean up
     remove(file_keep);
-    remove("/tmp/dbmem_test_sync_del/gone.md");
+    remove(TEST_TMP_DIR "/dbmem_test_sync_del/gone.md");
     rmdir_p(test_dir);
 
     // Create directory with one file
@@ -1867,7 +1867,7 @@ TEST(sqlite_sync_directory_removes_deleted) {
     int rc = insert_fake_content(db, keep_hash, file_keep, NULL, len);
     ASSERT_EQ(rc, SQLITE_OK);
 
-    rc = insert_fake_content(db, 99999, "/tmp/dbmem_test_sync_del/gone.md", NULL, 4);
+    rc = insert_fake_content(db, 99999, TEST_TMP_DIR "/dbmem_test_sync_del/gone.md", NULL, 4);
     ASSERT_EQ(rc, SQLITE_OK);
 
     // Verify 2 entries before sync
@@ -1878,7 +1878,7 @@ TEST(sqlite_sync_directory_removes_deleted) {
 
     // Sync — should remove the entry for gone.md, skip keep.md (hash match)
     sqlite3_int64 result;
-    rc = exec_get_int(db, "SELECT memory_add_directory('/tmp/dbmem_test_sync_del');", &result);
+    rc = exec_get_int(db, "SELECT memory_add_directory('" TEST_TMP_DIR "/dbmem_test_sync_del');", &result);
     ASSERT_EQ(rc, SQLITE_OK);
 
     // Only keep.md entry should remain
@@ -1901,17 +1901,17 @@ TEST(sqlite_sync_directory_removes_all_deleted) {
     sqlite3 *db = open_test_db();
     ASSERT(db != NULL);
 
-    const char *test_dir = "/tmp/dbmem_test_sync_allgone";
-    remove("/tmp/dbmem_test_sync_allgone/x.md");
+    const char *test_dir = TEST_TMP_DIR "/dbmem_test_sync_allgone";
+    remove(TEST_TMP_DIR "/dbmem_test_sync_allgone/x.md");
     rmdir_p(test_dir);
     mkdir_p(test_dir);  // empty directory
 
     // Insert fake entries pointing to files that don't exist
-    int rc = insert_fake_content(db, 1001, "/tmp/dbmem_test_sync_allgone/a.md", "ctx", 4);
+    int rc = insert_fake_content(db, 1001, TEST_TMP_DIR "/dbmem_test_sync_allgone/a.md", "ctx", 4);
     ASSERT_EQ(rc, SQLITE_OK);
-    rc = insert_fake_content(db, 1002, "/tmp/dbmem_test_sync_allgone/b.md", "ctx", 4);
+    rc = insert_fake_content(db, 1002, TEST_TMP_DIR "/dbmem_test_sync_allgone/b.md", "ctx", 4);
     ASSERT_EQ(rc, SQLITE_OK);
-    rc = insert_fake_content(db, 1003, "/tmp/dbmem_test_sync_allgone/c.md", "ctx", 4);
+    rc = insert_fake_content(db, 1003, TEST_TMP_DIR "/dbmem_test_sync_allgone/c.md", "ctx", 4);
     ASSERT_EQ(rc, SQLITE_OK);
 
     // Also insert vault entries to verify cascade delete
@@ -1930,7 +1930,7 @@ TEST(sqlite_sync_directory_removes_all_deleted) {
 
     // Sync — all files gone, all entries should be removed
     sqlite3_int64 result;
-    rc = exec_get_int(db, "SELECT memory_add_directory('/tmp/dbmem_test_sync_allgone');", &result);
+    rc = exec_get_int(db, "SELECT memory_add_directory('" TEST_TMP_DIR "/dbmem_test_sync_allgone');", &result);
     ASSERT_EQ(rc, SQLITE_OK);
 
     rc = exec_get_int(db, "SELECT COUNT(*) FROM dbmem_content;", &count);
@@ -1951,8 +1951,8 @@ TEST(sqlite_sync_directory_skips_unchanged) {
     sqlite3 *db = open_test_db();
     ASSERT(db != NULL);
 
-    const char *test_dir = "/tmp/dbmem_test_sync_skip";
-    const char *file = "/tmp/dbmem_test_sync_skip/note.md";
+    const char *test_dir = TEST_TMP_DIR "/dbmem_test_sync_skip";
+    const char *file = TEST_TMP_DIR "/dbmem_test_sync_skip/note.md";
     const char *content = "# My Note\nSome content.";
 
     remove(file);
@@ -1967,7 +1967,7 @@ TEST(sqlite_sync_directory_skips_unchanged) {
 
     // Sync — file exists with matching hash, should be skipped
     sqlite3_int64 result;
-    rc = exec_get_int(db, "SELECT memory_add_directory('/tmp/dbmem_test_sync_skip', 'notes');", &result);
+    rc = exec_get_int(db, "SELECT memory_add_directory('" TEST_TMP_DIR "/dbmem_test_sync_skip', 'notes');", &result);
     ASSERT_EQ(rc, SQLITE_OK);
 
     // Entry still exists unchanged (no duplication)

From 1b45e33c6a95be73290efd6f571808f2a5a680fa Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Thu, 30 Apr 2026 13:46:07 -0600
Subject: [PATCH 08/13] Hash sync test fixture from disk

---
 test/unittest.c | 12 +++++++++---
 1 file changed, 9 insertions(+), 3 deletions(-)

diff --git a/test/unittest.c b/test/unittest.c
index f06d381..1abfb65 100644
--- a/test/unittest.c
+++ b/test/unittest.c
@@ -1960,9 +1960,15 @@ TEST(sqlite_sync_directory_skips_unchanged) {
     mkdir_p(test_dir);
     create_test_file(file, content);
 
-    // Compute the hash and pre-insert the entry
-    uint64_t hash = dbmem_hash_compute(content, strlen(content));
-    int rc = insert_fake_content(db, hash, file, "notes", (sqlite3_int64)strlen(content));
+    // Compute the hash from disk so Windows text-mode newline translation
+    // cannot make the pre-inserted hash differ from memory_add_directory().
+    int64_t len = 0;
+    char *buf = dbmem_file_read(file, &len);
+    ASSERT(buf != NULL);
+    uint64_t hash = dbmem_hash_compute(buf, (size_t)len);
+    dbmemory_free(buf);
+
+    int rc = insert_fake_content(db, hash, file, "notes", len);
     ASSERT_EQ(rc, SQLITE_OK);
 
     // Sync — file exists with matching hash, should be skipped

From 1e519a2b57db2cf1ab18ee1174e8db1aaa54e476 Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Wed, 6 May 2026 11:47:28 -0600
Subject: [PATCH 09/13] feat: persist embedding metadata

- add schema versioning and migrate vault/cache metadata columns

- store n_tokens and truncated on vault rows and cache entries

- assert metadata persistence in unittest coverage

- document the remote parser refactor plan for later implementation
---
 remote-embedding-parser-refactor-plan.md | 170 +++++++++++++++++++++++
 src/sqlite-memory.c                      | 127 ++++++++++++++++-
 src/sqlite-memory.h                      |   2 +-
 test/unittest.c                          | 160 ++++++++++++++++++++-
 4 files changed, 449 insertions(+), 10 deletions(-)
 create mode 100644 remote-embedding-parser-refactor-plan.md

diff --git a/remote-embedding-parser-refactor-plan.md b/remote-embedding-parser-refactor-plan.md
new file mode 100644
index 0000000..fbd1abb
--- /dev/null
+++ b/remote-embedding-parser-refactor-plan.md
@@ -0,0 +1,170 @@
+# Remote Embedding Parser Refactor Plan
+
+This note captures the preferred implementation plan for later work. The goal is
+to make vectors.space response parsing directly testable without turning the
+parser into public API or giving production logic a test-only name.
+
+## Goal
+
+Refactor `src/dbmem-rembed.c` so the JSON response parsing currently embedded in
+`dbmem_remote_compute_embedding()` lives in a reusable internal function:
+
+```c
+int dbmem_remote_parse_embedding_response(...);
+```
+
+The function should be production logic, used by `dbmem_remote_compute_embedding()`
+and also callable from `test/unittest.c` via a manual forward declaration.
+
+Do not expose it in `sqlite-memory.h` or another public header.
+
+## Preferred Shape
+
+Use a normal internal production name, not a test-only name:
+
+```c
+int dbmem_remote_parse_embedding_response(
+    const char *json,
+    size_t json_len,
+    float **embedding,
+    size_t *embedding_capacity,
+    jsmntok_t **tokens,
+    int *tokens_capacity,
+    embedding_result_t *result,
+    char *err_msg,
+    size_t err_msg_len
+);
+```
+
+This keeps ownership explicit while avoiding exposure of `dbmem_remote_engine_t`
+or a new parser-state struct in test code.
+
+## Production Usage
+
+`dbmem_remote_compute_embedding()` keeps responsibility for:
+
+- request construction
+- HTTP transport
+- HTTP status handling
+- context error propagation
+- aggregate remote-engine stats
+
+After receiving a successful HTTP 200 response, it calls:
+
+```c
+char err_msg[DBMEM_ERRBUF_SIZE] = {0};
+int rc = dbmem_remote_parse_embedding_response(
+    engine->data,
+    engine->data_size,
+    &engine->embedding,
+    &engine->embedding_capacity,
+    &engine->tokens,
+    &engine->tokens_capacity,
+    result,
+    err_msg,
+    sizeof(err_msg)
+);
+
+if (rc != 0) {
+    dbmem_context_set_error(engine->context, err_msg);
+    return -1;
+}
+
+engine->total_tokens_processed += result->n_tokens;
+engine->total_embeddings_generated++;
+return 0;
+```
+
+## Parser Responsibility
+
+`dbmem_remote_parse_embedding_response()` should own:
+
+- parsing JSON with `jsmn`
+- allocating/growing the token buffer
+- locating top-level `output_dimension`
+- locating `data[0].embedding`
+- allocating/growing the embedding buffer
+- parsing embedding floats
+- reading `data[0].truncated`
+- reading token metadata from `usage`
+- filling `embedding_result_t`
+
+Token count priority should remain:
+
+1. `usage.exact_prompt_tokens`
+2. `usage.estimated_prompt_tokens`
+3. `usage.prompt_tokens`
+4. `0` if none are present
+
+## Unit Test Usage
+
+`test/unittest.c` can manually forward-declare the function under the relevant
+test guards:
+
+```c
+#if defined(TEST_SQLITE_EXTENSION) && !defined(DBMEM_OMIT_REMOTE_ENGINE)
+int dbmem_remote_parse_embedding_response(
+    const char *json,
+    size_t json_len,
+    float **embedding,
+    size_t *embedding_capacity,
+    jsmntok_t **tokens,
+    int *tokens_capacity,
+    embedding_result_t *result,
+    char *err_msg,
+    size_t err_msg_len
+);
+#endif
+```
+
+Tests create local buffers:
+
+```c
+float *embedding = NULL;
+size_t embedding_capacity = 0;
+jsmntok_t *tokens = NULL;
+int tokens_capacity = 0;
+embedding_result_t result = {0};
+char err_msg[1024] = {0};
+```
+
+Then call the parser with static JSON fixtures and free the buffers afterward:
+
+```c
+dbmemory_free(embedding);
+dbmemory_free(tokens);
+```
+
+## Fixture Tests To Add Later
+
+Recommended deterministic cases:
+
+- exact token count is preferred over estimated and prompt token counts
+- estimated token count is used when exact token count is absent
+- prompt token count is used when exact and estimated token counts are absent
+- missing usage object leaves `result.n_tokens == 0`
+- `data[0].truncated: false` maps to `result.truncated == false`
+- `data[0].truncated: true` maps to `result.truncated == true`
+- embedding float array is parsed correctly
+- output dimension is parsed correctly
+- missing `data`
+- missing `embedding`
+- empty embedding array
+- invalid top-level response shape
+
+Also decide whether the parser should reject mismatches between
+`output_dimension` and the embedding array length. Failing fast is likely safer,
+because a dimension mismatch can break later vector initialization/search.
+
+## Why This Plan
+
+This approach avoids:
+
+- live network dependence for parser correctness tests
+- exposing parser internals as public API
+- duplicating parser behavior in test-only code
+- coupling tests to `dbmem_remote_engine_t`
+- adding a new internal header before it is needed
+
+The e2e test discussion can proceed separately, especially around whether token
+metadata should become persisted product state or remain parser-only metadata.
diff --git a/src/sqlite-memory.c b/src/sqlite-memory.c
index f03dd22..5663b97 100644
--- a/src/sqlite-memory.c
+++ b/src/sqlite-memory.c
@@ -60,6 +60,9 @@ SQLITE_EXTENSION_INIT1
 #define DBMEM_SETTINGS_KEY_EMBEDDING_CACHE      "embedding_cache"
 #define DBMEM_SETTINGS_KEY_CACHE_MAX_ENTRIES    "cache_max_entries"
 #define DBMEM_SETTINGS_KEY_SEARCH_OVERSAMPLE    "search_oversample"
+#define DBMEM_SETTINGS_KEY_SCHEMA_VERSION       "schema_version"
+
+#define DBMEM_SCHEMA_VERSION                    2
 
 // default values from https://docs.openclaw.ai/concepts/memory
 #define DEFAULT_CHARS_PER_TOKEN                 4       // Approximate number of characters per token (GPT ≈ 4, Claude ≈ 3.5)
@@ -358,6 +361,105 @@ void dbmem_settings_load (sqlite3 *db, dbmem_context *ctx) {
 
 // MARK: - Database -
 
+static bool dbmem_database_column_exists (sqlite3 *db, const char *table, const char *column, int *out_rc) {
+    char sql[256];
+    snprintf(sql, sizeof(sql), "PRAGMA table_info(%s);", table);
+
+    sqlite3_stmt *vm = NULL;
+    int rc = sqlite3_prepare_v2(db, sql, -1, &vm, NULL);
+    if (rc != SQLITE_OK) {
+        if (out_rc) *out_rc = rc;
+        return false;
+    }
+
+    bool exists = false;
+    while ((rc = sqlite3_step(vm)) == SQLITE_ROW) {
+        const char *name = (const char *)sqlite3_column_text(vm, 1);
+        if (name && strcmp(name, column) == 0) {
+            exists = true;
+            break;
+        }
+    }
+
+    if (rc == SQLITE_DONE || rc == SQLITE_ROW) rc = SQLITE_OK;
+    sqlite3_finalize(vm);
+    if (out_rc) *out_rc = rc;
+    return exists;
+}
+
+static int dbmem_database_add_column_if_missing (sqlite3 *db, const char *table, const char *column, const char *alter_sql) {
+    int rc = SQLITE_OK;
+    if (dbmem_database_column_exists(db, table, column, &rc)) return SQLITE_OK;
+    if (rc != SQLITE_OK) return rc;
+    return sqlite3_exec(db, alter_sql, NULL, NULL, NULL);
+}
+
+static int dbmem_database_schema_version (sqlite3 *db, int *version) {
+    static const char *sql = "SELECT value FROM dbmem_settings WHERE key=?1 LIMIT 1;";
+
+    *version = 0;
+
+    sqlite3_stmt *vm = NULL;
+    int rc = sqlite3_prepare_v2(db, sql, -1, &vm, NULL);
+    if (rc != SQLITE_OK) goto cleanup;
+
+    rc = sqlite3_bind_text(vm, 1, DBMEM_SETTINGS_KEY_SCHEMA_VERSION, -1, SQLITE_STATIC);
+    if (rc != SQLITE_OK) goto cleanup;
+
+    rc = sqlite3_step(vm);
+    if (rc == SQLITE_ROW) {
+        *version = sqlite3_column_int(vm, 0);
+        rc = SQLITE_OK;
+    } else if (rc == SQLITE_DONE) {
+        rc = SQLITE_OK;
+    }
+
+cleanup:
+    if (vm) sqlite3_finalize(vm);
+    return rc;
+}
+
+static int dbmem_database_set_schema_version (sqlite3 *db, int version) {
+    return dbmem_settings_write_int(db, DBMEM_SETTINGS_KEY_SCHEMA_VERSION, version);
+}
+
+static int dbmem_database_migrate_v1_to_v2 (sqlite3 *db) {
+    int rc = dbmem_database_add_column_if_missing(db, "dbmem_vault", "n_tokens",
+        "ALTER TABLE dbmem_vault ADD COLUMN n_tokens INTEGER NOT NULL DEFAULT 0;");
+    if (rc != SQLITE_OK) return rc;
+
+    rc = dbmem_database_add_column_if_missing(db, "dbmem_vault", "truncated",
+        "ALTER TABLE dbmem_vault ADD COLUMN truncated INTEGER NOT NULL DEFAULT 0;");
+    if (rc != SQLITE_OK) return rc;
+
+    rc = dbmem_database_add_column_if_missing(db, "dbmem_cache", "n_tokens",
+        "ALTER TABLE dbmem_cache ADD COLUMN n_tokens INTEGER NOT NULL DEFAULT 0;");
+    if (rc != SQLITE_OK) return rc;
+
+    return dbmem_database_add_column_if_missing(db, "dbmem_cache", "truncated",
+        "ALTER TABLE dbmem_cache ADD COLUMN truncated INTEGER NOT NULL DEFAULT 0;");
+}
+
+static int dbmem_database_migrate (sqlite3 *db) {
+    int version = 0;
+    int rc = dbmem_database_schema_version(db, &version);
+    if (rc != SQLITE_OK) return rc;
+
+    if (version > DBMEM_SCHEMA_VERSION) return SQLITE_MISMATCH;
+    if (version <= 0) version = 1;
+
+    if (version < 2) {
+        rc = dbmem_database_migrate_v1_to_v2(db);
+        if (rc != SQLITE_OK) return rc;
+        version = 2;
+        rc = dbmem_database_set_schema_version(db, version);
+        if (rc != SQLITE_OK) return rc;
+    }
+
+    if (version != DBMEM_SCHEMA_VERSION) return SQLITE_MISMATCH;
+    return SQLITE_OK;
+}
+
 static int dbmem_database_init (sqlite3 *db) {
     const char *sql = "CREATE TABLE IF NOT EXISTS dbmem_settings (key TEXT PRIMARY KEY, value TEXT);";
     int rc = sqlite3_exec(db, sql, NULL, NULL, NULL);
@@ -367,14 +469,17 @@ static int dbmem_database_init (sqlite3 *db) {
     rc = sqlite3_exec(db, sql, NULL, NULL, NULL);
     if (rc != SQLITE_OK) return rc;
     
-    sql = "CREATE TABLE IF NOT EXISTS dbmem_vault (hash TEXT NOT NULL, seq INTEGER NOT NULL, embedding BLOB NOT NULL, offset INTEGER NOT NULL, length INTEGER NOT NULL, PRIMARY KEY (hash, seq));";
+    sql = "CREATE TABLE IF NOT EXISTS dbmem_vault (hash TEXT NOT NULL, seq INTEGER NOT NULL, embedding BLOB NOT NULL, offset INTEGER NOT NULL, length INTEGER NOT NULL, n_tokens INTEGER NOT NULL DEFAULT 0, truncated INTEGER NOT NULL DEFAULT 0, PRIMARY KEY (hash, seq));";
     rc = sqlite3_exec(db, sql, NULL, NULL, NULL);
     if (rc != SQLITE_OK) return rc;
     
-    sql = "CREATE TABLE IF NOT EXISTS dbmem_cache (text_hash TEXT NOT NULL, provider TEXT NOT NULL, model TEXT NOT NULL, embedding BLOB NOT NULL, dimension INTEGER NOT NULL, PRIMARY KEY (text_hash, provider, model));";
+    sql = "CREATE TABLE IF NOT EXISTS dbmem_cache (text_hash TEXT NOT NULL, provider TEXT NOT NULL, model TEXT NOT NULL, embedding BLOB NOT NULL, dimension INTEGER NOT NULL, n_tokens INTEGER NOT NULL DEFAULT 0, truncated INTEGER NOT NULL DEFAULT 0, PRIMARY KEY (text_hash, provider, model));";
     rc = sqlite3_exec(db, sql, NULL, NULL, NULL);
     if (rc != SQLITE_OK) return rc;
 
+    rc = dbmem_database_migrate(db);
+    if (rc != SQLITE_OK) return rc;
+
     sql = "CREATE VIRTUAL TABLE IF NOT EXISTS dbmem_vault_fts USING fts5 (content, hash UNINDEXED, seq UNINDEXED, context UNINDEXED);";
     rc = sqlite3_exec(db, sql, NULL, NULL, NULL);
     if (rc != SQLITE_OK) {
@@ -495,7 +600,7 @@ static int dbmem_database_add_entry (dbmem_context *ctx, sqlite3 *db, uint64_t h
 }
 
 static int dbmem_database_add_chunk (dbmem_context *ctx, embedding_result_t *result, size_t offset, size_t length, size_t index) {
-    static const char *sql = "INSERT INTO dbmem_vault (hash, seq, embedding, offset, length) VALUES (?1, ?2, ?3, ?4, ?5);";
+    static const char *sql = "INSERT INTO dbmem_vault (hash, seq, embedding, offset, length, n_tokens, truncated) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7);";
     
     sqlite3_stmt *vm = NULL;
     int rc = sqlite3_prepare_v2(ctx->db, sql, -1, &vm, NULL);
@@ -515,6 +620,12 @@ static int dbmem_database_add_chunk (dbmem_context *ctx, embedding_result_t *res
     
     rc = sqlite3_bind_int64(vm, 5, (sqlite3_int64)length);
     if (rc != SQLITE_OK) goto cleanup;
+
+    rc = sqlite3_bind_int(vm, 6, result->n_tokens);
+    if (rc != SQLITE_OK) goto cleanup;
+
+    rc = sqlite3_bind_int(vm, 7, result->truncated ? 1 : 0);
+    if (rc != SQLITE_OK) goto cleanup;
     
     rc = sqlite3_step(vm);
     if (rc == SQLITE_DONE) rc = SQLITE_OK;
@@ -1267,7 +1378,7 @@ static void dbmem_dump_embeding (const embedding_result_t *result) {
 // MARK: - Embedding Cache -
 
 static bool dbmem_cache_lookup (dbmem_context *ctx, uint64_t text_hash, embedding_result_t *result) {
-    static const char *sql = "SELECT embedding, dimension FROM dbmem_cache WHERE text_hash=?1 AND provider=?2 AND model=?3 LIMIT 1;";
+    static const char *sql = "SELECT embedding, dimension, n_tokens, truncated FROM dbmem_cache WHERE text_hash=?1 AND provider=?2 AND model=?3 LIMIT 1;";
 
     if (!ctx->provider || !ctx->model) return false;
 
@@ -1300,8 +1411,8 @@ static bool dbmem_cache_lookup (dbmem_context *ctx, uint64_t text_hash, embeddin
     memcpy(ctx->cache_buffer, blob, blob_bytes);
     result->embedding = ctx->cache_buffer;
     result->n_embd = dimension;
-    result->n_tokens = 0;
-    result->truncated = false;
+    result->n_tokens = sqlite3_column_int(vm, 2);
+    result->truncated = sqlite3_column_int(vm, 3) != 0;
     found = true;
 
 cleanup:
@@ -1337,7 +1448,7 @@ static void dbmem_cache_evict (dbmem_context *ctx) {
 }
 
 static void dbmem_cache_store (dbmem_context *ctx, uint64_t text_hash, const embedding_result_t *result) {
-    static const char *sql = "INSERT OR REPLACE INTO dbmem_cache (text_hash, provider, model, embedding, dimension) VALUES (?1, ?2, ?3, ?4, ?5);";
+    static const char *sql = "INSERT OR REPLACE INTO dbmem_cache (text_hash, provider, model, embedding, dimension, n_tokens, truncated) VALUES (?1, ?2, ?3, ?4, ?5, ?6, ?7);";
 
     if (!ctx->provider || !ctx->model) return;
 
@@ -1350,6 +1461,8 @@ static void dbmem_cache_store (dbmem_context *ctx, uint64_t text_hash, const emb
     sqlite3_bind_text(vm, 3, ctx->model, -1, SQLITE_STATIC);
     sqlite3_bind_blob(vm, 4, result->embedding, result->n_embd * (int)sizeof(float), SQLITE_STATIC);
     sqlite3_bind_int(vm, 5, result->n_embd);
+    sqlite3_bind_int(vm, 6, result->n_tokens);
+    sqlite3_bind_int(vm, 7, result->truncated ? 1 : 0);
 
     sqlite3_step(vm);
 
diff --git a/src/sqlite-memory.h b/src/sqlite-memory.h
index 07628af..77e17e9 100644
--- a/src/sqlite-memory.h
+++ b/src/sqlite-memory.h
@@ -26,7 +26,7 @@
 extern "C" {
 #endif
 
-#define SQLITE_DBMEMORY_VERSION "1.1.0"
+#define SQLITE_DBMEMORY_VERSION "1.2.0"
 
 // public API
 SQLITE_DBMEMORY_API int sqlite3_memory_init (sqlite3 *db, char **pzErrMsg, const sqlite3_api_routines *pApi);
diff --git a/test/unittest.c b/test/unittest.c
index ac746cb..87a5b2d 100644
--- a/test/unittest.c
+++ b/test/unittest.c
@@ -1568,7 +1568,7 @@ TEST(sqlite_schema_has_timestamps) {
     ASSERT(db != NULL);
 
     // Check that schema includes created_at column
-    char sql[256];
+    char sql[512];
     int rc = exec_get_text(db,
         "SELECT sql FROM sqlite_master WHERE name='dbmem_content';",
         sql, sizeof(sql));
@@ -1582,12 +1582,75 @@ TEST(sqlite_schema_has_timestamps) {
         sql, sizeof(sql));
     ASSERT_EQ(rc, SQLITE_OK);
     ASSERT(strstr(sql, "hash TEXT NOT NULL") != NULL);
+    ASSERT(strstr(sql, "n_tokens") != NULL);
+    ASSERT(strstr(sql, "truncated") != NULL);
 
     rc = exec_get_text(db,
         "SELECT sql FROM sqlite_master WHERE name='dbmem_cache';",
         sql, sizeof(sql));
     ASSERT_EQ(rc, SQLITE_OK);
     ASSERT(strstr(sql, "text_hash TEXT NOT NULL") != NULL);
+    ASSERT(strstr(sql, "n_tokens") != NULL);
+    ASSERT(strstr(sql, "truncated") != NULL);
+
+    sqlite3_int64 schema_version = 0;
+    rc = exec_get_int(db, "SELECT value FROM dbmem_settings WHERE key = 'schema_version';", &schema_version);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(schema_version, 2);
+
+    sqlite3_close(db);
+}
+
+TEST(sqlite_schema_migrates_embedding_metadata) {
+    sqlite3 *db = NULL;
+    int rc = sqlite3_open(":memory:", &db);
+    ASSERT_EQ(rc, SQLITE_OK);
+
+    rc = sqlite3_exec(db,
+        "CREATE TABLE dbmem_settings (key TEXT PRIMARY KEY, value TEXT);"
+        "INSERT INTO dbmem_settings (key, value) VALUES ('schema_version', '1');"
+        "CREATE TABLE dbmem_vault (hash TEXT NOT NULL, seq INTEGER NOT NULL, embedding BLOB NOT NULL, offset INTEGER NOT NULL, length INTEGER NOT NULL, PRIMARY KEY (hash, seq));"
+        "CREATE TABLE dbmem_cache (text_hash TEXT NOT NULL, provider TEXT NOT NULL, model TEXT NOT NULL, embedding BLOB NOT NULL, dimension INTEGER NOT NULL, PRIMARY KEY (text_hash, provider, model));",
+        NULL, NULL, NULL);
+    ASSERT_EQ(rc, SQLITE_OK);
+
+    rc = sqlite3_memory_init(db, NULL, NULL);
+    ASSERT_EQ(rc, SQLITE_OK);
+
+    sqlite3_int64 count = 0;
+    rc = exec_get_int(db, "SELECT COUNT(*) FROM pragma_table_info('dbmem_vault') WHERE name = 'n_tokens';", &count);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(count, 1);
+
+    rc = exec_get_int(db, "SELECT COUNT(*) FROM pragma_table_info('dbmem_vault') WHERE name = 'truncated';", &count);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(count, 1);
+
+    rc = exec_get_int(db, "SELECT COUNT(*) FROM pragma_table_info('dbmem_cache') WHERE name = 'n_tokens';", &count);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(count, 1);
+
+    rc = exec_get_int(db, "SELECT COUNT(*) FROM pragma_table_info('dbmem_cache') WHERE name = 'truncated';", &count);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(count, 1);
+
+    rc = sqlite3_exec(db,
+        "INSERT INTO dbmem_vault (hash, seq, embedding, offset, length) VALUES (printf('%016x', 900), 0, X'00000000', 0, 4);"
+        "INSERT INTO dbmem_cache (text_hash, provider, model, embedding, dimension) VALUES (printf('%016x', 901), 'dummy', 'model', X'00000000', 1);",
+        NULL, NULL, NULL);
+    ASSERT_EQ(rc, SQLITE_OK);
+
+    rc = exec_get_int(db, "SELECT n_tokens FROM dbmem_vault WHERE hash = printf('%016x', 900);", &count);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(count, 0);
+
+    rc = exec_get_int(db, "SELECT truncated FROM dbmem_cache WHERE text_hash = printf('%016x', 901);", &count);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(count, 0);
+
+    rc = exec_get_int(db, "SELECT value FROM dbmem_settings WHERE key = 'schema_version';", &count);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(count, 2);
 
     sqlite3_close(db);
 }
@@ -2158,7 +2221,7 @@ TEST(sqlite_cache_table_exists) {
     ASSERT(db != NULL);
 
     // Check that dbmem_cache table exists
-    char sql[256];
+    char sql[512];
     int rc = exec_get_text(db,
         "SELECT sql FROM sqlite_master WHERE name='dbmem_cache';",
         sql, sizeof(sql));
@@ -2169,6 +2232,8 @@ TEST(sqlite_cache_table_exists) {
     ASSERT(strstr(sql, "model") != NULL);
     ASSERT(strstr(sql, "embedding") != NULL);
     ASSERT(strstr(sql, "dimension") != NULL);
+    ASSERT(strstr(sql, "n_tokens") != NULL);
+    ASSERT(strstr(sql, "truncated") != NULL);
 
     sqlite3_close(db);
 }
@@ -2500,6 +2565,14 @@ static int dummy_compute(void *engine, const char *text, int text_len, void *xda
     return 0;
 }
 
+static int truncated_dummy_compute(void *engine, const char *text, int text_len, void *xdata, dbmem_embedding_result_t *result) {
+    int rc = dummy_compute(engine, text, text_len, xdata, result);
+    if (rc != 0) return rc;
+    result->n_tokens = 3;
+    result->truncated = true;
+    return 0;
+}
+
 static void dummy_free(void *engine, void *xdata) {
     UNUSED_PARAM(xdata);
     free(engine);
@@ -2646,6 +2719,87 @@ TEST(sqlite_custom_provider_add_text) {
     ASSERT_EQ(rc, SQLITE_OK);
     ASSERT(result >= 1);
 
+    rc = exec_get_int(db, "SELECT n_tokens FROM dbmem_vault LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 7);
+
+    rc = exec_get_int(db, "SELECT truncated FROM dbmem_vault LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 0);
+
+    rc = exec_get_int(db, "SELECT n_tokens FROM dbmem_cache LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 7);
+
+    rc = exec_get_int(db, "SELECT truncated FROM dbmem_cache LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 0);
+
+    rc = exec_get_int(db, "SELECT memory_clear();", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+
+    rc = exec_get_int(db, "SELECT memory_add_text('Hello world, this is a test.');", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT(result >= 1);
+
+    rc = exec_get_int(db, "SELECT n_tokens FROM dbmem_vault LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 7);
+
+    rc = exec_get_int(db, "SELECT truncated FROM dbmem_vault LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 0);
+
+    sqlite3_close(db);
+}
+
+TEST(sqlite_custom_provider_persists_truncated_metadata) {
+    sqlite3 *db = open_test_db();
+    ASSERT(db != NULL);
+
+    dbmem_provider_t prov = { .init = dummy_init, .compute = truncated_dummy_compute, .free = dummy_free };
+    int rc = sqlite3_memory_register_provider(db, "truncdummy", &prov);
+    ASSERT_EQ(rc, SQLITE_OK);
+
+    sqlite3_int64 result = 0;
+    rc = exec_get_int(db, "SELECT memory_set_model('truncdummy', 'test-model');", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+
+    rc = exec_get_int(db, "SELECT memory_add_text('This custom provider reports truncation.');", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT(result >= 1);
+
+    rc = exec_get_int(db, "SELECT n_tokens FROM dbmem_vault LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 3);
+
+    rc = exec_get_int(db, "SELECT truncated FROM dbmem_vault LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 1);
+
+    rc = exec_get_int(db, "SELECT n_tokens FROM dbmem_cache LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 3);
+
+    rc = exec_get_int(db, "SELECT truncated FROM dbmem_cache LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 1);
+
+    rc = exec_get_int(db, "SELECT memory_clear();", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+
+    rc = exec_get_int(db, "SELECT memory_add_text('This custom provider reports truncation.');", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT(result >= 1);
+
+    rc = exec_get_int(db, "SELECT n_tokens FROM dbmem_vault LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 3);
+
+    rc = exec_get_int(db, "SELECT truncated FROM dbmem_vault LIMIT 1;", &result);
+    ASSERT_EQ(rc, SQLITE_OK);
+    ASSERT_EQ(result, 1);
+
     sqlite3_close(db);
 }
 
@@ -2969,6 +3123,7 @@ int main(int argc, char *argv[]) {
     RUN_TEST(sqlite_memory_delete_nonexistent);
     RUN_TEST(sqlite_memory_delete_context_nonexistent);
     RUN_TEST(sqlite_schema_has_timestamps);
+    RUN_TEST(sqlite_schema_migrates_embedding_metadata);
     RUN_TEST(sqlite_direct_insert_with_timestamp);
     RUN_TEST(sqlite_memory_delete_direct);
     RUN_TEST(sqlite_memory_delete_context_direct);
@@ -3009,6 +3164,7 @@ int main(int argc, char *argv[]) {
     RUN_TEST(sqlite_custom_provider_register);
     RUN_TEST(sqlite_custom_provider_set_model);
     RUN_TEST(sqlite_custom_provider_add_text);
+    RUN_TEST(sqlite_custom_provider_persists_truncated_metadata);
     RUN_TEST(sqlite_mdx_preprocessing_applies_only_to_mdx_files);
     RUN_TEST(sqlite_custom_provider_null_callbacks);
     RUN_TEST(sqlite_custom_provider_init_error);

From d9e86ff74988a7390c50b02a12317cf29eea38ce Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Wed, 6 May 2026 12:13:29 -0600
Subject: [PATCH 10/13] test(e2e): add long-text and provider-limit tests;
 isolate curl configure

Add four e2e tests covering long-text behaviour and provider boundaries:
- memory_add_long_text_chunking + memory_search_long_text_sections:
  structural and retrieval coverage for multi-chunk inputs.
- memory_search_under_token_limit (~5KB, single chunk; passes) and
  memory_search_truncation_signature / _near_model_context (~10KB /
  ~19.5KB, rejected by provider): document vectors.space's 1024-token
  batch ceiling.

Also: RUN_TEST no longer prints PASSED after a test fails, and Makefile
strips LDFLAGS/CPPFLAGS/CFLAGS/LIBS from curl's ./configure so rebuilds
work in shells with Homebrew-style env exports.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 Makefile   |   2 +-
 test/e2e.c | 759 ++++++++++++++++++++++++++++++++++++++++++++++++++++-
 2 files changed, 759 insertions(+), 2 deletions(-)

diff --git a/Makefile b/Makefile
index c3fb299..1e5ad22 100644
--- a/Makefile
+++ b/Makefile
@@ -561,7 +561,7 @@ ifeq ($(PLATFORM),windows)
 else
 	unzip -o $(CURL_ZIP) -d $(CURL_DIR)/src/.
 endif
-	cd $(CURL_SRC) && ./configure \
+	cd $(CURL_SRC) && env -u LDFLAGS -u CPPFLAGS -u CFLAGS -u LIBS ./configure \
 	--without-libpsl \
 	--disable-alt-svc \
 	--disable-ares \
diff --git a/test/e2e.c b/test/e2e.c
index 41d97ed..d745b8c 100644
--- a/test/e2e.c
+++ b/test/e2e.c
@@ -58,12 +58,13 @@ static int tests_failed = 0;
 
 #define TEST(name) static void test_##name(void)
 #define RUN_TEST(name) do { \
+    int _failed_before = tests_failed; \
     printf("  Running %s... ", #name); \
     fflush(stdout); \
     test_##name(); \
     tests_run++; \
     tests_passed++; \
-    printf("PASSED\n"); \
+    if (tests_failed == _failed_before) printf("PASSED\n"); \
 } while(0)
 
 #define ASSERT(cond) do { \
@@ -120,6 +121,33 @@ static void create_test_file(const char *path, const char *content) {
     }
 }
 
+static int get_vault_metadata(const char *hash, int *chunk_count, int *min_tokens, int *min_truncated, int *max_truncated) {
+    sqlite3_stmt *stmt = NULL;
+    int rc = sqlite3_prepare_v2(db,
+        "SELECT COUNT(*), COALESCE(MIN(n_tokens), 0), COALESCE(MIN(truncated), 0), COALESCE(MAX(truncated), 0) "
+        "FROM dbmem_vault WHERE hash = ?1;",
+        -1, &stmt, NULL);
+    if (rc != SQLITE_OK) return rc;
+
+    rc = sqlite3_bind_text(stmt, 1, hash, -1, SQLITE_STATIC);
+    if (rc != SQLITE_OK) {
+        sqlite3_finalize(stmt);
+        return rc;
+    }
+
+    rc = sqlite3_step(stmt);
+    if (rc == SQLITE_ROW) {
+        if (chunk_count) *chunk_count = sqlite3_column_int(stmt, 0);
+        if (min_tokens) *min_tokens = sqlite3_column_int(stmt, 1);
+        if (min_truncated) *min_truncated = sqlite3_column_int(stmt, 2);
+        if (max_truncated) *max_truncated = sqlite3_column_int(stmt, 3);
+        rc = SQLITE_OK;
+    }
+
+    sqlite3_finalize(stmt);
+    return rc;
+}
+
 // ============================================================================
 // Phase 1: Setup
 // ============================================================================
@@ -242,6 +270,24 @@ TEST(verify_embedding) {
     sqlite3_finalize(stmt);
 }
 
+// Verify remote embedding metadata is persisted on the stored chunk.
+TEST(verify_embedding_metadata) {
+    sqlite3_stmt *stmt = NULL;
+    int rc = sqlite3_prepare_v2(db,
+        "SELECT n_tokens, truncated FROM dbmem_vault LIMIT 1;",
+        -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+    ASSERT(sqlite3_step(stmt) == SQLITE_ROW);
+
+    int n_tokens = sqlite3_column_int(stmt, 0);
+    int truncated = sqlite3_column_int(stmt, 1);
+    sqlite3_finalize(stmt);
+
+    ASSERT(n_tokens > 0);
+    ASSERT(truncated == 0);
+    printf("(n_tokens=%d, truncated=%d) ", n_tokens, truncated);
+}
+
 // memory_add_text with context (triggers remote embedding)
 TEST(memory_add_text_context) {
     ASSERT_SQL_OK(db, "SELECT memory_add_text('SQLite is a C-language library that implements a small, fast, self-contained SQL database engine.', 'test-context');");
@@ -424,6 +470,703 @@ TEST(memory_search_statement_reuse) {
     sqlite3_finalize(stmt);
 }
 
+// ============================================================================
+// Phase 4b: Long-text chunking + multi-section retrieval
+// ============================================================================
+
+// A long text with 4 clearly distinct sections, each tagged with a unique
+// anchor token so we can verify both (a) the chunker covers the whole text
+// and (b) section-specific queries retrieve the matching chunk.
+#define LONG_TEXT_ANCHOR_COOKING  "ZANZIBAR-PASTA"
+#define LONG_TEXT_ANCHOR_KERNEL   "QUOKKA-SCHEDULER"
+#define LONG_TEXT_ANCHOR_VIOLIN   "TARANTELLA-BRIDGE"
+#define LONG_TEXT_ANCHOR_ASTRO    "BETELGEUSE-PARALLAX"
+
+static const char *LONG_TEXT =
+    // Section 1 - cooking
+    "Cooking pasta well begins with abundant salted water at a rolling boil. "
+    "The " LONG_TEXT_ANCHOR_COOKING " technique calls for finishing the noodles "
+    "directly in the sauce, ladling in starchy cooking water until the emulsion "
+    "clings to each strand. Timing matters more than the package suggests: pull "
+    "the pasta a minute early and let the residual heat do the rest. "
+    "Salt aggressively. Stir often. Reserve water before draining. Toss vigorously. "
+    "Salt aggressively. Stir often. Reserve water before draining. Toss vigorously. "
+    "\n\n"
+    // Section 2 - kernel scheduling
+    "Operating system schedulers balance throughput against latency under load. "
+    "The " LONG_TEXT_ANCHOR_KERNEL " design favors short interactive tasks by "
+    "boosting their effective priority for a brief window after a wakeup event, "
+    "then decaying that boost as CPU time accumulates. This avoids starving "
+    "background batch work while keeping UI threads responsive. "
+    "Run queues, vruntime, and load balancing across cores all interact here. "
+    "Run queues, vruntime, and load balancing across cores all interact here. "
+    "\n\n"
+    // Section 3 - violin
+    "A violin's tone depends as much on setup as on the maker. The "
+    LONG_TEXT_ANCHOR_VIOLIN " is shaped from well-aged maple and positioned to "
+    "transmit string vibration to the top plate without damping the upper "
+    "partials. Soundpost placement, tailgut tension, and bow rosin all subtly "
+    "shift the instrument's voice. "
+    "Maple, spruce, varnish, and time. Maple, spruce, varnish, and time. "
+    "\n\n"
+    // Section 4 - astronomy
+    "Measuring stellar distances requires careful baseline geometry. The "
+    LONG_TEXT_ANCHOR_ASTRO " measurement is challenging because the star is a "
+    "pulsating red supergiant whose photosphere is not well defined. Modern "
+    "interferometry combined with Gaia astrometry has narrowed the uncertainty "
+    "but not eliminated it. "
+    "Parallax, redshift, standard candles, distance ladder. "
+    "Parallax, redshift, standard candles, distance ladder. ";
+
+// Structural: long text produces multiple chunks that fully cover the input,
+// every chunk has a valid embedding, and chunk offsets are well-formed.
+TEST(memory_add_long_text_chunking) {
+    // Force raw-text chunking so the chunk count is determined by
+    // max_tokens/overlay_tokens, not by markdown structure.
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('skip_semantic', 1);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('max_tokens', 80);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('overlay_tokens', 16);");
+
+    sqlite3_stmt *stmt = NULL;
+    int rc = sqlite3_prepare_v2(db, "SELECT memory_add_text(?1, 'long-text');", -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+    rc = sqlite3_bind_text(stmt, 1, LONG_TEXT, -1, SQLITE_STATIC);
+    ASSERT(rc == SQLITE_OK);
+    ASSERT(sqlite3_step(stmt) == SQLITE_ROW);
+    sqlite3_finalize(stmt);
+
+    char hash[DBMEM_HASH_STR_MAXLEN] = {0};
+    rc = sqlite3_prepare_v2(db,
+        "SELECT hash FROM dbmem_content WHERE context = 'long-text' LIMIT 1;",
+        -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK && sqlite3_step(stmt) == SQLITE_ROW);
+    snprintf(hash, sizeof(hash), "%s", (const char *)sqlite3_column_text(stmt, 0));
+    sqlite3_finalize(stmt);
+    ASSERT(strlen(hash) == DBMEM_HASH_HEX_LEN);
+
+    char sql[256];
+    snprintf(sql, sizeof(sql),
+        "SELECT COUNT(*) FROM dbmem_vault WHERE hash = '%s';", hash);
+    result_int = 0;
+    sqlite3_exec(db, sql, capture_int, NULL, NULL);
+    int chunk_count = result_int;
+    ASSERT(chunk_count >= 3);
+
+    snprintf(sql, sizeof(sql),
+        "SELECT seq, offset, length, embedding, n_tokens, truncated FROM dbmem_vault "
+        "WHERE hash = '%s' ORDER BY seq;", hash);
+    rc = sqlite3_prepare_v2(db, sql, -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+
+    int prev_seq = -1;
+    int prev_offset = -1;
+    int last_offset = 0, last_length = 0;
+    int seen = 0;
+    while (sqlite3_step(stmt) == SQLITE_ROW) {
+        int seq    = sqlite3_column_int(stmt, 0);
+        int offset = sqlite3_column_int(stmt, 1);
+        int length = sqlite3_column_int(stmt, 2);
+        int bytes  = sqlite3_column_bytes(stmt, 3);
+        int n_tokens = sqlite3_column_int(stmt, 4);
+        int truncated = sqlite3_column_int(stmt, 5);
+
+        ASSERT(seq == prev_seq + 1);
+        ASSERT(offset >= prev_offset);
+        ASSERT(length > 0);
+        ASSERT(bytes == EXPECTED_DIMENSION * (int)sizeof(float));
+        ASSERT(n_tokens > 0);
+        ASSERT(truncated == 0);
+
+        prev_seq = seq;
+        prev_offset = offset;
+        last_offset = offset;
+        last_length = length;
+        seen++;
+    }
+    sqlite3_finalize(stmt);
+    ASSERT(seen == chunk_count);
+
+    int total = (int)strlen(LONG_TEXT);
+    // Allow small tail slack for trailing-whitespace trimming by the parser.
+    ASSERT(last_offset + last_length >= total - 8);
+
+    printf("(%d chunks covering %d bytes) ", chunk_count, total);
+
+    // Restore defaults for downstream tests.
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('skip_semantic', 0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('max_tokens', 400);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('overlay_tokens', 80);");
+}
+
+// Retrieval: each section is reachable by a query phrase from that section.
+// Asserts on anchor-token presence in the top-3 snippets, not absolute
+// ranking, so minor embedding drift will not flake the test.
+TEST(memory_search_long_text_sections) {
+    struct { const char *query; const char *anchor; } cases[] = {
+        { "finishing pasta in the sauce with starchy water", LONG_TEXT_ANCHOR_COOKING },
+        { "boosting interactive task priority after wakeup", LONG_TEXT_ANCHOR_KERNEL  },
+        { "soundpost placement and string vibration",        LONG_TEXT_ANCHOR_VIOLIN  },
+        { "measuring stellar distance with parallax",        LONG_TEXT_ANCHOR_ASTRO   },
+    };
+    int n_cases = (int)(sizeof(cases) / sizeof(cases[0]));
+
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('min_score', 0.0);");
+
+    int matched = 0;
+    for (int i = 0; i < n_cases; i++) {
+        sqlite3_stmt *stmt = NULL;
+        int rc = sqlite3_prepare_v2(db,
+            "SELECT snippet FROM memory_search(?1, 3);", -1, &stmt, NULL);
+        ASSERT(rc == SQLITE_OK);
+        rc = sqlite3_bind_text(stmt, 1, cases[i].query, -1, SQLITE_STATIC);
+        ASSERT(rc == SQLITE_OK);
+
+        int found = 0;
+        while (sqlite3_step(stmt) == SQLITE_ROW) {
+            const char *snippet = (const char *)sqlite3_column_text(stmt, 0);
+            if (snippet && strstr(snippet, cases[i].anchor)) { found = 1; break; }
+        }
+        sqlite3_finalize(stmt);
+
+        if (!found) {
+            printf("FAILED\n    Query '%s' did not retrieve anchor '%s' in top 3\n",
+                   cases[i].query, cases[i].anchor);
+            tests_failed++;
+            tests_passed--;
+            return;
+        }
+        matched++;
+    }
+
+    printf("(%d/%d sections retrieved) ", matched, n_cases);
+
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('min_score', 0.7);");
+}
+
+// ============================================================================
+// Phase 4c: Single-chunk near the provider token ceiling
+// ============================================================================
+
+// Control test for memory_search_truncation_signature below: same setup
+// (single-chunk-everything, pure-vector ranking, leading-mosaics + tail-
+// vents text alongside a short vent reference) but the long text is sized
+// to land *under* vectors.space's 1024-token batch ceiling. Expectations:
+//
+//   1) The long chunk embeds successfully (no provider rejection).
+//   2) Stored as exactly one chunk in dbmem_vault.
+//   3) A tail-topic query retrieves both the short reference and the long
+//      chunk in the top-10 — confirming the tail was included in the
+//      embedding when the input fit in one batch.
+//
+// Sized at ~5200 bytes. Empirical calibration: 7159 / 9346 / 10075 bytes
+// all rejected with the same "input (1026 tokens)" template (so "1026" is
+// not a real count — just an "exceeded" sentinel). 7159 / 1024 ≈ 7.0
+// chars-per-token actual ratio for this filler, so 5200 bytes ≈ ~740
+// tokens — clear of the 1024 ceiling.
+TEST(memory_search_under_token_limit) {
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('skip_semantic', 1);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('max_tokens', 2048);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('overlay_tokens', 0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('vector_weight', 1.0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('text_weight', 0.0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('min_score', 0.0);");
+
+    static const char *SHORT_REF =
+        "Hydrothermal vents on the deep ocean floor sustain chemosynthetic "
+        "microbial ecosystems independent of sunlight. Tubeworms and "
+        "thermophilic archaea metabolize sulfur compounds emitted by the "
+        "vent fluids in total darkness.";
+
+    sqlite3_stmt *stmt = NULL;
+    int rc = sqlite3_prepare_v2(db,
+        "SELECT memory_add_text(?1, 'under-limit-short');", -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+    rc = sqlite3_bind_text(stmt, 1, SHORT_REF, -1, SQLITE_STATIC);
+    ASSERT(rc == SQLITE_OK);
+    ASSERT(sqlite3_step(stmt) == SQLITE_ROW);
+    sqlite3_finalize(stmt);
+
+    static const char *MOSAIC_LEAD =
+        "Andalusian zellige mosaics from medieval Granada and Cordoba feature "
+        "interlocking geometric tiles arranged in repeating decagonal motifs "
+        "of cobalt and ochre glaze. ";
+    static const char *MOSAIC_FILLER =
+        "Master craftsmen historically cut tesserae from glazed terracotta "
+        "and fit them into intricate patterns whose mathematical foundations "
+        "anticipate aperiodic tilings by centuries; pigments include lapis "
+        "lazuli, copper carbonate, and iron oxides. ";
+    static const char *VENT_TAIL =
+        " And entirely separately, deep ocean hydrothermal vents host "
+        "chemosynthetic communities of microbial mats, tubeworms, and "
+        "thermophilic archaea metabolizing sulfur compounds in total darkness.";
+
+    size_t cap = 16 * 1024;
+    char *long_text = (char *)malloc(cap);
+    ASSERT(long_text != NULL);
+    size_t pos = 0;
+    int n = snprintf(long_text + pos, cap - pos, "%s", MOSAIC_LEAD);
+    pos += (size_t)n;
+    while (pos < 5000
+           && pos + strlen(MOSAIC_FILLER) + strlen(VENT_TAIL) + 4 < cap) {
+        n = snprintf(long_text + pos, cap - pos, "%s", MOSAIC_FILLER);
+        if (n <= 0) break;
+        pos += (size_t)n;
+    }
+    n = snprintf(long_text + pos, cap - pos, "%s", VENT_TAIL);
+    pos += (size_t)n;
+    int long_text_len = (int)pos;
+
+    rc = sqlite3_prepare_v2(db,
+        "SELECT memory_add_text(?1, 'under-limit-long');", -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+    rc = sqlite3_bind_text(stmt, 1, long_text, long_text_len, SQLITE_STATIC);
+    ASSERT(rc == SQLITE_OK);
+    rc = sqlite3_step(stmt);
+    if (rc != SQLITE_ROW) {
+        printf("FAILED\n    memory_add_text(%d bytes) returned rc=%d\n    sqlite error: %s\n",
+               long_text_len, rc, sqlite3_errmsg(db));
+        sqlite3_finalize(stmt);
+        free(long_text);
+        tests_failed++;
+        tests_passed--;
+        return;
+    }
+    sqlite3_finalize(stmt);
+    free(long_text);
+
+    char short_hash[DBMEM_HASH_STR_MAXLEN] = {0};
+    char long_hash[DBMEM_HASH_STR_MAXLEN]  = {0};
+    rc = sqlite3_prepare_v2(db,
+        "SELECT hash FROM dbmem_content WHERE context = 'under-limit-short' LIMIT 1;",
+        -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK && sqlite3_step(stmt) == SQLITE_ROW);
+    snprintf(short_hash, sizeof(short_hash), "%s", (const char *)sqlite3_column_text(stmt, 0));
+    sqlite3_finalize(stmt);
+
+    rc = sqlite3_prepare_v2(db,
+        "SELECT hash FROM dbmem_content WHERE context = 'under-limit-long' LIMIT 1;",
+        -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK && sqlite3_step(stmt) == SQLITE_ROW);
+    snprintf(long_hash, sizeof(long_hash), "%s", (const char *)sqlite3_column_text(stmt, 0));
+    sqlite3_finalize(stmt);
+
+    // Single chunk, length around ~5KB but under the rejection threshold.
+    char sql[256];
+    snprintf(sql, sizeof(sql),
+        "SELECT COUNT(*) FROM dbmem_vault WHERE hash = '%s';", long_hash);
+    result_int = 0;
+    sqlite3_exec(db, sql, capture_int, NULL, NULL);
+    ASSERT(result_int == 1);
+
+    snprintf(sql, sizeof(sql),
+        "SELECT length FROM dbmem_vault WHERE hash = '%s' LIMIT 1;", long_hash);
+    result_int = 0;
+    sqlite3_exec(db, sql, capture_int, NULL, NULL);
+    int long_chunk_bytes = result_int;
+    ASSERT(long_chunk_bytes > 4500);
+
+    int chunk_count = 0, min_tokens = 0, min_truncated = 0, max_truncated = 0;
+    rc = get_vault_metadata(short_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
+    ASSERT(rc == SQLITE_OK);
+    ASSERT(chunk_count == 1);
+    ASSERT(min_tokens > 0);
+    ASSERT(min_truncated == 0 && max_truncated == 0);
+
+    rc = get_vault_metadata(long_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
+    ASSERT(rc == SQLITE_OK);
+    ASSERT(chunk_count == 1);
+    ASSERT(min_tokens > 0);
+    ASSERT(min_truncated == 0 && max_truncated == 0);
+
+    // Same query as the truncation test; with the full chunk embedded we
+    // expect both the short ref and the long chunk to surface in top-10.
+    rc = sqlite3_prepare_v2(db,
+        "SELECT hash, ranking FROM memory_search("
+        "  'chemosynthesis around deep-sea volcanic vents', 10);",
+        -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+
+    int short_rank = -1, long_rank = -1;
+    double short_score = 0.0, long_score = 0.0;
+    int row = 0;
+    while (sqlite3_step(stmt) == SQLITE_ROW) {
+        const char *hash = (const char *)sqlite3_column_text(stmt, 0);
+        double rank = sqlite3_column_double(stmt, 1);
+        if (hash && strcmp(hash, short_hash) == 0) {
+            short_rank = row; short_score = rank;
+        }
+        if (hash && strcmp(hash, long_hash) == 0) {
+            long_rank  = row; long_score  = rank;
+        }
+        row++;
+    }
+    sqlite3_finalize(stmt);
+
+    ASSERT(short_rank >= 0);
+    ASSERT(long_rank >= 0);
+
+    printf("(%d bytes; short rank=%d score=%.3f, long rank=%d score=%.3f) ",
+           long_chunk_bytes, short_rank, short_score, long_rank, long_score);
+
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('skip_semantic', 0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('max_tokens', 400);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('overlay_tokens', 80);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('vector_weight', 0.6);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('text_weight', 0.4);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('min_score', 0.7);");
+}
+
+// ============================================================================
+// Phase 4d: Model-level truncation behavioral signature
+// ============================================================================
+
+// When a single chunk exceeds the embedding model's input context window
+// (embeddinggemma-300m: ~2048 tokens), the service truncates and returns an
+// embedding that only represents the leading portion. The truncated flag is
+// persisted on dbmem_vault, and this test also checks the observable search
+// behavior:
+//
+//   1) Store a SHORT reference (fully embedded) entirely about topic T.
+//   2) Store a LONG single-chunk document whose LEADING ~10KB is about an
+//      unrelated topic and whose final ~250 bytes (well past the 2048-token
+//      window) introduce topic T.
+//   3) Search for topic T with pure-vector ranking.
+//
+// If the long chunk's embedding includes the tail, both should rank in the
+// same neighborhood. If truncated, the long chunk's embedding only encodes
+// the unrelated leading topic and ranks far below the short reference (or
+// drops out of the top-K entirely).
+TEST(memory_search_truncation_signature) {
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('skip_semantic', 1);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('max_tokens', 3000);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('overlay_tokens', 0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('vector_weight', 1.0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('text_weight', 0.0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('min_score', 0.0);");
+
+    // Short reference (~50 tokens), fully embedded, entirely about the topic.
+    static const char *SHORT_REF =
+        "Hydrothermal vents on the deep ocean floor sustain chemosynthetic "
+        "microbial ecosystems independent of sunlight. Tubeworms and "
+        "thermophilic archaea metabolize sulfur compounds emitted by the "
+        "vent fluids in total darkness.";
+
+    sqlite3_stmt *stmt = NULL;
+    int rc = sqlite3_prepare_v2(db,
+        "SELECT memory_add_text(?1, 'trunc-short');", -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+    rc = sqlite3_bind_text(stmt, 1, SHORT_REF, -1, SQLITE_STATIC);
+    ASSERT(rc == SQLITE_OK);
+    ASSERT(sqlite3_step(stmt) == SQLITE_ROW);
+    sqlite3_finalize(stmt);
+
+    // Build ~10KB single-chunk text: leading + filler about Andalusian
+    // mosaics, then a final ~250-byte tail introducing hydrothermal vents.
+    // ~10KB / ~4 chars-per-token ≈ 2500 tokens — past gemma's 2048 window.
+    static const char *MOSAIC_LEAD =
+        "Andalusian zellige mosaics from medieval Granada and Cordoba feature "
+        "interlocking geometric tiles arranged in repeating decagonal motifs "
+        "of cobalt and ochre glaze. ";
+    static const char *MOSAIC_FILLER =
+        "Master craftsmen historically cut tesserae from glazed terracotta "
+        "and fit them into intricate patterns whose mathematical foundations "
+        "anticipate aperiodic tilings by centuries; pigments include lapis "
+        "lazuli, copper carbonate, and iron oxides. ";
+    static const char *VENT_TAIL =
+        " And entirely separately, deep ocean hydrothermal vents host "
+        "chemosynthetic communities of microbial mats, tubeworms, and "
+        "thermophilic archaea metabolizing sulfur compounds in total darkness.";
+
+    size_t cap = 16 * 1024;
+    char *long_text = (char *)malloc(cap);
+    ASSERT(long_text != NULL);
+    size_t pos = 0;
+    int n = snprintf(long_text + pos, cap - pos, "%s", MOSAIC_LEAD);
+    pos += (size_t)n;
+    while (pos < 9800
+           && pos + strlen(MOSAIC_FILLER) + strlen(VENT_TAIL) + 4 < cap) {
+        n = snprintf(long_text + pos, cap - pos, "%s", MOSAIC_FILLER);
+        if (n <= 0) break;
+        pos += (size_t)n;
+    }
+    n = snprintf(long_text + pos, cap - pos, "%s", VENT_TAIL);
+    pos += (size_t)n;
+    int long_text_len = (int)pos;
+
+    rc = sqlite3_prepare_v2(db,
+        "SELECT memory_add_text(?1, 'trunc-long');", -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+    rc = sqlite3_bind_text(stmt, 1, long_text, long_text_len, SQLITE_STATIC);
+    ASSERT(rc == SQLITE_OK);
+    rc = sqlite3_step(stmt);
+    if (rc != SQLITE_ROW) {
+        printf("FAILED\n    memory_add_text(%d bytes) returned rc=%d\n    sqlite error: %s\n",
+               long_text_len, rc, sqlite3_errmsg(db));
+        sqlite3_finalize(stmt);
+        free(long_text);
+        tests_failed++;
+        tests_passed--;
+        return;
+    }
+    sqlite3_finalize(stmt);
+    free(long_text);
+
+    // Capture both hashes.
+    char short_hash[DBMEM_HASH_STR_MAXLEN] = {0};
+    char long_hash[DBMEM_HASH_STR_MAXLEN]  = {0};
+    rc = sqlite3_prepare_v2(db,
+        "SELECT hash FROM dbmem_content WHERE context = 'trunc-short' LIMIT 1;",
+        -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK && sqlite3_step(stmt) == SQLITE_ROW);
+    snprintf(short_hash, sizeof(short_hash), "%s", (const char *)sqlite3_column_text(stmt, 0));
+    sqlite3_finalize(stmt);
+
+    rc = sqlite3_prepare_v2(db,
+        "SELECT hash FROM dbmem_content WHERE context = 'trunc-long' LIMIT 1;",
+        -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK && sqlite3_step(stmt) == SQLITE_ROW);
+    snprintf(long_hash, sizeof(long_hash), "%s", (const char *)sqlite3_column_text(stmt, 0));
+    sqlite3_finalize(stmt);
+
+    // Confirm the long content stored as one chunk past gemma's window.
+    char sql[256];
+    snprintf(sql, sizeof(sql),
+        "SELECT COUNT(*) FROM dbmem_vault WHERE hash = '%s';", long_hash);
+    result_int = 0;
+    sqlite3_exec(db, sql, capture_int, NULL, NULL);
+    ASSERT(result_int == 1);
+
+    snprintf(sql, sizeof(sql),
+        "SELECT length FROM dbmem_vault WHERE hash = '%s' LIMIT 1;", long_hash);
+    result_int = 0;
+    sqlite3_exec(db, sql, capture_int, NULL, NULL);
+    int long_chunk_bytes = result_int;
+    // ~2048 tokens × ~4 chars/token = ~8192 chars; chunk must clearly exceed.
+    ASSERT(long_chunk_bytes > 9000);
+
+    int chunk_count = 0, min_tokens = 0, min_truncated = 0, max_truncated = 0;
+    rc = get_vault_metadata(short_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
+    ASSERT(rc == SQLITE_OK);
+    ASSERT(chunk_count == 1);
+    ASSERT(min_tokens > 0);
+    ASSERT(min_truncated == 0 && max_truncated == 0);
+
+    rc = get_vault_metadata(long_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
+    ASSERT(rc == SQLITE_OK);
+    ASSERT(chunk_count == 1);
+    ASSERT(min_tokens > 0);
+    ASSERT(min_truncated == 1 && max_truncated == 1);
+
+    // Query for the topic that appears throughout the short reference and
+    // only in the *tail* of the long chunk. Paraphrased so any residual FTS
+    // contribution would match both texts roughly equally.
+    rc = sqlite3_prepare_v2(db,
+        "SELECT hash, ranking FROM memory_search("
+        "  'chemosynthesis around deep-sea volcanic vents', 10);",
+        -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+
+    int short_rank = -1, long_rank = -1;
+    double short_score = 0.0, long_score = 0.0;
+    int row = 0;
+    while (sqlite3_step(stmt) == SQLITE_ROW) {
+        const char *hash = (const char *)sqlite3_column_text(stmt, 0);
+        double rank = sqlite3_column_double(stmt, 1);
+        if (hash && strcmp(hash, short_hash) == 0) {
+            short_rank = row; short_score = rank;
+        }
+        if (hash && strcmp(hash, long_hash) == 0) {
+            long_rank  = row; long_score  = rank;
+        }
+        row++;
+    }
+    sqlite3_finalize(stmt);
+
+    ASSERT(short_rank >= 0);
+    if (long_rank == -1) {
+        printf("(short rank=%d score=%.3f, long absent from top-10) ",
+               short_rank, short_score);
+    } else {
+        // With a fully-embedded long chunk we'd expect comparable rankings;
+        // truncation pushes the long chunk strictly below the short ref.
+        ASSERT(short_rank < long_rank);
+        printf("(short rank=%d score=%.3f, long rank=%d score=%.3f) ",
+               short_rank, short_score, long_rank, long_score);
+    }
+
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('skip_semantic', 0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('max_tokens', 400);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('overlay_tokens', 80);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('vector_weight', 0.6);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('text_weight', 0.4);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('min_score', 0.7);");
+}
+
+// ============================================================================
+// Phase 4e: Truncation signature near the model context window (~2000 tok)
+// ============================================================================
+
+// Same shape as memory_search_truncation_signature, but with a long text
+// sized at ~19500 bytes / ~9.8 chars-per-token ≈ ~1990 tokens — close to
+// embeddinggemma-300m's documented 2048-token context window. Useful for
+// observing how the provider behaves further past the 1024-token batch
+// ceiling: same rejection error, a different message, or (if the batch
+// size is raised on the server) a successful embed where truncation
+// actually occurs.
+TEST(memory_search_truncation_near_model_context) {
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('skip_semantic', 1);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('max_tokens', 6000);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('overlay_tokens', 0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('vector_weight', 1.0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('text_weight', 0.0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('min_score', 0.0);");
+
+    static const char *SHORT_REF =
+        "Hydrothermal vents on the deep ocean floor sustain chemosynthetic "
+        "microbial ecosystems independent of sunlight. Tubeworms and "
+        "thermophilic archaea metabolize sulfur compounds emitted by the "
+        "vent fluids in total darkness.";
+
+    sqlite3_stmt *stmt = NULL;
+    int rc = sqlite3_prepare_v2(db,
+        "SELECT memory_add_text(?1, 'trunc-large-short');", -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+    rc = sqlite3_bind_text(stmt, 1, SHORT_REF, -1, SQLITE_STATIC);
+    ASSERT(rc == SQLITE_OK);
+    ASSERT(sqlite3_step(stmt) == SQLITE_ROW);
+    sqlite3_finalize(stmt);
+
+    static const char *MOSAIC_LEAD =
+        "Andalusian zellige mosaics from medieval Granada and Cordoba feature "
+        "interlocking geometric tiles arranged in repeating decagonal motifs "
+        "of cobalt and ochre glaze. ";
+    static const char *MOSAIC_FILLER =
+        "Master craftsmen historically cut tesserae from glazed terracotta "
+        "and fit them into intricate patterns whose mathematical foundations "
+        "anticipate aperiodic tilings by centuries; pigments include lapis "
+        "lazuli, copper carbonate, and iron oxides. ";
+    static const char *VENT_TAIL =
+        " And entirely separately, deep ocean hydrothermal vents host "
+        "chemosynthetic communities of microbial mats, tubeworms, and "
+        "thermophilic archaea metabolizing sulfur compounds in total darkness.";
+
+    size_t cap = 32 * 1024;
+    char *long_text = (char *)malloc(cap);
+    ASSERT(long_text != NULL);
+    size_t pos = 0;
+    int n = snprintf(long_text + pos, cap - pos, "%s", MOSAIC_LEAD);
+    pos += (size_t)n;
+    while (pos < 19300
+           && pos + strlen(MOSAIC_FILLER) + strlen(VENT_TAIL) + 4 < cap) {
+        n = snprintf(long_text + pos, cap - pos, "%s", MOSAIC_FILLER);
+        if (n <= 0) break;
+        pos += (size_t)n;
+    }
+    n = snprintf(long_text + pos, cap - pos, "%s", VENT_TAIL);
+    pos += (size_t)n;
+    int long_text_len = (int)pos;
+
+    rc = sqlite3_prepare_v2(db,
+        "SELECT memory_add_text(?1, 'trunc-large-long');", -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+    rc = sqlite3_bind_text(stmt, 1, long_text, long_text_len, SQLITE_STATIC);
+    ASSERT(rc == SQLITE_OK);
+    rc = sqlite3_step(stmt);
+    if (rc != SQLITE_ROW) {
+        printf("FAILED\n    memory_add_text(%d bytes) returned rc=%d\n    sqlite error: %s\n",
+               long_text_len, rc, sqlite3_errmsg(db));
+        sqlite3_finalize(stmt);
+        free(long_text);
+        tests_failed++;
+        tests_passed--;
+        return;
+    }
+    sqlite3_finalize(stmt);
+    free(long_text);
+
+    char short_hash[DBMEM_HASH_STR_MAXLEN] = {0};
+    char long_hash[DBMEM_HASH_STR_MAXLEN]  = {0};
+    rc = sqlite3_prepare_v2(db,
+        "SELECT hash FROM dbmem_content WHERE context = 'trunc-large-short' LIMIT 1;",
+        -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK && sqlite3_step(stmt) == SQLITE_ROW);
+    snprintf(short_hash, sizeof(short_hash), "%s", (const char *)sqlite3_column_text(stmt, 0));
+    sqlite3_finalize(stmt);
+
+    rc = sqlite3_prepare_v2(db,
+        "SELECT hash FROM dbmem_content WHERE context = 'trunc-large-long' LIMIT 1;",
+        -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK && sqlite3_step(stmt) == SQLITE_ROW);
+    snprintf(long_hash, sizeof(long_hash), "%s", (const char *)sqlite3_column_text(stmt, 0));
+    sqlite3_finalize(stmt);
+
+    char sql[256];
+    snprintf(sql, sizeof(sql),
+        "SELECT COUNT(*) FROM dbmem_vault WHERE hash = '%s';", long_hash);
+    result_int = 0;
+    sqlite3_exec(db, sql, capture_int, NULL, NULL);
+    ASSERT(result_int == 1);
+
+    snprintf(sql, sizeof(sql),
+        "SELECT length FROM dbmem_vault WHERE hash = '%s' LIMIT 1;", long_hash);
+    result_int = 0;
+    sqlite3_exec(db, sql, capture_int, NULL, NULL);
+    int long_chunk_bytes = result_int;
+    ASSERT(long_chunk_bytes > 18000);
+
+    int chunk_count = 0, min_tokens = 0, min_truncated = 0, max_truncated = 0;
+    rc = get_vault_metadata(short_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
+    ASSERT(rc == SQLITE_OK);
+    ASSERT(chunk_count == 1);
+    ASSERT(min_tokens > 0);
+    ASSERT(min_truncated == 0 && max_truncated == 0);
+
+    rc = get_vault_metadata(long_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
+    ASSERT(rc == SQLITE_OK);
+    ASSERT(chunk_count == 1);
+    ASSERT(min_tokens > 0);
+    ASSERT(min_truncated == 1 && max_truncated == 1);
+
+    rc = sqlite3_prepare_v2(db,
+        "SELECT hash, ranking FROM memory_search("
+        "  'chemosynthesis around deep-sea volcanic vents', 10);",
+        -1, &stmt, NULL);
+    ASSERT(rc == SQLITE_OK);
+
+    int short_rank = -1, long_rank = -1;
+    double short_score = 0.0, long_score = 0.0;
+    int row = 0;
+    while (sqlite3_step(stmt) == SQLITE_ROW) {
+        const char *hash = (const char *)sqlite3_column_text(stmt, 0);
+        double rank = sqlite3_column_double(stmt, 1);
+        if (hash && strcmp(hash, short_hash) == 0) {
+            short_rank = row; short_score = rank;
+        }
+        if (hash && strcmp(hash, long_hash) == 0) {
+            long_rank  = row; long_score  = rank;
+        }
+        row++;
+    }
+    sqlite3_finalize(stmt);
+
+    ASSERT(short_rank >= 0);
+    if (long_rank == -1) {
+        printf("(%d bytes; short rank=%d score=%.3f, long absent from top-10) ",
+               long_chunk_bytes, short_rank, short_score);
+    } else {
+        ASSERT(short_rank < long_rank);
+        printf("(%d bytes; short rank=%d score=%.3f, long rank=%d score=%.3f) ",
+               long_chunk_bytes, short_rank, short_score, long_rank, long_score);
+    }
+
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('skip_semantic', 0);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('max_tokens', 400);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('overlay_tokens', 80);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('vector_weight', 0.6);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('text_weight', 0.4);");
+    ASSERT_SQL_OK(db, "SELECT memory_set_option('min_score', 0.7);");
+}
+
 // ============================================================================
 // Phase 5: Deletion
 // ============================================================================
@@ -531,6 +1274,7 @@ int main(void) {
     // Phase 3: Content Management (network calls)
     RUN_TEST(memory_add_text);
     RUN_TEST(verify_embedding);
+    RUN_TEST(verify_embedding_metadata);
     RUN_TEST(memory_add_text_context);
     RUN_TEST(memory_add_text_idempotent);
 #ifndef DBMEM_OMIT_IO
@@ -543,6 +1287,19 @@ int main(void) {
     RUN_TEST(memory_search_ranking);
     RUN_TEST(memory_search_statement_reuse);
 
+    // Phase 4b: Long-text chunking + multi-section retrieval
+    RUN_TEST(memory_add_long_text_chunking);
+    RUN_TEST(memory_search_long_text_sections);
+
+    // Phase 4c: Single-chunk near (under) the provider token ceiling
+    RUN_TEST(memory_search_under_token_limit);
+
+    // Phase 4d: Model-level truncation behavioral signature
+    RUN_TEST(memory_search_truncation_signature);
+
+    // Phase 4e: Same shape, but text size pushed near the model context window
+    RUN_TEST(memory_search_truncation_near_model_context);
+
     // Phase 5: Deletion
     RUN_TEST(memory_delete);
     RUN_TEST(memory_delete_context);

From bbfa514d80eb551b1a2df3b3d8feeb726172579f Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Thu, 7 May 2026 09:30:52 -0600
Subject: [PATCH 11/13] test(e2e): print embedding metadata; differentiate
 SHORT_REFs

Surface n_tokens and truncated from dbmem_vault in the test output for
the four search tests:
- memory_search_long_text_sections: aggregate (chunk count, min n_tokens,
  any-truncated) for the chunked corpus.
- memory_search_under_token_limit, _truncation_signature,
  _truncation_near_model_context: per-chunk values for both the short
  reference and the long chunk alongside ranking/score.

Also append a per-test trailing sentence to the SHORT_REF in the two
truncation tests so memory_add_text's content-hash idempotency doesn't
collapse them into no-ops of the under-limit test's identical SHORT_REF.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 test/e2e.c | 76 ++++++++++++++++++++++++++++++++++++++++++++----------
 1 file changed, 63 insertions(+), 13 deletions(-)

diff --git a/test/e2e.c b/test/e2e.c
index d745b8c..cffbbe6 100644
--- a/test/e2e.c
+++ b/test/e2e.c
@@ -638,7 +638,26 @@ TEST(memory_search_long_text_sections) {
         matched++;
     }
 
-    printf("(%d/%d sections retrieved) ", matched, n_cases);
+    // Surface aggregate per-chunk metadata for the underlying long-text
+    // corpus (one row in dbmem_content, multiple chunks in dbmem_vault).
+    char long_text_hash[DBMEM_HASH_STR_MAXLEN] = {0};
+    sqlite3_stmt *hstmt = NULL;
+    int hrc = sqlite3_prepare_v2(db,
+        "SELECT hash FROM dbmem_content WHERE context = 'long-text' LIMIT 1;",
+        -1, &hstmt, NULL);
+    int chunk_count = 0, min_tokens = 0, min_truncated = 0, max_truncated = 0;
+    if (hrc == SQLITE_OK && sqlite3_step(hstmt) == SQLITE_ROW) {
+        snprintf(long_text_hash, sizeof(long_text_hash), "%s",
+                 (const char *)sqlite3_column_text(hstmt, 0));
+        sqlite3_finalize(hstmt);
+        get_vault_metadata(long_text_hash, &chunk_count, &min_tokens,
+                           &min_truncated, &max_truncated);
+    } else {
+        if (hstmt) sqlite3_finalize(hstmt);
+    }
+
+    printf("(%d/%d sections retrieved; %d chunks min_n_tok=%d any_trunc=%d) ",
+           matched, n_cases, chunk_count, min_tokens, max_truncated);
 
     ASSERT_SQL_OK(db, "SELECT memory_set_option('min_score', 0.7);");
 }
@@ -766,17 +785,24 @@ TEST(memory_search_under_token_limit) {
     ASSERT(long_chunk_bytes > 4500);
 
     int chunk_count = 0, min_tokens = 0, min_truncated = 0, max_truncated = 0;
+    int short_n_tokens = 0, short_truncated = 0;
+    int long_n_tokens  = 0, long_truncated  = 0;
+
     rc = get_vault_metadata(short_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
     ASSERT(rc == SQLITE_OK);
     ASSERT(chunk_count == 1);
     ASSERT(min_tokens > 0);
     ASSERT(min_truncated == 0 && max_truncated == 0);
+    short_n_tokens = min_tokens;
+    short_truncated = max_truncated;
 
     rc = get_vault_metadata(long_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
     ASSERT(rc == SQLITE_OK);
     ASSERT(chunk_count == 1);
     ASSERT(min_tokens > 0);
     ASSERT(min_truncated == 0 && max_truncated == 0);
+    long_n_tokens = min_tokens;
+    long_truncated = max_truncated;
 
     // Same query as the truncation test; with the full chunk embedded we
     // expect both the short ref and the long chunk to surface in top-10.
@@ -805,8 +831,9 @@ TEST(memory_search_under_token_limit) {
     ASSERT(short_rank >= 0);
     ASSERT(long_rank >= 0);
 
-    printf("(%d bytes; short rank=%d score=%.3f, long rank=%d score=%.3f) ",
-           long_chunk_bytes, short_rank, short_score, long_rank, long_score);
+    printf("(short: n_tok=%d trunc=%d rank=%d score=%.3f; long: %d bytes n_tok=%d trunc=%d rank=%d score=%.3f) ",
+           short_n_tokens, short_truncated, short_rank, short_score,
+           long_chunk_bytes, long_n_tokens, long_truncated, long_rank, long_score);
 
     ASSERT_SQL_OK(db, "SELECT memory_set_option('skip_semantic', 0);");
     ASSERT_SQL_OK(db, "SELECT memory_set_option('max_tokens', 400);");
@@ -845,11 +872,14 @@ TEST(memory_search_truncation_signature) {
     ASSERT_SQL_OK(db, "SELECT memory_set_option('min_score', 0.0);");
 
     // Short reference (~50 tokens), fully embedded, entirely about the topic.
+    // Trailing sentence differs per test so memory_add_text's content-hash
+    // idempotency doesn't collapse this insert into a no-op of an earlier
+    // test's identical SHORT_REF.
     static const char *SHORT_REF =
         "Hydrothermal vents on the deep ocean floor sustain chemosynthetic "
         "microbial ecosystems independent of sunlight. Tubeworms and "
         "thermophilic archaea metabolize sulfur compounds emitted by the "
-        "vent fluids in total darkness.";
+        "vent fluids in total darkness. Truncation-signature reference.";
 
     sqlite3_stmt *stmt = NULL;
     int rc = sqlite3_prepare_v2(db,
@@ -945,17 +975,24 @@ TEST(memory_search_truncation_signature) {
     ASSERT(long_chunk_bytes > 9000);
 
     int chunk_count = 0, min_tokens = 0, min_truncated = 0, max_truncated = 0;
+    int short_n_tokens = 0, short_truncated = 0;
+    int long_n_tokens  = 0, long_truncated  = 0;
+
     rc = get_vault_metadata(short_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
     ASSERT(rc == SQLITE_OK);
     ASSERT(chunk_count == 1);
     ASSERT(min_tokens > 0);
     ASSERT(min_truncated == 0 && max_truncated == 0);
+    short_n_tokens = min_tokens;
+    short_truncated = max_truncated;
 
     rc = get_vault_metadata(long_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
     ASSERT(rc == SQLITE_OK);
     ASSERT(chunk_count == 1);
     ASSERT(min_tokens > 0);
     ASSERT(min_truncated == 1 && max_truncated == 1);
+    long_n_tokens = min_tokens;
+    long_truncated = max_truncated;
 
     // Query for the topic that appears throughout the short reference and
     // only in the *tail* of the long chunk. Paraphrased so any residual FTS
@@ -984,14 +1021,16 @@ TEST(memory_search_truncation_signature) {
 
     ASSERT(short_rank >= 0);
     if (long_rank == -1) {
-        printf("(short rank=%d score=%.3f, long absent from top-10) ",
-               short_rank, short_score);
+        printf("(short: n_tok=%d trunc=%d rank=%d score=%.3f; long: %d bytes n_tok=%d trunc=%d absent from top-10) ",
+               short_n_tokens, short_truncated, short_rank, short_score,
+               long_chunk_bytes, long_n_tokens, long_truncated);
     } else {
         // With a fully-embedded long chunk we'd expect comparable rankings;
         // truncation pushes the long chunk strictly below the short ref.
         ASSERT(short_rank < long_rank);
-        printf("(short rank=%d score=%.3f, long rank=%d score=%.3f) ",
-               short_rank, short_score, long_rank, long_score);
+        printf("(short: n_tok=%d trunc=%d rank=%d score=%.3f; long: %d bytes n_tok=%d trunc=%d rank=%d score=%.3f) ",
+               short_n_tokens, short_truncated, short_rank, short_score,
+               long_chunk_bytes, long_n_tokens, long_truncated, long_rank, long_score);
     }
 
     ASSERT_SQL_OK(db, "SELECT memory_set_option('skip_semantic', 0);");
@@ -1021,11 +1060,13 @@ TEST(memory_search_truncation_near_model_context) {
     ASSERT_SQL_OK(db, "SELECT memory_set_option('text_weight', 0.0);");
     ASSERT_SQL_OK(db, "SELECT memory_set_option('min_score', 0.0);");
 
+    // Trailing sentence differs from the other tests' SHORT_REFs so the
+    // content-hash idempotency in memory_add_text doesn't collapse the insert.
     static const char *SHORT_REF =
         "Hydrothermal vents on the deep ocean floor sustain chemosynthetic "
         "microbial ecosystems independent of sunlight. Tubeworms and "
         "thermophilic archaea metabolize sulfur compounds emitted by the "
-        "vent fluids in total darkness.";
+        "vent fluids in total darkness. Near-context reference.";
 
     sqlite3_stmt *stmt = NULL;
     int rc = sqlite3_prepare_v2(db,
@@ -1115,17 +1156,24 @@ TEST(memory_search_truncation_near_model_context) {
     ASSERT(long_chunk_bytes > 18000);
 
     int chunk_count = 0, min_tokens = 0, min_truncated = 0, max_truncated = 0;
+    int short_n_tokens = 0, short_truncated = 0;
+    int long_n_tokens  = 0, long_truncated  = 0;
+
     rc = get_vault_metadata(short_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
     ASSERT(rc == SQLITE_OK);
     ASSERT(chunk_count == 1);
     ASSERT(min_tokens > 0);
     ASSERT(min_truncated == 0 && max_truncated == 0);
+    short_n_tokens = min_tokens;
+    short_truncated = max_truncated;
 
     rc = get_vault_metadata(long_hash, &chunk_count, &min_tokens, &min_truncated, &max_truncated);
     ASSERT(rc == SQLITE_OK);
     ASSERT(chunk_count == 1);
     ASSERT(min_tokens > 0);
     ASSERT(min_truncated == 1 && max_truncated == 1);
+    long_n_tokens = min_tokens;
+    long_truncated = max_truncated;
 
     rc = sqlite3_prepare_v2(db,
         "SELECT hash, ranking FROM memory_search("
@@ -1151,12 +1199,14 @@ TEST(memory_search_truncation_near_model_context) {
 
     ASSERT(short_rank >= 0);
     if (long_rank == -1) {
-        printf("(%d bytes; short rank=%d score=%.3f, long absent from top-10) ",
-               long_chunk_bytes, short_rank, short_score);
+        printf("(short: n_tok=%d trunc=%d rank=%d score=%.3f; long: %d bytes n_tok=%d trunc=%d absent from top-10) ",
+               short_n_tokens, short_truncated, short_rank, short_score,
+               long_chunk_bytes, long_n_tokens, long_truncated);
     } else {
         ASSERT(short_rank < long_rank);
-        printf("(%d bytes; short rank=%d score=%.3f, long rank=%d score=%.3f) ",
-               long_chunk_bytes, short_rank, short_score, long_rank, long_score);
+        printf("(short: n_tok=%d trunc=%d rank=%d score=%.3f; long: %d bytes n_tok=%d trunc=%d rank=%d score=%.3f) ",
+               short_n_tokens, short_truncated, short_rank, short_score,
+               long_chunk_bytes, long_n_tokens, long_truncated, long_rank, long_score);
     }
 
     ASSERT_SQL_OK(db, "SELECT memory_set_option('skip_semantic', 0);");

From 9d9494a1a5d9fc878fc9547ec13fedf8082450f3 Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Thu, 7 May 2026 11:05:24 -0600
Subject: [PATCH 12/13] chore: untrack local refactor plan file
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

remote-embedding-parser-refactor-plan.md was committed by mistake — it
was a local-only planning note. Remove it from tracking via git rm
--cached so the working copy is preserved for the author while the file
is dropped from the published tree.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 remote-embedding-parser-refactor-plan.md | 170 -----------------------
 1 file changed, 170 deletions(-)
 delete mode 100644 remote-embedding-parser-refactor-plan.md

diff --git a/remote-embedding-parser-refactor-plan.md b/remote-embedding-parser-refactor-plan.md
deleted file mode 100644
index fbd1abb..0000000
--- a/remote-embedding-parser-refactor-plan.md
+++ /dev/null
@@ -1,170 +0,0 @@
-# Remote Embedding Parser Refactor Plan
-
-This note captures the preferred implementation plan for later work. The goal is
-to make vectors.space response parsing directly testable without turning the
-parser into public API or giving production logic a test-only name.
-
-## Goal
-
-Refactor `src/dbmem-rembed.c` so the JSON response parsing currently embedded in
-`dbmem_remote_compute_embedding()` lives in a reusable internal function:
-
-```c
-int dbmem_remote_parse_embedding_response(...);
-```
-
-The function should be production logic, used by `dbmem_remote_compute_embedding()`
-and also callable from `test/unittest.c` via a manual forward declaration.
-
-Do not expose it in `sqlite-memory.h` or another public header.
-
-## Preferred Shape
-
-Use a normal internal production name, not a test-only name:
-
-```c
-int dbmem_remote_parse_embedding_response(
-    const char *json,
-    size_t json_len,
-    float **embedding,
-    size_t *embedding_capacity,
-    jsmntok_t **tokens,
-    int *tokens_capacity,
-    embedding_result_t *result,
-    char *err_msg,
-    size_t err_msg_len
-);
-```
-
-This keeps ownership explicit while avoiding exposure of `dbmem_remote_engine_t`
-or a new parser-state struct in test code.
-
-## Production Usage
-
-`dbmem_remote_compute_embedding()` keeps responsibility for:
-
-- request construction
-- HTTP transport
-- HTTP status handling
-- context error propagation
-- aggregate remote-engine stats
-
-After receiving a successful HTTP 200 response, it calls:
-
-```c
-char err_msg[DBMEM_ERRBUF_SIZE] = {0};
-int rc = dbmem_remote_parse_embedding_response(
-    engine->data,
-    engine->data_size,
-    &engine->embedding,
-    &engine->embedding_capacity,
-    &engine->tokens,
-    &engine->tokens_capacity,
-    result,
-    err_msg,
-    sizeof(err_msg)
-);
-
-if (rc != 0) {
-    dbmem_context_set_error(engine->context, err_msg);
-    return -1;
-}
-
-engine->total_tokens_processed += result->n_tokens;
-engine->total_embeddings_generated++;
-return 0;
-```
-
-## Parser Responsibility
-
-`dbmem_remote_parse_embedding_response()` should own:
-
-- parsing JSON with `jsmn`
-- allocating/growing the token buffer
-- locating top-level `output_dimension`
-- locating `data[0].embedding`
-- allocating/growing the embedding buffer
-- parsing embedding floats
-- reading `data[0].truncated`
-- reading token metadata from `usage`
-- filling `embedding_result_t`
-
-Token count priority should remain:
-
-1. `usage.exact_prompt_tokens`
-2. `usage.estimated_prompt_tokens`
-3. `usage.prompt_tokens`
-4. `0` if none are present
-
-## Unit Test Usage
-
-`test/unittest.c` can manually forward-declare the function under the relevant
-test guards:
-
-```c
-#if defined(TEST_SQLITE_EXTENSION) && !defined(DBMEM_OMIT_REMOTE_ENGINE)
-int dbmem_remote_parse_embedding_response(
-    const char *json,
-    size_t json_len,
-    float **embedding,
-    size_t *embedding_capacity,
-    jsmntok_t **tokens,
-    int *tokens_capacity,
-    embedding_result_t *result,
-    char *err_msg,
-    size_t err_msg_len
-);
-#endif
-```
-
-Tests create local buffers:
-
-```c
-float *embedding = NULL;
-size_t embedding_capacity = 0;
-jsmntok_t *tokens = NULL;
-int tokens_capacity = 0;
-embedding_result_t result = {0};
-char err_msg[1024] = {0};
-```
-
-Then call the parser with static JSON fixtures and free the buffers afterward:
-
-```c
-dbmemory_free(embedding);
-dbmemory_free(tokens);
-```
-
-## Fixture Tests To Add Later
-
-Recommended deterministic cases:
-
-- exact token count is preferred over estimated and prompt token counts
-- estimated token count is used when exact token count is absent
-- prompt token count is used when exact and estimated token counts are absent
-- missing usage object leaves `result.n_tokens == 0`
-- `data[0].truncated: false` maps to `result.truncated == false`
-- `data[0].truncated: true` maps to `result.truncated == true`
-- embedding float array is parsed correctly
-- output dimension is parsed correctly
-- missing `data`
-- missing `embedding`
-- empty embedding array
-- invalid top-level response shape
-
-Also decide whether the parser should reject mismatches between
-`output_dimension` and the embedding array length. Failing fast is likely safer,
-because a dimension mismatch can break later vector initialization/search.
-
-## Why This Plan
-
-This approach avoids:
-
-- live network dependence for parser correctness tests
-- exposing parser internals as public API
-- duplicating parser behavior in test-only code
-- coupling tests to `dbmem_remote_engine_t`
-- adding a new internal header before it is needed
-
-The e2e test discussion can proceed separately, especially around whether token
-metadata should become persisted product state or remain parser-only metadata.

From b9339effe794e31b405c824157c5e3da0fc08605 Mon Sep 17 00:00:00 2001
From: Andrea Donetti <andinux@gmail.com>
Date: Thu, 7 May 2026 11:21:37 -0600
Subject: [PATCH 13/13] docs: surface n_tokens/truncated columns in API.md
 statistics

Add example queries for the schema-version-2 columns on dbmem_vault so
readers can discover token-usage aggregation and truncation diagnostics
without having to read the source.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 API.md | 15 +++++++++++++++
 1 file changed, 15 insertions(+)

diff --git a/API.md b/API.md
index bd9636c..694e16d 100644
--- a/API.md
+++ b/API.md
@@ -770,6 +770,21 @@ FROM dbmem_content
 WHERE last_accessed > 0
 ORDER BY last_accessed DESC
 LIMIT 10;
+
+-- Tokens consumed and truncation per context
+-- (n_tokens / truncated were added in schema version 2)
+SELECT
+    COALESCE(c.context, '(none)') as context,
+    SUM(v.n_tokens) as tokens_processed,
+    SUM(v.truncated) as truncated_chunks
+FROM dbmem_vault v
+JOIN dbmem_content c ON c.hash = v.hash
+GROUP BY c.context;
+
+-- Chunks that the embedding model truncated on input
+SELECT hash, seq, length, n_tokens
+FROM dbmem_vault
+WHERE truncated = 1;
 ```
 
 ---