Skip to content
Open
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
6 changes: 3 additions & 3 deletions .github/workflows/main.yml
Original file line number Diff line number Diff line change
Expand Up @@ -364,7 +364,7 @@ jobs:
echo "::endgroup::"

echo "::group::build unittest binary for android"
make build/unittest ${{ matrix.make }} SQLITE_AMALGAM=${SQLITE_DIR}/sqlite3.c
make build/unittest ${{ matrix.make }} SQLITE_AMALGAM=${SQLITE_DIR}/sqlite3.c DEFINES="-DTEST_SQLITE_EXTENSION"
echo "::endgroup::"

echo "::group::build e2e binary for android"
Expand Down Expand Up @@ -406,12 +406,12 @@ jobs:

- name: unix test sqlite-memory
if: matrix.skip_test != true && matrix.os != 'windows-2022' && matrix.name != 'android'
run: ${{ matrix.name == 'linux-musl' && matrix.arch == 'arm64' && 'docker exec alpine' || '' }} make test ${{ matrix.make && matrix.make || ''}}
run: ${{ matrix.name == 'linux-musl' && matrix.arch == 'arm64' && 'docker exec alpine' || '' }} make test ${{ matrix.make && matrix.make || ''}} DEFINES="-DTEST_SQLITE_EXTENSION"

- name: windows test sqlite-memory
if: matrix.skip_test != true && matrix.name == 'windows'
shell: msys2 {0}
run: make test ${{ matrix.make && matrix.make || ''}}
run: make test ${{ matrix.make && matrix.make || ''}} DEFINES="-DTEST_SQLITE_EXTENSION"

- name: unix e2e sqlite-memory
if: matrix.skip_test != true && matrix.variant != 'local' && matrix.os != 'windows-2022' && matrix.name != 'android'
Expand Down
22 changes: 19 additions & 3 deletions API.md
Original file line number Diff line number Diff line change
Expand Up @@ -564,8 +564,8 @@ typedef struct {
**`dbmem_embedding_result_t` struct:**
```c
typedef struct {
int n_tokens; // Number of tokens processed
int n_tokens_truncated; // Tokens that were truncated (0 if none)
int n_tokens; // Number of processed tokens (0 if unknown)
bool truncated; // True when the input was truncated before embedding
int n_embd; // Embedding dimension
float *embedding; // Embedding vector (engine-owned, valid until next call or free)
} dbmem_embedding_result_t;
Expand All @@ -574,6 +574,7 @@ typedef struct {
**Notes:**
- Works regardless of `DBMEM_OMIT_LOCAL_ENGINE` / `DBMEM_OMIT_REMOTE_ENGINE` compile flags
- The `embedding` buffer in `dbmem_embedding_result_t` must remain valid until the next `compute` call or `free` — it is engine-owned, not copied by the caller
- `n_tokens` is metadata about the processed input when the engine can provide it; `truncated` is a boolean flag, not a truncated-token count
- Only one custom provider can be registered per connection at a time; registering again replaces the previous one
- The provider struct is copied by value; the caller does not need to keep it alive after registration

Expand All @@ -596,7 +597,7 @@ static int my_compute(void *engine, const char *text, int text_len, void *xdata,
// ... fill vec with your embedding ...
result->n_embd = e->dimension;
result->n_tokens = text_len / 4;
result->n_tokens_truncated = 0;
result->truncated = false;
result->embedding = vec;
return 0;
}
Expand Down Expand Up @@ -769,6 +770,21 @@ FROM dbmem_content
WHERE last_accessed > 0
ORDER BY last_accessed DESC
LIMIT 10;

-- Tokens consumed and truncation per context
-- (n_tokens / truncated were added in schema version 2)
SELECT
COALESCE(c.context, '(none)') as context,
SUM(v.n_tokens) as tokens_processed,
SUM(v.truncated) as truncated_chunks
FROM dbmem_vault v
JOIN dbmem_content c ON c.hash = v.hash
GROUP BY c.context;

-- Chunks that the embedding model truncated on input
SELECT hash, seq, length, n_tokens
FROM dbmem_vault
WHERE truncated = 1;
```

---
Expand Down
2 changes: 1 addition & 1 deletion Makefile
Original file line number Diff line number Diff line change
Expand Up @@ -561,7 +561,7 @@ ifeq ($(PLATFORM),windows)
else
unzip -o $(CURL_ZIP) -d $(CURL_DIR)/src/.
endif
cd $(CURL_SRC) && ./configure \
cd $(CURL_SRC) && env -u LDFLAGS -u CPPFLAGS -u CFLAGS -u LIBS ./configure \
--without-libpsl \
--disable-alt-svc \
--disable-ares \
Expand Down
2 changes: 1 addition & 1 deletion src/dbmem-embed.h
Original file line number Diff line number Diff line change
Expand Up @@ -17,7 +17,7 @@ typedef struct dbmem_remote_engine_t dbmem_remote_engine_t;
// Embedding result structure (always one embedding per call)
typedef struct {
int n_tokens; // Number of tokens processed
int n_tokens_truncated; // Number of tokens truncated (0 if none)
bool truncated; // True when the input was truncated before embedding
int n_embd; // Embedding dimension
float *embedding; // Pointer to embedding (points to engine's buffer, do not free)
} embedding_result_t;
Expand Down
6 changes: 3 additions & 3 deletions src/dbmem-lembed.c
Original file line number Diff line number Diff line change
Expand Up @@ -223,9 +223,9 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex
}

// Handle token overflow: truncate to max context size
int n_tokens_truncated = 0;
bool truncated = false;
if (n_tokens > engine->n_ctx) {
n_tokens_truncated = n_tokens - engine->n_ctx;
truncated = true;
n_tokens = engine->n_ctx;
}

Expand Down Expand Up @@ -275,7 +275,7 @@ int dbmem_local_compute_embedding (dbmem_local_engine_t *engine, const char *tex

// Fill result
result->n_tokens = n_tokens;
result->n_tokens_truncated = n_tokens_truncated;
result->truncated = truncated;
result->n_embd = engine->n_embd;
result->embedding = engine->embedding;

Expand Down
140 changes: 119 additions & 21 deletions src/dbmem-rembed.c
Original file line number Diff line number Diff line change
Expand Up @@ -210,6 +210,62 @@ static int set_json_error_message (dbmem_remote_engine_t *engine) {
return -1;
}

static int dbmem_json_skip_token (const jsmntok_t *tokens, int index) {
int next = index + 1;

if (tokens[index].type == JSMN_ARRAY) {
for (int i = 0; i < tokens[index].size; i++) {
next = dbmem_json_skip_token(tokens, next);
}
return next;
}

if (tokens[index].type == JSMN_OBJECT) {
for (int i = 0; i < tokens[index].size; i++) {
next += 1; // skip key token
next = dbmem_json_skip_token(tokens, next);
}
return next;
}

return next;
}

static bool dbmem_json_token_equals (const char *json, const jsmntok_t *token, const char *text) {
size_t len = strlen(text);
size_t token_len = (size_t)(token->end - token->start);
return token_len == len && memcmp(json + token->start, text, len) == 0;
}

static int dbmem_json_object_find (const char *json, const jsmntok_t *tokens, int object_index, const char *key) {
if (object_index < 0 || tokens[object_index].type != JSMN_OBJECT) return -1;

int index = object_index + 1;
for (int i = 0; i < tokens[object_index].size; i++) {
int key_index = index;
int value_index = key_index + 1;

if (tokens[key_index].type != JSMN_STRING) return -1;
if (dbmem_json_token_equals(json, &tokens[key_index], key)) return value_index;

index = dbmem_json_skip_token(tokens, value_index);
}

return -1;
}

static bool dbmem_json_parse_bool (const char *json, const jsmntok_t *token) {
size_t len = (size_t)(token->end - token->start);
return token->type == JSMN_PRIMITIVE && len == 4 && memcmp(json + token->start, "true", 4) == 0;
}

#if ENABLE_DBMEM_DEBUG_EMBEDDING
static void dbmem_remote_debug_log_response(dbmem_remote_engine_t *engine, long http_code) {
const char *response = engine->data ? engine->data : "";
DEBUG_DBMEM_ALWAYS("[dbmem-rembed] vectors.space response (HTTP %ld): %s", http_code, response);
}
#endif

// MARK: -

dbmem_remote_engine_t *dbmem_remote_engine_init (void *ctx, const char *provider, const char *model, char err_msg[DBMEM_ERRBUF_SIZE]) {
Expand Down Expand Up @@ -450,6 +506,10 @@ int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *t
sqlite3_free(response_data);
#endif

#if ENABLE_DBMEM_DEBUG_EMBEDDING
dbmem_remote_debug_log_response(engine, http_code);
#endif

if (http_code != 200) {
return set_json_error_message(engine);
}
Expand Down Expand Up @@ -482,27 +542,65 @@ int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *t
int n_embd = 0;
int prompt_tokens = 0;
int estimated_prompt_tokens = 0;
int exact_prompt_tokens = 0;
bool truncated = false;
int emb_start = -1;
size_t emb_count = 0;

for (int i = 0; i < ntokens - 1; i++) {
if (tokens[i].type != JSMN_STRING) continue;
int klen = tokens[i].end - tokens[i].start;
const char *key = engine->data + tokens[i].start;

if (klen == 9 && memcmp(key, "embedding", 9) == 0 && tokens[i + 1].type == JSMN_ARRAY) {
if (tokens[i + 1].size <= 0) {
dbmem_context_set_error(engine->context, "Invalid embedding array size in API response");
return -1;
}
emb_count = (size_t)tokens[i + 1].size;
emb_start = i + 2;
} else if (klen == 16 && memcmp(key, "output_dimension", 16) == 0) {
n_embd = atoi(engine->data + tokens[i + 1].start);
} else if (klen == 13 && memcmp(key, "prompt_tokens", 13) == 0 && tokens[i + 1].type == JSMN_PRIMITIVE) {
prompt_tokens = atoi(engine->data + tokens[i + 1].start);
} else if (klen == 23 && memcmp(key, "estimated_prompt_tokens", 23) == 0) {
estimated_prompt_tokens = atoi(engine->data + tokens[i + 1].start);
if (tokens[0].type != JSMN_OBJECT) {
dbmem_context_set_error(engine->context, "Invalid API response shape");
return -1;
}

int output_dimension_index = dbmem_json_object_find(engine->data, tokens, 0, "output_dimension");
if (output_dimension_index >= 0 && tokens[output_dimension_index].type == JSMN_PRIMITIVE) {
n_embd = atoi(engine->data + tokens[output_dimension_index].start);
}

int data_index = dbmem_json_object_find(engine->data, tokens, 0, "data");
if (data_index < 0 || tokens[data_index].type != JSMN_ARRAY || tokens[data_index].size <= 0) {
dbmem_context_set_error(engine->context, "Missing embedding data in API response");
return -1;
}

int item_index = data_index + 1;
if (tokens[item_index].type != JSMN_OBJECT) {
dbmem_context_set_error(engine->context, "Invalid embedding item in API response");
return -1;
}

int embedding_index = dbmem_json_object_find(engine->data, tokens, item_index, "embedding");
if (embedding_index < 0 || tokens[embedding_index].type != JSMN_ARRAY) {
dbmem_context_set_error(engine->context, "Missing embedding data in API response");
return -1;
}
if (tokens[embedding_index].size <= 0) {
dbmem_context_set_error(engine->context, "Invalid embedding array size in API response");
return -1;
}
emb_count = (size_t)tokens[embedding_index].size;
emb_start = embedding_index + 1;

int truncated_index = dbmem_json_object_find(engine->data, tokens, item_index, "truncated");
if (truncated_index >= 0) {
truncated = dbmem_json_parse_bool(engine->data, &tokens[truncated_index]);
}

int usage_index = dbmem_json_object_find(engine->data, tokens, 0, "usage");
if (usage_index >= 0 && tokens[usage_index].type == JSMN_OBJECT) {
int prompt_tokens_index = dbmem_json_object_find(engine->data, tokens, usage_index, "prompt_tokens");
if (prompt_tokens_index >= 0 && tokens[prompt_tokens_index].type == JSMN_PRIMITIVE) {
prompt_tokens = atoi(engine->data + tokens[prompt_tokens_index].start);
}

int exact_prompt_tokens_index = dbmem_json_object_find(engine->data, tokens, usage_index, "exact_prompt_tokens");
if (exact_prompt_tokens_index >= 0 && tokens[exact_prompt_tokens_index].type == JSMN_PRIMITIVE) {
exact_prompt_tokens = atoi(engine->data + tokens[exact_prompt_tokens_index].start);
}

int estimated_prompt_tokens_index = dbmem_json_object_find(engine->data, tokens, usage_index, "estimated_prompt_tokens");
if (estimated_prompt_tokens_index >= 0 && tokens[estimated_prompt_tokens_index].type == JSMN_PRIMITIVE) {
estimated_prompt_tokens = atoi(engine->data + tokens[estimated_prompt_tokens_index].start);
}
}

Expand Down Expand Up @@ -534,12 +632,12 @@ int dbmem_remote_compute_embedding (dbmem_remote_engine_t *engine, const char *t

// Fill result
result->n_embd = n_embd;
result->n_tokens = prompt_tokens;
result->n_tokens_truncated = (estimated_prompt_tokens > prompt_tokens) ? estimated_prompt_tokens - prompt_tokens : 0;
result->n_tokens = exact_prompt_tokens > 0 ? exact_prompt_tokens : (estimated_prompt_tokens > 0 ? estimated_prompt_tokens : prompt_tokens);
result->truncated = truncated;
result->embedding = engine->embedding;

// Update statistics
engine->total_tokens_processed += prompt_tokens;
engine->total_tokens_processed += result->n_tokens;
engine->total_embeddings_generated++;

return 0;
Expand Down
Loading
Loading