From 01c98c7d8882681a68f400556aa7d673d1cc2d47 Mon Sep 17 00:00:00 2001
From: Dan Lynch <pyramation@gmail.com>
Date: Thu, 21 May 2026 07:06:38 +0000
Subject: [PATCH] feat: replace request_type with service + operation in
 inference log inserts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- InferenceLogEntry: requestType → service + operation fields
- Add cacheReadTokens, cacheWriteTokens, rawUsage fields
- Update logInferenceUsage SQL to match new inference_log schema
- Update llm-api.ts logInference function + both callers
- Embedding calls: service='embedding', operation='create'
- Chat calls: service='llm', operation='chat'

Coordinates with constructive-db PR #1281 (merged).
---
 graphile/graphile-llm/src/metering.ts    | 50 ++++++++++++++++++------
 graphql/server/src/middleware/llm-api.ts | 16 +++++---
 2 files changed, 47 insertions(+), 19 deletions(-)

diff --git a/graphile/graphile-llm/src/metering.ts b/graphile/graphile-llm/src/metering.ts
index 15e82800b..b775284ec 100644
--- a/graphile/graphile-llm/src/metering.ts
+++ b/graphile/graphile-llm/src/metering.ts
@@ -133,10 +133,13 @@ export interface InferenceLogEntry {
   actorId: string | null;
   model: string;
   provider: string | null;
-  requestType: 'embedding' | 'chat' | 'rag';
+  service: 'llm' | 'embedding' | 'tts' | 'stt' | 'ocr' | 'image_gen' | 'search' | 'compute';
+  operation: string;
   inputTokens: number;
   outputTokens: number;
   totalTokens: number;
+  cacheReadTokens: number | null;
+  cacheWriteTokens: number | null;
   latencyMs: number;
   ragEnabled: boolean;
   chunksRetrieved: number | null;
@@ -144,6 +147,7 @@ export interface InferenceLogEntry {
   embeddingLatencyMs: number | null;
   status: 'success' | 'quota_exceeded' | 'provider_error' | 'timeout';
   errorType: string | null;
+  rawUsage: Record<string, unknown> | null;
 }
 
 /**
@@ -161,29 +165,33 @@ export async function logInferenceUsage(
   const { schema, tableName } = ctx.inferenceLog;
   const sql = `INSERT INTO "${schema}"."${tableName}" (
     database_id, entity_id, actor_id,
-    model, provider, request_type,
+    model, provider, service, operation,
     input_tokens, output_tokens, total_tokens,
+    cache_read_tokens, cache_write_tokens,
     latency_ms, rag_enabled, chunks_retrieved,
     embedding_model, embedding_latency_ms,
-    status, error_type
+    status, error_type, raw_usage
   ) VALUES (
     $1, $2, $3,
-    $4, $5, $6,
-    $7, $8, $9,
-    $10, $11, $12,
-    $13, $14,
-    $15, $16
+    $4, $5, $6, $7,
+    $8, $9, $10,
+    $11, $12,
+    $13, $14, $15,
+    $16, $17,
+    $18, $19, $20
   )`;
 
   try {
     await ctx.withPgClient(ctx.pgSettings, async (pgClient) => {
       await pgClient.query(sql, [
         entry.databaseId, entry.entityId, entry.actorId,
-        entry.model, entry.provider, entry.requestType,
+        entry.model, entry.provider, entry.service, entry.operation,
         entry.inputTokens, entry.outputTokens, entry.totalTokens,
+        entry.cacheReadTokens, entry.cacheWriteTokens,
         entry.latencyMs, entry.ragEnabled, entry.chunksRetrieved,
         entry.embeddingModel, entry.embeddingLatencyMs,
         entry.status, entry.errorType,
+        entry.rawUsage ? JSON.stringify(entry.rawUsage) : null,
       ]);
     });
   } catch (e: unknown) {
@@ -259,10 +267,13 @@ export async function meteredEmbed(
       actorId: ctx.actorId,
       model: options.embeddingModel ?? meterSlug,
       provider: options.provider ?? null,
-      requestType: 'embedding',
+      service: 'embedding',
+      operation: 'create',
       inputTokens: placeholderAmountTokens,
       outputTokens: 0,
       totalTokens: placeholderAmountTokens,
+      cacheReadTokens: null,
+      cacheWriteTokens: null,
       latencyMs: Date.now() - startTime,
       ragEnabled: false,
       chunksRetrieved: null,
@@ -270,6 +281,7 @@ export async function meteredEmbed(
       embeddingLatencyMs: null,
       status: 'quota_exceeded',
       errorType: null,
+      rawUsage: null,
     }).catch(() => {});
 
     return {
@@ -302,10 +314,13 @@ export async function meteredEmbed(
     actorId: ctx.actorId,
     model: options.embeddingModel ?? meterSlug,
     provider: options.provider ?? null,
-    requestType: 'embedding',
+    service: 'embedding',
+    operation: 'create',
     inputTokens: placeholderAmountTokens,
     outputTokens: 0,
     totalTokens: placeholderAmountTokens,
+    cacheReadTokens: null,
+    cacheWriteTokens: null,
     latencyMs,
     ragEnabled: false,
     chunksRetrieved: null,
@@ -313,6 +328,7 @@ export async function meteredEmbed(
     embeddingLatencyMs: latencyMs,
     status: 'success',
     errorType: null,
+    rawUsage: null,
   }).catch(() => {});
 
   return {
@@ -387,10 +403,13 @@ export async function meteredChat(
       actorId: ctx.actorId,
       model: meteringOptions.chatModel ?? meterSlug,
       provider: meteringOptions.provider ?? null,
-      requestType: 'chat',
+      service: 'llm',
+      operation: 'chat',
       inputTokens: placeholderInputTokens,
       outputTokens: 0,
       totalTokens: placeholderInputTokens,
+      cacheReadTokens: null,
+      cacheWriteTokens: null,
       latencyMs: Date.now() - startTime,
       ragEnabled: false,
       chunksRetrieved: null,
@@ -398,6 +417,7 @@ export async function meteredChat(
       embeddingLatencyMs: null,
       status: 'quota_exceeded',
       errorType: null,
+      rawUsage: null,
     }).catch(() => {});
 
     return {
@@ -434,10 +454,13 @@ export async function meteredChat(
     actorId: ctx.actorId,
     model: meteringOptions.chatModel ?? meterSlug,
     provider: meteringOptions.provider ?? null,
-    requestType: 'chat',
+    service: 'llm',
+    operation: 'chat',
     inputTokens: placeholderInputTokens,
     outputTokens: placeholderOutputTokens,
     totalTokens: placeholderTotalTokens,
+    cacheReadTokens: null,
+    cacheWriteTokens: null,
     latencyMs,
     ragEnabled: false,
     chunksRetrieved: null,
@@ -445,6 +468,7 @@ export async function meteredChat(
     embeddingLatencyMs: null,
     status: 'success',
     errorType: null,
+    rawUsage: null,
   }).catch(() => {});
 
   return {
diff --git a/graphql/server/src/middleware/llm-api.ts b/graphql/server/src/middleware/llm-api.ts
index 355a8f2b7..2d98c4acd 100644
--- a/graphql/server/src/middleware/llm-api.ts
+++ b/graphql/server/src/middleware/llm-api.ts
@@ -264,7 +264,8 @@ async function logInference(
     actorId: string;
     model: string;
     provider: string;
-    requestType: string;
+    service: string;
+    operation: string;
     inputTokens: number;
     outputTokens: number;
     totalTokens: number;
@@ -276,14 +277,15 @@ async function logInference(
     await withRlsClient(pool, pgSettings, async (client) => {
       await client.query(
         `INSERT INTO "${logInfo.schemaName}"."${logInfo.tableName}"
-         (entity_id, actor_id, model, provider, request_type, input_tokens, output_tokens, total_tokens, latency_ms, status)
-         VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10)`,
+         (entity_id, actor_id, model, provider, service, operation, input_tokens, output_tokens, total_tokens, latency_ms, status)
+         VALUES ($1, $2, $3, $4, $5, $6, $7, $8, $9, $10, $11)`,
         [
           data.entityId,
           data.actorId,
           data.model,
           data.provider,
-          data.requestType,
+          data.service,
+          data.operation,
           data.inputTokens,
           data.outputTokens,
           data.totalTokens,
@@ -572,7 +574,8 @@ async function handleSendMessage(
           actorId: userId,
           model,
           provider: 'ollama',
-          requestType: 'chat',
+          service: 'llm',
+          operation: 'chat',
           inputTokens,
           outputTokens,
           totalTokens,
@@ -635,7 +638,8 @@ async function handleSendMessage(
         actorId: userId,
         model,
         provider: 'ollama',
-        requestType: 'chat',
+        service: 'llm',
+        operation: 'chat',
         inputTokens,
         outputTokens,
         totalTokens,