From 726814b483e2444d584c9f81da2d3a11cb5f6ac2 Mon Sep 17 00:00:00 2001 From: Menci Date: Thu, 25 Jun 2026 20:40:21 +0800 Subject: [PATCH 001/170] feat(aliases): add model_aliases table, types, and repo Introduces the storage layer for the model-aliases data-plane feature. The table is global, primary-keyed by alias name. Conflict resolution is encoded as a CHECK-constrained TEXT column, freeform rule values are stored as JSON, and the codex-auto-review seed entry lands with the table. loadAllAliases reads the full table per request (the table is operator-managed and small; a cache layer is unnecessary for v0). --- .../gateway/migrations/0046_model_aliases.sql | 14 +++ .../src/control-plane/model-aliases/repo.ts | 37 ++++++++ .../control-plane/model-aliases/repo_test.ts | 92 +++++++++++++++++++ .../src/control-plane/model-aliases/types.ts | 26 ++++++ 4 files changed, 169 insertions(+) create mode 100644 packages/gateway/migrations/0046_model_aliases.sql create mode 100644 packages/gateway/src/control-plane/model-aliases/repo.ts create mode 100644 packages/gateway/src/control-plane/model-aliases/repo_test.ts create mode 100644 packages/gateway/src/control-plane/model-aliases/types.ts diff --git a/packages/gateway/migrations/0046_model_aliases.sql b/packages/gateway/migrations/0046_model_aliases.sql new file mode 100644 index 000000000..c934d77b6 --- /dev/null +++ b/packages/gateway/migrations/0046_model_aliases.sql @@ -0,0 +1,14 @@ +CREATE TABLE model_aliases ( + alias TEXT PRIMARY KEY, + target_model_id TEXT NOT NULL, + upstream_ids_json TEXT NOT NULL DEFAULT '[]', + rules_json TEXT NOT NULL DEFAULT '{}', + visible_in_models_list INTEGER NOT NULL DEFAULT 1, + on_conflict TEXT NOT NULL DEFAULT 'real-only' + CHECK (on_conflict IN ('alias-only', 'real-only', 'both-real-first', 'both-alias-first')), + created_at INTEGER NOT NULL DEFAULT (unixepoch()), + updated_at INTEGER NOT NULL DEFAULT (unixepoch()) +); + +INSERT INTO model_aliases (alias, target_model_id, rules_json, on_conflict) +VALUES ('codex-auto-review', 'gpt-5.4', '{"reasoning":{"effort":"low"}}', 'real-only'); diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts new file mode 100644 index 000000000..70024e0cd --- /dev/null +++ b/packages/gateway/src/control-plane/model-aliases/repo.ts @@ -0,0 +1,37 @@ +import type { ModelAlias, OnConflict } from './types.ts'; +import type { SqlDatabase } from '@floway-dev/platform'; + +interface ModelAliasRow { + alias: string; + target_model_id: string; + upstream_ids_json: string; + rules_json: string; + visible_in_models_list: number; + on_conflict: OnConflict; +} + +// The model_aliases table is operator-managed and small (dozens of rows at +// most), so the data plane reads the full table per request — no cache layer. +export const loadAllAliases = async (db: SqlDatabase): Promise => { + const { results } = await db + .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict FROM model_aliases') + .all(); + return results.map(toModelAlias); +}; + +const toModelAlias = (row: ModelAliasRow): ModelAlias => ({ + alias: row.alias, + targetModelId: row.target_model_id, + upstreamIds: parseJsonField(row.alias, 'upstream_ids_json', row.upstream_ids_json), + rules: parseJsonField(row.alias, 'rules_json', row.rules_json), + visibleInModelsList: row.visible_in_models_list === 1, + onConflict: row.on_conflict, +}); + +const parseJsonField = (alias: string, field: string, raw: string): T => { + try { + return JSON.parse(raw) as T; + } catch (cause) { + throw new Error(`Malformed model_aliases ${field} for ${alias}`, { cause }); + } +}; diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts new file mode 100644 index 000000000..a4da76fde --- /dev/null +++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts @@ -0,0 +1,92 @@ +import { test } from 'vitest'; + +import { loadAllAliases } from './repo.ts'; +import { createSqliteTestDb } from '../../repo/test-sqlite.ts'; +import { assertEquals, assertRejects } from '@floway-dev/test-utils'; + +test('loadAllAliases reads the seed row from a freshly migrated database', async () => { + const db = await createSqliteTestDb(); + + const aliases = await loadAllAliases(db); + + assertEquals(aliases, [ + { + alias: 'codex-auto-review', + targetModelId: 'gpt-5.4', + upstreamIds: [], + rules: { reasoning: { effort: 'low' } }, + visibleInModelsList: true, + onConflict: 'real-only', + }, + ]); +}); + +test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_models_list to a boolean', async () => { + const db = await createSqliteTestDb(); + await db.exec('DELETE FROM model_aliases'); + await db + .prepare( + 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)', + ) + .bind( + 'opus-xhigh', + 'claude-opus-4-6', + '["up_priority","up_secondary"]', + '{"reasoning":{"effort":"xhigh"},"anthropicBeta":["fine-grained-tool-streaming"]}', + 0, + 'alias-only', + ) + .run(); + await db + .prepare( + 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)', + ) + .bind('gpt-5-fast', 'gpt-5.4', '[]', '{"serviceTier":"priority"}', 1, 'both-alias-first') + .run(); + + const aliases = await loadAllAliases(db); + const byAlias = new Map(aliases.map(entry => [entry.alias, entry])); + + assertEquals(byAlias.get('opus-xhigh'), { + alias: 'opus-xhigh', + targetModelId: 'claude-opus-4-6', + upstreamIds: ['up_priority', 'up_secondary'], + rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] }, + visibleInModelsList: false, + onConflict: 'alias-only', + }); + assertEquals(byAlias.get('gpt-5-fast'), { + alias: 'gpt-5-fast', + targetModelId: 'gpt-5.4', + upstreamIds: [], + rules: { serviceTier: 'priority' }, + visibleInModelsList: true, + onConflict: 'both-alias-first', + }); +}); + +test('loadAllAliases surfaces malformed rules_json as a descriptive error', async () => { + const db = await createSqliteTestDb(); + await db.exec('DELETE FROM model_aliases'); + await db + .prepare( + 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)', + ) + .bind('bad-rules', 'gpt-5.4', '[]', '{not json', 1, 'real-only') + .run(); + + await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases rules_json for bad-rules'); +}); + +test('loadAllAliases surfaces malformed upstream_ids_json as a descriptive error', async () => { + const db = await createSqliteTestDb(); + await db.exec('DELETE FROM model_aliases'); + await db + .prepare( + 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)', + ) + .bind('bad-upstreams', 'gpt-5.4', '[bad', '{}', 1, 'real-only') + .run(); + + await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases upstream_ids_json for bad-upstreams'); +}); diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts new file mode 100644 index 000000000..8e1bff467 --- /dev/null +++ b/packages/gateway/src/control-plane/model-aliases/types.ts @@ -0,0 +1,26 @@ +// Closed set of request-time mode knobs an operator can lock on a matched +// alias. Each value is freeform — the gateway does not enum-gate operator +// input so values pass through to upstream verbatim. +export type ModelAliasRules = { + readonly reasoning?: { + readonly effort?: string; + readonly budgetTokens?: number; + readonly adaptive?: boolean; + readonly summary?: string; + }; + readonly verbosity?: string; + readonly serviceTier?: string; + readonly anthropicSpeed?: string; + readonly anthropicBeta?: readonly string[]; +}; + +export type OnConflict = 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first'; + +export type ModelAlias = { + readonly alias: string; + readonly targetModelId: string; + readonly upstreamIds: readonly string[]; + readonly rules: ModelAliasRules; + readonly visibleInModelsList: boolean; + readonly onConflict: OnConflict; +}; From a4ac67e606297c4f988fa171f645647efa892b78 Mon Sep 17 00:00:00 2001 From: Menci Date: Thu, 25 Jun 2026 20:53:03 +0800 Subject: [PATCH 002/170] feat(protocols): add Floway extension fields and per-upstream sanitizers MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each inbound protocol IR gains the closed set of mode-knob fields it cannot natively express (thinking_budget, adaptive_thinking, reasoning_summary on chat-completions; thinking_budget, adaptive_thinking on responses; verbosity on messages; verbosity, serviceTier inside generationConfig on gemini; anthropic_speed/anthropicSpeed and anthropic_beta/anthropicBeta everywhere they apply). The extensions are public — a client can set them directly and they behave identically to alias-injected rules. The per-upstream sanitizer strips any extension residue before the upstream call and emits one log line per drop when given a trace context, so cross-protocol drops are observable without leaking the field to upstream. --- .../src/data-plane/chat/shared/sanitize.ts | 41 +++++++++++ .../data-plane/chat/shared/sanitize_test.ts | 73 +++++++++++++++++++ packages/protocols/package.json | 3 +- .../protocols/src/chat-completions/index.ts | 10 +++ packages/protocols/src/extensions/index.ts | 16 ++++ packages/protocols/src/gemini/index.ts | 8 ++ packages/protocols/src/index.ts | 1 + packages/protocols/src/messages/index.ts | 2 + packages/protocols/src/responses/index.ts | 8 ++ 9 files changed, 161 insertions(+), 1 deletion(-) create mode 100644 packages/gateway/src/data-plane/chat/shared/sanitize.ts create mode 100644 packages/gateway/src/data-plane/chat/shared/sanitize_test.ts create mode 100644 packages/protocols/src/extensions/index.ts diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts new file mode 100644 index 000000000..918156d16 --- /dev/null +++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts @@ -0,0 +1,41 @@ +import { FLOWAY_EXTENSION_FIELDS } from '@floway-dev/protocols/extensions'; + +export interface SanitizeTraceCtx { + readonly aliasName?: string; + readonly emit: (line: { alias?: string; field: string; targetProtocol: string }) => void; +} + +const stripKeys = ( + body: Record, + keys: readonly string[], + targetProtocol: string, + trace: SanitizeTraceCtx | undefined, + fieldPrefix: string = '', +): void => { + for (const key of keys) { + if (key in body) { + delete body[key]; + trace?.emit({ alias: trace.aliasName, field: `${fieldPrefix}${key}`, targetProtocol }); + } + } +}; + +export const sanitizeForChatCompletionsUpstream = (body: Record, trace?: SanitizeTraceCtx): void => { + stripKeys(body, FLOWAY_EXTENSION_FIELDS.chatCompletions, 'chat-completions', trace); +}; + +export const sanitizeForResponsesUpstream = (body: Record, trace?: SanitizeTraceCtx): void => { + stripKeys(body, FLOWAY_EXTENSION_FIELDS.responses, 'responses', trace); +}; + +export const sanitizeForMessagesUpstream = (body: Record, trace?: SanitizeTraceCtx): void => { + stripKeys(body, FLOWAY_EXTENSION_FIELDS.messages, 'messages', trace); +}; + +export const sanitizeForGeminiUpstream = (body: Record, trace?: SanitizeTraceCtx): void => { + stripKeys(body, FLOWAY_EXTENSION_FIELDS.gemini.topLevel, 'gemini', trace); + const generationConfig = body.generationConfig; + if (generationConfig && typeof generationConfig === 'object') { + stripKeys(generationConfig as Record, FLOWAY_EXTENSION_FIELDS.gemini.generationConfig, 'gemini', trace, 'generationConfig.'); + } +}; diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts new file mode 100644 index 000000000..eebcd5d06 --- /dev/null +++ b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts @@ -0,0 +1,73 @@ +import { test } from 'vitest'; + +import { + sanitizeForChatCompletionsUpstream, + sanitizeForGeminiUpstream, + sanitizeForMessagesUpstream, + sanitizeForResponsesUpstream, + type SanitizeTraceCtx, +} from './sanitize.ts'; +import { assertEquals } from '@floway-dev/test-utils'; + +type TraceLine = { alias?: string; field: string; targetProtocol: string }; + +const makeTrace = (aliasName?: string): { ctx: SanitizeTraceCtx; lines: TraceLine[] } => { + const lines: TraceLine[] = []; + return { + ctx: { aliasName, emit: line => lines.push(line) }, + lines, + }; +}; + +test('sanitizeForMessagesUpstream strips verbosity and emits one trace line', () => { + const body: Record = { verbosity: 'low', model: 'x' }; + const { ctx, lines } = makeTrace('codex-auto-review'); + sanitizeForMessagesUpstream(body, ctx); + assertEquals(body, { model: 'x' }); + assertEquals(lines, [{ alias: 'codex-auto-review', field: 'verbosity', targetProtocol: 'messages' }]); +}); + +test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves native fields', () => { + const body: Record = { + thinking_budget: 4096, + anthropic_speed: 'fast', + reasoning_effort: 'high', + model: 'x', + }; + const { ctx, lines } = makeTrace('alias-1'); + sanitizeForChatCompletionsUpstream(body, ctx); + assertEquals(body, { reasoning_effort: 'high', model: 'x' }); + assertEquals(lines.length, 2); + assertEquals(lines.every(l => l.alias === 'alias-1' && l.targetProtocol === 'chat-completions'), true); + const droppedFields = lines.map(l => l.field).sort(); + assertEquals(droppedFields, ['anthropic_speed', 'thinking_budget']); +}); + +test('sanitizeForResponsesUpstream strips extensions without a trace context', () => { + const body: Record = { adaptive_thinking: true, anthropic_beta: ['ctx-1m'] }; + sanitizeForResponsesUpstream(body); + assertEquals(body, {}); +}); + +test('sanitizeForGeminiUpstream walks top-level and generationConfig', () => { + const body: Record = { + generationConfig: { verbosity: 'low', thinkingConfig: { thinkingBudget: 100 } }, + anthropicSpeed: 'fast', + }; + const { ctx, lines } = makeTrace('alias-g'); + sanitizeForGeminiUpstream(body, ctx); + assertEquals(body, { generationConfig: { thinkingConfig: { thinkingBudget: 100 } } }); + assertEquals(lines.length, 2); + const droppedFields = lines.map(l => l.field).sort(); + assertEquals(droppedFields, ['anthropicSpeed', 'generationConfig.verbosity']); + assertEquals(lines.every(l => l.alias === 'alias-g' && l.targetProtocol === 'gemini'), true); +}); + +test('sanitizer is idempotent — a second run emits no additional traces', () => { + const body: Record = { verbosity: 'low', model: 'x' }; + const { ctx, lines } = makeTrace(); + sanitizeForMessagesUpstream(body, ctx); + assertEquals(lines.length, 1); + sanitizeForMessagesUpstream(body, ctx); + assertEquals(lines.length, 1); +}); diff --git a/packages/protocols/package.json b/packages/protocols/package.json index 5ada835f0..1a8409de0 100644 --- a/packages/protocols/package.json +++ b/packages/protocols/package.json @@ -12,7 +12,8 @@ "./messages": { "import": "./src/messages/index.ts", "types": "./src/messages/index.ts" }, "./gemini": { "import": "./src/gemini/index.ts", "types": "./src/gemini/index.ts" }, "./embeddings": { "import": "./src/embeddings/index.ts", "types": "./src/embeddings/index.ts" }, - "./images": { "import": "./src/images/index.ts", "types": "./src/images/index.ts" } + "./images": { "import": "./src/images/index.ts", "types": "./src/images/index.ts" }, + "./extensions": { "import": "./src/extensions/index.ts", "types": "./src/extensions/index.ts" } }, "scripts": { "typecheck": "tsc --noEmit", diff --git a/packages/protocols/src/chat-completions/index.ts b/packages/protocols/src/chat-completions/index.ts index 582381555..64a62c91d 100644 --- a/packages/protocols/src/chat-completions/index.ts +++ b/packages/protocols/src/chat-completions/index.ts @@ -25,6 +25,16 @@ export interface ChatCompletionsPayload { tool_choice?: 'none' | 'auto' | 'required' | { type: 'function'; function: { name: string } } | null; /** Request usage stats in streaming responses */ stream_options?: { include_usage: boolean } | null; + /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + thinking_budget?: number; + /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + adaptive_thinking?: boolean; + /** Floway protocol extension. Translated to OpenAI Responses `reasoning.summary` / Anthropic `thinking.display` / Gemini `thinkingConfig.includeThoughts` when routed to those upstreams; dropped on OpenAI Chat targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + reasoning_summary?: string; + /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + anthropic_speed?: string; + /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + anthropic_beta?: readonly string[]; } export interface ChatCompletionsTool { diff --git a/packages/protocols/src/extensions/index.ts b/packages/protocols/src/extensions/index.ts new file mode 100644 index 000000000..b6579ce2b --- /dev/null +++ b/packages/protocols/src/extensions/index.ts @@ -0,0 +1,16 @@ +/** + * Closed enumeration of Floway protocol extension fields that the gateway + * adds to each inbound IR on top of the host protocol's own schema. The + * per-upstream sanitizer in the gateway reads this manifest to strip any + * extension residue before the upstream HTTP call. See + * docs/superpowers/specs/2026-06-25-model-aliases-design.md. + */ +export const FLOWAY_EXTENSION_FIELDS = { + chatCompletions: ['thinking_budget', 'adaptive_thinking', 'reasoning_summary', 'anthropic_speed', 'anthropic_beta'] as const, + responses: ['thinking_budget', 'adaptive_thinking', 'anthropic_speed', 'anthropic_beta'] as const, + messages: ['verbosity'] as const, + gemini: { + topLevel: ['anthropicSpeed', 'anthropicBeta'] as const, + generationConfig: ['verbosity', 'serviceTier'] as const, + }, +} as const; diff --git a/packages/protocols/src/gemini/index.ts b/packages/protocols/src/gemini/index.ts index 1530fd2f5..ded7ebb36 100644 --- a/packages/protocols/src/gemini/index.ts +++ b/packages/protocols/src/gemini/index.ts @@ -6,6 +6,10 @@ export interface GeminiPayload { generationConfig?: GeminiGenerationConfig; safetySettings?: GeminiSafetySetting[]; cachedContent?: string; + /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + anthropicSpeed?: string; + /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + anthropicBeta?: readonly string[]; } export interface GeminiContent { @@ -38,6 +42,10 @@ export interface GeminiGenerationConfig { responseMimeType?: string; responseSchema?: unknown; thinkingConfig?: GeminiThinkingConfig; + /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + verbosity?: string; + /** Floway protocol extension. Translated to OpenAI Chat `service_tier` / Responses `service_tier` / Anthropic `service_tier` when routed to those upstreams; dropped on Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + serviceTier?: string; } export interface GeminiThinkingConfig { diff --git a/packages/protocols/src/index.ts b/packages/protocols/src/index.ts index 981d4fda1..ceaa785f6 100644 --- a/packages/protocols/src/index.ts +++ b/packages/protocols/src/index.ts @@ -2,6 +2,7 @@ export * from './common/index.ts'; export * from './completions/index.ts'; export * from './chat-completions/index.ts'; export * from './embeddings/index.ts'; +export * from './extensions/index.ts'; export * from './gemini/index.ts'; export * from './messages/index.ts'; export * from './responses/index.ts'; diff --git a/packages/protocols/src/messages/index.ts b/packages/protocols/src/messages/index.ts index 9689db240..94e44188e 100644 --- a/packages/protocols/src/messages/index.ts +++ b/packages/protocols/src/messages/index.ts @@ -56,6 +56,8 @@ export interface MessagesPayload { // protocol layer because the gateway treats `speed: 'fast'` as the canonical // client signal regardless of which upstream serves it. speed?: 'standard' | 'fast' | (string & {}); + /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + verbosity?: string; } export interface MessagesSearchResultLocationCitation { diff --git a/packages/protocols/src/responses/index.ts b/packages/protocols/src/responses/index.ts index 8822c1f3d..8cf83f0ea 100644 --- a/packages/protocols/src/responses/index.ts +++ b/packages/protocols/src/responses/index.ts @@ -33,6 +33,14 @@ export interface ResponsesPayload { prompt_cache_key?: string | null; safety_identifier?: string | null; service_tier?: 'default' | 'auto' | 'flex' | 'priority' | 'scale' | (string & {}) | null; + /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + thinking_budget?: number; + /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + adaptive_thinking?: boolean; + /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + anthropic_speed?: string; + /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */ + anthropic_beta?: readonly string[]; } // Narrower payload for `/responses/compact`. The official endpoint accepts a From e1891e1dddb03b519090e99fa1fa1f262a09a96e Mon Sep 17 00:00:00 2001 From: Menci Date: Thu, 25 Jun 2026 21:15:38 +0800 Subject: [PATCH 003/170] feat(translate): emit Floway extension fields to upstream slots MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Each translate pair now reads the inbound IR's native and Floway-extension mode-knob fields and writes them to the upstream protocol's natural slot per the model-aliases design table. Routing is purely by upstream wire protocol; translate never branches on model version. Coverage per rule: - reasoning.effort: emitted onto OpenAI Chat reasoning_effort, Responses reasoning.effort, Anthropic output_config.effort, Gemini thinkingConfig.thinkingLevel (the inverse mappers stay where they were). - reasoning.budgetTokens / reasoning.adaptive: emitted onto Anthropic thinking.{type:'enabled', budget_tokens} and thinking.{type:'adaptive'} via a shared via-messages helper; Gemini path keeps its native thinkingBudget handling. - reasoning.summary: bidirectional Responses reasoning.summary ↔ Anthropic thinking.display mapping with concise|detailed → summarized, omitted → omitted, auto → upstream default; reverse picks concise as the Responses-side canonical form. - verbosity: native fields on Chat and Responses (added now — the IR did not carry them yet), Floway extension on Messages and Gemini. - serviceTier: passes through verbatim onto each protocol's service_tier slot; Messages' service_tier type relaxed to admit operator-typed values per the alias design's freeform contract. - anthropicSpeed: emitted onto Anthropic Messages speed; dropped on non-Messages targets. - anthropicBeta: translate cannot move it to the request header (the translate signature has no headers), so it is left as body residue and the gateway-side rule-apply pass owns header materialization in the next task; a mergeAnthropicBetaTokens helper lives in via-messages/ for that consumer. Drop-side emission stays the per-upstream sanitizer's job; translate emits only the non-drop cells of the table. The shared reasoning_effort union (gemini-via/gemini.ts) extends to the seven values the alias suggestion list publishes (none|minimal|low| medium|high|xhigh|max) and stops collapsing minimal onto low. --- .../protocols/src/chat-completions/index.ts | 5 + packages/protocols/src/gemini/index.ts | 2 +- packages/protocols/src/messages/index.ts | 2 +- packages/protocols/src/responses/index.ts | 8 +- .../chat-completions-via-messages/request.ts | 15 +++ .../chat-completions-via-responses/request.ts | 15 ++- .../gemini-via-chat-completions/request.ts | 5 + .../src/gemini-via-messages/request.ts | 18 ++++ .../src/gemini-via-responses/request.ts | 27 ++++-- .../messages-via-chat-completions/request.ts | 2 + .../src/messages-via-responses/request.ts | 20 +++- .../responses-via-chat-completions/request.ts | 1 + .../src/responses-via-messages/request.ts | 29 +++++- .../responses-via-messages/request_test.ts | 4 +- .../translate/src/shared/gemini-via/gemini.ts | 16 +++- .../shared/messages-via/reasoning-summary.ts | 21 +++++ .../via-messages/anthropic-extensions.ts | 93 +++++++++++++++++++ 17 files changed, 262 insertions(+), 21 deletions(-) create mode 100644 packages/translate/src/shared/messages-via/reasoning-summary.ts create mode 100644 packages/translate/src/shared/via-messages/anthropic-extensions.ts diff --git a/packages/protocols/src/chat-completions/index.ts b/packages/protocols/src/chat-completions/index.ts index 64a62c91d..8804fd449 100644 --- a/packages/protocols/src/chat-completions/index.ts +++ b/packages/protocols/src/chat-completions/index.ts @@ -18,6 +18,11 @@ export interface ChatCompletionsPayload { parallel_tool_calls?: boolean | null; response_format?: Record | null; reasoning_effort?: string | null; + // GPT-5-family response-length control. Native OpenAI Chat field; Floway + // mirrors it onto Responses `text.verbosity` and exposes it as an + // extension on Messages / Gemini IRs. + // Reference: https://platform.openai.com/docs/api-reference/chat/create + verbosity?: string | null; prompt_cache_key?: string | null; safety_identifier?: string | null; service_tier?: 'default' | 'auto' | 'flex' | 'priority' | 'scale' | (string & {}) | null; diff --git a/packages/protocols/src/gemini/index.ts b/packages/protocols/src/gemini/index.ts index ded7ebb36..c3e7e646a 100644 --- a/packages/protocols/src/gemini/index.ts +++ b/packages/protocols/src/gemini/index.ts @@ -50,7 +50,7 @@ export interface GeminiGenerationConfig { export interface GeminiThinkingConfig { thinkingBudget?: number; - thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high' | string; + thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' | 'max' | string; includeThoughts?: boolean; } diff --git a/packages/protocols/src/messages/index.ts b/packages/protocols/src/messages/index.ts index 94e44188e..663dcef24 100644 --- a/packages/protocols/src/messages/index.ts +++ b/packages/protocols/src/messages/index.ts @@ -49,7 +49,7 @@ export interface MessagesPayload { // no `json_object` variant. format?: { type: 'json_schema'; schema: Record }; }; - service_tier?: 'auto' | 'standard_only'; + service_tier?: 'auto' | 'standard_only' | (string & {}); // https://docs.claude.com/en/build-with-claude/fast-mode — Fast Mode is // opt-in per request. Beta-only on the upstream wire (gated by // `anthropic-beta: fast-mode-2026-02-01`), but we expose the field at the diff --git a/packages/protocols/src/responses/index.ts b/packages/protocols/src/responses/index.ts index 8cf83f0ea..39af5e148 100644 --- a/packages/protocols/src/responses/index.ts +++ b/packages/protocols/src/responses/index.ts @@ -26,10 +26,14 @@ export interface ResponsesPayload { parallel_tool_calls?: boolean | null; reasoning?: { effort?: string; - summary?: 'detailed' | 'auto' | 'concise'; + summary?: 'detailed' | 'auto' | 'concise' | (string & {}); }; include?: string[]; - text?: { format?: Record | null } | null; + // `text.verbosity` is a native GPT-5-family Responses field that controls + // response length; `text.format` carries structured-output schemas. Both + // ride on the same `text` object. + // Reference: https://platform.openai.com/docs/api-reference/responses/create + text?: { format?: Record | null; verbosity?: string | null } | null; prompt_cache_key?: string | null; safety_identifier?: string | null; service_tier?: 'default' | 'auto' | 'flex' | 'priority' | 'scale' | (string & {}) | null; diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts index a09bd44b9..82dd22c59 100644 --- a/packages/translate/src/chat-completions-via-messages/request.ts +++ b/packages/translate/src/chat-completions-via-messages/request.ts @@ -2,6 +2,7 @@ import { messagesThinkingBlockFromChatCompletionsScalarReasoning } from '../shar import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts'; import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint } from '../shared/via-messages/cache-breakpoints.ts'; import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts'; +import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts'; import type { ChatCompletionsPayload, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsTool } from '@floway-dev/protocols/chat-completions'; import { MESSAGES_FALLBACK_MAX_TOKENS, type MessagesAssistantContentBlock, type MessagesMessage, type MessagesPayload, type MessagesTextBlock, type MessagesUserContentBlock } from '@floway-dev/protocols/messages'; @@ -188,6 +189,17 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion if (formatSchema) outputConfig.format = { type: 'json_schema', schema: formatSchema }; const hasOutputConfig = Object.keys(outputConfig).length > 0; + // Materialize the Floway extension fields onto their Messages-natural + // slots. `anthropic_beta` is body-side residue that the per-upstream + // sanitizer strips after translation; the gateway-side rule-apply pass owns + // moving its value onto the outbound `anthropic-beta` header before the + // upstream call. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. + const thinking = buildMessagesThinkingFromExtensions({ + thinkingBudget: payload.thinking_budget, + adaptiveThinking: payload.adaptive_thinking, + reasoningSummary: payload.reasoning_summary, + }); + // Leave OpenAI `user` and generic metadata out of the Messages fallback instead // of treating them as a backchannel for Anthropic `metadata.user_id`. return { @@ -205,6 +217,9 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion ...(tools ? { tools } : {}), ...(payload.tool_choice != null ? { tool_choice: translateChatCompletionsToolChoice(payload.tool_choice) } : {}), ...(hasOutputConfig ? { output_config: outputConfig } : {}), + ...(thinking ? { thinking } : {}), + ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}), + ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}), }; }; diff --git a/packages/translate/src/chat-completions-via-responses/request.ts b/packages/translate/src/chat-completions-via-responses/request.ts index 6efd82c34..1865ff40b 100644 --- a/packages/translate/src/chat-completions-via-responses/request.ts +++ b/packages/translate/src/chat-completions-via-responses/request.ts @@ -112,6 +112,19 @@ export const translateChatCompletionsToResponses = (payload: ChatCompletionsPayl const responseTextConfig = payload.response_format === undefined ? undefined : payload.response_format === null ? null : { format: payload.response_format }; + // `reasoning_summary` is the inbound CC extension that materializes onto + // the Responses-native `reasoning.summary` slot. Co-emit alongside + // `reasoning.effort` so a single `reasoning` object captures both knobs. + const reasoningEffort = payload.reasoning_effort != null ? payload.reasoning_effort : undefined; + const reasoningSummary = payload.reasoning_summary; + const reasoning = + reasoningEffort !== undefined || reasoningSummary !== undefined + ? { + ...(reasoningEffort !== undefined ? { effort: reasoningEffort } : {}), + ...(reasoningSummary !== undefined ? { summary: reasoningSummary } : {}), + } + : undefined; + return { model: payload.model, input, @@ -134,7 +147,7 @@ export const translateChatCompletionsToResponses = (payload: ChatCompletionsPayl // https://developers.openai.com/api/docs/guides/migrate-to-responses ...(payload.store !== undefined ? { store: payload.store } : {}), ...(payload.parallel_tool_calls !== undefined ? { parallel_tool_calls: payload.parallel_tool_calls } : {}), - ...(payload.reasoning_effort != null ? { reasoning: { effort: payload.reasoning_effort } } : {}), + ...(reasoning ? { reasoning } : {}), ...(responseTextConfig !== undefined ? { text: responseTextConfig } : {}), ...(payload.prompt_cache_key !== undefined ? { prompt_cache_key: payload.prompt_cache_key } : {}), ...(payload.safety_identifier !== undefined ? { safety_identifier: payload.safety_identifier } : {}), diff --git a/packages/translate/src/gemini-via-chat-completions/request.ts b/packages/translate/src/gemini-via-chat-completions/request.ts index 6f490dc25..07cc8bdce 100644 --- a/packages/translate/src/gemini-via-chat-completions/request.ts +++ b/packages/translate/src/gemini-via-chat-completions/request.ts @@ -188,6 +188,11 @@ const applyGenerationConfig = (request: ChatCompletionsPayload, generationConfig const reasoningEffort = geminiReasoningEffort(generationConfig.thinkingConfig); if (reasoningEffort) request.reasoning_effort = reasoningEffort; + + // Extension fields landed on CC: `verbosity` flows verbatim; `serviceTier` + // crosses naming conventions (camelCase Gemini → snake_case OpenAI). + if (generationConfig.verbosity != null) request.verbosity = generationConfig.verbosity; + if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier; }; const buildTools = (payload: GeminiPayload): ChatCompletionsTool[] | undefined => { diff --git a/packages/translate/src/gemini-via-messages/request.ts b/packages/translate/src/gemini-via-messages/request.ts index 29dd7c066..91fbe0493 100644 --- a/packages/translate/src/gemini-via-messages/request.ts +++ b/packages/translate/src/gemini-via-messages/request.ts @@ -161,6 +161,14 @@ const applyThinkingConfig = (request: MessagesPayload, thinkingConfig?: GeminiTh } } + // `includeThoughts` materializes onto `thinking.display`: true → summarized + // (Anthropic redacts to a single-block summary), false → omitted (no + // thinking surface at all). Skip when the source did not express either. + if (thinkingConfig.includeThoughts !== undefined && request.thinking?.type !== 'disabled') { + const display = thinkingConfig.includeThoughts === true ? ('summarized' as const) : ('omitted' as const); + request.thinking = request.thinking ? { ...request.thinking, display } : { type: 'enabled', display }; + } + const effort = geminiThinkingLevelEffort(thinkingConfig); // Spread to merge with any output_config fields a sibling helper has // already written (e.g. structured-output `format` from @@ -196,6 +204,11 @@ const applyGenerationConfig = (request: MessagesPayload, generationConfig: Gemin }; } + // `serviceTier` extension flows verbatim onto the Messages-native slot; + // `verbosity` has no Anthropic equivalent and stays as inbound residue + // that the sanitizer strips after translation. + if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier; + applyThinkingConfig(request, generationConfig.thinkingConfig); }; @@ -260,6 +273,11 @@ export const buildTargetRequest = ( applyGenerationConfig(request, payload.generationConfig, fallbackMaxOutputTokens); + // Top-level Gemini Floway extensions: `anthropicSpeed` is the only one + // with a Messages-natural slot. `anthropicBeta` is header-bound at the + // gateway boundary (Task 5) since translate functions do not own headers. + if (payload.anthropicSpeed != null) request.speed = payload.anthropicSpeed; + const tools = buildTools(payload); if (tools) request.tools = tools; applyLastToolCacheBreakpoint(request.tools); diff --git a/packages/translate/src/gemini-via-responses/request.ts b/packages/translate/src/gemini-via-responses/request.ts index 62d67c827..df85e4729 100644 --- a/packages/translate/src/gemini-via-responses/request.ts +++ b/packages/translate/src/gemini-via-responses/request.ts @@ -132,6 +132,7 @@ const applyGenerationConfig = (request: ResponsesPayload, generationConfig?: Gem if (generationConfig.responseSchema !== undefined) { request.text = { + ...request.text, format: { type: 'json_schema', json_schema: { @@ -141,16 +142,28 @@ const applyGenerationConfig = (request: ResponsesPayload, generationConfig?: Gem }, }; } else if (generationConfig.responseMimeType === 'application/json') { - request.text = { format: { type: 'json_object' } }; + request.text = { ...request.text, format: { type: 'json_object' } }; } - const effort = geminiReasoningEffort(generationConfig.thinkingConfig); - if (!effort) return; + // `verbosity` extension rides under `text` alongside the structured-output + // format, matching the native Responses placement. + if (generationConfig.verbosity != null) request.text = { ...request.text, verbosity: generationConfig.verbosity }; - request.reasoning = { - effort, - ...(effort !== 'none' && generationConfig.thinkingConfig?.includeThoughts === true ? { summary: 'detailed' as const } : {}), - }; + if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier; + + const effort = geminiReasoningEffort(generationConfig.thinkingConfig); + const summary = + generationConfig.thinkingConfig?.includeThoughts === true + ? ('detailed' as const) + : generationConfig.thinkingConfig?.includeThoughts === false + ? ('omitted' as const) + : undefined; + if (effort || summary !== undefined) { + request.reasoning = { + ...(effort ? { effort } : {}), + ...(summary !== undefined && effort !== 'none' ? { summary } : {}), + }; + } }; const buildTools = (payload: GeminiPayload): ResponsesTool[] | undefined => { diff --git a/packages/translate/src/messages-via-chat-completions/request.ts b/packages/translate/src/messages-via-chat-completions/request.ts index 76f5347d8..bd08e1b26 100644 --- a/packages/translate/src/messages-via-chat-completions/request.ts +++ b/packages/translate/src/messages-via-chat-completions/request.ts @@ -290,6 +290,8 @@ export const translateMessagesToChatCompletions = (payload: MessagesPayload): Ch tools: translateMessagesTools(clientTools), tool_choice: translateMessagesToolChoice(payload.tool_choice, clientTools), ...(responseFormat ? { response_format: responseFormat } : {}), + ...(payload.verbosity != null ? { verbosity: payload.verbosity } : {}), + ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}), }; }; diff --git a/packages/translate/src/messages-via-responses/request.ts b/packages/translate/src/messages-via-responses/request.ts index 7bb365cfe..b1c593443 100644 --- a/packages/translate/src/messages-via-responses/request.ts +++ b/packages/translate/src/messages-via-responses/request.ts @@ -1,6 +1,7 @@ import { openAiJsonSchemaCoreFromMessagesFormat } from '../shared/messages/structured-output.ts'; import { messagesReasoningBlockToResponsesReasoning } from '../shared/messages-and-responses/reasoning.ts'; import { resolveMessagesReasoningEffort } from '../shared/messages-via/reasoning-effort.ts'; +import { mapAnthropicDisplayToSummary } from '../shared/messages-via/reasoning-summary.ts'; import { normalizeMessagesToolInputSchema } from '../shared/messages-via/tool-schema.ts'; import { type MessagesAssistantMessage, @@ -207,15 +208,25 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response // Responses upstream may reject it. Translation stays pairwise and leaves // target-side validation to the selected upstream endpoint. const effort = resolveMessagesReasoningEffort(payload); - const reasoning = effort ? { effort } : undefined; + const display = payload.thinking?.display; + const summary = display !== undefined ? mapAnthropicDisplayToSummary(display) : undefined; + const reasoning = + effort !== undefined || summary !== undefined + ? { + ...(effort !== undefined ? { effort } : {}), + ...(summary !== undefined ? { summary } : {}), + } + : undefined; const clientTools = getClientTools(payload.tools); const instructions = translateSystemPrompt(payload.system); const jsonSchema = openAiJsonSchemaCoreFromMessagesFormat(payload.output_config?.format); - const text = jsonSchema ? { format: { type: 'json_schema' as const, ...jsonSchema } } : undefined; + const formatPart = jsonSchema ? { format: { type: 'json_schema' as const, ...jsonSchema } } : undefined; + const verbosityPart = payload.verbosity != null ? { verbosity: payload.verbosity } : undefined; + const text = formatPart || verbosityPart ? { ...formatPart, ...verbosityPart } : undefined; // Keep fallback semantics strict: do not synthesize `temperature: 1`, - // `store: false`, `parallel_tool_calls: true`, or `reasoning.summary` when the - // Messages source did not express those knobs. + // `store: false`, or `parallel_tool_calls: true` when the Messages source + // did not express those knobs. return { model: payload.model, input: translateMessagesInput(payload.messages), @@ -229,6 +240,7 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response stream: true, ...(reasoning ? { reasoning } : {}), ...(text ? { text } : {}), + ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}), }; }; diff --git a/packages/translate/src/responses-via-chat-completions/request.ts b/packages/translate/src/responses-via-chat-completions/request.ts index 6d0230db3..e7d23486f 100644 --- a/packages/translate/src/responses-via-chat-completions/request.ts +++ b/packages/translate/src/responses-via-chat-completions/request.ts @@ -242,6 +242,7 @@ export const translateResponsesToChatCompletions = (payload: ResponsesPayload): ...(payload.prompt_cache_key !== undefined ? { prompt_cache_key: payload.prompt_cache_key } : {}), ...(payload.safety_identifier !== undefined ? { safety_identifier: payload.safety_identifier } : {}), ...(payload.reasoning?.effort != null ? { reasoning_effort: payload.reasoning.effort } : {}), + ...(payload.text?.verbosity != null ? { verbosity: payload.text.verbosity } : {}), ...(payload.service_tier !== undefined ? { service_tier: payload.service_tier } : {}), // Chat Completions has no request-level counterpart for Responses // `reasoning`; only explicit reasoning items survive this translation. diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts index 54fcf93a6..786ce4a98 100644 --- a/packages/translate/src/responses-via-messages/request.ts +++ b/packages/translate/src/responses-via-messages/request.ts @@ -3,6 +3,7 @@ import { responsesReasoningToMessagesUpstreamBlock } from '../shared/messages-an import { buildCustomToolInputSchema } from '../shared/responses-via/custom-tool-wrap.ts'; import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint, EPHEMERAL_CACHE_CONTROL } from '../shared/via-messages/cache-breakpoints.ts'; import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts'; +import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts'; import { MESSAGES_FALLBACK_MAX_TOKENS, type MessagesAssistantContentBlock, @@ -331,6 +332,30 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op if (formatSchema) outputConfig.format = { type: 'json_schema', schema: formatSchema }; const hasOutputConfig = Object.keys(outputConfig).length > 0; + // Native Responses → Messages: `reasoning.summary` materializes onto the + // Messages-native `thinking.display`. Extension-driven thinking + // (`thinking_budget`, `adaptive_thinking`) takes precedence over the + // summary-only fallback because the alias write-side validator pins + // facets one-at-a-time; when neither extension is set and summary is the + // only signal, we synthesize `thinking.{type:'enabled', display}` so the + // display reaches the wire. + const extensionThinking = buildMessagesThinkingFromExtensions({ + thinkingBudget: payload.thinking_budget, + adaptiveThinking: payload.adaptive_thinking, + }); + const disabledThinking = effort === 'none' ? { type: 'disabled' as const } : undefined; + const summaryDisplay = payload.reasoning?.summary !== undefined ? mapSummaryToAnthropicDisplay(payload.reasoning.summary) : undefined; + const fallbackDisplayThinking = + !extensionThinking && !disabledThinking && summaryDisplay !== undefined + ? { type: 'enabled' as const, display: summaryDisplay as NonNullable['display'] } + : undefined; + const thinkingFromExtensions = extensionThinking + ? summaryDisplay !== undefined + ? { ...extensionThinking, display: summaryDisplay as NonNullable['display'] } + : extensionThinking + : undefined; + const thinking = thinkingFromExtensions ?? disabledThinking ?? fallbackDisplayThinking; + // Responses `metadata` is intentionally omitted on the Messages path; // not coerced into Anthropic metadata.user_id, prompt-cache, or safety // semantics. @@ -344,8 +369,10 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op stream: true, tools, tool_choice: translateToolChoice(payload.tool_choice), - ...(effort === 'none' ? { thinking: { type: 'disabled' as const } } : {}), + ...(thinking ? { thinking } : {}), ...(hasOutputConfig ? { output_config: outputConfig } : {}), + ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}), + ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}), }; return { target, customToolNames }; diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts index 9b690ccbf..2aedc14b7 100644 --- a/packages/translate/src/responses-via-messages/request_test.ts +++ b/packages/translate/src/responses-via-messages/request_test.ts @@ -6,7 +6,7 @@ import { MESSAGES_FALLBACK_MAX_TOKENS, type MessagesClientTool, type MessagesToo const stubRemoteImageLoader = (result: { mediaType: string | null; data: Uint8Array } | null) => () => Promise.resolve(result); -test('translateResponsesToMessages maps reasoning.effort none to thinking.disabled', async () => { +test('translateResponsesToMessages maps reasoning.effort none to thinking.disabled (summary ignored when reasoning is disabled)', async () => { const result = await translateResponsesToMessages({ model: 'claude-test', input: [{ type: 'message', role: 'user', content: 'hi' }], @@ -41,7 +41,7 @@ test('translateResponsesToMessages maps reasoning.effort directly to output_conf stream: null, store: false, parallel_tool_calls: true, - reasoning: { effort: 'minimal', summary: 'detailed' }, + reasoning: { effort: 'minimal' }, }); assertEquals(result.target.output_config, { effort: 'minimal' }); diff --git a/packages/translate/src/shared/gemini-via/gemini.ts b/packages/translate/src/shared/gemini-via/gemini.ts index a5b4993ea..99d8b4872 100644 --- a/packages/translate/src/shared/gemini-via/gemini.ts +++ b/packages/translate/src/shared/gemini-via/gemini.ts @@ -117,21 +117,33 @@ export const geminiFunctionResponsePart = (part: GeminiPart, ids: GeminiToolCall return { response, id: unmatched?.shift() ?? id }; }; -export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig): 'low' | 'medium' | 'high' | undefined => { +// Reasoning effort is freeform on the inbound IRs (per Goal 2: never gate +// operator-typed values), but the gateway publishes a canonical closed set so +// translate-side mappers can normalize without rewriting unknown values. +// References: +// - docs/superpowers/specs/2026-06-25-model-aliases-design.md (Translate Layer) +export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' | 'max'; + +export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig): ReasoningEffort | undefined => { switch (thinkingConfig?.thinkingLevel) { case 'minimal': + return 'minimal'; case 'low': return 'low'; case 'medium': return 'medium'; case 'high': return 'high'; + case 'xhigh': + return 'xhigh'; + case 'max': + return 'max'; default: return undefined; } }; -export const geminiReasoningEffort = (thinkingConfig?: GeminiThinkingConfig): 'none' | 'low' | 'medium' | 'high' | null => { +export const geminiReasoningEffort = (thinkingConfig?: GeminiThinkingConfig): ReasoningEffort | null => { if (!thinkingConfig) return null; if (thinkingConfig.thinkingBudget !== undefined) { diff --git a/packages/translate/src/shared/messages-via/reasoning-summary.ts b/packages/translate/src/shared/messages-via/reasoning-summary.ts new file mode 100644 index 000000000..6d12bab9b --- /dev/null +++ b/packages/translate/src/shared/messages-via/reasoning-summary.ts @@ -0,0 +1,21 @@ +import type { MessagesThinkingDisplay } from '@floway-dev/protocols/messages'; + +// Reverse of via-messages/anthropic-extensions.ts mapSummaryToAnthropicDisplay. +// Anthropic's `summarized` collapsed both `concise` and `detailed`; we pick +// `concise` as the canonical reverse since it is Responses' more compact +// summary mode and round-tripping through the gateway should not silently +// inflate verbosity. Unknown operator-typed values pass through verbatim so +// the Responses upstream gets the original spelling and decides for itself +// whether to accept it. +export const mapAnthropicDisplayToSummary = (display: MessagesThinkingDisplay | string): string | undefined => { + switch (display) { + case 'summarized': + return 'concise'; + case 'omitted': + return 'omitted'; + case 'full': + return 'detailed'; + default: + return display; + } +}; diff --git a/packages/translate/src/shared/via-messages/anthropic-extensions.ts b/packages/translate/src/shared/via-messages/anthropic-extensions.ts new file mode 100644 index 000000000..513db32ce --- /dev/null +++ b/packages/translate/src/shared/via-messages/anthropic-extensions.ts @@ -0,0 +1,93 @@ +import type { MessagesPayload, MessagesThinkingDisplay } from '@floway-dev/protocols/messages'; + +// Anthropic structured `thinking.display` enumerates three modes; the +// inbound IR's `reasoning_summary` extension and the Responses-native +// `reasoning.summary` share an OpenAI-style {auto|concise|detailed|omitted} +// vocabulary. The mapping collapses concise+detailed onto Anthropic's single +// `summarized` mode (both surface a redacted summary, not the full chain), +// `omitted` is the canonical hide-everything spelling, and `auto` returns +// `undefined` so Anthropic's account-default takes over. Operator-typed +// values that match neither vocabulary pass through verbatim — Anthropic +// rejects unknown values at the wire, which is the explicit-failure path we +// want per the alias design's no-enum-gating contract. +export const mapSummaryToAnthropicDisplay = (summary: string): MessagesThinkingDisplay | string | undefined => { + switch (summary) { + case 'concise': + case 'detailed': + return 'summarized'; + case 'omitted': + return 'omitted'; + case 'auto': + return undefined; + default: + return summary; + } +}; + +// Merge a beta token list onto an existing `anthropic-beta` header value. +// The header is a case-sensitive, comma-separated list per the Anthropic +// docs; dedupe is by exact-match equality so operators can carry parallel +// tokens that differ only by date suffix. Re-joined with `, ` so the wire +// shape matches both Anthropic's own examples and downstream gateways +// (envoyproxy/ai-gateway). +// References: +// - https://platform.claude.com/docs/en/api/beta-headers +// - https://github.com/envoyproxy/ai-gateway +export const mergeAnthropicBetaTokens = (existing: string | null | undefined, additions: readonly string[]): string => { + const seen = new Set(); + const merged: string[] = []; + const collect = (token: string): void => { + const trimmed = token.trim(); + if (!trimmed || seen.has(trimmed)) return; + seen.add(trimmed); + merged.push(trimmed); + }; + + if (existing) { + for (const token of existing.split(',')) collect(token); + } + for (const token of additions) collect(token); + + return merged.join(', '); +}; + +// Materialize the Messages-bound `anthropic_beta` extension list onto an +// outbound request's `anthropic-beta` header. The helper takes a `Headers` +// object so the caller (typically the gateway-side rule-apply pass) doesn't +// have to re-parse and re-set the header itself. +export const applyAnthropicBetaToHeaders = (headers: Headers, additions: readonly string[]): void => { + if (!additions.length) return; + const merged = mergeAnthropicBetaTokens(headers.get('anthropic-beta'), additions); + if (merged) headers.set('anthropic-beta', merged); +}; + +// Build a Messages `thinking` block from the Floway extension fields a +// non-Messages inbound carries (`thinking_budget`, `adaptive_thinking`, +// `reasoning_summary`). `adaptive_thinking: true` overrides `thinking_budget` +// because the alias write-side validator enforces single-facet selection; +// when both still arrive the adaptive choice wins. +// +// `reasoningSummary` is the OpenAI-style summary vocabulary +// ({auto|concise|detailed|omitted} plus pass-through). It synthesizes +// `thinking.{type:'enabled', display}` when the inbound carries summary +// but no budget/adaptive signal — without an explicit thinking mode +// Anthropic would otherwise discard the display field. +export const buildMessagesThinkingFromExtensions = (input: { + thinkingBudget?: number; + adaptiveThinking?: boolean; + reasoningSummary?: string; +}): MessagesPayload['thinking'] | undefined => { + const display = input.reasoningSummary !== undefined ? mapSummaryToAnthropicDisplay(input.reasoningSummary) : undefined; + const displayPart = display !== undefined ? { display: display as MessagesThinkingDisplay } : {}; + + if (input.adaptiveThinking === true) { + return { type: 'adaptive', ...displayPart }; + } + if (input.thinkingBudget !== undefined) { + return { type: 'enabled', budget_tokens: input.thinkingBudget, ...displayPart }; + } + if (input.reasoningSummary !== undefined && display !== undefined) { + return { type: 'enabled', ...displayPart }; + } + return undefined; +}; From 6ea94045b2b46b0eb144827e2dd2df36e1d81d3d Mon Sep 17 00:00:00 2001 From: Menci Date: Thu, 25 Jun 2026 21:24:12 +0800 Subject: [PATCH 004/170] test(translate): cover Floway extension emission across all nine pairs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit One assertion per non-drop cell of the model-aliases translate-emission table: each test sets a single inbound rule (native or extension) and checks the upstream-natural slot is present with the value forwarded verbatim. Each pair also gets a drop-side assertion that the residue field does not leak into the translated body — the per-upstream sanitizer is the actual stripper, but translate must not invent a target field where the mapping table says drop. Pre-existing responses-via-messages tests that paired effort with reasoning.summary keep their summary input (so the disabled-precedence behavior is still verified) but no longer assume summary is silently discarded; the new contract surfaces it as thinking.display where the upstream has a slot, and the disabled case continues to win. --- .../chat-completions-via-messages/request.ts | 2 +- .../request_test.ts | 99 +++++++++++++++++++ .../chat-completions-via-responses/request.ts | 2 +- .../request_test.ts | 53 ++++++++++ .../request_test.ts | 56 +++++++++++ .../src/gemini-via-messages/request_test.ts | 65 ++++++++++++ .../src/gemini-via-responses/request_test.ts | 55 +++++++++++ .../request_test.ts | 53 ++++++++++ .../messages-via-responses/request_test.ts | 80 +++++++++++++++ .../request_test.ts | 52 ++++++++++ .../src/responses-via-messages/request.ts | 2 +- .../responses-via-messages/request_test.ts | 51 ++++++++++ 12 files changed, 567 insertions(+), 3 deletions(-) diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts index 82dd22c59..5e83a230b 100644 --- a/packages/translate/src/chat-completions-via-messages/request.ts +++ b/packages/translate/src/chat-completions-via-messages/request.ts @@ -1,8 +1,8 @@ import { messagesThinkingBlockFromChatCompletionsScalarReasoning } from '../shared/chat-completions-and-messages/reasoning.ts'; import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts'; +import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts'; import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint } from '../shared/via-messages/cache-breakpoints.ts'; import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts'; -import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts'; import type { ChatCompletionsPayload, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsTool } from '@floway-dev/protocols/chat-completions'; import { MESSAGES_FALLBACK_MAX_TOKENS, type MessagesAssistantContentBlock, type MessagesMessage, type MessagesPayload, type MessagesTextBlock, type MessagesUserContentBlock } from '@floway-dev/protocols/messages'; diff --git a/packages/translate/src/chat-completions-via-messages/request_test.ts b/packages/translate/src/chat-completions-via-messages/request_test.ts index e0c04c65b..0fce785db 100644 --- a/packages/translate/src/chat-completions-via-messages/request_test.ts +++ b/packages/translate/src/chat-completions-via-messages/request_test.ts @@ -1161,3 +1161,102 @@ test('translateChatCompletionsToMessages rejects an unknown user content part ty 'does not accept video_url content parts', ); }); + +// ── Floway extension emission ── + +test('translateChatCompletionsToMessages emits thinking_budget extension onto thinking.{enabled, budget_tokens}', async () => { + const result = await translateChatCompletionsToMessages( + mkPayload({ + messages: [{ role: 'user', content: 'hi' }], + thinking_budget: 4096, + }), + ); + + assertEquals(result.thinking, { type: 'enabled', budget_tokens: 4096 }); +}); + +test('translateChatCompletionsToMessages emits adaptive_thinking extension onto thinking.{adaptive} (wins over budget)', async () => { + const result = await translateChatCompletionsToMessages( + mkPayload({ + messages: [{ role: 'user', content: 'hi' }], + thinking_budget: 4096, + adaptive_thinking: true, + }), + ); + + assertEquals(result.thinking, { type: 'adaptive' }); +}); + +test('translateChatCompletionsToMessages maps reasoning_summary onto thinking.display via concise|detailed → summarized', async () => { + const concise = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'concise' })); + const detailed = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'detailed' })); + const omitted = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'omitted' })); + const auto = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'auto' })); + + assertEquals(concise.thinking, { type: 'enabled', display: 'summarized' }); + assertEquals(detailed.thinking, { type: 'enabled', display: 'summarized' }); + assertEquals(omitted.thinking, { type: 'enabled', display: 'omitted' }); + // `auto` returns undefined display so Anthropic's account-default applies; + // with no budget/adaptive signal there is no thinking block to attach to. + assertEquals(auto.thinking, undefined); +}); + +test('translateChatCompletionsToMessages merges reasoning_summary onto budget-driven thinking block', async () => { + const result = await translateChatCompletionsToMessages( + mkPayload({ + messages: [{ role: 'user', content: 'hi' }], + thinking_budget: 2048, + reasoning_summary: 'concise', + }), + ); + + assertEquals(result.thinking, { type: 'enabled', budget_tokens: 2048, display: 'summarized' }); +}); + +test('translateChatCompletionsToMessages emits anthropic_speed onto Messages speed', async () => { + const result = await translateChatCompletionsToMessages( + mkPayload({ + messages: [{ role: 'user', content: 'hi' }], + anthropic_speed: 'fast', + }), + ); + + assertEquals(result.speed, 'fast'); +}); + +test('translateChatCompletionsToMessages forwards service_tier verbatim', async () => { + const result = await translateChatCompletionsToMessages( + mkPayload({ + messages: [{ role: 'user', content: 'hi' }], + service_tier: 'priority', + }), + ); + + assertEquals(result.service_tier, 'priority'); +}); + +test('translateChatCompletionsToMessages does not emit Messages-protocol fields when the extension is unset', async () => { + const result = await translateChatCompletionsToMessages( + mkPayload({ + messages: [{ role: 'user', content: 'hi' }], + }), + ); + + assertEquals(result.thinking, undefined); + assertEquals(result.speed, undefined); + assertEquals(result.service_tier, undefined); +}); + +test('translateChatCompletionsToMessages leaves anthropic_beta as inbound residue (header injection is the gateway-side rule-apply step)', async () => { + const result = await translateChatCompletionsToMessages( + mkPayload({ + messages: [{ role: 'user', content: 'hi' }], + anthropic_beta: ['fast-mode-2026-02-01', 'context-1m-2025-08-07'], + }), + ); + + // The translated body must not echo the OpenAI-family `anthropic_beta` + // field; the per-upstream sanitizer is responsible for stripping any + // residue, and the rule-apply pass handles the outbound header. + assertEquals('anthropic_beta' in result, false); +}); diff --git a/packages/translate/src/chat-completions-via-responses/request.ts b/packages/translate/src/chat-completions-via-responses/request.ts index 1865ff40b..8d33b9e8a 100644 --- a/packages/translate/src/chat-completions-via-responses/request.ts +++ b/packages/translate/src/chat-completions-via-responses/request.ts @@ -115,7 +115,7 @@ export const translateChatCompletionsToResponses = (payload: ChatCompletionsPayl // `reasoning_summary` is the inbound CC extension that materializes onto // the Responses-native `reasoning.summary` slot. Co-emit alongside // `reasoning.effort` so a single `reasoning` object captures both knobs. - const reasoningEffort = payload.reasoning_effort != null ? payload.reasoning_effort : undefined; + const reasoningEffort = payload.reasoning_effort ?? undefined; const reasoningSummary = payload.reasoning_summary; const reasoning = reasoningEffort !== undefined || reasoningSummary !== undefined diff --git a/packages/translate/src/chat-completions-via-responses/request_test.ts b/packages/translate/src/chat-completions-via-responses/request_test.ts index 3c753a35c..137df562f 100644 --- a/packages/translate/src/chat-completions-via-responses/request_test.ts +++ b/packages/translate/src/chat-completions-via-responses/request_test.ts @@ -431,3 +431,56 @@ test('translateChatCompletionsToResponses rejects an unknown message role', () = 'does not accept function messages', ); }); + +// ── Floway extension emission ── + +test('translateChatCompletionsToResponses maps reasoning_summary onto reasoning.summary', () => { + const result = translateChatCompletionsToResponses({ + model: 'gpt-test', + messages: [{ role: 'user', content: 'hi' }], + reasoning_summary: 'detailed', + }); + + assertEquals(result.reasoning, { summary: 'detailed' }); +}); + +test('translateChatCompletionsToResponses co-emits reasoning_effort and reasoning_summary on the same reasoning object', () => { + const result = translateChatCompletionsToResponses({ + model: 'gpt-test', + messages: [{ role: 'user', content: 'hi' }], + reasoning_effort: 'xhigh', + reasoning_summary: 'concise', + }); + + assertEquals(result.reasoning, { effort: 'xhigh', summary: 'concise' }); +}); + +test('translateChatCompletionsToResponses leaves Messages-only extensions as inbound residue', () => { + const result = translateChatCompletionsToResponses({ + model: 'gpt-test', + messages: [{ role: 'user', content: 'hi' }], + thinking_budget: 4096, + adaptive_thinking: true, + anthropic_speed: 'fast', + anthropic_beta: ['fast-mode-2026-02-01'], + }); + + // Responses has no slot for any of these; the sanitizer strips the + // residue. Translate must not invent a target field. + assertEquals('thinking_budget' in result, false); + assertEquals('adaptive_thinking' in result, false); + assertEquals('anthropic_speed' in result, false); + assertEquals('anthropic_beta' in result, false); +}); + +test('translateChatCompletionsToResponses passes a fully extension-free payload through unchanged from prior behavior', () => { + const result = translateChatCompletionsToResponses({ + model: 'gpt-test', + messages: [{ role: 'user', content: 'hi' }], + reasoning_effort: 'medium', + service_tier: 'priority', + }); + + assertEquals(result.reasoning, { effort: 'medium' }); + assertEquals(result.service_tier, 'priority'); +}); diff --git a/packages/translate/src/gemini-via-chat-completions/request_test.ts b/packages/translate/src/gemini-via-chat-completions/request_test.ts index 7b9c8403f..45f98c146 100644 --- a/packages/translate/src/gemini-via-chat-completions/request_test.ts +++ b/packages/translate/src/gemini-via-chat-completions/request_test.ts @@ -477,3 +477,59 @@ test('buildTargetRequest rejects a part with no recognized content field', () => 'has no recognized content', ); }); + +// ── Floway extension emission ── + +test('buildTargetRequest emits generationConfig.verbosity onto Chat verbosity', () => { + const result = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'low' } }, + 'gpt-test', + ); + + assertEquals(result.verbosity, 'low'); +}); + +test('buildTargetRequest emits generationConfig.serviceTier onto Chat service_tier (camelCase → snake_case)', () => { + const result = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { serviceTier: 'priority' } }, + 'gpt-test', + ); + + assertEquals(result.service_tier, 'priority'); +}); + +test('buildTargetRequest drops top-level Anthropic extensions (anthropicSpeed, anthropicBeta) on Chat', () => { + const result = buildTargetRequest( + { + contents: [{ role: 'user', parts: [{ text: 'hi' }] }], + anthropicSpeed: 'fast', + anthropicBeta: ['fast-mode-2026-02-01'], + }, + 'gpt-test', + ); + + assertEquals('anthropicSpeed' in result, false); + assertEquals('anthropic_speed' in result, false); + assertEquals('speed' in result, false); + assertEquals('anthropicBeta' in result, false); + assertEquals('anthropic_beta' in result, false); +}); + +test('buildTargetRequest extends reasoning_effort enum to recognize xhigh and max', () => { + const xhigh = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { thinkingLevel: 'xhigh' } } }, + 'gpt-test', + ); + const max = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { thinkingLevel: 'max' } } }, + 'gpt-test', + ); + const minimal = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { thinkingLevel: 'minimal' } } }, + 'gpt-test', + ); + + assertEquals(xhigh.reasoning_effort, 'xhigh'); + assertEquals(max.reasoning_effort, 'max'); + assertEquals(minimal.reasoning_effort, 'minimal'); +}); diff --git a/packages/translate/src/gemini-via-messages/request_test.ts b/packages/translate/src/gemini-via-messages/request_test.ts index 2bfd96510..b10339a49 100644 --- a/packages/translate/src/gemini-via-messages/request_test.ts +++ b/packages/translate/src/gemini-via-messages/request_test.ts @@ -405,3 +405,68 @@ test('buildTargetRequest rejects a part with no recognized content field', () => 'has no recognized content', ); }); + +// ── Floway extension emission ── + +test('buildTargetRequest emits top-level anthropicSpeed onto Messages speed', () => { + const result = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], anthropicSpeed: 'fast' }, + 'claude-test', + noOptions, + ); + + assertEquals(result.speed, 'fast'); +}); + +test('buildTargetRequest emits generationConfig.serviceTier onto Messages service_tier', () => { + const result = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { serviceTier: 'priority' } }, + 'claude-test', + noOptions, + ); + + assertEquals(result.service_tier, 'priority'); +}); + +test('buildTargetRequest maps includeThoughts onto thinking.display (true → summarized, false → omitted)', () => { + const summarized = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: true } } }, + 'claude-test', + noOptions, + ); + const omitted = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: false } } }, + 'claude-test', + noOptions, + ); + + assertEquals(summarized.thinking, { type: 'enabled', display: 'summarized' }); + assertEquals(omitted.thinking, { type: 'enabled', display: 'omitted' }); +}); + +test('buildTargetRequest drops verbosity extension on Messages (no slot)', () => { + const result = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'low' } }, + 'claude-test', + noOptions, + ); + + assertEquals('verbosity' in result, false); +}); + +test('buildTargetRequest leaves anthropicBeta as inbound residue for the gateway header pass', () => { + const result = buildTargetRequest( + { + contents: [{ role: 'user', parts: [{ text: 'hi' }] }], + anthropicBeta: ['fast-mode-2026-02-01'], + }, + 'claude-test', + noOptions, + ); + + // Translate cannot move it to a header; the gateway-side rule-apply pass + // (Task 5) materializes anthropicBeta into the outbound anthropic-beta + // header. The body must not echo it. + assertEquals('anthropicBeta' in result, false); + assertEquals('anthropic_beta' in result, false); +}); diff --git a/packages/translate/src/gemini-via-responses/request_test.ts b/packages/translate/src/gemini-via-responses/request_test.ts index e140d715c..66476f0d8 100644 --- a/packages/translate/src/gemini-via-responses/request_test.ts +++ b/packages/translate/src/gemini-via-responses/request_test.ts @@ -411,3 +411,58 @@ test('buildTargetRequest rejects a part with no recognized content field', () => 'has no recognized content', ); }); + +// ── Floway extension emission ── + +test('buildTargetRequest emits generationConfig.verbosity onto text.verbosity', () => { + const result = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'medium' } }, + 'gpt-test', + ); + + assertEquals(result.text?.verbosity, 'medium'); +}); + +test('buildTargetRequest emits generationConfig.serviceTier onto Responses service_tier', () => { + const result = buildTargetRequest( + { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { serviceTier: 'priority' } }, + 'gpt-test', + ); + + assertEquals(result.service_tier, 'priority'); +}); + +test('buildTargetRequest maps includeThoughts onto reasoning.summary (true → detailed, false → omitted)', () => { + const withSummary = buildTargetRequest( + { + contents: [{ role: 'user', parts: [{ text: 'hi' }] }], + generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: true } }, + }, + 'gpt-test', + ); + const withoutSummary = buildTargetRequest( + { + contents: [{ role: 'user', parts: [{ text: 'hi' }] }], + generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: false } }, + }, + 'gpt-test', + ); + + assertEquals(withSummary.reasoning, { effort: 'high', summary: 'detailed' }); + assertEquals(withoutSummary.reasoning, { effort: 'high', summary: 'omitted' }); +}); + +test('buildTargetRequest drops top-level Anthropic extensions on Responses', () => { + const result = buildTargetRequest( + { + contents: [{ role: 'user', parts: [{ text: 'hi' }] }], + anthropicSpeed: 'fast', + anthropicBeta: ['fast-mode-2026-02-01'], + }, + 'gpt-test', + ); + + assertEquals('anthropicSpeed' in result, false); + assertEquals('anthropic_speed' in result, false); + assertEquals('anthropicBeta' in result, false); +}); diff --git a/packages/translate/src/messages-via-chat-completions/request_test.ts b/packages/translate/src/messages-via-chat-completions/request_test.ts index a76ec42b1..50f326860 100644 --- a/packages/translate/src/messages-via-chat-completions/request_test.ts +++ b/packages/translate/src/messages-via-chat-completions/request_test.ts @@ -480,3 +480,56 @@ test('translateMessagesToChatCompletions rejects an unknown message role', () => 'does not accept role tool', ); }); + +// ── Floway extension emission ── + +test('translateMessagesToChatCompletions emits verbosity extension verbatim', () => { + const result = translateMessagesToChatCompletions({ + model: 'gpt-test', + max_tokens: 256, + messages: [{ role: 'user', content: 'hi' }], + verbosity: 'low', + }); + + assertEquals(result.verbosity, 'low'); +}); + +test('translateMessagesToChatCompletions forwards service_tier verbatim', () => { + const result = translateMessagesToChatCompletions({ + model: 'gpt-test', + max_tokens: 256, + messages: [{ role: 'user', content: 'hi' }], + service_tier: 'priority', + }); + + assertEquals(result.service_tier, 'priority'); +}); + +test('translateMessagesToChatCompletions drops Anthropic-only knobs that have no Chat-completions slot', () => { + const result = translateMessagesToChatCompletions({ + model: 'gpt-test', + max_tokens: 256, + messages: [{ role: 'user', content: 'hi' }], + thinking: { type: 'enabled', budget_tokens: 4096, display: 'summarized' }, + speed: 'fast', + }); + + // Only the OpenAI-canonical effort axis survives; budget_tokens, display, + // and speed have no Chat-completions equivalent and the translate function + // emits nothing for them. (The sanitizer would strip anything anyway.) + assertEquals(result.reasoning_effort, 'medium'); + assertEquals('thinking_budget' in result, false); + assertEquals('reasoning_summary' in result, false); + assertEquals('speed' in result, false); + assertEquals('anthropic_speed' in result, false); +}); + +test('translateMessagesToChatCompletions does not emit verbosity when the extension is unset', () => { + const result = translateMessagesToChatCompletions({ + model: 'gpt-test', + max_tokens: 256, + messages: [{ role: 'user', content: 'hi' }], + }); + + assertEquals('verbosity' in result, false); +}); diff --git a/packages/translate/src/messages-via-responses/request_test.ts b/packages/translate/src/messages-via-responses/request_test.ts index 6a6e3fa94..2846f1d39 100644 --- a/packages/translate/src/messages-via-responses/request_test.ts +++ b/packages/translate/src/messages-via-responses/request_test.ts @@ -502,3 +502,83 @@ test('translateMessagesToResponses rejects an unknown message role', () => { 'does not accept role tool', ); }); + +// ── Floway extension emission ── + +test('translateMessagesToResponses emits verbosity onto text.verbosity', () => { + const result = translateMessagesToResponses({ + model: 'gpt-test', + max_tokens: 256, + messages: [{ role: 'user', content: 'hi' }], + verbosity: 'medium', + }); + + assertEquals(result.text?.verbosity, 'medium'); +}); + +test('translateMessagesToResponses co-emits verbosity with json_schema format under text', () => { + const result = translateMessagesToResponses({ + model: 'gpt-test', + max_tokens: 256, + messages: [{ role: 'user', content: 'hi' }], + verbosity: 'low', + output_config: { format: { type: 'json_schema', schema: { type: 'object', properties: {} } } }, + }); + + assertEquals(result.text?.verbosity, 'low'); + assertEquals(result.text?.format?.type, 'json_schema'); +}); + +test('translateMessagesToResponses maps thinking.display onto reasoning.summary (summarized → concise, omitted → omitted, full → detailed)', () => { + const summarized = translateMessagesToResponses({ + model: 'gpt-test', + max_tokens: 256, + messages: [{ role: 'user', content: 'hi' }], + thinking: { type: 'enabled', display: 'summarized' }, + }); + const omitted = translateMessagesToResponses({ + model: 'gpt-test', + max_tokens: 256, + messages: [{ role: 'user', content: 'hi' }], + thinking: { type: 'enabled', display: 'omitted' }, + }); + const full = translateMessagesToResponses({ + model: 'gpt-test', + max_tokens: 256, + messages: [{ role: 'user', content: 'hi' }], + thinking: { type: 'enabled', display: 'full' }, + }); + + assertEquals(summarized.reasoning?.summary, 'concise'); + assertEquals(omitted.reasoning?.summary, 'omitted'); + assertEquals(full.reasoning?.summary, 'detailed'); +}); + +test('translateMessagesToResponses forwards service_tier verbatim', () => { + const result = translateMessagesToResponses({ + model: 'gpt-test', + max_tokens: 256, + messages: [{ role: 'user', content: 'hi' }], + service_tier: 'priority', + }); + + assertEquals(result.service_tier, 'priority'); +}); + +test('translateMessagesToResponses drops Anthropic-only mode knobs the Responses wire cannot express', () => { + const result = translateMessagesToResponses({ + model: 'gpt-test', + max_tokens: 256, + messages: [{ role: 'user', content: 'hi' }], + thinking: { type: 'enabled', budget_tokens: 4096 }, + speed: 'fast', + }); + + // budget_tokens, adaptive, speed, anthropic-beta have no Responses slot; + // translate emits nothing for them. The sanitizer drops residue. + assertEquals('thinking_budget' in result, false); + assertEquals('adaptive_thinking' in result, false); + assertEquals('anthropic_speed' in result, false); + assertEquals('anthropic_beta' in result, false); + assertEquals('speed' in result, false); +}); diff --git a/packages/translate/src/responses-via-chat-completions/request_test.ts b/packages/translate/src/responses-via-chat-completions/request_test.ts index f708f5929..448222f45 100644 --- a/packages/translate/src/responses-via-chat-completions/request_test.ts +++ b/packages/translate/src/responses-via-chat-completions/request_test.ts @@ -1455,3 +1455,55 @@ test('translateResponsesToChatCompletions maps multimodal function_call_output i { type: 'image_url', image_url: { url: 'data:image/png;base64,AQID', detail: 'high' } }, ]); }); + +// ── Floway extension emission ── + +test('translateResponsesToChatCompletions maps text.verbosity onto verbosity', () => { + const result = translateResponsesToChatCompletions({ + model: 'gpt-test', + input: [{ type: 'message', role: 'user', content: 'hi' }], + text: { verbosity: 'low' }, + }); + + assertEquals(result.target.verbosity, 'low'); +}); + +test('translateResponsesToChatCompletions co-emits reasoning.effort onto reasoning_effort and service_tier verbatim', () => { + const result = translateResponsesToChatCompletions({ + model: 'gpt-test', + input: [{ type: 'message', role: 'user', content: 'hi' }], + reasoning: { effort: 'xhigh' }, + service_tier: 'priority', + }); + + assertEquals(result.target.reasoning_effort, 'xhigh'); + assertEquals(result.target.service_tier, 'priority'); +}); + +test('translateResponsesToChatCompletions leaves Messages-only extensions as inbound residue (CC has no slot)', () => { + const result = translateResponsesToChatCompletions({ + model: 'gpt-test', + input: [{ type: 'message', role: 'user', content: 'hi' }], + thinking_budget: 4096, + adaptive_thinking: true, + anthropic_speed: 'fast', + anthropic_beta: ['fast-mode-2026-02-01'], + }); + + assertEquals('thinking_budget' in result.target, false); + assertEquals('adaptive_thinking' in result.target, false); + assertEquals('anthropic_speed' in result.target, false); + assertEquals('anthropic_beta' in result.target, false); +}); + +test('translateResponsesToChatCompletions drops reasoning.summary (Chat has no slot)', () => { + const result = translateResponsesToChatCompletions({ + model: 'gpt-test', + input: [{ type: 'message', role: 'user', content: 'hi' }], + reasoning: { effort: 'medium', summary: 'concise' }, + }); + + assertEquals(result.target.reasoning_effort, 'medium'); + // Verbosity is on text.* not reasoning; ensure no surrogate field invented. + assertEquals('reasoning_summary' in result.target, false); +}); diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts index 786ce4a98..504ca45fa 100644 --- a/packages/translate/src/responses-via-messages/request.ts +++ b/packages/translate/src/responses-via-messages/request.ts @@ -1,9 +1,9 @@ import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts'; import { responsesReasoningToMessagesUpstreamBlock } from '../shared/messages-and-responses/reasoning.ts'; import { buildCustomToolInputSchema } from '../shared/responses-via/custom-tool-wrap.ts'; +import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts'; import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint, EPHEMERAL_CACHE_CONTROL } from '../shared/via-messages/cache-breakpoints.ts'; import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts'; -import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts'; import { MESSAGES_FALLBACK_MAX_TOKENS, type MessagesAssistantContentBlock, diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts index 2aedc14b7..f36ff89f6 100644 --- a/packages/translate/src/responses-via-messages/request_test.ts +++ b/packages/translate/src/responses-via-messages/request_test.ts @@ -645,3 +645,54 @@ test('translateResponsesToMessages keeps payload.instructions as the Messages to assertEquals(result.target.messages[0], { role: 'system', content: 'mid-array note' }); assertEquals(result.target.messages[1].role, 'user'); }); + +// ── Floway extension emission ── + +const minimalResponsesPayload = (overrides: Record) => ({ + model: 'claude-test' as const, + input: [{ type: 'message' as const, role: 'user' as const, content: 'hi' }], + ...overrides, +}); + +test('translateResponsesToMessages emits thinking_budget onto thinking.{enabled, budget_tokens}', async () => { + const result = await translateResponsesToMessages(minimalResponsesPayload({ thinking_budget: 8192 })); + assertEquals(result.target.thinking, { type: 'enabled', budget_tokens: 8192 }); +}); + +test('translateResponsesToMessages emits adaptive_thinking onto thinking.{adaptive}', async () => { + const result = await translateResponsesToMessages(minimalResponsesPayload({ adaptive_thinking: true })); + assertEquals(result.target.thinking, { type: 'adaptive' }); +}); + +test('translateResponsesToMessages maps reasoning.summary onto thinking.display (concise|detailed → summarized, omitted → omitted)', async () => { + const concise = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'concise' } })); + const detailed = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'detailed' } })); + const omitted = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'omitted' } })); + + assertEquals(concise.target.thinking, { type: 'enabled', display: 'summarized' }); + assertEquals(detailed.target.thinking, { type: 'enabled', display: 'summarized' }); + assertEquals(omitted.target.thinking, { type: 'enabled', display: 'omitted' }); +}); + +test('translateResponsesToMessages emits anthropic_speed onto speed', async () => { + const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_speed: 'fast' })); + assertEquals(result.target.speed, 'fast'); +}); + +test('translateResponsesToMessages forwards service_tier verbatim', async () => { + const result = await translateResponsesToMessages(minimalResponsesPayload({ service_tier: 'priority' })); + assertEquals(result.target.service_tier, 'priority'); +}); + +test('translateResponsesToMessages leaves anthropic_beta as inbound residue for the gateway header pass', async () => { + const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_beta: ['fast-mode-2026-02-01'] })); + assertEquals('anthropic_beta' in result.target, false); +}); + +test('translateResponsesToMessages emission stack: budget + summary writes display onto the budget-driven block', async () => { + const result = await translateResponsesToMessages(minimalResponsesPayload({ + thinking_budget: 2048, + reasoning: { effort: 'medium', summary: 'concise' }, + })); + assertEquals(result.target.thinking, { type: 'enabled', budget_tokens: 2048, display: 'summarized' }); +}); From d7e9fe0d2c0a4d7a62cef83ff4101fa170a41187 Mon Sep 17 00:00:00 2001 From: Menci Date: Thu, 25 Jun 2026 21:54:09 +0800 Subject: [PATCH 005/170] feat(gateway): weave alias matching into model resolution fan-out MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit enumerateModelInterpretations now matches each (provider, lookupId) pair against the global alias table (post-prefix-strip, semantic P). Per the matched alias's onConflict, the fan-out pushes either the alias-rewrite interpretation, the real-name interpretation, or both (in either order). A post-resolution prune drops the alias-rewrite when the real-name resolved under onConflict=real-only — the alias remains when the real lookup misses, so an empty upstream catalog falls back to the alias's target id. The aliasRules and aliasName ride through into a new ChatCandidate wrapper type so downstream attempt logic can apply the rules and set the x-floway-alias response header without polluting the @floway-dev/provider package. RoutingDecision and classifyResponsesItemAffinity become generic over the candidate type to carry alias metadata across the affinity walk without re-deriving it. modelAliases is added to the central Repo interface so each chat serve.ts call site reaches it through getRepo() — the same pattern the other operator-managed config tables follow. --- .../chat/chat-completions/routing.ts | 4 +- .../data-plane/chat/chat-completions/serve.ts | 3 + .../src/data-plane/chat/gemini/routing.ts | 4 +- .../src/data-plane/chat/gemini/serve.ts | 5 + .../src/data-plane/chat/messages/routing.ts | 4 +- .../src/data-plane/chat/messages/serve.ts | 5 + .../chat/responses/items/affinity.ts | 12 +- .../src/data-plane/chat/responses/routing.ts | 4 +- .../data-plane/chat/responses/serve-prep.ts | 7 +- .../src/data-plane/chat/shared/candidates.ts | 41 +++- .../data-plane/chat/shared/candidates_test.ts | 12 + .../src/data-plane/chat/shared/routing.ts | 12 +- .../src/data-plane/model-aliases/match.ts | 19 ++ .../data-plane/model-aliases/match_test.ts | 54 +++++ .../src/data-plane/providers/registry.ts | 123 ++++++++++- .../src/data-plane/providers/registry_test.ts | 208 +++++++++++++++++- packages/gateway/src/repo/memory.ts | 19 ++ packages/gateway/src/repo/sql.ts | 13 ++ packages/gateway/src/repo/types.ts | 8 + 19 files changed, 515 insertions(+), 42 deletions(-) create mode 100644 packages/gateway/src/data-plane/model-aliases/match.ts create mode 100644 packages/gateway/src/data-plane/model-aliases/match_test.ts diff --git a/packages/gateway/src/data-plane/chat/chat-completions/routing.ts b/packages/gateway/src/data-plane/chat/chat-completions/routing.ts index 381feaea9..efcc380d6 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/routing.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/routing.ts @@ -1,13 +1,13 @@ import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; -import type { ProviderCandidate } from '../shared/candidates.ts'; +import type { ChatCandidate } from '../shared/candidates.ts'; import type { RoutingDecision } from '../shared/routing.ts'; import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions'; import { chatCompletionsViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items'; export const planChatCompletionsRouting = async (input: { readonly payload: ChatCompletionsPayload; - readonly candidates: readonly ProviderCandidate[]; + readonly candidates: readonly ChatCandidate[]; readonly store: StatefulResponsesStore; }): Promise => await classifyResponsesItemAffinity({ diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts index 73e8c1afd..5d27541f9 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts @@ -1,6 +1,7 @@ import { chatCompletionsAttempt } from './attempt.ts'; import { renderChatCompletionsFailure } from './errors.ts'; import { planChatCompletionsRouting } from './routing.ts'; +import { getRepo } from '../../../repo/index.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; import { enumerateProviderCandidates } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -18,9 +19,11 @@ export interface ChatCompletionsServeGenerateArgs { export const chatCompletionsServe = { generate: async (args: ChatCompletionsServeGenerateArgs): Promise>> => { const { payload, ctx, store, headers } = args; + const aliases = await getRepo().modelAliases.loadAll(); const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({ upstreamIds: ctx.upstreamIds, model: payload.model, + aliases, pickTarget: endpoints => endpoints.chatCompletions ? 'chat-completions' : endpoints.messages ? 'messages' diff --git a/packages/gateway/src/data-plane/chat/gemini/routing.ts b/packages/gateway/src/data-plane/chat/gemini/routing.ts index 2d5e37d87..28e353c59 100644 --- a/packages/gateway/src/data-plane/chat/gemini/routing.ts +++ b/packages/gateway/src/data-plane/chat/gemini/routing.ts @@ -1,6 +1,6 @@ import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; -import type { ProviderCandidate } from '../shared/candidates.ts'; +import type { ChatCandidate } from '../shared/candidates.ts'; import type { RoutingDecision } from '../shared/routing.ts'; import type { GeminiPayload } from '@floway-dev/protocols/gemini'; import { geminiViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items'; @@ -9,7 +9,7 @@ export type GeminiRoutingDecision = RoutingDecision; export const planGeminiRouting = async (input: { readonly payload: GeminiPayload; - readonly candidates: readonly ProviderCandidate[]; + readonly candidates: readonly ChatCandidate[]; readonly store: StatefulResponsesStore; }): Promise => await classifyResponsesItemAffinity({ diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts index f5daa1d86..840da62c7 100644 --- a/packages/gateway/src/data-plane/chat/gemini/serve.ts +++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts @@ -1,6 +1,7 @@ import { geminiAttempt } from './attempt.ts'; import { renderGeminiFailure } from './errors.ts'; import { planGeminiRouting } from './routing.ts'; +import { getRepo } from '../../../repo/index.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; import { enumerateProviderCandidates } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -30,9 +31,11 @@ export interface GeminiServeCountTokensArgs { export const geminiServe = { generate: async (args: GeminiServeGenerateArgs): Promise>> => { const { payload, ctx, store, model, headers } = args; + const aliases = await getRepo().modelAliases.loadAll(); const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({ upstreamIds: ctx.upstreamIds, model, + aliases, // Gemini has no native upstream target in the provider API; prefer // Chat Completions, then Messages, then Responses. pickTarget: endpoints => endpoints.chatCompletions ? 'chat-completions' : endpoints.messages ? 'messages' : endpoints.responses ? 'responses' : null, @@ -60,9 +63,11 @@ export const geminiServe = { countTokens: async (args: GeminiServeCountTokensArgs): Promise> | PlainResult> => { const { payload, ctx, store, model, headers } = args; + const aliases = await getRepo().modelAliases.loadAll(); const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({ upstreamIds: ctx.upstreamIds, model, + aliases, // Gemini countTokens has no native upstream support; only providers // exposing the Messages endpoint qualify because we translate Gemini // → Messages and call Messages count_tokens upstream. diff --git a/packages/gateway/src/data-plane/chat/messages/routing.ts b/packages/gateway/src/data-plane/chat/messages/routing.ts index d6de52107..e9783625c 100644 --- a/packages/gateway/src/data-plane/chat/messages/routing.ts +++ b/packages/gateway/src/data-plane/chat/messages/routing.ts @@ -1,6 +1,6 @@ import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; -import type { ProviderCandidate } from '../shared/candidates.ts'; +import type { ChatCandidate } from '../shared/candidates.ts'; import type { RoutingDecision } from '../shared/routing.ts'; import type { MessagesPayload } from '@floway-dev/protocols/messages'; import { messagesViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items'; @@ -9,7 +9,7 @@ export type MessagesRoutingDecision = RoutingDecision; export const planMessagesRouting = async (input: { readonly payload: MessagesPayload; - readonly candidates: readonly ProviderCandidate[]; + readonly candidates: readonly ChatCandidate[]; readonly store: StatefulResponsesStore; }): Promise => await classifyResponsesItemAffinity({ diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts index ae9bb5d6c..719091768 100644 --- a/packages/gateway/src/data-plane/chat/messages/serve.ts +++ b/packages/gateway/src/data-plane/chat/messages/serve.ts @@ -1,6 +1,7 @@ import { messagesAttempt } from './attempt.ts'; import { renderMessagesFailure } from './errors.ts'; import { planMessagesRouting } from './routing.ts'; +import { getRepo } from '../../../repo/index.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; import { enumerateProviderCandidates } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -25,9 +26,11 @@ export interface MessagesServeCountTokensArgs { export const messagesServe = { generate: async (args: MessagesServeGenerateArgs): Promise>> => { const { payload, ctx, store, headers } = args; + const aliases = await getRepo().modelAliases.loadAll(); const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({ upstreamIds: ctx.upstreamIds, model: payload.model, + aliases, pickTarget: endpoints => endpoints.messages ? 'messages' : endpoints.responses ? 'responses' @@ -57,9 +60,11 @@ export const messagesServe = { countTokens: async (args: MessagesServeCountTokensArgs): Promise> | PlainResult> => { const { payload, ctx, store, headers } = args; + const aliases = await getRepo().modelAliases.loadAll(); const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({ upstreamIds: ctx.upstreamIds, model: payload.model, + aliases, pickTarget: endpoints => endpoints.messages ? 'messages' : null, scheduler: ctx.backgroundScheduler, currentColo: ctx.currentColo, diff --git a/packages/gateway/src/data-plane/chat/responses/items/affinity.ts b/packages/gateway/src/data-plane/chat/responses/items/affinity.ts index 8fbc1a146..d5b5e5e47 100644 --- a/packages/gateway/src/data-plane/chat/responses/items/affinity.ts +++ b/packages/gateway/src/data-plane/chat/responses/items/affinity.ts @@ -98,10 +98,10 @@ const collectStoredResponsesItemRefs = async ( return references; }; -const orderCandidatesByStoredResponsesAffinity = ( - candidates: readonly ProviderCandidate[], +const orderCandidatesByStoredResponsesAffinity = ( + candidates: readonly T[], preferredUpstreamIds: ReadonlySet, -): readonly ProviderCandidate[] => { +): readonly T[] => { const preferred = [...preferredUpstreamIds].reverse(); if (preferred.length === 0) return candidates; @@ -113,17 +113,17 @@ const orderCandidatesByStoredResponsesAffinity = ( return [...preferredCandidates, ...remainingCandidates]; }; -export const classifyResponsesItemAffinity = async (input: { +export const classifyResponsesItemAffinity = async (input: { sourceItems: TSourceItems; view: ResponsesItemsView; store: StatefulResponsesStore; - candidates: readonly ProviderCandidate[]; + candidates: readonly TCandidate[]; // Items the caller will stage as inputs after the affinity walk; passed // here so `loadInputItems` can pre-load any stored row whose content hash // matches one of them. Without this, a duplicate user message resent on // a later turn cannot be reused — it would mint a fresh row each time. inputItemsToStage?: readonly ResponsesInputItem[]; -}): Promise => { +}): Promise> => { const { sourceItems, view, store, candidates, inputItemsToStage } = input; await store.loadInputItems({ sourceItems, diff --git a/packages/gateway/src/data-plane/chat/responses/routing.ts b/packages/gateway/src/data-plane/chat/responses/routing.ts index 519e95bfe..05661aa69 100644 --- a/packages/gateway/src/data-plane/chat/responses/routing.ts +++ b/packages/gateway/src/data-plane/chat/responses/routing.ts @@ -1,5 +1,5 @@ import { classifyResponsesItemAffinity } from './items/affinity.ts'; -import type { ProviderCandidate } from '../shared/candidates.ts'; +import type { ChatCandidate } from '../shared/candidates.ts'; import type { RoutingDecision } from '../shared/routing.ts'; import type { StatefulResponsesStore } from './items/store.ts'; import type { ResponsesInputItem, ResponsesPayload } from '@floway-dev/protocols/responses'; @@ -7,7 +7,7 @@ import { responsesItemsView } from '@floway-dev/translate/via-responses/response export const planResponsesRouting = async (input: { readonly payload: ResponsesPayload; - readonly candidates: readonly ProviderCandidate[]; + readonly candidates: readonly ChatCandidate[]; readonly store: StatefulResponsesStore; }): Promise => { // A bare-string input is wrapped into a synthetic user message for staging; diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts index 96096ee29..ec4a48afa 100644 --- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts +++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts @@ -1,7 +1,8 @@ import { renderResponsesFailure } from './errors.ts'; import type { StatefulResponsesStore } from './items/store.ts'; import { planResponsesRouting } from './routing.ts'; -import { enumerateProviderCandidates, type ProviderCandidate } from '../shared/candidates.ts'; +import { getRepo } from '../../../repo/index.ts'; +import { enumerateProviderCandidates, type ChatCandidate } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common'; import type { ResponsesInputItem, ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses'; @@ -72,7 +73,7 @@ const stageUserInputItems = async (input: ResponsesPayload['input'], store: Stat export type ResponsesServePlan = | { readonly kind: 'failure'; readonly result: ExecuteResult> } - | { readonly kind: 'ready'; readonly prepared: ResponsesPayload; readonly candidate: ProviderCandidate }; + | { readonly kind: 'ready'; readonly prepared: ResponsesPayload; readonly candidate: ChatCandidate }; // Runs the shared serve-side prep both `responsesServe.generate` and // `responsesServe.compact` need before dispatching to `responsesAttempt`: @@ -88,9 +89,11 @@ export const prepareResponsesServePlan = async (args: { }): Promise => { const { payload, ctx, store, pickTarget } = args; const prepared = await expandPreviousResponseId(payload, store); + const aliases = await getRepo().modelAliases.loadAll(); const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({ upstreamIds: ctx.upstreamIds, model: prepared.model, + aliases, pickTarget, scheduler: ctx.backgroundScheduler, currentColo: ctx.currentColo, diff --git a/packages/gateway/src/data-plane/chat/shared/candidates.ts b/packages/gateway/src/data-plane/chat/shared/candidates.ts index 86b02e721..7a58f9e08 100644 --- a/packages/gateway/src/data-plane/chat/shared/candidates.ts +++ b/packages/gateway/src/data-plane/chat/shared/candidates.ts @@ -1,3 +1,4 @@ +import type { ModelAlias, ModelAliasRules } from '../../../control-plane/model-aliases/types.ts'; import { createPerRequestFetcher } from '../../../dial/per-request.ts'; import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../../providers/registry.ts'; import type { BackgroundScheduler } from '@floway-dev/platform'; @@ -6,6 +7,18 @@ import type { ChatTargetApi, ProviderCandidate } from '@floway-dev/provider'; export type { ProviderCandidate }; +// Wrapper around `ProviderCandidate` that carries the matched alias's +// operator-locked request-time rules and the alias name. The wrapper lives +// here (in the gateway) rather than on `ProviderCandidate` itself to keep +// the `@floway-dev/provider` package unaware of the gateway's alias +// concept. Downstream attempt logic narrows the candidate when it needs +// to apply rules or stamp the `x-floway-alias` response header; passthrough +// consumers continue to treat the candidate as a plain `ProviderCandidate`. +export type ChatCandidate = ProviderCandidate & { + readonly aliasRules?: ModelAliasRules; + readonly aliasName?: string; +}; + // Returns the candidates that satisfy both the model resolution and the // target-endpoint pick, plus a `sawModel` flag that distinguishes the // "model is missing entirely" failure from "model exists but does not @@ -13,11 +26,16 @@ export type { ProviderCandidate }; // whose catalog fetch rejected this round so the caller's failure // renderer can surface them parenthetically. export const enumerateProviderCandidates = async ({ - upstreamIds, model, pickTarget, scheduler, currentColo, + upstreamIds, model, aliases, pickTarget, scheduler, currentColo, }: { // null = unrestricted; empty list = no providers visible. upstreamIds: readonly string[] | null; model: string; + // Operator-managed alias table loaded by the caller (typically via + // `getRepo().modelAliases.loadAll()`). The fan-out matches each + // (provider, lookupId) interpretation against this list; an empty list + // is a valid input and produces only literal interpretations. + aliases: readonly ModelAlias[]; pickTarget: (endpoints: ModelEndpoints) => ChatTargetApi | null; // Threaded into `resolveModelForProvider` so the per-upstream catalog // lookup hits the SWR-cached `fetchUpstreamModelsCached` instead of @@ -27,7 +45,7 @@ export const enumerateProviderCandidates = async ({ // into the per-request fetcher so colo-scoped fallback entries can be // honoured at dial time. currentColo: string; -}): Promise<{ readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean; readonly failedUpstreams: readonly string[] }> => { +}): Promise<{ readonly candidates: readonly ChatCandidate[]; readonly sawModel: boolean; readonly failedUpstreams: readonly string[] }> => { const fetcherForUpstream = await createPerRequestFetcher(currentColo); const providers = await listModelProviders(upstreamIds); @@ -39,17 +57,28 @@ export const enumerateProviderCandidates = async ({ // `resolveModelForRequest`; first-viable-wins ordering follows configured // sort_order across upstreams, with the unprefixed interpretation pushed // before the prefixed one within a single upstream. - const interpretations = enumerateModelInterpretations(model, providers); + // + // Alias matching runs inside `enumerateModelInterpretations`: each + // (provider, lookupId) pair is checked against the alias table and the + // matched alias's `onConflict` decides what to push. The alias-rewrite + // metadata rides out alongside each resolved candidate so the attempt + // layer can apply the locked rules. + const interpretations = enumerateModelInterpretations(model, providers, aliases); const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler); - const candidates: ProviderCandidate[] = []; + const candidates: ChatCandidate[] = []; let sawModel = false; - for (const { provider, resolved } of resolutions) { + for (const { interpretation, provider, resolved } of resolutions) { sawModel = true; const targetApi = pickTarget(resolved.binding.upstreamModel.endpoints); if (!targetApi) continue; - candidates.push({ provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) }); + const base: ProviderCandidate = { provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) }; + candidates.push( + interpretation.aliasRules !== undefined + ? { ...base, aliasRules: interpretation.aliasRules, aliasName: interpretation.aliasName } + : base, + ); } return { candidates, sawModel, failedUpstreams }; diff --git a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts index 627b631d5..381d395b9 100644 --- a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts +++ b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts @@ -54,6 +54,7 @@ describe('enumerateProviderCandidates', () => { const { candidates, sawModel } = await enumerateProviderCandidates({ upstreamIds: null, + aliases: [], model: 'test-model', pickTarget: pickMessages, scheduler: testScheduler, @@ -74,6 +75,7 @@ describe('enumerateProviderCandidates', () => { const { candidates, sawModel } = await enumerateProviderCandidates({ upstreamIds: null, + aliases: [], model: 'test-model', pickTarget: pickMessages, scheduler: testScheduler, @@ -94,6 +96,7 @@ describe('enumerateProviderCandidates', () => { const { candidates, sawModel } = await enumerateProviderCandidates({ upstreamIds: null, + aliases: [], model: 'test-model', pickTarget: pickMessages, scheduler: testScheduler, @@ -113,6 +116,7 @@ describe('enumerateProviderCandidates', () => { const { candidates } = await enumerateProviderCandidates({ upstreamIds: null, + aliases: [], model: 'test-model', pickTarget: pickMessages, scheduler: testScheduler, @@ -133,6 +137,7 @@ describe('enumerateProviderCandidates', () => { const { candidates } = await enumerateProviderCandidates({ upstreamIds: ['up_c', 'up_a'], + aliases: [], model: 'test-model', pickTarget: pickMessages, scheduler: testScheduler, @@ -155,6 +160,7 @@ describe('enumerateProviderCandidates', () => { const { candidates } = await enumerateProviderCandidates({ upstreamIds: null, + aliases: [], model: 'test-model', pickTarget: pickMessages, scheduler: testScheduler, @@ -172,6 +178,7 @@ describe('enumerateProviderCandidates', () => { const { candidates: msgCandidates } = await enumerateProviderCandidates({ upstreamIds: null, + aliases: [], model: 'test-model', pickTarget: pickMessagesOrResponses, scheduler: testScheduler, @@ -182,6 +189,7 @@ describe('enumerateProviderCandidates', () => { const { candidates: resCandidates } = await enumerateProviderCandidates({ upstreamIds: null, + aliases: [], model: 'test-model', pickTarget: pickResponses, scheduler: testScheduler, @@ -198,6 +206,7 @@ describe('enumerateProviderCandidates', () => { const { candidates: anyCandidates } = await enumerateProviderCandidates({ upstreamIds: null, + aliases: [], model: 'test-model', pickTarget: pickAny, scheduler: testScheduler, @@ -208,6 +217,7 @@ describe('enumerateProviderCandidates', () => { const { candidates: msgCandidates, sawModel } = await enumerateProviderCandidates({ upstreamIds: null, + aliases: [], model: 'test-model', pickTarget: pickMessages, scheduler: testScheduler, @@ -245,6 +255,7 @@ describe('enumerateProviderCandidates', () => { async () => { const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({ upstreamIds: null, + aliases: [], model: 'test-model', pickTarget: pickMessages, scheduler: testScheduler, @@ -288,6 +299,7 @@ describe('enumerateProviderCandidates', () => { async () => { const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({ upstreamIds: null, + aliases: [], model: 'test-model', pickTarget: pickMessages, scheduler: testScheduler, diff --git a/packages/gateway/src/data-plane/chat/shared/routing.ts b/packages/gateway/src/data-plane/chat/shared/routing.ts index 96785da70..392c50cf5 100644 --- a/packages/gateway/src/data-plane/chat/shared/routing.ts +++ b/packages/gateway/src/data-plane/chat/shared/routing.ts @@ -1,6 +1,12 @@ -import type { ProviderCandidate } from './candidates.ts'; +import type { ChatCandidate, ProviderCandidate } from './candidates.ts'; import type { ChatServeFailure } from './errors.ts'; -export type RoutingDecision = - | { readonly kind: 'success'; readonly candidates: readonly ProviderCandidate[] } +// Generic over the candidate type so call sites that hand in `ChatCandidate` +// receive a decision whose surviving candidates retain the alias metadata. +// The candidate filtering and ordering inside routing is shape-agnostic — +// it touches `binding.upstream` and `binding.supportsResponsesItemReference` +// only — so the generic narrows naturally from `ChatCandidate` back out +// without re-deriving the alias fields. +export type RoutingDecision = + | { readonly kind: 'success'; readonly candidates: readonly T[] } | { readonly kind: 'failure'; readonly failure: ChatServeFailure }; diff --git a/packages/gateway/src/data-plane/model-aliases/match.ts b/packages/gateway/src/data-plane/model-aliases/match.ts new file mode 100644 index 000000000..f297d1a50 --- /dev/null +++ b/packages/gateway/src/data-plane/model-aliases/match.ts @@ -0,0 +1,19 @@ +import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; + +export interface AliasMatchResult { + readonly alias: ModelAlias; +} + +// Lookup an alias for the (post-prefix-strip) lookupId against the upstream's +// id. An empty `upstreamIds` filter on the alias means "match any upstream"; +// a non-empty filter must include the upstream's id. +export const matchAlias = ( + lookupId: string, + upstreamId: string, + aliases: readonly ModelAlias[], +): AliasMatchResult | undefined => { + const hit = aliases.find(a => a.alias === lookupId); + if (!hit) return undefined; + if (hit.upstreamIds.length > 0 && !hit.upstreamIds.includes(upstreamId)) return undefined; + return { alias: hit }; +}; diff --git a/packages/gateway/src/data-plane/model-aliases/match_test.ts b/packages/gateway/src/data-plane/model-aliases/match_test.ts new file mode 100644 index 000000000..b1dd8ff2f --- /dev/null +++ b/packages/gateway/src/data-plane/model-aliases/match_test.ts @@ -0,0 +1,54 @@ +import { describe, expect, test } from 'vitest'; + +import { matchAlias } from './match.ts'; +import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; + +const make = (overrides: Partial): ModelAlias => ({ + alias: 'a', + targetModelId: 't', + upstreamIds: [], + rules: {}, + visibleInModelsList: true, + onConflict: 'real-only', + ...overrides, +}); + +describe('matchAlias', () => { + test('matches by exact lookupId when alias has no upstream filter', () => { + const aliases = [make({ alias: 'codex-auto-review', targetModelId: 'gpt-5.4' })]; + expect(matchAlias('codex-auto-review', 'up-1', aliases)?.alias.alias).toBe('codex-auto-review'); + }); + + test('does not match when lookupId differs', () => { + const aliases = [make({ alias: 'codex-auto-review' })]; + expect(matchAlias('something-else', 'up-1', aliases)).toBeUndefined(); + }); + + test('respects upstreamIds allowlist (member matches)', () => { + const aliases = [make({ alias: 'a', upstreamIds: ['up-1', 'up-2'] })]; + expect(matchAlias('a', 'up-1', aliases)).toBeDefined(); + expect(matchAlias('a', 'up-2', aliases)).toBeDefined(); + }); + + test('respects upstreamIds allowlist (non-member misses)', () => { + const aliases = [make({ alias: 'a', upstreamIds: ['up-1'] })]; + expect(matchAlias('a', 'up-3', aliases)).toBeUndefined(); + }); + + test('empty upstreamIds means match-any', () => { + const aliases = [make({ alias: 'a', upstreamIds: [] })]; + expect(matchAlias('a', 'anywhere', aliases)).toBeDefined(); + }); + + test('returns the first matching alias entry verbatim', () => { + const aliases = [ + make({ alias: 'a', targetModelId: 'first', rules: { reasoning: { effort: 'low' } } }), + make({ alias: 'a', targetModelId: 'second' }), + ]; + expect(matchAlias('a', 'up-x', aliases)?.alias).toEqual(aliases[0]); + }); + + test('returns undefined for an empty alias list', () => { + expect(matchAlias('a', 'up-x', [])).toBeUndefined(); + }); +}); diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts index 79a4f1c5a..8ca75c518 100644 --- a/packages/gateway/src/data-plane/providers/registry.ts +++ b/packages/gateway/src/data-plane/providers/registry.ts @@ -1,5 +1,7 @@ import { fetchUpstreamModelsCached } from './models-cache.ts'; +import type { ModelAlias, ModelAliasRules } from '../../control-plane/model-aliases/types.ts'; import { getRepo } from '../../repo/index.ts'; +import { matchAlias } from '../model-aliases/match.ts'; import type { BackgroundScheduler } from '@floway-dev/platform'; import { type ModelEndpointKey, type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common'; import type { InternalModel, ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider'; @@ -289,8 +291,22 @@ export interface ModelInterpretation { provider: ModelProviderInstance; // The bare id to query the upstream's catalog with. Equals the inbound // model id for the unprefixed surface; equals `inbound.slice(prefix.length)` - // for the prefixed surface. + // for the prefixed surface. For an alias-rewrite interpretation it equals + // the matched alias's `targetModelId`. lookupId: string; + // Operator-locked request-time rules carried alongside an alias-rewrite + // interpretation. Set only when this interpretation is the alias-rewrite + // half of a matched alias; the real-name interpretation in the same + // `conflictGroup` (and every non-aliased interpretation) leaves this + // undefined. + aliasRules?: ModelAliasRules; + // The alias name as authored by the operator. Set in lockstep with + // `aliasRules` and carried out for the `x-floway-alias` response header. + aliasName?: string; + // Identity-keyed group shared by the two interpretations a single + // `onConflict: 'real-only'` alias emits. The post-resolution prune uses + // this to drop the alias-rewrite member when both halves resolved. + conflictGroup?: { readonly originalLookupId: string }; } // Expands one inbound model id into every (provider, catalog-lookup-id) pair @@ -298,44 +314,102 @@ export interface ModelInterpretation { // when the inbound id literally equals one of the public-id surfaces the // upstream advertises (bare and/or prefixed, per `modelPrefix.addressable`). // The unprefixed interpretation is always pushed first when both apply. +// +// Each (provider, lookupId) candidate is then matched against the global +// alias table — semantic P, post-prefix-strip — and the matched alias's +// `onConflict` decides whether to push the real-name interpretation, the +// alias-rewrite interpretation, or both (in either order). When neither +// the alias nor the alias's target id is exposed by the upstream catalog, +// the fan-out still emits both interpretations and resolution simply +// drops the half that misses. export const enumerateModelInterpretations = ( modelId: string, providers: readonly ModelProviderInstance[], + aliases: readonly ModelAlias[], ): ModelInterpretation[] => { const out: ModelInterpretation[] = []; for (const provider of providers) { const cfg = provider.modelPrefix; if (cfg === null || cfg.addressable.includes('unprefixed')) { - out.push({ provider, lookupId: modelId }); + pushInterpretation(out, provider, modelId, aliases); } if (cfg !== null && cfg.addressable.includes('prefixed') && modelId.startsWith(cfg.prefix)) { - out.push({ provider, lookupId: modelId.slice(cfg.prefix.length) }); + pushInterpretation(out, provider, modelId.slice(cfg.prefix.length), aliases); } } return out; }; +const pushInterpretation = ( + out: ModelInterpretation[], + provider: ModelProviderInstance, + lookupId: string, + aliases: readonly ModelAlias[], +): void => { + const hit = matchAlias(lookupId, provider.upstream, aliases); + if (!hit) { + out.push({ provider, lookupId }); + return; + } + const { alias } = hit; + const aliasInterp: ModelInterpretation = { + provider, + lookupId: alias.targetModelId, + aliasRules: alias.rules, + aliasName: alias.alias, + }; + const realInterp: ModelInterpretation = { provider, lookupId }; + switch (alias.onConflict) { + case 'alias-only': + out.push(aliasInterp); + return; + case 'real-only': { + // Both halves enter the resolution pass; the post-resolution prune + // drops the alias-rewrite member when the real-name resolved too. + // Identity-keyed group so the prune step can rejoin them without + // re-deriving an alias key. + const group = { originalLookupId: lookupId }; + out.push({ ...realInterp, conflictGroup: group }); + out.push({ ...aliasInterp, conflictGroup: group }); + return; + } + case 'both-real-first': + out.push(realInterp); + out.push(aliasInterp); + return; + case 'both-alias-first': + out.push(aliasInterp); + out.push(realInterp); + return; + } +}; + // Fan out per-interpretation against the SWR cache and collect the resolved // matches plus a deduped list of upstreams whose catalog fetch rejected. // Shared by `resolveModelForRequest` and `enumerateProviderCandidates`; the // per-caller divergence (passthrough vs LLM-candidate shape) happens after // this returns. Cancellation (`AbortError`) propagates so the per-request // abort signal cannot be masked by a slow upstream's rejection. +// +// Each successful resolution carries its source `interpretation` back to +// the caller so the alias-rewrite metadata (`aliasRules`, `aliasName`) +// rides through to the candidate, and so the `real-only` post-resolution +// prune can rejoin the two halves of a conflict group. export const collectInterpretationOutcomes = async ( interpretations: readonly ModelInterpretation[], fetcherForUpstream: (upstreamId: string) => Fetcher, scheduler: BackgroundScheduler, ): Promise<{ - resolutions: Array<{ provider: ModelProviderInstance; resolved: ProviderModelResolution }>; + resolutions: Array<{ interpretation: ModelInterpretation; provider: ModelProviderInstance; resolved: ProviderModelResolution }>; failedUpstreams: string[]; }> => { - const settled = await Promise.allSettled(interpretations.map(({ provider, lookupId }) => - resolveModelForProvider(provider, lookupId, fetcherForUpstream(provider.upstream), scheduler) - .then(resolved => ({ provider, resolved })))); + const settled = await Promise.allSettled(interpretations.map(interpretation => + resolveModelForProvider(interpretation.provider, interpretation.lookupId, fetcherForUpstream(interpretation.provider.upstream), scheduler) + .then(resolved => ({ interpretation, resolved })))); const failedUpstreams: string[] = []; const failedSeen = new Set(); - const resolutions: Array<{ provider: ModelProviderInstance; resolved: ProviderModelResolution }> = []; + const resolutions: Array<{ interpretation: ModelInterpretation; provider: ModelProviderInstance; resolved: ProviderModelResolution }> = []; for (const [index, result] of settled.entries()) { if (result.status === 'rejected') { @@ -350,12 +424,36 @@ export const collectInterpretationOutcomes = async ( } continue; } - const { provider, resolved } = result.value; + const { interpretation, resolved } = result.value; if (!resolved) continue; - resolutions.push({ provider, resolved }); + resolutions.push({ interpretation, provider: interpretation.provider, resolved }); } - return { resolutions, failedUpstreams }; + // `onConflict: 'real-only'`: when both halves of a conflict group + // resolved, drop the alias-rewrite half so the real-name match is the + // only one downstream sees. When only the alias-rewrite half resolved + // (the upstream has no model named after the alias itself), keep it — + // the operator's intent is to fall back to the alias when no real model + // collides. + const droppedInterpretations = new Set(); + const byGroup = new Map<{ readonly originalLookupId: string }, ModelInterpretation[]>(); + for (const { interpretation } of resolutions) { + const group = interpretation.conflictGroup; + if (!group) continue; + const list = byGroup.get(group) ?? []; + list.push(interpretation); + byGroup.set(group, list); + } + for (const members of byGroup.values()) { + if (members.length < 2) continue; + const aliasRewriteMember = members.find(i => i.aliasRules !== undefined); + if (aliasRewriteMember) droppedInterpretations.add(aliasRewriteMember); + } + + return { + resolutions: resolutions.filter(r => !droppedInterpretations.has(r.interpretation)), + failedUpstreams, + }; }; export const resolveModelForRequest = async ( @@ -363,13 +461,14 @@ export const resolveModelForRequest = async ( upstreamFilter: readonly string[] | null, fetcherForUpstream: (upstreamId: string) => Fetcher, scheduler: BackgroundScheduler, + aliases: readonly ModelAlias[] = [], ): Promise => { const providers = await listModelProviders(upstreamFilter); if (providers.length === 0) { throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE); } - const interpretations = enumerateModelInterpretations(modelId, providers); + const interpretations = enumerateModelInterpretations(modelId, providers, aliases); const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler); return { matches: resolutions.map(r => r.resolved), failedUpstreams }; }; diff --git a/packages/gateway/src/data-plane/providers/registry_test.ts b/packages/gateway/src/data-plane/providers/registry_test.ts index c330abfee..d470f9169 100644 --- a/packages/gateway/src/data-plane/providers/registry_test.ts +++ b/packages/gateway/src/data-plane/providers/registry_test.ts @@ -2,6 +2,7 @@ import { describe, expect, test } from 'vitest'; import { clearInFlightForTesting } from './models-cache.ts'; import { compareModelIds, enumerateModelInterpretations, getInternalModels, listModelProviders, resolveModelForProvider, resolveModelForRequest } from './registry.ts'; +import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, setupAppTest } from '../../test-helpers.ts'; import { directFetcher, type ModelProviderInstance } from '@floway-dev/provider'; import { createCopilotProvider } from '@floway-dev/provider-copilot'; @@ -609,20 +610,20 @@ describe('enumerateModelInterpretations', () => { // A: no prefix, bare always accepted. B: prefixed-only addressable — bare // is not accepted. C: dual-addressable, bare accepted; the prefixed form // does not apply because `gpt-4o` does not start with `cx/`. - assertEquals(shape(enumerateModelInterpretations('gpt-4o', [A, B, C])), [ + assertEquals(shape(enumerateModelInterpretations('gpt-4o', [A, B, C], [])), [ { upstream: 'A', lookupId: 'gpt-4o' }, { upstream: 'C', lookupId: 'gpt-4o' }, ]); }); test('prefix-only-addressable upstream strips the prefix when it matches', () => { - assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [B])), [ + assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [B], [])), [ { upstream: 'B', lookupId: 'gpt-4o' }, ]); }); test('prefix-only-addressable upstream is silent when the prefix does not match', () => { - assertEquals(enumerateModelInterpretations('gpt-4o', [B]), []); + assertEquals(enumerateModelInterpretations('gpt-4o', [B], []), []); }); test('dual-addressable upstream produces two interpretations when the prefix matches', () => { @@ -633,7 +634,7 @@ describe('enumerateModelInterpretations', () => { upstream: 'D', name: 'd', modelPrefix: { prefix: 'or/', addressable: ['unprefixed', 'prefixed'], listed: ['prefixed'] }, }); - assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [D])), [ + assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [D], [])), [ { upstream: 'D', lookupId: 'or/gpt-4o' }, { upstream: 'D', lookupId: 'gpt-4o' }, ]); @@ -653,7 +654,7 @@ describe('enumerateModelInterpretations', () => { modelPrefix: { prefix: 'aa/bb/', addressable: ['prefixed'], listed: ['prefixed'] }, }); const Z = fakeProvider({ upstream: 'Z', name: 'z', modelPrefix: null }); - assertEquals(shape(enumerateModelInterpretations('aa/bb/gpt-5', [X, Y, Z])), [ + assertEquals(shape(enumerateModelInterpretations('aa/bb/gpt-5', [X, Y, Z], [])), [ { upstream: 'X', lookupId: 'bb/gpt-5' }, { upstream: 'Y', lookupId: 'gpt-5' }, { upstream: 'Z', lookupId: 'aa/bb/gpt-5' }, @@ -906,3 +907,200 @@ describe('catalog listing under modelPrefix', () => { ); }); }); + +// Synthetic-catalog alias matching against a single provider. Verifies that +// each `onConflict` mode emits the right interpretation shape from +// `enumerateModelInterpretations`. The downstream `collectInterpretationOutcomes` +// pass is exercised in the e2e suite below. +describe('enumerateModelInterpretations with alias matching', () => { + const provider = fakeProvider({ upstream: 'U', name: 'u', modelPrefix: null }); + + const makeAlias = (over: Partial): ModelAlias => ({ + alias: 'codex-auto-review', + targetModelId: 'gpt-5.4', + upstreamIds: [], + rules: { reasoning: { effort: 'low' } }, + visibleInModelsList: true, + onConflict: 'real-only', + ...over, + }); + + test('alias-only emits exactly the alias-rewrite interpretation, with rules', () => { + const aliases = [makeAlias({ onConflict: 'alias-only' })]; + const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases); + assertEquals(out.length, 1); + assertEquals(out[0].lookupId, 'gpt-5.4'); + assertEquals(out[0].aliasRules, { reasoning: { effort: 'low' } }); + assertEquals(out[0].aliasName, 'codex-auto-review'); + assertEquals(out[0].conflictGroup, undefined); + }); + + test('real-only emits both halves, tagged with a shared conflictGroup', () => { + const aliases = [makeAlias({ onConflict: 'real-only' })]; + const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases); + assertEquals(out.length, 2); + // Real first, alias second — the prune step removes the alias when + // real resolved, so real-first keeps the natural iteration order. + assertEquals(out[0].lookupId, 'codex-auto-review'); + assertEquals(out[0].aliasRules, undefined); + assertEquals(out[1].lookupId, 'gpt-5.4'); + assertEquals(out[1].aliasRules, { reasoning: { effort: 'low' } }); + expect(out[0].conflictGroup).toBeDefined(); + expect(out[0].conflictGroup).toBe(out[1].conflictGroup); + }); + + test('both-real-first emits real then alias, neither group-tagged', () => { + const aliases = [makeAlias({ onConflict: 'both-real-first' })]; + const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases); + assertEquals(out.length, 2); + assertEquals(out[0].lookupId, 'codex-auto-review'); + assertEquals(out[0].aliasRules, undefined); + assertEquals(out[1].lookupId, 'gpt-5.4'); + assertEquals(out[1].aliasRules, { reasoning: { effort: 'low' } }); + assertEquals(out[0].conflictGroup, undefined); + assertEquals(out[1].conflictGroup, undefined); + }); + + test('both-alias-first emits alias then real, neither group-tagged', () => { + const aliases = [makeAlias({ onConflict: 'both-alias-first' })]; + const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases); + assertEquals(out.length, 2); + assertEquals(out[0].lookupId, 'gpt-5.4'); + assertEquals(out[0].aliasRules, { reasoning: { effort: 'low' } }); + assertEquals(out[1].lookupId, 'codex-auto-review'); + assertEquals(out[1].aliasRules, undefined); + }); + + test('upstreamIds filter skips the alias on providers outside the allowlist', () => { + const aliases = [makeAlias({ onConflict: 'alias-only', upstreamIds: ['OTHER'] })]; + const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases); + // The alias only applies to OTHER, so this provider sees a literal + // (no-rewrite) interpretation. + assertEquals(out.length, 1); + assertEquals(out[0].lookupId, 'codex-auto-review'); + assertEquals(out[0].aliasRules, undefined); + }); + + test('prefix-strip happens before alias matching (semantic P)', () => { + // Configure the provider with a prefix; the inbound `cx/codex-auto-review` + // strips to `codex-auto-review` and matches the alias. The alias-rewrite + // interpretation carries the target id `gpt-5.4`. + const prefixedProvider = fakeProvider({ + upstream: 'P', name: 'p', + modelPrefix: { prefix: 'cx/', addressable: ['prefixed'], listed: ['prefixed'] }, + }); + const aliases = [makeAlias({ onConflict: 'alias-only' })]; + const out = enumerateModelInterpretations('cx/codex-auto-review', [prefixedProvider], aliases); + assertEquals(out.length, 1); + assertEquals(out[0].lookupId, 'gpt-5.4'); + assertEquals(out[0].aliasName, 'codex-auto-review'); + }); +}); + +// E2E coverage of the post-resolution prune. Uses a real Azure-backed +// catalog (resolved without HTTP) so the conflict pruning behavior is +// observed end-to-end via `resolveModelForRequest`. +describe('resolveModelForRequest applies alias onConflict pruning', () => { + // Helper that stages a single Azure upstream exposing both the real + // alias-named model and the alias's target model. + const stageBothNamesUpstream = async (): Promise => { + const { repo } = await setupAppTest(); + await repo.upstreams.deleteAll(); + await repo.upstreams.save({ + id: 'up_a', + provider: 'azure', + name: 'A', + enabled: true, + sortOrder: 1, + createdAt: '2026-05-21T00:00:00.000Z', + updatedAt: '2026-05-21T00:00:00.000Z', + config: { + endpoint: 'https://a.openai.azure.com', + apiKey: 'az-key', + models: [ + { upstreamModelId: 'codex-auto-review', endpoints: { chatCompletions: {} } }, + { upstreamModelId: 'gpt-5.4', endpoints: { chatCompletions: {} } }, + ], + }, + flagOverrides: {}, + disabledPublicModelIds: [], + proxyFallbackList: [], + modelPrefix: null, + state: null, + }); + }; + + // Helper that stages a single Azure upstream exposing ONLY the alias's + // target model (no real `codex-auto-review` collision). + const stageTargetOnlyUpstream = async (): Promise => { + const { repo } = await setupAppTest(); + await repo.upstreams.deleteAll(); + await repo.upstreams.save({ + id: 'up_a', + provider: 'azure', + name: 'A', + enabled: true, + sortOrder: 1, + createdAt: '2026-05-21T00:00:00.000Z', + updatedAt: '2026-05-21T00:00:00.000Z', + config: { + endpoint: 'https://a.openai.azure.com', + apiKey: 'az-key', + models: [ + { upstreamModelId: 'gpt-5.4', endpoints: { chatCompletions: {} } }, + ], + }, + flagOverrides: {}, + disabledPublicModelIds: [], + proxyFallbackList: [], + modelPrefix: null, + state: null, + }); + }; + + const aliasOf = (onConflict: ModelAlias['onConflict']): ModelAlias => ({ + alias: 'codex-auto-review', + targetModelId: 'gpt-5.4', + upstreamIds: [], + rules: { reasoning: { effort: 'low' } }, + visibleInModelsList: true, + onConflict, + }); + + test('alias-only resolves to a single match against the alias target id', async () => { + await stageBothNamesUpstream(); + const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('alias-only')]); + assertEquals(resolved.matches.length, 1); + assertEquals(resolved.matches[0].id, 'gpt-5.4'); + }); + + test('real-only drops the alias-rewrite resolution when the real-name resolves too', async () => { + await stageBothNamesUpstream(); + const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('real-only')]); + assertEquals(resolved.matches.length, 1); + assertEquals(resolved.matches[0].id, 'codex-auto-review'); + }); + + test('real-only keeps the alias-rewrite resolution when the real-name catalog lookup misses', async () => { + await stageTargetOnlyUpstream(); + const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('real-only')]); + assertEquals(resolved.matches.length, 1); + assertEquals(resolved.matches[0].id, 'gpt-5.4'); + }); + + test('both-real-first resolves to two matches, real first', async () => { + await stageBothNamesUpstream(); + const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('both-real-first')]); + assertEquals(resolved.matches.length, 2); + assertEquals(resolved.matches[0].id, 'codex-auto-review'); + assertEquals(resolved.matches[1].id, 'gpt-5.4'); + }); + + test('both-alias-first resolves to two matches, alias first', async () => { + await stageBothNamesUpstream(); + const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('both-alias-first')]); + assertEquals(resolved.matches.length, 2); + assertEquals(resolved.matches[0].id, 'gpt-5.4'); + assertEquals(resolved.matches[1].id, 'codex-auto-review'); + }); +}); diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts index 5a85dba39..85f01b621 100644 --- a/packages/gateway/src/repo/memory.ts +++ b/packages/gateway/src/repo/memory.ts @@ -13,6 +13,7 @@ import type { ApiKeyRepo, BackoffRow, CachedModelsRow, + ModelAliasesRepo, ModelsCacheRepo, PerformanceDimensions, PerformanceErrorSample, @@ -39,6 +40,7 @@ import type { UsersRepo, } from './types.ts'; import { serializeStoredState } from './upstream-json.ts'; +import type { ModelAlias } from '../control-plane/model-aliases/types.ts'; import { latencyBucketForMs } from '../shared/performance-histogram.ts'; import { generateSessionToken } from '../shared/session-tokens.ts'; import { assertWebSearchProviderName } from '../shared/web-search-providers.ts'; @@ -896,6 +898,7 @@ export class InMemoryRepo implements Repo { proxyBackoffs: ProxyBackoffRepo; responsesItems: ResponsesItemsRepo; responsesSnapshots: ResponsesSnapshotsRepo; + modelAliases: ModelAliasesRepo; constructor() { this.users = new MemoryUsersRepo(); @@ -911,5 +914,21 @@ export class InMemoryRepo implements Repo { this.proxyBackoffs = new MemoryProxyBackoffRepo(); this.responsesItems = new MemoryResponsesItemsRepo(); this.responsesSnapshots = new MemoryResponsesSnapshotsRepo(); + this.modelAliases = new MemoryModelAliasesRepo(); + } +} + +// Test-only in-memory backing for the alias table. The list starts empty +// and can be reseeded via `setAll` so tests exercising alias-resolution +// behavior do not depend on a live SQL database. +export class MemoryModelAliasesRepo implements ModelAliasesRepo { + private rows: readonly ModelAlias[] = []; + + loadAll(): Promise { + return Promise.resolve(this.rows); + } + + setAll(rows: readonly ModelAlias[]): void { + this.rows = rows; } } diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts index b716d07e4..109b35024 100644 --- a/packages/gateway/src/repo/sql.ts +++ b/packages/gateway/src/repo/sql.ts @@ -7,6 +7,7 @@ import type { ApiKeyRepo, BackoffRow, CachedModelsRow, + ModelAliasesRepo, ModelsCacheRepo, PerformanceDimensions, PerformanceErrorSample, @@ -34,6 +35,8 @@ import type { UsersRepo, } from './types.ts'; import { serializeStoredConfig, serializeStoredState } from './upstream-json.ts'; +import { loadAllAliases } from '../control-plane/model-aliases/repo.ts'; +import type { ModelAlias } from '../control-plane/model-aliases/types.ts'; import { latencyBucketForMs } from '../shared/performance-histogram.ts'; import { generateSessionToken } from '../shared/session-tokens.ts'; import { assertWebSearchProviderName } from '../shared/web-search-providers.ts'; @@ -1599,6 +1602,7 @@ export class SqlRepo implements Repo { proxyBackoffs: ProxyBackoffRepo; responsesItems: ResponsesItemsRepo; responsesSnapshots: ResponsesSnapshotsRepo; + modelAliases: ModelAliasesRepo; constructor(db: SqlDatabase) { this.users = new SqlUsersRepo(db); @@ -1614,5 +1618,14 @@ export class SqlRepo implements Repo { this.proxyBackoffs = new SqlProxyBackoffRepo(db); this.responsesItems = new SqlResponsesItemsRepo(db); this.responsesSnapshots = new SqlResponsesSnapshotsRepo(db); + this.modelAliases = new SqlModelAliasesRepo(db); + } +} + +class SqlModelAliasesRepo implements ModelAliasesRepo { + constructor(private db: SqlDatabase) {} + + loadAll(): Promise { + return loadAllAliases(this.db); } } diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts index 0341d41ef..d282aaa98 100644 --- a/packages/gateway/src/repo/types.ts +++ b/packages/gateway/src/repo/types.ts @@ -1,3 +1,4 @@ +import type { ModelAlias } from '../control-plane/model-aliases/types.ts'; import type { HistogramBucket } from '../shared/performance-histogram.ts'; import type { WebSearchProviderName } from '../shared/web-search-providers.ts'; import type { BillingDimension, ModelPricing } from '@floway-dev/protocols/common'; @@ -332,4 +333,11 @@ export interface Repo { proxyBackoffs: ProxyBackoffRepo; responsesItems: ResponsesItemsRepo; responsesSnapshots: ResponsesSnapshotsRepo; + modelAliases: ModelAliasesRepo; +} + +// Operator-managed alias table; small (dozens of rows at most) and read +// per request, so the repo deliberately exposes only a full-table fetch. +export interface ModelAliasesRepo { + loadAll(): Promise; } From 8305153a58fac34afd42b8c92f5a1346a83c67b4 Mon Sep 17 00:00:00 2001 From: Menci Date: Thu, 25 Jun 2026 23:12:35 +0800 Subject: [PATCH 006/170] feat(gateway): apply alias rules, synthesize /v1/models entries, set x-floway-alias MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit applyAliasRulesTo writes rule values into each inbound IR's native slot when the protocol supports the concept and the Floway extension slot otherwise. Alias values override user-supplied values per the operator-locked semantics in Goal 3 of the design. /v1/models appends alias entries with aliasedFrom carrying the target, upstream filter, rules, and conflict mode. Aliases with visibleInModelsList=false are omitted; aliases whose targets are unreachable are still listed — operator-declared, no silent hide. The Gemini /v1beta/models surface mirrors the same alias-listing policy. The x-floway-alias response header carries the matched alias name on every call served via an alias, giving callers a no-mode-required debug hook for understanding routing. Per-upstream sanitizers run just before each upstream HTTP call, emitting one drop-trace line per stripped extension field with the matched alias name attached. The same sanitize emission point fires for client-sent extension residue regardless of alias provenance. Embeddings, images, and /v1/completions thread aliases through resolveModelForRequest so alias-name resolution still rewrites the target id; rules don't apply to these passthrough endpoints (no protocol slots) but the matched alias name still rides out on the response header, and one drop trace line per declared rule lands so an operator can confirm the rewrite ran. Side touches: - ChatCandidate replaces ProviderCandidate on every chat attempt arg type, restoring the alias-metadata propagation the routing layer already preserves. - GatewayCtx grows a per-request responseHeaders bag; the http wrappers flush it onto the outgoing Response through a new finalizeGatewayResponse helper that also routes through the dump accumulator. - ProviderModelResolution gains an optional aliasName; passthrough callers read it directly off the resolved match. - pushInterpretation's onConflict switch grows an assertNever default. --- .../src/control-plane/model-aliases/repo.ts | 4 +- .../control-plane/model-aliases/repo_test.ts | 35 +-- .../src/control-plane/model-aliases/types.ts | 4 + .../chat/chat-completions/attempt.ts | 10 +- .../chat/chat-completions/attempt_test.ts | 1 + .../data-plane/chat/chat-completions/http.ts | 6 +- .../demote-developer-to-system_test.ts | 1 + .../demote-interleaved-system-to-user_test.ts | 1 + ...le-reasoning-on-forced-tool-choice_test.ts | 1 + .../include-usage-stream-options_test.ts | 1 + .../interceptors/normalize-usage_test.ts | 1 + .../vendor-deepseek-normalize_test.ts | 1 + .../vendor-kimi-normalize_test.ts | 1 + .../vendor-qwen-normalize_test.ts | 1 + .../data-plane/chat/chat-completions/serve.ts | 6 + .../chat/chat-completions/serve_test.ts | 1 + .../src/data-plane/chat/gemini/attempt.ts | 6 +- .../data-plane/chat/gemini/attempt_test.ts | 1 + .../src/data-plane/chat/gemini/http.ts | 10 +- .../strip-safety-settings_test.ts | 1 + .../strip-unsupported-part-fields_test.ts | 1 + .../strip-unsupported-tools_test.ts | 1 + .../suppress-thought-parts_test.ts | 1 + .../data-plane/chat/gemini/respond_test.ts | 1 + .../src/data-plane/chat/gemini/serve.ts | 8 + .../src/data-plane/chat/gemini/serve_test.ts | 1 + .../src/data-plane/chat/messages/attempt.ts | 28 ++- .../data-plane/chat/messages/attempt_test.ts | 1 + .../src/data-plane/chat/messages/http.ts | 8 +- .../src/data-plane/chat/messages/http_test.ts | 65 +++++- .../demote-interleaved-system-to-user_test.ts | 1 + ...le-reasoning-on-forced-tool-choice_test.ts | 1 + .../strip-billing-attribution_test.ts | 1 + .../interceptors/web-search-shim_test.ts | 1 + .../data-plane/chat/messages/respond_test.ts | 1 + .../src/data-plane/chat/messages/serve.ts | 13 ++ .../data-plane/chat/messages/serve_test.ts | 1 + .../src/data-plane/chat/responses/attempt.ts | 15 +- .../data-plane/chat/responses/attempt_test.ts | 1 + .../src/data-plane/chat/responses/http.ts | 14 +- .../canonicalize-encrypted-content_test.ts | 1 + .../demote-developer-to-system_test.ts | 1 + .../demote-interleaved-system-to-user_test.ts | 1 + ...le-reasoning-on-forced-tool-choice_test.ts | 1 + .../interceptors/retry-cyber-policy_test.ts | 1 + .../interceptors/server-tool-shim_test.ts | 2 + .../image-generation-integration_test.ts | 1 + .../server-tools/image-generation.ts | 9 +- .../server-tools/image-generation_test.ts | 1 + .../vendor-deepseek-normalize_test.ts | 1 + .../vendor-qwen-normalize_test.ts | 1 + .../src/data-plane/chat/responses/serve.ts | 12 + .../data-plane/chat/responses/serve_test.ts | 1 + .../src/data-plane/chat/shared/gateway-ctx.ts | 23 +- .../data-plane/chat/shared/respond_test.ts | 1 + .../src/data-plane/chat/shared/sanitize.ts | 10 + .../chat/shared/upstream-telemetry_test.ts | 1 + .../src/data-plane/model-aliases/apply.ts | 105 +++++++++ .../data-plane/model-aliases/apply_test.ts | 218 ++++++++++++++++++ .../data-plane/model-aliases/match_test.ts | 1 + .../gateway/src/data-plane/models/gemini.ts | 29 ++- .../gateway/src/data-plane/models/load.ts | 39 +++- .../gateway/src/data-plane/models/serve.ts | 4 +- .../src/data-plane/models/serve_test.ts | 178 ++++++++++++++ .../src/data-plane/providers/registry.ts | 18 +- .../src/data-plane/providers/registry_test.ts | 2 + .../data-plane/shared/passthrough-serve.ts | 40 +++- .../gateway/src/test-helpers/gateway-ctx.ts | 1 + packages/protocols/src/common/models.ts | 24 ++ packages/translate/package.json | 3 +- 70 files changed, 918 insertions(+), 69 deletions(-) create mode 100644 packages/gateway/src/data-plane/model-aliases/apply.ts create mode 100644 packages/gateway/src/data-plane/model-aliases/apply_test.ts diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts index 70024e0cd..4c13cd09b 100644 --- a/packages/gateway/src/control-plane/model-aliases/repo.ts +++ b/packages/gateway/src/control-plane/model-aliases/repo.ts @@ -8,13 +8,14 @@ interface ModelAliasRow { rules_json: string; visible_in_models_list: number; on_conflict: OnConflict; + created_at: number; } // The model_aliases table is operator-managed and small (dozens of rows at // most), so the data plane reads the full table per request — no cache layer. export const loadAllAliases = async (db: SqlDatabase): Promise => { const { results } = await db - .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict FROM model_aliases') + .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases') .all(); return results.map(toModelAlias); }; @@ -26,6 +27,7 @@ const toModelAlias = (row: ModelAliasRow): ModelAlias => ({ rules: parseJsonField(row.alias, 'rules_json', row.rules_json), visibleInModelsList: row.visible_in_models_list === 1, onConflict: row.on_conflict, + createdAt: row.created_at, }); const parseJsonField = (alias: string, field: string, raw: string): T => { diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts index a4da76fde..ff1efa046 100644 --- a/packages/gateway/src/control-plane/model-aliases/repo_test.ts +++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts @@ -8,17 +8,21 @@ test('loadAllAliases reads the seed row from a freshly migrated database', async const db = await createSqliteTestDb(); const aliases = await loadAllAliases(db); - - assertEquals(aliases, [ - { - alias: 'codex-auto-review', - targetModelId: 'gpt-5.4', - upstreamIds: [], - rules: { reasoning: { effort: 'low' } }, - visibleInModelsList: true, - onConflict: 'real-only', - }, - ]); + assertEquals(aliases.length, 1); + const [seed] = aliases; + // `createdAt` rides off the migration's `DEFAULT (unixepoch())`, so the + // exact value is wall-clock dependent. Assert structurally that it landed + // as a number and strip it before comparing the rest of the row. + assertEquals(typeof seed.createdAt, 'number'); + const { createdAt: _createdAt, ...withoutTimestamp } = seed; + assertEquals(withoutTimestamp, { + alias: 'codex-auto-review', + targetModelId: 'gpt-5.4', + upstreamIds: [], + rules: { reasoning: { effort: 'low' } }, + visibleInModelsList: true, + onConflict: 'real-only', + }); }); test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_models_list to a boolean', async () => { @@ -26,7 +30,7 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo await db.exec('DELETE FROM model_aliases'); await db .prepare( - 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)', + 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)', ) .bind( 'opus-xhigh', @@ -35,13 +39,14 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo '{"reasoning":{"effort":"xhigh"},"anthropicBeta":["fine-grained-tool-streaming"]}', 0, 'alias-only', + 1_700_000_000, ) .run(); await db .prepare( - 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)', + 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)', ) - .bind('gpt-5-fast', 'gpt-5.4', '[]', '{"serviceTier":"priority"}', 1, 'both-alias-first') + .bind('gpt-5-fast', 'gpt-5.4', '[]', '{"serviceTier":"priority"}', 1, 'both-alias-first', 1_700_000_001) .run(); const aliases = await loadAllAliases(db); @@ -54,6 +59,7 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] }, visibleInModelsList: false, onConflict: 'alias-only', + createdAt: 1_700_000_000, }); assertEquals(byAlias.get('gpt-5-fast'), { alias: 'gpt-5-fast', @@ -62,6 +68,7 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo rules: { serviceTier: 'priority' }, visibleInModelsList: true, onConflict: 'both-alias-first', + createdAt: 1_700_000_001, }); }); diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts index 8e1bff467..7594ceff6 100644 --- a/packages/gateway/src/control-plane/model-aliases/types.ts +++ b/packages/gateway/src/control-plane/model-aliases/types.ts @@ -23,4 +23,8 @@ export type ModelAlias = { readonly rules: ModelAliasRules; readonly visibleInModelsList: boolean; readonly onConflict: OnConflict; + // Unix epoch seconds stamped at row insertion. Surfaced on the + // `/v1/models` synthesized alias entry so callers see when an alias was + // declared, matching the `created` semantics of the real entries. + readonly createdAt: number; }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts index 5862192e5..71c3a8288 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts @@ -5,9 +5,10 @@ import { responsesAttempt } from '../responses/attempt.ts'; import { rewriteStoredResponsesItemsForCandidate } from '../responses/items/rewrite.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; import { providerStreamResultToExecuteResult, buildUpstreamCallOptions } from '../shared/attempt-helpers.ts'; -import type { ProviderCandidate } from '../shared/candidates.ts'; +import type { ChatCandidate } from '../shared/candidates.ts'; import { tryCatchChatServeFailure } from '../shared/errors.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; +import { createSanitizeTraceCtx, sanitizeForChatCompletionsUpstream } from '../shared/sanitize.ts'; import { traverseTranslation } from '../shared/translate-traverse.ts'; import { createUpstreamLatencyRecorder } from '../shared/upstream-telemetry.ts'; import { runInterceptors } from '@floway-dev/interceptor'; @@ -21,7 +22,7 @@ export interface ChatCompletionsAttemptArgs { readonly payload: ChatCompletionsPayload; readonly ctx: GatewayCtx; readonly store: StatefulResponsesStore; - readonly candidate: ProviderCandidate; + readonly candidate: ChatCandidate; readonly headers: Headers; } @@ -67,7 +68,7 @@ export const chatCompletionsAttempt = { const rewriteOrRenderChatCompletionsFailure = async ( payload: ChatCompletionsPayload, store: StatefulResponsesStore, - candidate: ProviderCandidate, + candidate: ChatCandidate, ): Promise<{ payload: ChatCompletionsPayload; failure?: undefined } | { payload?: undefined; failure: ExecuteResult> & { type: 'api-error' } }> => { try { const rewrittenMessages = await rewriteStoredResponsesItemsForCandidate( @@ -98,10 +99,11 @@ const rewriteOrRenderChatCompletionsFailure = async ( const callChatCompletionsAsExecuteResult = async ( payload: ChatCompletionsPayload, ctx: GatewayCtx, - candidate: ProviderCandidate, + candidate: ChatCandidate, headers: Headers, ): Promise>> => { const { model: _model, ...body } = payload; + sanitizeForChatCompletionsUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName)); const recorder = createUpstreamLatencyRecorder(); const providerResult = await candidate.binding.provider.callChatCompletions( candidate.binding.upstreamModel, diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts index 747a93de5..62b814359 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts @@ -23,6 +23,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/chat-completions/http.ts b/packages/gateway/src/data-plane/chat/chat-completions/http.ts index a46d537ed..a22a86543 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/http.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/http.ts @@ -3,7 +3,7 @@ import { chatCompletionsServe } from './serve.ts'; import type { AuthedContext } from '../../../middleware/auth.ts'; import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts'; import { createNonResponsesSourceStore } from '../responses/items/store.ts'; -import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts'; +import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts'; import { readRequestBody, type RequestBody } from '../shared/request-body.ts'; import { providerModelsUnavailableResponse } from '../shared/upstream-models-error.ts'; import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions'; @@ -24,7 +24,7 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques const effectiveCtx = ctx ?? createGatewayCtxFromHono(c, { wantsStream: false, requestBody }); const result = internalErrorResult(502, toInternalDebugError(error)); const { response } = await respondChatCompletions(c, result, false, false, effectiveCtx); - return (effectiveCtx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(effectiveCtx, response); }; export const chatCompletionsHttp = { @@ -44,7 +44,7 @@ export const chatCompletionsHttp = { const store = createNonResponsesSourceStore(ctx.apiKeyId); const result = await chatCompletionsServe.generate({ payload, ctx, store, headers: inboundHeadersForUpstream(c) }); const { response } = await respondChatCompletions(c, result, wantsStream, includeUsageChunk, ctx); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); } catch (error) { return await respondWithInternalError(c, error, requestBody, ctx); } diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts index 1d3c8252e..83d9bccb7 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts @@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts index c7f560cb3..156389a46 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts @@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts index 7ba1962fd..e46726510 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts @@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts index 1a28fef4a..e3e4147a2 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts @@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts index 0969e8d8e..0b6fed4f1 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts @@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts index d72f890f5..81be2c3ab 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts @@ -20,6 +20,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts index 74de17c38..1cfc304b7 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts @@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts index 4f7197da8..0506a1e25 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts @@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts index 5d27541f9..1347dd6bd 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts @@ -2,6 +2,7 @@ import { chatCompletionsAttempt } from './attempt.ts'; import { renderChatCompletionsFailure } from './errors.ts'; import { planChatCompletionsRouting } from './routing.ts'; import { getRepo } from '../../../repo/index.ts'; +import { applyAliasRulesToChatCompletions } from '../../model-aliases/apply.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; import { enumerateProviderCandidates } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -47,6 +48,11 @@ export const chatCompletionsServe = { : { kind: 'model-missing', model: payload.model, failedUpstreams }, ); } + // Apply operator-locked alias rules to the inbound IR before the + // attempt runs its interceptor chain. The matching `x-floway-alias` + // header rides out via ctx.responseHeaders. + if (candidate.aliasRules) applyAliasRulesToChatCompletions(payload, candidate.aliasRules); + if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName); return await chatCompletionsAttempt.generate({ payload, ctx, store, candidate, headers }); }, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts index 616b2ba66..402803203 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts @@ -49,6 +49,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt.ts b/packages/gateway/src/data-plane/chat/gemini/attempt.ts index 1d120ca00..880b0d62a 100644 --- a/packages/gateway/src/data-plane/chat/gemini/attempt.ts +++ b/packages/gateway/src/data-plane/chat/gemini/attempt.ts @@ -6,7 +6,7 @@ import { chatCompletionsAttempt } from '../chat-completions/attempt.ts'; import { messagesAttempt } from '../messages/attempt.ts'; import { responsesAttempt } from '../responses/attempt.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; -import type { ProviderCandidate } from '../shared/candidates.ts'; +import type { ChatCandidate } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; import { traverseTranslation } from '../shared/translate-traverse.ts'; import { runInterceptors } from '@floway-dev/interceptor'; @@ -19,7 +19,7 @@ export interface GeminiAttemptGenerateArgs { readonly payload: GeminiPayload; readonly ctx: GatewayCtx; readonly store: StatefulResponsesStore; - readonly candidate: ProviderCandidate; + readonly candidate: ChatCandidate; readonly headers: Headers; } @@ -27,7 +27,7 @@ export interface GeminiAttemptCountTokensArgs { readonly payload: GeminiPayload; readonly ctx: GatewayCtx; readonly store: StatefulResponsesStore; - readonly candidate: ProviderCandidate; + readonly candidate: ChatCandidate; readonly headers: Headers; } diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts index 9d08e557f..29a4e9bc5 100644 --- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts @@ -24,6 +24,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/gemini/http.ts b/packages/gateway/src/data-plane/chat/gemini/http.ts index 35a6e921f..8126d13e7 100644 --- a/packages/gateway/src/data-plane/chat/gemini/http.ts +++ b/packages/gateway/src/data-plane/chat/gemini/http.ts @@ -3,7 +3,7 @@ import { geminiServe } from './serve.ts'; import type { AuthedContext } from '../../../middleware/auth.ts'; import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts'; import { createNonResponsesSourceStore } from '../responses/items/store.ts'; -import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts'; +import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts'; import { readRequestBody, type RequestBody } from '../shared/request-body.ts'; import type { GeminiContent, GeminiPayload } from '@floway-dev/protocols/gemini'; import { internalErrorResult, ProviderModelsUnavailableError, toInternalDebugError } from '@floway-dev/provider'; @@ -66,11 +66,11 @@ const respondWithGeminiError = async ( body: new TextEncoder().encode(body), }; const { response } = await respondGemini(c, apiErrorResult, wantsStream, ctx); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); } const internalResult = internalErrorResult(500, toInternalDebugError(error)); const { response } = await respondGemini(c, internalResult, wantsStream, ctx); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); }; // Single entry for `/v1beta/models/:modelAction`. Splits the model and action @@ -97,7 +97,7 @@ const runGeminiGenerate = async (c: AuthedContext, model: string, wantsStream: b try { const result = await geminiServe.generate({ payload, ctx, store, model, headers: inboundHeadersForUpstream(c) }); const { response } = await respondGemini(c, result, wantsStream, ctx); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); } catch (error) { return await respondWithGeminiError(c, error, ctx, wantsStream); } @@ -113,7 +113,7 @@ const runGeminiCountTokens = async (c: AuthedContext, model: string): Promise {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts index 6a4608cf4..3b02b63f8 100644 --- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts @@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts index 7cbfc4593..6a2c20ef7 100644 --- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts @@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts index e526bb9e3..eb67a0092 100644 --- a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts @@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts index a5c887c14..31981b544 100644 --- a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts @@ -26,6 +26,7 @@ const ctx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts index 840da62c7..e1f61d628 100644 --- a/packages/gateway/src/data-plane/chat/gemini/serve.ts +++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts @@ -2,6 +2,7 @@ import { geminiAttempt } from './attempt.ts'; import { renderGeminiFailure } from './errors.ts'; import { planGeminiRouting } from './routing.ts'; import { getRepo } from '../../../repo/index.ts'; +import { applyAliasRulesToGemini } from '../../model-aliases/apply.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; import { enumerateProviderCandidates } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -58,6 +59,11 @@ export const geminiServe = { 'generate', ); } + // Operator-locked alias rules apply to the Gemini IR before the attempt + // runs; the matching `x-floway-alias` header rides out via + // ctx.responseHeaders. + if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules); + if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName); return await geminiAttempt.generate({ payload, ctx, store, candidate, headers }); }, @@ -90,6 +96,8 @@ export const geminiServe = { 'countTokens', ); } + if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules); + if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName); return await geminiAttempt.countTokens({ payload, ctx, store, candidate, headers }); }, }; diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts index 42945a6d8..070d44471 100644 --- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts @@ -48,6 +48,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/messages/attempt.ts b/packages/gateway/src/data-plane/chat/messages/attempt.ts index e67387b85..3dcde67c7 100644 --- a/packages/gateway/src/data-plane/chat/messages/attempt.ts +++ b/packages/gateway/src/data-plane/chat/messages/attempt.ts @@ -6,10 +6,11 @@ import { responsesAttempt } from '../responses/attempt.ts'; import { rewriteStoredResponsesItemsForCandidate } from '../responses/items/rewrite.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; import { providerStreamResultToExecuteResult, buildUpstreamCallOptions } from '../shared/attempt-helpers.ts'; -import type { ProviderCandidate } from '../shared/candidates.ts'; +import type { ChatCandidate } from '../shared/candidates.ts'; import { tryCatchChatServeFailure } from '../shared/errors.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; import { plainResultFromResponse } from '../shared/respond.ts'; +import { sanitizeForMessagesUpstream, createSanitizeTraceCtx } from '../shared/sanitize.ts'; import { traverseTranslation } from '../shared/translate-traverse.ts'; import { createUpstreamLatencyRecorder } from '../shared/upstream-telemetry.ts'; import { runInterceptors } from '@floway-dev/interceptor'; @@ -17,13 +18,14 @@ import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { MessagesMessage, MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages'; import { type ExecuteResult, type PlainResult } from '@floway-dev/provider'; import { translateMessagesViaChatCompletions, translateMessagesViaResponses } from '@floway-dev/translate'; +import { applyAnthropicBetaToHeaders } from '@floway-dev/translate/via-messages/anthropic-extensions'; import { messagesViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items'; export interface MessagesAttemptGenerateArgs { readonly payload: MessagesPayload; readonly ctx: GatewayCtx; readonly store: StatefulResponsesStore; - readonly candidate: ProviderCandidate; + readonly candidate: ChatCandidate; readonly headers: Headers; } @@ -31,7 +33,7 @@ export interface MessagesAttemptCountTokensArgs { readonly payload: MessagesPayload; readonly ctx: GatewayCtx; readonly store: StatefulResponsesStore; - readonly candidate: ProviderCandidate; + readonly candidate: ChatCandidate; readonly headers: Headers; } @@ -48,12 +50,21 @@ export const messagesAttempt = { return await runInterceptors(invocation, ctx, messagesInterceptors, async () => { if (candidate.targetApi === 'messages') { const { model: _model, ...body } = invocation.payload; + // The candidate's `anthropic_beta` alias rule merges onto the + // anthropic-beta header (the wire path; the body slot is rejected + // by the http entry). Body extensions are stripped just before the + // upstream call, after every interceptor has had its say. + const outgoingHeaders = new Headers(invocation.headers); + if (candidate.aliasRules?.anthropicBeta?.length) { + applyAnthropicBetaToHeaders(outgoingHeaders, candidate.aliasRules.anthropicBeta); + } + sanitizeForMessagesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName)); const recorder = createUpstreamLatencyRecorder(); const providerResult = await candidate.binding.provider.callMessages( candidate.binding.upstreamModel, body, ctx.abortSignal, - buildUpstreamCallOptions(candidate, ctx, recorder.record, invocation.headers), + buildUpstreamCallOptions(candidate, ctx, recorder.record, outgoingHeaders), ); return await providerStreamResultToExecuteResult(providerResult, candidate, ctx, recorder); } @@ -98,11 +109,16 @@ export const messagesAttempt = { const recorder = createUpstreamLatencyRecorder(); const response = await runInterceptors(invocation, ctx, messagesCountTokensInterceptors, async () => { const { model: _model, ...body } = invocation.payload; + const outgoingHeaders = new Headers(invocation.headers); + if (candidate.aliasRules?.anthropicBeta?.length) { + applyAnthropicBetaToHeaders(outgoingHeaders, candidate.aliasRules.anthropicBeta); + } + sanitizeForMessagesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName)); const { response } = await candidate.binding.provider.callMessagesCountTokens( candidate.binding.upstreamModel, body, ctx.abortSignal, - buildUpstreamCallOptions(candidate, ctx, recorder.record, invocation.headers), + buildUpstreamCallOptions(candidate, ctx, recorder.record, outgoingHeaders), ); return response; }); @@ -124,7 +140,7 @@ export const messagesAttempt = { const rewriteOrRenderMessagesFailure = async ( payload: MessagesPayload, store: StatefulResponsesStore, - candidate: ProviderCandidate, + candidate: ChatCandidate, ): Promise<{ payload: MessagesPayload; failure?: undefined } | { payload?: undefined; failure: ExecuteResult> & { type: 'api-error' } }> => { try { const rewrittenMessages = await rewriteStoredResponsesItemsForCandidate( diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts index 2cd89323e..f9192e289 100644 --- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts @@ -23,6 +23,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/messages/http.ts b/packages/gateway/src/data-plane/chat/messages/http.ts index 8dfc4dc6e..b138a9ccf 100644 --- a/packages/gateway/src/data-plane/chat/messages/http.ts +++ b/packages/gateway/src/data-plane/chat/messages/http.ts @@ -3,7 +3,7 @@ import { messagesServe } from './serve.ts'; import type { AuthedContext } from '../../../middleware/auth.ts'; import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts'; import { createNonResponsesSourceStore } from '../responses/items/store.ts'; -import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts'; +import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts'; import { readRequestBody, type RequestBody } from '../shared/request-body.ts'; import { providerModelsUnavailableResponse } from '../shared/upstream-models-error.ts'; import type { MessagesPayload } from '@floway-dev/protocols/messages'; @@ -44,7 +44,7 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques const effectiveCtx = ctx ?? createGatewayCtxFromHono(c, { wantsStream: false, requestBody }); const result = internalErrorResult(502, toInternalDebugError(error)); const { response } = await respondMessages(c, result, false, effectiveCtx); - return (effectiveCtx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(effectiveCtx, response); }; const parsePayload = (requestBody: RequestBody): MessagesPayload => @@ -64,7 +64,7 @@ export const messagesHttp = { const store = createNonResponsesSourceStore(ctx.apiKeyId); const result = await messagesServe.generate({ payload, ctx, store, headers: inboundHeadersForUpstream(c) }); const { response } = await respondMessages(c, result, wantsStream, ctx); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); } catch (error) { return await respondWithInternalError(c, error, requestBody, ctx); } @@ -82,7 +82,7 @@ export const messagesHttp = { const store = createNonResponsesSourceStore(ctx.apiKeyId); const result = await messagesServe.countTokens({ payload, ctx, store, headers: inboundHeadersForUpstream(c) }); const { response } = await respondMessages(c, result, false, ctx); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); } catch (error) { return await respondWithInternalError(c, error, requestBody, ctx); } diff --git a/packages/gateway/src/data-plane/chat/messages/http_test.ts b/packages/gateway/src/data-plane/chat/messages/http_test.ts index 844a05177..cf32509d2 100644 --- a/packages/gateway/src/data-plane/chat/messages/http_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/http_test.ts @@ -5,13 +5,13 @@ import type { AuthVars } from '../../../middleware/auth.ts'; import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; import type { ApiKey, User } from '../../../repo/types.ts'; -import type { ProviderCandidate } from '../shared/candidates.ts'; +import type { ChatCandidate } from '../shared/candidates.ts'; import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common'; import type { MessagesStreamEvent } from '@floway-dev/protocols/messages'; import { directFetcher, type ProviderCallResult, type ProviderStreamResult, type UpstreamCallOptions } from '@floway-dev/provider'; import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils'; -const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = []; +const candidatesQueue: { readonly candidates: readonly ChatCandidate[]; readonly sawModel: boolean }[] = []; vi.mock('../shared/candidates.ts', async importOriginal => { const original = await importOriginal(); return { @@ -28,7 +28,7 @@ const { messagesHttp } = await import('./http.ts'); const API_KEY_ID = 'key_messages_http_test'; -const queueCandidates = (candidates: readonly ProviderCandidate[], sawModel = candidates.length > 0): void => { +const queueCandidates = (candidates: readonly ChatCandidate[], sawModel = candidates.length > 0): void => { candidatesQueue.push({ candidates, sawModel }); }; @@ -104,7 +104,7 @@ const makeCandidate = (overrides: { upstream?: string; callMessages?: (model: unknown, body: unknown, signal?: AbortSignal, opts?: UpstreamCallOptions) => Promise>; callMessagesCountTokens?: (model: unknown, body: unknown, signal?: AbortSignal, opts?: UpstreamCallOptions) => Promise; -} = {}): ProviderCandidate => { +} = {}): ChatCandidate => { const upstream = overrides.upstream ?? 'up_test'; const upstreamModel = stubUpstreamModel(); const provider = stubProvider({ @@ -272,3 +272,60 @@ test('POST /v1/messages forwards upstream response headers end-to-end (non-strea assertEquals(response.headers.get('anthropic-ratelimit-unified-status'), 'allowed'); assertEquals(response.headers.get('cf-ray'), 'cf_ray_e2e'); }); + +test('POST /v1/messages stamps x-floway-alias when the candidate is alias-matched', async () => { + installRepo(); + const callMessages = vi.fn(async (): Promise> => ({ + ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers(), + })); + const candidate = makeCandidate({ callMessages }); + queueCandidates([{ ...candidate, aliasRules: { reasoning: { effort: 'low' } }, aliasName: 'codex-auto-review' }]); + + const response = await makeApp().request('/v1/messages', { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ model: 'codex-auto-review', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }), + }); + + assertEquals(response.status, 200); + assertEquals(response.headers.get('x-floway-alias'), 'codex-auto-review'); +}); + +test('POST /v1/messages does not set x-floway-alias when no alias matched', async () => { + installRepo(); + const callMessages = vi.fn(async (): Promise> => ({ + ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers(), + })); + queueCandidates([makeCandidate({ callMessages })]); + + const response = await makeApp().request('/v1/messages', { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ model: 'test-model', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }), + }); + + assertEquals(response.status, 200); + assertEquals(response.headers.get('x-floway-alias'), null); +}); + +test('POST /v1/messages applies alias reasoning.effort onto output_config before upstream call', async () => { + installRepo(); + const observedBodies: { output_config?: { effort?: string } }[] = []; + const callMessages = vi.fn(async (_model: unknown, body: unknown): Promise> => { + observedBodies.push(body as { output_config?: { effort?: string } }); + return { ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers() }; + }); + const candidate = makeCandidate({ callMessages }); + queueCandidates([{ ...candidate, aliasRules: { reasoning: { effort: 'high' } }, aliasName: 'alias-x' }]); + + const response = await makeApp().request('/v1/messages', { + method: 'POST', + headers: { 'content-type': 'application/json' }, + body: JSON.stringify({ model: 'alias-x', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }), + }); + + assertEquals(response.status, 200); + const observed = observedBodies[0]; + if (observed === undefined) throw new Error('expected callMessages to receive a body'); + assertEquals(observed.output_config?.effort, 'high'); +}); diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts index 7fcb07153..9df67c5c0 100644 --- a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts @@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts index 7ed0ca556..7b7045355 100644 --- a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts @@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts index 3c74ea4a9..8ae90e232 100644 --- a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts @@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts index 27c5cad83..dca97addd 100644 --- a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts @@ -58,6 +58,7 @@ const gatewayCtx = (apiKeyId: string = 'test-key'): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/messages/respond_test.ts b/packages/gateway/src/data-plane/chat/messages/respond_test.ts index dfd3b10e3..79d0a9db6 100644 --- a/packages/gateway/src/data-plane/chat/messages/respond_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/respond_test.ts @@ -534,6 +534,7 @@ const makeRespondCtx = (): GatewayCtx => ({ wantsStream: false, runtimeLocation: 'TEST', backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, currentColo: 'TEST', dump: null, diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts index 719091768..30282afd4 100644 --- a/packages/gateway/src/data-plane/chat/messages/serve.ts +++ b/packages/gateway/src/data-plane/chat/messages/serve.ts @@ -2,6 +2,7 @@ import { messagesAttempt } from './attempt.ts'; import { renderMessagesFailure } from './errors.ts'; import { planMessagesRouting } from './routing.ts'; import { getRepo } from '../../../repo/index.ts'; +import { applyAliasRulesToMessages } from '../../model-aliases/apply.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; import { enumerateProviderCandidates } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -55,6 +56,13 @@ export const messagesServe = { 'generate', ); } + // Operator-locked alias rules go onto the inbound IR before the attempt + // begins so the per-protocol interceptor chain (and any downstream + // translate pass) sees the already-injected fields. The matching + // `x-floway-alias` response header is staged on the gateway-stamped + // header set; the http wrapper flushes it onto the outgoing Response. + if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules); + if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName); return await messagesAttempt.generate({ payload, ctx, store, candidate, headers }); }, @@ -84,6 +92,11 @@ export const messagesServe = { 'countTokens', ); } + // count_tokens carries the same alias semantics as generate — operator + // rules apply uniformly regardless of endpoint, and the response header + // rides out the same way. + if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules); + if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName); return await messagesAttempt.countTokens({ payload, ctx, store, candidate, headers }); }, }; diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts index 3bf0faace..734bad296 100644 --- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts @@ -46,6 +46,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/responses/attempt.ts b/packages/gateway/src/data-plane/chat/responses/attempt.ts index db7220990..c0fb6a902 100644 --- a/packages/gateway/src/data-plane/chat/responses/attempt.ts +++ b/packages/gateway/src/data-plane/chat/responses/attempt.ts @@ -10,9 +10,10 @@ import { recordPerformanceLatency, requireRecordedDurationMs } from '../../share import { chatCompletionsAttempt } from '../chat-completions/attempt.ts'; import { messagesAttempt } from '../messages/attempt.ts'; import { providerStreamResultToExecuteResult, buildUpstreamCallOptions, telemetryModelIdentity } from '../shared/attempt-helpers.ts'; -import type { ProviderCandidate } from '../shared/candidates.ts'; +import type { ChatCandidate } from '../shared/candidates.ts'; import { tryCatchChatServeFailure } from '../shared/errors.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; +import { createSanitizeTraceCtx, sanitizeForResponsesUpstream } from '../shared/sanitize.ts'; import { traverseTranslation } from '../shared/translate-traverse.ts'; import { createUpstreamLatencyRecorder, recordUpstreamHttpFailure, upstreamPerformanceContext } from '../shared/upstream-telemetry.ts'; import { runInterceptors } from '@floway-dev/interceptor'; @@ -26,7 +27,7 @@ export interface ResponsesAttemptGenerateArgs { readonly payload: ResponsesPayload; readonly ctx: GatewayCtx; readonly store: StatefulResponsesStore; - readonly candidate: ProviderCandidate; + readonly candidate: ChatCandidate; // Native HTTP/WS entry passes 'append'; the cross-protocol translation-in // path (another protocol's attempt translating into Responses) passes // 'none' so the outer source owns snapshot persistence. @@ -38,7 +39,7 @@ export interface ResponsesAttemptCompactArgs { readonly payload: ResponsesPayload; readonly ctx: GatewayCtx; readonly store: StatefulResponsesStore; - readonly candidate: ProviderCandidate; + readonly candidate: ChatCandidate; readonly headers: Headers; } @@ -148,7 +149,7 @@ type RewriteOutcome = const rewriteOrRenderFailure = async ( payload: ResponsesPayload, store: StatefulResponsesStore, - candidate: ProviderCandidate, + candidate: ChatCandidate, ): Promise => { try { return await rewriteResponsesItemsForCandidate(payload, store, candidate); @@ -185,12 +186,13 @@ const dispatchResponses = async ( payload: ResponsesPayload, ctx: GatewayCtx, store: StatefulResponsesStore, - candidate: ProviderCandidate, + candidate: ChatCandidate, headers: Headers, ): Promise>> => { switch (candidate.targetApi) { case 'responses': { const { model: _model, ...body } = payload; + sanitizeForResponsesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName)); const recorder = createUpstreamLatencyRecorder(); const providerResult = await candidate.binding.provider.callResponses( candidate.binding.upstreamModel, @@ -236,10 +238,11 @@ const dispatchResponses = async ( const callResponsesCompactAsExecuteResult = async ( payload: ResponsesPayload, ctx: GatewayCtx, - candidate: ProviderCandidate, + candidate: ChatCandidate, headers: Headers, ): Promise>> => { const { model: _model, stream: _stream, store: _store, ...body } = payload; + sanitizeForResponsesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName)); const recorder = createUpstreamLatencyRecorder(); const providerResult = await candidate.binding.provider.callResponsesCompact( candidate.binding.upstreamModel, diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts index d698b5189..a952627b4 100644 --- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts @@ -25,6 +25,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/responses/http.ts b/packages/gateway/src/data-plane/chat/responses/http.ts index dc6e1694b..54c497a0a 100644 --- a/packages/gateway/src/data-plane/chat/responses/http.ts +++ b/packages/gateway/src/data-plane/chat/responses/http.ts @@ -5,7 +5,7 @@ import { responsesServe } from './serve.ts'; import type { AuthedContext } from '../../../middleware/auth.ts'; import { CODEX_AUTO_REVIEW_ALIAS, CODEX_AUTO_REVIEW_TARGET } from '../../codex/auto-review-alias.ts'; import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts'; -import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts'; +import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts'; import { readRequestBody, type RequestBody } from '../shared/request-body.ts'; import { providerModelsUnavailableResponse } from '../shared/upstream-models-error.ts'; import type { ResponsesPayload } from '@floway-dev/protocols/responses'; @@ -57,7 +57,7 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques const effectiveCtx = ctx ?? createGatewayCtxFromHono(c, { wantsStream: false, requestBody }); const result = internalErrorResult(502, toInternalDebugError(error)); const { response } = await respondResponses(c, result, false, effectiveCtx); - return (effectiveCtx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(effectiveCtx, response); }; const parsePayload = (requestBody: RequestBody, stampReasoningEffort: boolean): ResponsesPayload => @@ -74,12 +74,12 @@ export const responsesHttp = { const store = createResponsesHttpStore(ctx.apiKeyId, payload.store ?? undefined); const result = await responsesServe.generate({ payload, ctx, store, snapshotMode: payload.store === false ? 'none' : 'append', headers: inboundHeadersForUpstream(c) }); const { response } = await respondResponses(c, result, wantsStream, ctx); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); } catch (error) { if (error instanceof PreviousResponseNotFoundError) { const response = previousResponseNotFoundResponse(error.previousResponseId); ctx?.dump?.error('gateway'); - return (ctx?.dump?.finalize(response) ?? response); + return ctx ? finalizeGatewayResponse(ctx, response) : response; } return await respondWithInternalError(c, error, requestBody, ctx); } @@ -96,15 +96,15 @@ export const responsesHttp = { if (result.type === 'result') { ctx.dump?.success(result.modelIdentity, result.usage); const compactResponse = Response.json(result.result); - return (ctx.dump?.finalize(compactResponse) ?? compactResponse); + return finalizeGatewayResponse(ctx, compactResponse); } const { response } = await respondResponses(c, result, false, ctx); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); } catch (error) { if (error instanceof PreviousResponseNotFoundError) { const response = previousResponseNotFoundResponse(error.previousResponseId); ctx?.dump?.error('gateway'); - return (ctx?.dump?.finalize(response) ?? response); + return ctx ? finalizeGatewayResponse(ctx, response) : response; } return await respondWithInternalError(c, error, requestBody, ctx); } diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts index de97faf65..f904f85d5 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts @@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts index 0a705fd12..a193d01db 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts @@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts index e32752001..ae1fc3970 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts @@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts index 7c4ee2f6b..dcddbd6c8 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts @@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts index 98fa7fdcb..3cf947b01 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts @@ -45,6 +45,7 @@ const stubCtx = (overrides: { abortSignal?: AbortSignal } = {}): GatewayCtx => ( currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, ...overrides, }); diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts index f9d5cf7b2..6688dcec7 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts @@ -348,6 +348,7 @@ const makeGatewayCtx = (apiKeyId: string = 'k1'): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); @@ -4496,6 +4497,7 @@ test('downstream AbortSignal threads through to provider search / fetchPage and currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, abortSignal: controller.signal, }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts index 908b18489..46e30b043 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts @@ -144,6 +144,7 @@ const gatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts index a66fe9995..77904bb63 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts @@ -1,4 +1,5 @@ import { createPerRequestFetcher } from '../../../../../dial/per-request.ts'; +import { getRepo } from '../../../../../repo/index.ts'; import { sleep } from '../../../../../shared/sleep.ts'; import { resolveModelForRequest } from '../../../../providers/registry.ts'; import { appendFailedUpstreams } from '../../../../shared/failed-upstreams.ts'; @@ -535,7 +536,13 @@ const resolveImageBinding = async ( const endpointPath = isEdit ? '/images/edits' : '/images/generations'; let resolution; try { - resolution = await resolveModelForRequest(state.config.model, state.upstreamIds, fetcherForUpstream, state.backgroundScheduler); + // The image-generation server-tool runs inside a Responses request; the + // outer request's matched alias (if any) has already stamped the + // response header. Threading aliases here keeps the second + // resolveModelForRequest (for the image tool's own model id) consistent + // with how the outer LLM call resolved its candidate. + const aliases = await getRepo().modelAliases.loadAll(); + resolution = await resolveModelForRequest(state.config.model, state.upstreamIds, fetcherForUpstream, state.backgroundScheduler, aliases); } catch (e) { return { ok: false, error: serverError(e) }; } diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts index 515b4ef6c..da94b3068 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts @@ -56,6 +56,7 @@ const gatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts index fbed0749c..7db1b6360 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts @@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts index 45de471f4..23afe8462 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts @@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/serve.ts b/packages/gateway/src/data-plane/chat/responses/serve.ts index bed8b6df3..e66a2a29a 100644 --- a/packages/gateway/src/data-plane/chat/responses/serve.ts +++ b/packages/gateway/src/data-plane/chat/responses/serve.ts @@ -2,6 +2,7 @@ import { responsesAttempt } from './attempt.ts'; import type { ResponsesAttemptResult } from './interceptors/types.ts'; import type { ResponsesSnapshotMode, StatefulResponsesStore } from './items/store.ts'; import { prepareResponsesServePlan } from './serve-prep.ts'; +import { applyAliasRulesToResponses } from '../../model-aliases/apply.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses'; @@ -46,6 +47,11 @@ export const responsesServe = { : null, }); if (plan.kind === 'failure') return plan.result; + // Operator-locked alias rules apply to the prepared inbound IR before + // the attempt runs; the `x-floway-alias` header rides out via + // ctx.responseHeaders. + if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules); + if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName); const effectiveSnapshotMode: ResponsesSnapshotMode = snapshotMode !== 'none' && containsCompactionTrigger(plan.prepared.input) ? 'replace' : snapshotMode; @@ -62,6 +68,12 @@ export const responsesServe = { pickTarget: endpoints => endpoints.responses ? 'responses' : null, }); if (plan.kind === 'failure') return plan.result; + // Alias rules also apply on the compact path. The upstream compact + // endpoint silently drops fields like `reasoning` it does not honor; + // applying uniformly keeps the operator's intent expressed at the + // inbound boundary regardless of which endpoint runs. + if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules); + if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName); return await responsesAttempt.compact({ payload: plan.prepared, ctx, store, candidate: plan.candidate, headers }); }, }; diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts index 389771d69..07369504d 100644 --- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts @@ -57,6 +57,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts index 21f14d66d..0e199e403 100644 --- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts +++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts @@ -21,10 +21,17 @@ export interface GatewayCtx { // provider-call boundary. readonly runtimeLocation: string; readonly currentColo: string; - // Null when the api key has no retention configured, in which case the - // respond layer's `ctx.dump?.X(...)` calls collapse to no-ops and - // `ctx.dump?.finalize(response) ?? response` returns the response unchanged. + // Null when the api key has no retention configured, in which case + // `finalizeGatewayResponse` short-circuits the dump tee and returns the + // response untouched (headers from `responseHeaders` are still applied). readonly dump: DumpAccumulator | null; + // Per-request response-header staging. The data-plane writes alias-aware + // and similar non-upstream headers here mid-request; the inbound HTTP + // wrapper merges them onto the final outgoing Response before + // `dump?.finalize`. Mutable on purpose — the serve layer owns the + // chosen candidate and is the right seam for stamping the + // `x-floway-alias` header. + readonly responseHeaders: Headers; } export interface CreateGatewayCtxOptions { @@ -70,5 +77,15 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt runtimeLocation: colo, currentColo: colo, dump, + responseHeaders: new Headers(), }; }; + +// Apply ctx-stamped response headers onto the outgoing Response and then run +// the dump-accumulator's finalize tee. Every inbound HTTP wrapper returns its +// response through this seam so alias and other gateway-stamped headers ride +// out uniformly across happy-path, error, and passthrough paths. +export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => { + for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value); + return ctx.dump?.finalize(response) ?? response; +}; diff --git a/packages/gateway/src/data-plane/chat/shared/respond_test.ts b/packages/gateway/src/data-plane/chat/shared/respond_test.ts index e57f52ae4..fd506b083 100644 --- a/packages/gateway/src/data-plane/chat/shared/respond_test.ts +++ b/packages/gateway/src/data-plane/chat/shared/respond_test.ts @@ -45,6 +45,7 @@ const setup = (): Harness => { dump: null, backgroundScheduler: promise => { background.push(promise); }, requestStartedAt, + responseHeaders: new Headers(), }), }; }; diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts index 918156d16..832f8f41d 100644 --- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts +++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts @@ -5,6 +5,16 @@ export interface SanitizeTraceCtx { readonly emit: (line: { alias?: string; field: string; targetProtocol: string }) => void; } +// Default per-request trace that flows through the gateway's console logger. +// `aliasName` rides through to the trace line so an operator inspecting logs +// can correlate the drop with the matched alias; when no alias matched the +// field still appears (residue from a client-sent extension), just without +// alias attribution. +export const createSanitizeTraceCtx = (aliasName: string | undefined): SanitizeTraceCtx => ({ + ...(aliasName !== undefined ? { aliasName } : {}), + emit: line => console.warn('floway.alias.drop', JSON.stringify(line)), +}); + const stripKeys = ( body: Record, keys: readonly string[], diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts index d1d10a103..374cab4fe 100644 --- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts +++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts @@ -24,6 +24,7 @@ const baseCtx = (overrides: Partial = {}): GatewayCtx => { apiKeyId: 'key_1', upstreamIds: null, wantsStream: true, + responseHeaders: new Headers(), requestStartedAt: 0, runtimeLocation: 'TEST', currentColo: 'TEST', diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts new file mode 100644 index 000000000..9a2bb5950 --- /dev/null +++ b/packages/gateway/src/data-plane/model-aliases/apply.ts @@ -0,0 +1,105 @@ +import type { ModelAliasRules } from '../../control-plane/model-aliases/types.ts'; +import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions'; +import type { GeminiPayload } from '@floway-dev/protocols/gemini'; +import type { MessagesPayload, MessagesThinkingDisplay } from '@floway-dev/protocols/messages'; +import type { ResponsesPayload } from '@floway-dev/protocols/responses'; +import { mapSummaryToAnthropicDisplay } from '@floway-dev/translate/via-messages/anthropic-extensions'; + +// Each function writes the alias rules into the inbound IR's slot best suited +// to the host protocol: native when the protocol can express the concept, +// extension otherwise. Writes overwrite any user-supplied value — aliases are +// operator-locked per Goal 3. Mapping table is the single source of truth in +// docs/superpowers/specs/2026-06-25-model-aliases-design.md. + +export const applyAliasRulesToChatCompletions = (payload: ChatCompletionsPayload, rules: ModelAliasRules): void => { + // reasoning.effort is native; budget/adaptive/summary ride on extension slots + // because Chat Completions has no native expression for those. + if (rules.reasoning?.effort !== undefined) payload.reasoning_effort = rules.reasoning.effort; + if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens; + if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true; + if (rules.reasoning?.summary !== undefined) payload.reasoning_summary = rules.reasoning.summary; + if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity; + if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier; + if (rules.anthropicSpeed !== undefined) payload.anthropic_speed = rules.anthropicSpeed; + if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta]; +}; + +export const applyAliasRulesToResponses = (payload: ResponsesPayload, rules: ModelAliasRules): void => { + // reasoning.{effort, summary} and text.verbosity / service_tier are native; + // budget/adaptive ride on extension slots; the two anthropic_* knobs only + // matter when this Responses inbound lands on a Messages upstream. + if (rules.reasoning?.effort !== undefined) payload.reasoning = { ...payload.reasoning, effort: rules.reasoning.effort }; + if (rules.reasoning?.summary !== undefined) payload.reasoning = { ...payload.reasoning, summary: rules.reasoning.summary }; + if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens; + if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true; + if (rules.verbosity !== undefined) payload.text = { ...payload.text, verbosity: rules.verbosity }; + if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier; + if (rules.anthropicSpeed !== undefined) payload.anthropic_speed = rules.anthropicSpeed; + if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta]; +}; + +export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: ModelAliasRules): void => { + // Anthropic has natives for effort, thinking, speed, and service_tier; only + // verbosity is a Floway extension on this inbound. anthropic_beta is the + // wire header — the attempt layer reads `candidate.aliasRules.anthropicBeta` + // and merges via mergeAnthropicBetaTokens, so we do not stamp the body here. + if (rules.reasoning?.effort !== undefined) { + payload.output_config = { ...payload.output_config, effort: rules.reasoning.effort }; + } + // Adaptive wins over budgetTokens when both arrive — the write-side + // validator forbids the combination, but the apply step has to make a + // choice if both slip through and the translate-layer policy is + // adaptive-first. + if (rules.reasoning?.adaptive === true) { + payload.thinking = { type: 'adaptive' }; + } else if (rules.reasoning?.budgetTokens !== undefined) { + payload.thinking = { type: 'enabled', budget_tokens: rules.reasoning.budgetTokens }; + } + if (rules.reasoning?.summary !== undefined) { + const display = mapSummaryToAnthropicDisplay(rules.reasoning.summary); + if (display !== undefined) { + // When no prior thinking branch ran (no effort/budget/adaptive in this + // rule), synthesize `thinking: {type:'enabled', display}` so the + // operator's summary intent survives — Anthropic discards `display` + // without `type`. Matches `buildMessagesThinkingFromExtensions`. + const base = payload.thinking ?? { type: 'enabled' as const }; + payload.thinking = { ...base, display: display as MessagesThinkingDisplay }; + } + } + if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity; + if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier; + if (rules.anthropicSpeed !== undefined) payload.speed = rules.anthropicSpeed; +}; + +export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAliasRules): void => { + // All four reasoning knobs ride on the native thinkingConfig; verbosity and + // serviceTier ride on extension slots under generationConfig; the + // anthropic_* knobs ride on top-level extension slots so the existing + // gemini-via-messages translator picks them up there. + const hasThinking = rules.reasoning?.effort !== undefined + || rules.reasoning?.budgetTokens !== undefined + || rules.reasoning?.adaptive === true + || rules.reasoning?.summary !== undefined; + const hasGenerationConfig = hasThinking || rules.verbosity !== undefined || rules.serviceTier !== undefined; + + if (hasGenerationConfig) { + const generationConfig = { ...payload.generationConfig }; + const thinkingConfig = { ...generationConfig.thinkingConfig }; + if (rules.reasoning?.effort !== undefined) thinkingConfig.thinkingLevel = rules.reasoning.effort; + if (rules.reasoning?.budgetTokens !== undefined) thinkingConfig.thinkingBudget = rules.reasoning.budgetTokens; + if (rules.reasoning?.adaptive === true) thinkingConfig.thinkingBudget = -1; + if (rules.reasoning?.summary !== undefined) { + // Gemini exposes a single boolean for summary; map summary='omitted' to + // false and every other value (auto / concise / detailed / freeform) to + // true. Operators that want to fall back to Gemini's account default + // simply omit `reasoning.summary` from the rule. + thinkingConfig.includeThoughts = rules.reasoning.summary !== 'omitted'; + } + if (hasThinking) generationConfig.thinkingConfig = thinkingConfig; + if (rules.verbosity !== undefined) generationConfig.verbosity = rules.verbosity; + if (rules.serviceTier !== undefined) generationConfig.serviceTier = rules.serviceTier; + payload.generationConfig = generationConfig; + } + if (rules.anthropicSpeed !== undefined) payload.anthropicSpeed = rules.anthropicSpeed; + if (rules.anthropicBeta?.length) payload.anthropicBeta = [...rules.anthropicBeta]; +}; diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts new file mode 100644 index 000000000..e05f40c91 --- /dev/null +++ b/packages/gateway/src/data-plane/model-aliases/apply_test.ts @@ -0,0 +1,218 @@ +import { describe, expect, test } from 'vitest'; + +import { + applyAliasRulesToChatCompletions, + applyAliasRulesToGemini, + applyAliasRulesToMessages, + applyAliasRulesToResponses, +} from './apply.ts'; +import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions'; +import type { GeminiPayload } from '@floway-dev/protocols/gemini'; +import type { MessagesPayload } from '@floway-dev/protocols/messages'; +import type { ResponsesPayload } from '@floway-dev/protocols/responses'; + +// Empty-shaped payload helpers; the apply functions only touch the alias-rule +// slots so the rest can stay structurally minimal. +const cc = (overrides: Partial = {}): ChatCompletionsPayload => ({ model: 'x', messages: [], ...overrides }); +const resp = (overrides: Partial = {}): ResponsesPayload => ({ model: 'x', input: 'hi', ...overrides }); +const msg = (overrides: Partial = {}): MessagesPayload => ({ model: 'x', messages: [], max_tokens: 1, ...overrides }); +const gem = (overrides: Partial = {}): GeminiPayload => ({ ...overrides }); + +describe('applyAliasRulesToChatCompletions', () => { + test('writes effort to native reasoning_effort and overrides user value', () => { + const payload = cc({ reasoning_effort: 'low' }); + applyAliasRulesToChatCompletions(payload, { reasoning: { effort: 'high' } }); + expect(payload.reasoning_effort).toBe('high'); + }); + + test('writes budgetTokens to extension thinking_budget', () => { + const payload = cc(); + applyAliasRulesToChatCompletions(payload, { reasoning: { budgetTokens: 4096 } }); + expect(payload.thinking_budget).toBe(4096); + }); + + test('writes adaptive to extension adaptive_thinking', () => { + const payload = cc(); + applyAliasRulesToChatCompletions(payload, { reasoning: { adaptive: true } }); + expect(payload.adaptive_thinking).toBe(true); + }); + + test('writes summary to extension reasoning_summary', () => { + const payload = cc(); + applyAliasRulesToChatCompletions(payload, { reasoning: { summary: 'detailed' } }); + expect(payload.reasoning_summary).toBe('detailed'); + }); + + test('writes verbosity, serviceTier, anthropicSpeed, anthropicBeta to their slots', () => { + const payload = cc(); + applyAliasRulesToChatCompletions(payload, { + verbosity: 'low', serviceTier: 'flex', anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'], + }); + expect(payload.verbosity).toBe('low'); + expect(payload.service_tier).toBe('flex'); + expect(payload.anthropic_speed).toBe('fast'); + expect(payload.anthropic_beta).toEqual(['ctx-1m']); + }); + + test('leaves payload untouched when rules carry no fields', () => { + const payload = cc({ reasoning_effort: 'medium', verbosity: 'high' }); + applyAliasRulesToChatCompletions(payload, {}); + expect(payload.reasoning_effort).toBe('medium'); + expect(payload.verbosity).toBe('high'); + }); +}); + +describe('applyAliasRulesToResponses', () => { + test('writes effort to native reasoning.effort and overrides user value', () => { + const payload = resp({ reasoning: { effort: 'low' } }); + applyAliasRulesToResponses(payload, { reasoning: { effort: 'high' } }); + expect(payload.reasoning?.effort).toBe('high'); + }); + + test('writes summary to native reasoning.summary', () => { + const payload = resp(); + applyAliasRulesToResponses(payload, { reasoning: { summary: 'detailed' } }); + expect(payload.reasoning?.summary).toBe('detailed'); + }); + + test('writes budgetTokens to extension thinking_budget', () => { + const payload = resp(); + applyAliasRulesToResponses(payload, { reasoning: { budgetTokens: 4096 } }); + expect(payload.thinking_budget).toBe(4096); + }); + + test('writes adaptive to extension adaptive_thinking', () => { + const payload = resp(); + applyAliasRulesToResponses(payload, { reasoning: { adaptive: true } }); + expect(payload.adaptive_thinking).toBe(true); + }); + + test('writes verbosity to native text.verbosity, preserving format', () => { + const payload = resp({ text: { format: { type: 'json_object' } } }); + applyAliasRulesToResponses(payload, { verbosity: 'low' }); + expect(payload.text?.verbosity).toBe('low'); + expect(payload.text?.format).toEqual({ type: 'json_object' }); + }); + + test('writes serviceTier to native service_tier', () => { + const payload = resp(); + applyAliasRulesToResponses(payload, { serviceTier: 'flex' }); + expect(payload.service_tier).toBe('flex'); + }); + + test('writes anthropicSpeed / anthropicBeta to extension slots', () => { + const payload = resp(); + applyAliasRulesToResponses(payload, { anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'] }); + expect(payload.anthropic_speed).toBe('fast'); + expect(payload.anthropic_beta).toEqual(['ctx-1m']); + }); +}); + +describe('applyAliasRulesToMessages', () => { + test('writes effort to native output_config.effort', () => { + const payload = msg(); + applyAliasRulesToMessages(payload, { reasoning: { effort: 'high' } }); + expect(payload.output_config?.effort).toBe('high'); + }); + + test('writes budgetTokens to thinking.enabled', () => { + const payload = msg(); + applyAliasRulesToMessages(payload, { reasoning: { budgetTokens: 4096 } }); + expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 4096 }); + }); + + test('writes adaptive to thinking.type=adaptive', () => { + const payload = msg(); + applyAliasRulesToMessages(payload, { reasoning: { adaptive: true } }); + expect(payload.thinking).toEqual({ type: 'adaptive' }); + }); + + test('writes summary to thinking.display (mapped from OpenAI vocabulary)', () => { + const payload = msg({ thinking: { type: 'enabled', budget_tokens: 1024 } }); + applyAliasRulesToMessages(payload, { reasoning: { summary: 'detailed' } }); + expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 1024, display: 'summarized' }); + }); + + test('writes anthropicSpeed to native speed', () => { + const payload = msg(); + applyAliasRulesToMessages(payload, { anthropicSpeed: 'fast' }); + expect(payload.speed).toBe('fast'); + }); + + test('writes serviceTier to native service_tier', () => { + const payload = msg(); + applyAliasRulesToMessages(payload, { serviceTier: 'priority' }); + expect(payload.service_tier).toBe('priority'); + }); + + test('writes verbosity to the extension slot', () => { + const payload = msg(); + applyAliasRulesToMessages(payload, { verbosity: 'low' }); + expect(payload.verbosity).toBe('low'); + }); + + test('adaptive overrides budgetTokens when both arrive on the same call', () => { + // The write-side validator forbids both, but if both still arrive the + // adaptive choice has to win to match the translate-layer policy. + const payload = msg(); + applyAliasRulesToMessages(payload, { reasoning: { budgetTokens: 1024, adaptive: true } }); + expect(payload.thinking).toEqual({ type: 'adaptive' }); + }); +}); + +describe('applyAliasRulesToGemini', () => { + test('writes effort to generationConfig.thinkingConfig.thinkingLevel', () => { + const payload = gem(); + applyAliasRulesToGemini(payload, { reasoning: { effort: 'high' } }); + expect(payload.generationConfig?.thinkingConfig?.thinkingLevel).toBe('high'); + }); + + test('writes budgetTokens to generationConfig.thinkingConfig.thinkingBudget', () => { + const payload = gem(); + applyAliasRulesToGemini(payload, { reasoning: { budgetTokens: 4096 } }); + expect(payload.generationConfig?.thinkingConfig?.thinkingBudget).toBe(4096); + }); + + test('writes adaptive to generationConfig.thinkingConfig.thinkingBudget = -1', () => { + const payload = gem(); + applyAliasRulesToGemini(payload, { reasoning: { adaptive: true } }); + expect(payload.generationConfig?.thinkingConfig?.thinkingBudget).toBe(-1); + }); + + test('writes summary to generationConfig.thinkingConfig.includeThoughts when not omitted', () => { + const payload = gem(); + applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } }); + expect(payload.generationConfig?.thinkingConfig?.includeThoughts).toBe(true); + }); + + test('writes summary=omitted to generationConfig.thinkingConfig.includeThoughts=false', () => { + const payload = gem(); + applyAliasRulesToGemini(payload, { reasoning: { summary: 'omitted' } }); + expect(payload.generationConfig?.thinkingConfig?.includeThoughts).toBe(false); + }); + + test('writes verbosity to generationConfig.verbosity extension', () => { + const payload = gem(); + applyAliasRulesToGemini(payload, { verbosity: 'low' }); + expect(payload.generationConfig?.verbosity).toBe('low'); + }); + + test('writes serviceTier to generationConfig.serviceTier extension', () => { + const payload = gem(); + applyAliasRulesToGemini(payload, { serviceTier: 'flex' }); + expect(payload.generationConfig?.serviceTier).toBe('flex'); + }); + + test('writes anthropicSpeed / anthropicBeta to top-level extension slots', () => { + const payload = gem(); + applyAliasRulesToGemini(payload, { anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'] }); + expect(payload.anthropicSpeed).toBe('fast'); + expect(payload.anthropicBeta).toEqual(['ctx-1m']); + }); + + test('preserves existing thinkingConfig entries when adding a new one', () => { + const payload = gem({ generationConfig: { thinkingConfig: { thinkingBudget: 1024 } } }); + applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } }); + expect(payload.generationConfig?.thinkingConfig).toEqual({ thinkingBudget: 1024, includeThoughts: true }); + }); +}); diff --git a/packages/gateway/src/data-plane/model-aliases/match_test.ts b/packages/gateway/src/data-plane/model-aliases/match_test.ts index b1dd8ff2f..7252078c9 100644 --- a/packages/gateway/src/data-plane/model-aliases/match_test.ts +++ b/packages/gateway/src/data-plane/model-aliases/match_test.ts @@ -10,6 +10,7 @@ const make = (overrides: Partial): ModelAlias => ({ rules: {}, visibleInModelsList: true, onConflict: 'real-only', + createdAt: 0, ...overrides, }); diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts index 93f4b6822..33dbefa40 100644 --- a/packages/gateway/src/data-plane/models/gemini.ts +++ b/packages/gateway/src/data-plane/models/gemini.ts @@ -1,8 +1,10 @@ import type { Context } from 'hono'; import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts'; +import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; import { createPerRequestFetcher } from '../../dial/per-request.ts'; import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts'; +import { getRepo } from '../../repo/index.ts'; import { backgroundSchedulerFromContext } from '../../runtime/background.ts'; import { getCurrentColo } from '../../runtime/runtime-info.ts'; import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts'; @@ -66,16 +68,36 @@ const loadGeminiModels = async ( upstreamFilter: readonly string[] | null, fetcherForUpstream: (upstreamId: string) => Fetcher, scheduler: BackgroundScheduler, + aliases: readonly ModelAlias[], ): Promise => { const models = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler); // Only chat models are representable in the Gemini /models shape. - return models.filter(model => model.kind === 'chat').map(toGeminiModel); + const realChatEntries = models.filter(model => model.kind === 'chat').map(toGeminiModel); + // Visible aliases append in `loadAllAliases` order; the Gemini surface + // carries no `aliasedFrom` extension (Gemini's `Model` resource is closed) + // so the entry advertises the alias id plus the target's display fields. + const byId = new Map(models.map(m => [m.id, m])); + const aliasEntries: GeminiModel[] = []; + for (const alias of aliases) { + if (!alias.visibleInModelsList) continue; + const target = byId.get(alias.targetModelId); + if (target && target.kind !== 'chat') continue; + aliasEntries.push(toGeminiModel({ + ...(target ?? {} as InternalModel), + id: alias.alias, + display_name: alias.alias, + kind: 'chat', + limits: target?.limits ?? {}, + })); + } + return [...realChatEntries, ...aliasEntries]; }; export const serveGeminiModels = async (c: Context): Promise => { try { const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw)); - return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)) }); + const aliases = await getRepo().modelAliases.loadAll(); + return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases) }); } catch (error) { return geminiModelLoadError(error); } @@ -88,7 +110,8 @@ export const serveGeminiModelInfo = async (c: Context): Promise => { const modelId = rawModelId.replace(/^models\//, ''); try { const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw)); - const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c))).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`); + const aliases = await getRepo().modelAliases.loadAll(); + const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases)).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`); if (!model) return geminiError(404, `Model not found: ${modelId}`); return Response.json(model); } catch (error) { diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts index eed33c9de..585b5d638 100644 --- a/packages/gateway/src/data-plane/models/load.ts +++ b/packages/gateway/src/data-plane/models/load.ts @@ -1,3 +1,4 @@ +import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; import { getInternalModels } from '../providers/registry.ts'; import type { BackgroundScheduler } from '@floway-dev/platform'; import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common'; @@ -21,12 +22,48 @@ export const toPublicModel = (model: InternalModel): PublicModel => { return info; }; +// Synthesize one PublicModel for each visible alias, appended after the real +// entries. The owner falls back to the alias-target's `owned_by` on whichever +// real entry resolves it; if the target isn't present on any reachable +// upstream, the entry still appears (operator-declared; the listing reflects +// operator intent) with a `floway` owner so the row is unambiguous. +export const toPublicModelFromAlias = (alias: ModelAlias, byId: ReadonlyMap): PublicModel => { + const target = byId.get(alias.targetModelId); + const info: PublicModel = { + id: alias.alias, + object: 'model', + type: 'model', + display_name: alias.alias, + limits: target?.limits ? { ...target.limits } : {}, + kind: target?.kind ?? 'chat', + created: alias.createdAt, + created_at: new Date(alias.createdAt * 1000).toISOString(), + aliasedFrom: { + targetModelId: alias.targetModelId, + upstreamIds: alias.upstreamIds, + rules: alias.rules, + onConflict: alias.onConflict, + }, + }; + info.owned_by = target?.owned_by ?? alias.upstreamIds[0] ?? 'floway'; + return info; +}; + export const loadModels = async ( upstreamFilter: readonly string[] | null, fetcherForUpstream: (upstreamId: string) => Fetcher, scheduler: BackgroundScheduler, + aliases: readonly ModelAlias[], ): Promise => { - const data = (await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler)).map(toPublicModel); + const internal = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler); + const realEntries = internal.map(toPublicModel); + const byId = new Map(internal.map(m => [m.id, m])); + // Visible aliases append in `loadAllAliases` order, after every real entry. + // The spec's no-silent-hide policy keeps disabled-target aliases visible — + // the user-facing failure on call is the canonical signal, not the + // listing. + const aliasEntries = aliases.filter(a => a.visibleInModelsList).map(a => toPublicModelFromAlias(a, byId)); + const data = [...realEntries, ...aliasEntries]; return { object: 'list', has_more: false, diff --git a/packages/gateway/src/data-plane/models/serve.ts b/packages/gateway/src/data-plane/models/serve.ts index 9b8b510f9..60736266b 100644 --- a/packages/gateway/src/data-plane/models/serve.ts +++ b/packages/gateway/src/data-plane/models/serve.ts @@ -7,6 +7,7 @@ import { loadModels } from './load.ts'; import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts'; import { createPerRequestFetcher } from '../../dial/per-request.ts'; import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts'; +import { getRepo } from '../../repo/index.ts'; import { backgroundSchedulerFromContext } from '../../runtime/background.ts'; import { getCurrentColo } from '../../runtime/runtime-info.ts'; import { ProviderModelsUnavailableError } from '@floway-dev/provider'; @@ -14,7 +15,8 @@ import { ProviderModelsUnavailableError } from '@floway-dev/provider'; export const models = async (c: Context) => { try { const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw)); - return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c))); + const aliases = await getRepo().modelAliases.loadAll(); + return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases)); } catch (e) { // Upstream HTTP/parse failures squash to a generic message so we do not // leak upstream identity. Other registry-thrown errors (e.g. the "no diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts index 1408f10a6..855eca5fa 100644 --- a/packages/gateway/src/data-plane/models/serve_test.ts +++ b/packages/gateway/src/data-plane/models/serve_test.ts @@ -1,5 +1,6 @@ import { test } from 'vitest'; +import type { MemoryModelAliasesRepo } from '../../repo/memory.ts'; import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts'; import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot'; import { jsonResponse, withMockedFetch, assertEquals } from '@floway-dev/test-utils'; @@ -586,3 +587,180 @@ test('/v1/models returns the last real error when every account model load fails }, ); }); + +// /v1/models alias-listing coverage. Each test exercises one slice of the +// spec's visibility contract: visible alias appears with `aliasedFrom`, +// hidden alias does not appear, alias-with-disabled-target is still listed, +// the `aliasedFrom` shape matches the spec byte-for-byte. +test('/v1/models appends a visible alias with aliasedFrom after the real entries', async () => { + const { repo, apiKey } = await setupAppTest(); + + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'codex-auto-review', + targetModelId: 'gpt-5.4', + upstreamIds: [], + rules: { reasoning: { effort: 'low' } }, + visibleInModelsList: true, + onConflict: 'real-only', + createdAt: 1_700_000_000, + }, + ]); + + await repo.upstreams.save(buildCustomUpstreamRecord({ + id: 'up_oai', + name: 'Test OpenAI', + sortOrder: 100, + config: { + baseUrl: 'https://oai.example.com', + authStyle: 'bearer', + apiKey: 'sk-test', + endpoints: { chatCompletions: {} }, + }, + })); + + await withMockedFetch( + request => { + const url = new URL(request.url); + if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']); + if (url.pathname === '/copilot_internal/v2/token') { + return jsonResponse({ + token: 'copilot-access-token', + expires_at: 4102444800, + refresh_in: 3600, + endpoints: { api: 'https://api.individual.githubcopilot.com' }, + }); + } + if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') { + return jsonResponse(copilotModels([])); + } + if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') { + return jsonResponse({ + object: 'list', + data: [{ id: 'gpt-5.4', owned_by: 'openai' }], + }); + } + throw new Error(`Unhandled fetch ${request.url}`); + }, + async () => { + const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } }); + assertEquals(response.status, 200); + const body = await response.json() as { data: Array<{ id: string; owned_by?: string; aliasedFrom?: unknown }> }; + const ids = body.data.map(m => m.id); + assertEquals(ids[ids.length - 1], 'codex-auto-review'); + const aliasEntry = body.data.find(m => m.id === 'codex-auto-review'); + if (!aliasEntry) throw new Error('expected codex-auto-review alias entry'); + assertEquals(aliasEntry.aliasedFrom, { + targetModelId: 'gpt-5.4', + upstreamIds: [], + rules: { reasoning: { effort: 'low' } }, + onConflict: 'real-only', + }); + assertEquals(aliasEntry.owned_by, 'openai'); + }, + ); +}); + +test('/v1/models omits aliases marked visibleInModelsList=false', async () => { + const { repo, apiKey } = await setupAppTest(); + + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'hidden-alias', + targetModelId: 'gpt-5.4', + upstreamIds: [], + rules: {}, + visibleInModelsList: false, + onConflict: 'real-only', + createdAt: 0, + }, + ]); + + await repo.upstreams.save(buildCustomUpstreamRecord({ + id: 'up_oai', + name: 'Test OpenAI', + sortOrder: 100, + config: { + baseUrl: 'https://oai.example.com', + authStyle: 'bearer', + apiKey: 'sk-test', + endpoints: { chatCompletions: {} }, + }, + })); + + await withMockedFetch( + request => { + const url = new URL(request.url); + if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']); + if (url.pathname === '/copilot_internal/v2/token') { + return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } }); + } + if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') { + return jsonResponse(copilotModels([])); + } + if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') { + return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] }); + } + throw new Error(`Unhandled fetch ${request.url}`); + }, + async () => { + const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } }); + const body = await response.json() as { data: Array<{ id: string }> }; + assertEquals(body.data.map(m => m.id).includes('hidden-alias'), false); + }, + ); +}); + +test('/v1/models lists an alias whose target is not present on any upstream (no silent hide)', async () => { + const { repo, apiKey } = await setupAppTest(); + + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'orphan-alias', + targetModelId: 'never-resolves', + upstreamIds: ['up_oai'], + rules: {}, + visibleInModelsList: true, + onConflict: 'real-only', + createdAt: 0, + }, + ]); + + await repo.upstreams.save(buildCustomUpstreamRecord({ + id: 'up_oai', + name: 'Test OpenAI', + sortOrder: 100, + config: { + baseUrl: 'https://oai.example.com', + authStyle: 'bearer', + apiKey: 'sk-test', + endpoints: { chatCompletions: {} }, + }, + })); + + await withMockedFetch( + request => { + const url = new URL(request.url); + if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']); + if (url.pathname === '/copilot_internal/v2/token') { + return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } }); + } + if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') { + return jsonResponse(copilotModels([])); + } + if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') { + return jsonResponse({ object: 'list', data: [] }); + } + throw new Error(`Unhandled fetch ${request.url}`); + }, + async () => { + const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } }); + const body = await response.json() as { data: Array<{ id: string; aliasedFrom?: { targetModelId: string }; owned_by?: string }> }; + const orphan = body.data.find(m => m.id === 'orphan-alias'); + if (!orphan) throw new Error('expected orphan-alias entry'); + assertEquals(orphan.aliasedFrom?.targetModelId, 'never-resolves'); + // No matching real entry → owner falls back to the alias's primary upstream id. + assertEquals(orphan.owned_by, 'up_oai'); + }, + ); +}); diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts index 8ca75c518..e29df1c6a 100644 --- a/packages/gateway/src/data-plane/providers/registry.ts +++ b/packages/gateway/src/data-plane/providers/registry.ts @@ -285,6 +285,11 @@ export interface ProviderModelResolution { id: string; model: UpstreamModel; binding: ProviderModelRecord; + // Set when this resolution came from an alias-rewrite interpretation. The + // gateway-side passthrough callers (embeddings/images/completions) stamp + // this onto the `x-floway-alias` response header so alias-served calls are + // observable without enabling any extra mode. + aliasName?: string; } export interface ModelInterpretation { @@ -381,6 +386,10 @@ const pushInterpretation = ( out.push(aliasInterp); out.push(realInterp); return; + default: { + const exhaustive: never = alias.onConflict; + throw new Error(`pushInterpretation: unhandled onConflict '${exhaustive as string}'`); + } } }; @@ -470,7 +479,14 @@ export const resolveModelForRequest = async ( const interpretations = enumerateModelInterpretations(modelId, providers, aliases); const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler); - return { matches: resolutions.map(r => r.resolved), failedUpstreams }; + // Project each resolution's alias-rewrite interpretation onto the + // returned ProviderModelResolution so passthrough callers can stamp the + // `x-floway-alias` header without re-deriving the match. + const matches: ProviderModelResolution[] = resolutions.map(r => + r.interpretation.aliasName !== undefined + ? { ...r.resolved, aliasName: r.interpretation.aliasName } + : r.resolved); + return { matches, failedUpstreams }; }; export const resolveModelForProvider = async ( diff --git a/packages/gateway/src/data-plane/providers/registry_test.ts b/packages/gateway/src/data-plane/providers/registry_test.ts index d470f9169..9823aeb98 100644 --- a/packages/gateway/src/data-plane/providers/registry_test.ts +++ b/packages/gateway/src/data-plane/providers/registry_test.ts @@ -922,6 +922,7 @@ describe('enumerateModelInterpretations with alias matching', () => { rules: { reasoning: { effort: 'low' } }, visibleInModelsList: true, onConflict: 'real-only', + createdAt: 0, ...over, }); @@ -1065,6 +1066,7 @@ describe('resolveModelForRequest applies alias onConflict pruning', () => { rules: { reasoning: { effort: 'low' } }, visibleInModelsList: true, onConflict, + createdAt: 0, }); test('alias-only resolves to a single match against the alias target id', async () => { diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts index 1add1a115..6c917fb86 100644 --- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts +++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts @@ -21,6 +21,7 @@ import { createUpstreamLatencyRecorder, recordPerformanceError, recordPerformanc import { recordTokenUsage } from './telemetry/usage.ts'; import { createPerRequestFetcher } from '../../dial/per-request.ts'; import type { AuthedContext } from '../../middleware/auth.ts'; +import { getRepo } from '../../repo/index.ts'; import type { TokenUsage } from '../../repo/types.ts'; import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts'; import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts'; @@ -117,6 +118,31 @@ interface PassthroughServeContext { export const passthroughApiError = (c: Context, message: string, status: ContentfulStatusCode): Response => c.json({ error: { message, type: 'api_error' } }, status); +// Emit one trace line per rule field present on the matched alias when the +// inbound endpoint has no slot for the rule. The passthrough endpoints +// (embeddings, images, /v1/completions) carry no Floway-extension fields +// so a non-empty `rules` object is structurally dropped before the upstream +// call; emitting one trace line per knob gives an operator the same signal +// the chat sanitizers do. +const traceDroppedAliasRulesForPassthrough = ( + aliasName: string, + aliases: readonly { alias: string; rules: Record }[], + sourceApi: PassthroughServeApiName, +): void => { + const matched = aliases.find(a => a.alias === aliasName); + if (!matched) return; + const rules = matched.rules as { reasoning?: Record; verbosity?: unknown; serviceTier?: unknown; anthropicSpeed?: unknown; anthropicBeta?: readonly unknown[] }; + const fields: string[] = []; + if (rules.reasoning) for (const key of Object.keys(rules.reasoning)) fields.push(`reasoning.${key}`); + if (rules.verbosity !== undefined) fields.push('verbosity'); + if (rules.serviceTier !== undefined) fields.push('serviceTier'); + if (rules.anthropicSpeed !== undefined) fields.push('anthropicSpeed'); + if (rules.anthropicBeta?.length) fields.push('anthropicBeta'); + for (const field of fields) { + console.warn('floway.alias.drop', JSON.stringify({ alias: aliasName, field, targetProtocol: sourceApi })); + } +}; + export const passthroughServe = async (input: PassthroughServeContext): Promise => { const { c, ctx, sourceApi, model, bindingServesEndpoint, call, response: responseHandling } = input; const requestStartedAt = performance.now(); @@ -124,12 +150,20 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise< try { const fetcherForUpstream = await createPerRequestFetcher(ctx.currentColo); + // Aliases pass through so a `(model, lookupId)` interpretation can rewrite + // to the alias's target id even for non-LLM-shaped endpoints. The alias + // rules themselves never apply here — the inbound payload (embeddings, + // images, /v1/completions) has no protocol-extension slots for the rule + // knobs. We still surface the matched alias name on the + // `x-floway-alias` response header and trace one log line per dropped + // rule so an operator can confirm the rewrite ran. + const aliases = await getRepo().modelAliases.loadAll(); // Each match is one (upstream, upstream-catalog id) pair that interprets // the inbound public id. Iteration order follows configured sort_order // across upstreams, with the unprefixed interpretation pushed before the // prefixed one within a single upstream. The first match whose binding // satisfies the endpoint capability wins. - const { matches, failedUpstreams } = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler); + const { matches, failedUpstreams } = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler, aliases); if (matches.length === 0) { ctx.dump?.error('gateway'); return passthroughApiError(c, appendFailedUpstreams(`Model ${model} is not available on any configured upstream.`, failedUpstreams), 404); @@ -137,6 +171,10 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise< for (const match of matches) { if (!bindingServesEndpoint(match.binding)) continue; + if (match.aliasName !== undefined) { + ctx.responseHeaders.set('x-floway-alias', match.aliasName); + traceDroppedAliasRulesForPassthrough(match.aliasName, aliases, sourceApi); + } const recorder = createUpstreamLatencyRecorder(); const { response, modelKey } = await call(match.binding, { diff --git a/packages/gateway/src/test-helpers/gateway-ctx.ts b/packages/gateway/src/test-helpers/gateway-ctx.ts index 3a19af303..3ebf1f474 100644 --- a/packages/gateway/src/test-helpers/gateway-ctx.ts +++ b/packages/gateway/src/test-helpers/gateway-ctx.ts @@ -15,5 +15,6 @@ export const mockGatewayCtx = (overrides: Partial = {}): GatewayCtx dump: null, backgroundScheduler: promise => { void promise; }, requestStartedAt: 0, + responseHeaders: new Headers(), ...overrides, }); diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts index fddc80318..383e0ffe0 100644 --- a/packages/protocols/src/common/models.ts +++ b/packages/protocols/src/common/models.ts @@ -110,6 +110,30 @@ export interface PublicModel { }; kind: ModelKind; cost?: ModelPricing; + // Floway protocol extension. Present on synthesized alias entries the + // gateway appends to the listing. Clients that do not know about the + // field ignore it; alias-aware clients (dashboard, CLI shims) render the + // alias's target id and rules from this payload directly. + // See docs/superpowers/specs/2026-06-25-model-aliases-design.md. + aliasedFrom?: PublicModelAliasedFrom; +} + +export interface PublicModelAliasedFrom { + targetModelId: string; + upstreamIds: readonly string[]; + rules: { + reasoning?: { + effort?: string; + budgetTokens?: number; + adaptive?: boolean; + summary?: string; + }; + verbosity?: string; + serviceTier?: string; + anthropicSpeed?: string; + anthropicBeta?: readonly string[]; + }; + onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first'; } export interface PublicModelsResponse { diff --git a/packages/translate/package.json b/packages/translate/package.json index 462bf2d1c..e18d7f564 100644 --- a/packages/translate/package.json +++ b/packages/translate/package.json @@ -5,7 +5,8 @@ "type": "module", "exports": { ".": { "import": "./src/index.ts", "types": "./src/index.ts" }, - "./via-responses/responses-items": { "import": "./src/shared/via-responses/responses-items.ts", "types": "./src/shared/via-responses/responses-items.ts" } + "./via-responses/responses-items": { "import": "./src/shared/via-responses/responses-items.ts", "types": "./src/shared/via-responses/responses-items.ts" }, + "./via-messages/anthropic-extensions": { "import": "./src/shared/via-messages/anthropic-extensions.ts", "types": "./src/shared/via-messages/anthropic-extensions.ts" } }, "scripts": { "typecheck": "tsc --noEmit" From 8f2ce38b7a896c093fb397b71dd681c04ab3d59a Mon Sep 17 00:00:00 2001 From: Menci Date: Thu, 25 Jun 2026 23:55:54 +0800 Subject: [PATCH 007/170] fix(aliases): address final review (header on passthrough, streaming-safe, idempotent seed, ordered listing) Final-review fix wave on top of the model-aliases data-plane series. Each finding from the whole-branch review is addressed; one shim is kept and documented per the reviewer's option-B recommendation. - Critical #1: `/v1/embeddings`, `/v1/images/*`, and `/v1/completions` returned the response through the legacy `ctx.dump?.finalize` pattern instead of `finalizeGatewayResponse`, so the `x-floway-alias` header the passthrough scaffold stamped on the per-ctx bag was silently dropped. Route all three call sites through `finalizeGatewayResponse` for a uniform finalize seam. - Important #4: Make the `x-floway-alias` stamp streaming-safe by introducing `stageGatewayResponseHeader(ctx, name, value)` that writes the header to BOTH Hono's `c.header` (the documented knob that survives `streamSSE`'s internal `c.newResponse`) and the per-ctx `responseHeaders` bag `finalizeGatewayResponse` merges onto Web- `Response.json`-built non-streaming responses. The chat serve.ts layers (messages, gemini, responses, chat-completions) and passthrough-serve all go through this helper, eliminating the reliance on post-construction `response.headers.set` for streaming. - Important #3: Add coverage in `gemini_test.ts` that a visible alias appears in `/v1beta/models` as a synthetic Gemini model entry with the expected name, displayName, and supportedGenerationMethods. The prior code path was untested; a refactor of `loadGeminiModels` would not have been caught. - Important #2: Keep the pre-alias-table `rewriteResponsesEntryModelAlias` shim that swaps `codex-auto-review` -> `gpt-5.4` before the matcher runs (option B from the review). Add a code comment above it explaining the carveout: the seeded alias is `on_conflict='real-only'` and on a Codex upstream that exposes a real `codex-auto-review` model the alias would otherwise lose, breaking parity with Codex CLI's native behavior. The shim is temporary pending a deliberate Codex behavior change. - Minor #6: Switch the `0046_model_aliases.sql` seed `INSERT` to `INSERT OR IGNORE` so a fresh local-dev replay doesn't trip the PRIMARY KEY uniqueness check. - Minor #8: Add `ORDER BY alias` to `loadAllAliases` so the `/v1/models` listing emits alias entries deterministically across runtimes. The unit-test fan-out reflects adding `c: AuthedContext` to `GatewayCtx` so the serve layer can call Hono's `c.header` directly. Test stubs go through the shared `stubAuthedContext` helper. --- .../gateway/migrations/0046_model_aliases.sql | 2 +- .../src/control-plane/model-aliases/repo.ts | 4 +- .../chat/chat-completions/attempt_test.ts | 2 + .../demote-developer-to-system_test.ts | 2 + .../demote-interleaved-system-to-user_test.ts | 2 + ...le-reasoning-on-forced-tool-choice_test.ts | 2 + .../include-usage-stream-options_test.ts | 2 + .../interceptors/normalize-usage_test.ts | 2 + .../vendor-deepseek-normalize_test.ts | 2 + .../vendor-kimi-normalize_test.ts | 2 + .../vendor-qwen-normalize_test.ts | 2 + .../data-plane/chat/chat-completions/serve.ts | 6 +- .../chat/chat-completions/serve_test.ts | 2 + .../data-plane/chat/gemini/attempt_test.ts | 2 + .../strip-safety-settings_test.ts | 2 + .../strip-unsupported-part-fields_test.ts | 2 + .../strip-unsupported-tools_test.ts | 2 + .../suppress-thought-parts_test.ts | 2 + .../data-plane/chat/gemini/respond_test.ts | 2 + .../src/data-plane/chat/gemini/serve.ts | 9 +-- .../src/data-plane/chat/gemini/serve_test.ts | 2 + .../data-plane/chat/messages/attempt_test.ts | 2 + .../demote-interleaved-system-to-user_test.ts | 2 + ...le-reasoning-on-forced-tool-choice_test.ts | 2 + .../strip-billing-attribution_test.ts | 2 + .../interceptors/web-search-shim_test.ts | 2 + .../data-plane/chat/messages/respond_test.ts | 2 + .../src/data-plane/chat/messages/serve.ts | 9 +-- .../data-plane/chat/messages/serve_test.ts | 2 + .../data-plane/chat/responses/attempt_test.ts | 2 + .../src/data-plane/chat/responses/http.ts | 17 ++++++ .../canonicalize-encrypted-content_test.ts | 2 + .../demote-developer-to-system_test.ts | 2 + .../demote-interleaved-system-to-user_test.ts | 2 + ...le-reasoning-on-forced-tool-choice_test.ts | 2 + .../interceptors/retry-cyber-policy_test.ts | 2 + .../interceptors/server-tool-shim_test.ts | 3 + .../image-generation-integration_test.ts | 2 + .../server-tools/image-generation_test.ts | 2 + .../vendor-deepseek-normalize_test.ts | 2 + .../vendor-qwen-normalize_test.ts | 2 + .../src/data-plane/chat/responses/serve.ts | 9 +-- .../data-plane/chat/responses/serve_test.ts | 2 + .../src/data-plane/chat/shared/gateway-ctx.ts | 39 +++++++++--- .../data-plane/chat/shared/respond_test.ts | 2 + .../chat/shared/upstream-telemetry_test.ts | 2 + .../src/data-plane/completions/serve.ts | 7 +-- .../src/data-plane/completions/serve_test.ts | 47 +++++++++++++++ .../src/data-plane/embeddings/serve.ts | 7 +-- .../src/data-plane/embeddings/serve_test.ts | 56 +++++++++++++++++ .../gateway/src/data-plane/images/serve.ts | 15 ++--- .../src/data-plane/images/serve_test.ts | 60 +++++++++++++++++++ .../src/data-plane/models/gemini_test.ts | 46 ++++++++++++++ .../data-plane/shared/passthrough-serve.ts | 9 ++- .../gateway/src/test-helpers/gateway-ctx.ts | 10 ++++ 55 files changed, 384 insertions(+), 45 deletions(-) diff --git a/packages/gateway/migrations/0046_model_aliases.sql b/packages/gateway/migrations/0046_model_aliases.sql index c934d77b6..d76687bbd 100644 --- a/packages/gateway/migrations/0046_model_aliases.sql +++ b/packages/gateway/migrations/0046_model_aliases.sql @@ -10,5 +10,5 @@ CREATE TABLE model_aliases ( updated_at INTEGER NOT NULL DEFAULT (unixepoch()) ); -INSERT INTO model_aliases (alias, target_model_id, rules_json, on_conflict) +INSERT OR IGNORE INTO model_aliases (alias, target_model_id, rules_json, on_conflict) VALUES ('codex-auto-review', 'gpt-5.4', '{"reasoning":{"effort":"low"}}', 'real-only'); diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts index 4c13cd09b..a7cfd963f 100644 --- a/packages/gateway/src/control-plane/model-aliases/repo.ts +++ b/packages/gateway/src/control-plane/model-aliases/repo.ts @@ -13,9 +13,11 @@ interface ModelAliasRow { // The model_aliases table is operator-managed and small (dozens of rows at // most), so the data plane reads the full table per request — no cache layer. +// `ORDER BY alias` makes the read deterministic so `/v1/models` and friends +// emit alias entries in a stable, operator-predictable order across runtimes. export const loadAllAliases = async (db: SqlDatabase): Promise => { const { results } = await db - .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases') + .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases ORDER BY alias') .all(); return results.map(toModelAlias); }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts index 62b814359..29f9d1b1b 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts @@ -3,6 +3,7 @@ import { test, vi } from 'vitest'; import { chatCompletionsAttempt } from './attempt.ts'; import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import { createNonResponsesSourceStore } from '../responses/items/store.ts'; import type { ProviderCandidate } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -23,6 +24,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts index 83d9bccb7..b84aea40f 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { withDemoteDeveloperToSystem } from './demote-developer-to-system.ts'; import type { ChatCompletionsInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions'; import { eventResult } from '@floway-dev/provider'; @@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts index 156389a46..4ecb21e72 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { withInterleavedSystemDemotedToUser } from './demote-interleaved-system-to-user.ts'; import type { ChatCompletionsInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions'; import { eventResult } from '@floway-dev/provider'; @@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts index e46726510..0ff191572 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts'; import type { ChatCompletionsInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions'; import { eventResult } from '@floway-dev/provider'; @@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts index e3e4147a2..f8d0c33c5 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { withUsageStreamOptionsIncluded } from './include-usage-stream-options.ts'; import type { ChatCompletionsInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions'; import { eventResult } from '@floway-dev/provider'; @@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts index 0b6fed4f1..b9dcb1b9c 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { withUsageNormalized } from './normalize-usage.ts'; import type { ChatCompletionsInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions'; import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common'; @@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts index 81be2c3ab..7f96ee5f2 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import type { ChatCompletionsInvocation } from './types.ts'; import { withVendorDeepseekChatCompletionsNormalize } from './vendor-deepseek-normalize.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions'; import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common'; @@ -20,6 +21,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts index 1cfc304b7..2547a30b4 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import type { ChatCompletionsInvocation } from './types.ts'; import { withVendorKimiChatCompletionsNormalize } from './vendor-kimi-normalize.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions'; import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common'; @@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts index 0506a1e25..e374ea3b4 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import type { ChatCompletionsInvocation } from './types.ts'; import { withVendorQwenChatCompletionsNormalize } from './vendor-qwen-normalize.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions'; import { eventResult } from '@floway-dev/provider'; @@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts index 1347dd6bd..28e10cfd5 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts @@ -6,6 +6,7 @@ import { applyAliasRulesToChatCompletions } from '../../model-aliases/apply.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; import { enumerateProviderCandidates } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; +import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts'; import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { ExecuteResult } from '@floway-dev/provider'; @@ -50,9 +51,10 @@ export const chatCompletionsServe = { } // Apply operator-locked alias rules to the inbound IR before the // attempt runs its interceptor chain. The matching `x-floway-alias` - // header rides out via ctx.responseHeaders. + // header is staged via Hono's `c.header` so it survives `streamSSE`'s + // internal `c.newResponse`. if (candidate.aliasRules) applyAliasRulesToChatCompletions(payload, candidate.aliasRules); - if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName); + if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName); return await chatCompletionsAttempt.generate({ payload, ctx, store, candidate, headers }); }, }; diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts index 402803203..37441a5ba 100644 --- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts +++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts @@ -2,6 +2,7 @@ import { test, vi } from 'vitest'; import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import { createNonResponsesSourceStore } from '../responses/items/store.ts'; import type { ProviderCandidate } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -49,6 +50,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts index 29a4e9bc5..bc7c7949b 100644 --- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts @@ -3,6 +3,7 @@ import { test, vi } from 'vitest'; import { geminiAttempt } from './attempt.ts'; import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import { createNonResponsesSourceStore } from '../responses/items/store.ts'; import type { ProviderCandidate } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -24,6 +25,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts index d4b48ac3a..98d15dd39 100644 --- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts @@ -1,6 +1,7 @@ import { test } from 'vitest'; import { stripSafetySettings } from './strip-safety-settings.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini'; @@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts index 3b02b63f8..e74d45a4f 100644 --- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts @@ -1,6 +1,7 @@ import { test } from 'vitest'; import { stripUnsupportedPartFields } from './strip-unsupported-part-fields.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini'; @@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts index 6a2c20ef7..689ee6d0e 100644 --- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts @@ -1,6 +1,7 @@ import { test } from 'vitest'; import { stripUnsupportedTools } from './strip-unsupported-tools.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini'; @@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts index eb67a0092..33e49a791 100644 --- a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts @@ -1,6 +1,7 @@ import { test } from 'vitest'; import { suppressThoughtParts } from './suppress-thought-parts.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common'; import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini'; @@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts index 31981b544..61e0132a7 100644 --- a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts @@ -2,6 +2,7 @@ import { Hono } from 'hono'; import { test } from 'vitest'; import { respondGemini } from './respond.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import { eventFrame } from '@floway-dev/protocols/common'; @@ -26,6 +27,7 @@ const ctx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts index e1f61d628..7d1cf345e 100644 --- a/packages/gateway/src/data-plane/chat/gemini/serve.ts +++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts @@ -6,6 +6,7 @@ import { applyAliasRulesToGemini } from '../../model-aliases/apply.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; import { enumerateProviderCandidates } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; +import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini'; import type { ExecuteResult, PlainResult } from '@floway-dev/provider'; @@ -60,10 +61,10 @@ export const geminiServe = { ); } // Operator-locked alias rules apply to the Gemini IR before the attempt - // runs; the matching `x-floway-alias` header rides out via - // ctx.responseHeaders. + // runs; the matching `x-floway-alias` header is staged via Hono's + // `c.header` so it survives `streamSSE`'s internal `c.newResponse`. if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules); - if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName); + if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName); return await geminiAttempt.generate({ payload, ctx, store, candidate, headers }); }, @@ -97,7 +98,7 @@ export const geminiServe = { ); } if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules); - if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName); + if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName); return await geminiAttempt.countTokens({ payload, ctx, store, candidate, headers }); }, }; diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts index 070d44471..3ef8114e8 100644 --- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts +++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts @@ -2,6 +2,7 @@ import { test, vi } from 'vitest'; import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import { createNonResponsesSourceStore } from '../responses/items/store.ts'; import type { ProviderCandidate } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -48,6 +49,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts index f9192e289..41a96f0de 100644 --- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts @@ -3,6 +3,7 @@ import { test, vi } from 'vitest'; import { messagesAttempt } from './attempt.ts'; import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import { createNonResponsesSourceStore } from '../responses/items/store.ts'; import type { ProviderCandidate } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -23,6 +24,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts index 9df67c5c0..bf29636a7 100644 --- a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { demoteInterleavedSystemToUser } from './demote-interleaved-system-to-user.ts'; import type { MessagesInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages'; @@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts index 7b7045355..9db2fe856 100644 --- a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts'; import type { MessagesInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages'; @@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts index 8ae90e232..c08720c71 100644 --- a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { stripBillingAttribution } from './strip-billing-attribution.ts'; import type { MessagesInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages'; @@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts index dca97addd..8ddfa03e7 100644 --- a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts @@ -13,6 +13,7 @@ import { } from './web-search-shim.ts'; import { initRepo } from '../../../../repo/index.ts'; import { InMemoryRepo } from '../../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import { DEFAULT_SEARCH_CONFIG } from '../../../tools/web-search/search-config.ts'; import type { WebSearchProvider, WebSearchProviderResult } from '../../../tools/web-search/types.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; @@ -58,6 +59,7 @@ const gatewayCtx = (apiKeyId: string = 'test-key'): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/messages/respond_test.ts b/packages/gateway/src/data-plane/chat/messages/respond_test.ts index 79d0a9db6..0caef38b6 100644 --- a/packages/gateway/src/data-plane/chat/messages/respond_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/respond_test.ts @@ -4,6 +4,7 @@ import { test } from 'vitest'; import { createMessagesStreamUsageState, respondMessages, tokenUsageFromMessagesFrame } from './respond.ts'; import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common'; import type { MessagesStreamEvent } from '@floway-dev/protocols/messages'; @@ -534,6 +535,7 @@ const makeRespondCtx = (): GatewayCtx => ({ wantsStream: false, runtimeLocation: 'TEST', backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, currentColo: 'TEST', diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts index 30282afd4..8b0b337f1 100644 --- a/packages/gateway/src/data-plane/chat/messages/serve.ts +++ b/packages/gateway/src/data-plane/chat/messages/serve.ts @@ -6,6 +6,7 @@ import { applyAliasRulesToMessages } from '../../model-aliases/apply.ts'; import type { StatefulResponsesStore } from '../responses/items/store.ts'; import { enumerateProviderCandidates } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; +import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages'; import type { ExecuteResult, PlainResult } from '@floway-dev/provider'; @@ -59,10 +60,10 @@ export const messagesServe = { // Operator-locked alias rules go onto the inbound IR before the attempt // begins so the per-protocol interceptor chain (and any downstream // translate pass) sees the already-injected fields. The matching - // `x-floway-alias` response header is staged on the gateway-stamped - // header set; the http wrapper flushes it onto the outgoing Response. + // `x-floway-alias` header is staged via Hono's `c.header` so it + // survives `streamSSE`'s internal `c.newResponse`. if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules); - if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName); + if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName); return await messagesAttempt.generate({ payload, ctx, store, candidate, headers }); }, @@ -96,7 +97,7 @@ export const messagesServe = { // rules apply uniformly regardless of endpoint, and the response header // rides out the same way. if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules); - if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName); + if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName); return await messagesAttempt.countTokens({ payload, ctx, store, candidate, headers }); }, }; diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts index 734bad296..bb1dfcfb0 100644 --- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts +++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts @@ -2,6 +2,7 @@ import { test, vi } from 'vitest'; import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import { createNonResponsesSourceStore } from '../responses/items/store.ts'; import type { ProviderCandidate } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; @@ -46,6 +47,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts index a952627b4..ceba118d7 100644 --- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts @@ -7,6 +7,7 @@ import { createResponsesHttpStore } from './items/store.ts'; import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; import type { StoredResponsesItem } from '../../../repo/types.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import type { ProviderCandidate } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common'; @@ -25,6 +26,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/responses/http.ts b/packages/gateway/src/data-plane/chat/responses/http.ts index 54c497a0a..9346d5576 100644 --- a/packages/gateway/src/data-plane/chat/responses/http.ts +++ b/packages/gateway/src/data-plane/chat/responses/http.ts @@ -16,6 +16,23 @@ import { internalErrorResult, toInternalDebugError } from '@floway-dev/provider' // performance telemetry, and usage accounting all see the real model name // (and the `low` reasoning effort the alias implies — generate only; // compact carries no `reasoning` field). +// +// This shim predates the operator-managed alias table seeded by migration +// `0046_model_aliases.sql`. The two paths overlap on `/v1/responses` — +// rewriting at this entry swaps the inbound `model` to `gpt-5.4` BEFORE the +// alias matcher in `enumerateProviderCandidates` runs, so the alias row +// never matches for this surface. The carveout is deliberate: the seeded +// alias is stored with `on_conflict='real-only'`, which means on a Codex +// upstream that exposes a real `codex-auto-review` model the alias would +// silently lose to the real id and the `reasoning.effort=low` rule would +// never apply — breaking parity with Codex CLI's native auto-review +// behavior. Other inbound surfaces (`/v1/messages`, `/v1/chat/completions`, +// `/v1beta/…`) carry no entry-level shim and reach the alias matcher +// unchanged; they observe `real-only` semantics as designed. +// +// The shim is a temporary carveout pending a follow-up that either deletes +// it after a deliberate Codex behavior change (e.g. switching to +// `both-alias-first`) or migrates the entire surface to the alias table. const rewriteResponsesEntryModelAlias = (payload: ResponsesPayload, stampReasoningEffort: boolean): ResponsesPayload => { if (payload.model !== CODEX_AUTO_REVIEW_ALIAS) return payload; if (!stampReasoningEffort) return { ...payload, model: CODEX_AUTO_REVIEW_TARGET }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts index f904f85d5..444ce069d 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { withReasoningEncryptedContentCanonicalized } from './canonicalize-encrypted-content.ts'; import type { ResponsesInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts'; import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common'; @@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts index a193d01db..a2951ef03 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { withDemoteDeveloperToSystem } from './demote-developer-to-system.ts'; import type { ResponsesInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts'; import { doneFrame } from '@floway-dev/protocols/common'; @@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts index ae1fc3970..f4f26c112 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { withInterleavedSystemDemotedToUser } from './demote-interleaved-system-to-user.ts'; import type { ResponsesInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts'; import { doneFrame } from '@floway-dev/protocols/common'; @@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts index dcddbd6c8..ea7b872ab 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts'; import type { ResponsesInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts'; import { doneFrame } from '@floway-dev/protocols/common'; @@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts index 3cf947b01..5b9bb4c91 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { withCyberPolicyRetried } from './retry-cyber-policy.ts'; import type { ResponsesInvocation } from './types.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts'; import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common'; @@ -45,6 +46,7 @@ const stubCtx = (overrides: { abortSignal?: AbortSignal } = {}): GatewayCtx => ( currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, ...overrides, diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts index 6688dcec7..ff2e0f910 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts @@ -16,6 +16,7 @@ import { SHIM_TOOL_NAME, webSearchServerTool } from './server-tools/web-search.t import type { ResponsesInterceptor, ResponsesInvocation } from './types.ts'; import { initRepo } from '../../../../repo/index.ts'; import { InMemoryRepo } from '../../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import { resolveConfiguredWebSearchProvider } from '../../../tools/web-search/provider.ts'; import type { ConfiguredWebSearchProvider, @@ -348,6 +349,7 @@ const makeGatewayCtx = (apiKeyId: string = 'k1'): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); @@ -4497,6 +4499,7 @@ test('downstream AbortSignal threads through to provider search / fetchPage and currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, abortSignal: controller.signal, diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts index 46e30b043..8f1abec4c 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts @@ -2,6 +2,7 @@ import { beforeEach, test, vi } from 'vitest'; import { initRepo } from '../../../../../repo/index.ts'; import { InMemoryRepo } from '../../../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../../shared/gateway-ctx.ts'; import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../../items/store.ts'; import type { ResponsesInvocation } from '../types.ts'; @@ -144,6 +145,7 @@ const gatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts index da94b3068..16f0415e1 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts @@ -19,6 +19,7 @@ import { } from './image-generation.ts'; import { initRepo } from '../../../../../repo/index.ts'; import { InMemoryRepo } from '../../../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../../shared/gateway-ctx.ts'; import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../../items/store.ts'; import type { ResponsesInvocation } from '../types.ts'; @@ -56,6 +57,7 @@ const gatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts index 7db1b6360..c873f9225 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import type { ResponsesInvocation } from './types.ts'; import { withVendorDeepseekResponsesNormalize } from './vendor-deepseek-normalize.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts'; import { doneFrame } from '@floway-dev/protocols/common'; @@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts index 23afe8462..6417306ff 100644 --- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import type { ResponsesInvocation } from './types.ts'; import { withVendorQwenResponsesNormalize } from './vendor-qwen-normalize.ts'; +import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts'; import type { GatewayCtx } from '../../shared/gateway-ctx.ts'; import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts'; import { doneFrame } from '@floway-dev/protocols/common'; @@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = { currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }; diff --git a/packages/gateway/src/data-plane/chat/responses/serve.ts b/packages/gateway/src/data-plane/chat/responses/serve.ts index e66a2a29a..81035f20d 100644 --- a/packages/gateway/src/data-plane/chat/responses/serve.ts +++ b/packages/gateway/src/data-plane/chat/responses/serve.ts @@ -4,6 +4,7 @@ import type { ResponsesSnapshotMode, StatefulResponsesStore } from './items/stor import { prepareResponsesServePlan } from './serve-prep.ts'; import { applyAliasRulesToResponses } from '../../model-aliases/apply.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; +import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts'; import type { ProtocolFrame } from '@floway-dev/protocols/common'; import type { ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses'; import type { ExecuteResult } from '@floway-dev/provider'; @@ -48,10 +49,10 @@ export const responsesServe = { }); if (plan.kind === 'failure') return plan.result; // Operator-locked alias rules apply to the prepared inbound IR before - // the attempt runs; the `x-floway-alias` header rides out via - // ctx.responseHeaders. + // the attempt runs; the `x-floway-alias` header is staged via Hono's + // `c.header` so it survives `streamSSE`'s internal `c.newResponse`. if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules); - if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName); + if (plan.candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', plan.candidate.aliasName); const effectiveSnapshotMode: ResponsesSnapshotMode = snapshotMode !== 'none' && containsCompactionTrigger(plan.prepared.input) ? 'replace' : snapshotMode; @@ -73,7 +74,7 @@ export const responsesServe = { // applying uniformly keeps the operator's intent expressed at the // inbound boundary regardless of which endpoint runs. if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules); - if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName); + if (plan.candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', plan.candidate.aliasName); return await responsesAttempt.compact({ payload: plan.prepared, ctx, store, candidate: plan.candidate, headers }); }, }; diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts index 07369504d..b42f5bdbf 100644 --- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts @@ -5,6 +5,7 @@ import { createResponsesHttpStore, MemoryStatefulResponsesBacking, LayeredStatef import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; import type { StoredResponsesItem, StoredResponsesSnapshot } from '../../../repo/types.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import type { ProviderCandidate } from '../shared/candidates.ts'; import type { GatewayCtx } from '../shared/gateway-ctx.ts'; import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions'; @@ -57,6 +58,7 @@ const makeGatewayCtx = (): GatewayCtx => ({ currentColo: 'TEST', dump: null, backgroundScheduler: () => {}, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, }); diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts index 0e199e403..ad0ef2169 100644 --- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts +++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts @@ -6,6 +6,14 @@ import { getCurrentColo } from '../../../runtime/runtime-info.ts'; import type { BackgroundScheduler } from '@floway-dev/platform'; export interface GatewayCtx { + // The inbound Hono context. Carried so the serve layer can stage + // response headers via `c.header(name, value)` — the Hono-documented + // knob that survives `streamSSE`'s internal `c.newResponse` for the + // streaming surfaces. For non-streaming surfaces that build the + // outgoing Response via the Web `Response.json` constructor (which + // bypasses Hono's context), the same value also lands on + // `responseHeaders` so `finalizeGatewayResponse` can stamp it. + readonly c: AuthedContext; readonly apiKeyId: string; readonly upstreamIds: readonly string[] | null; readonly abortSignal?: AbortSignal; @@ -23,14 +31,14 @@ export interface GatewayCtx { readonly currentColo: string; // Null when the api key has no retention configured, in which case // `finalizeGatewayResponse` short-circuits the dump tee and returns the - // response untouched (headers from `responseHeaders` are still applied). + // response untouched (entries from `responseHeaders` are still applied). readonly dump: DumpAccumulator | null; - // Per-request response-header staging. The data-plane writes alias-aware - // and similar non-upstream headers here mid-request; the inbound HTTP - // wrapper merges them onto the final outgoing Response before - // `dump?.finalize`. Mutable on purpose — the serve layer owns the - // chosen candidate and is the right seam for stamping the - // `x-floway-alias` header. + // Per-request response-header staging for the non-streaming and error + // paths that build their Response via the Web `Response.json` constructor + // rather than through Hono's `c.json`/`streamSSE`. The serve layer writes + // gateway-stamped headers (e.g. `x-floway-alias`) here in lockstep with + // its `ctx.c.header(...)` call; `finalizeGatewayResponse` then merges + // them onto the outgoing Response. readonly responseHeaders: Headers; } @@ -67,6 +75,7 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt if (opts.model !== undefined) dump?.requestedModel(opts.model); const colo = getCurrentColo(c.req.raw); return { + c, apiKeyId: apiKey.id, upstreamIds, abortSignal: controller?.signal, @@ -81,10 +90,22 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt }; }; +// Stage one gateway response header so it lands on the outgoing Response +// regardless of which builder produced it. Calls Hono's `c.header` (the +// only knob that survives `streamSSE`'s internal `c.newResponse`) AND +// stages on the per-ctx `responseHeaders` bag that `finalizeGatewayResponse` +// merges onto Web-`Response.json`-built non-streaming responses. +export const stageGatewayResponseHeader = (ctx: GatewayCtx, name: string, value: string): void => { + ctx.c.header(name, value); + ctx.responseHeaders.set(name, value); +}; + // Apply ctx-stamped response headers onto the outgoing Response and then run // the dump-accumulator's finalize tee. Every inbound HTTP wrapper returns its -// response through this seam so alias and other gateway-stamped headers ride -// out uniformly across happy-path, error, and passthrough paths. +// response through this seam so gateway-stamped headers ride out uniformly +// across happy-path, error, and passthrough paths — including the +// non-streaming surfaces that build their Response via Web `Response.json` +// rather than Hono's `c.json`. export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => { for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value); return ctx.dump?.finalize(response) ?? response; diff --git a/packages/gateway/src/data-plane/chat/shared/respond_test.ts b/packages/gateway/src/data-plane/chat/shared/respond_test.ts index fd506b083..738bb6399 100644 --- a/packages/gateway/src/data-plane/chat/shared/respond_test.ts +++ b/packages/gateway/src/data-plane/chat/shared/respond_test.ts @@ -4,6 +4,7 @@ import type { GatewayCtx } from './gateway-ctx.ts'; import { SourceStreamState, recordPerformance, recordUsage } from './respond.ts'; import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import type { PerformanceTelemetryContext, TelemetryModelIdentity } from '@floway-dev/provider'; import { assertEquals } from '@floway-dev/test-utils'; @@ -45,6 +46,7 @@ const setup = (): Harness => { dump: null, backgroundScheduler: promise => { background.push(promise); }, requestStartedAt, + c: stubAuthedContext(), responseHeaders: new Headers(), }), }; diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts index 374cab4fe..d3e9ad8b0 100644 --- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts +++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts @@ -4,6 +4,7 @@ import type { GatewayCtx } from './gateway-ctx.ts'; import { withUpstreamTelemetry } from './upstream-telemetry.ts'; import { initRepo } from '../../../repo/index.ts'; import { InMemoryRepo } from '../../../repo/memory.ts'; +import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts'; import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common'; import type { MessagesStreamEvent } from '@floway-dev/protocols/messages'; import type { PerformanceTelemetryContext } from '@floway-dev/provider'; @@ -24,6 +25,7 @@ const baseCtx = (overrides: Partial = {}): GatewayCtx => { apiKeyId: 'key_1', upstreamIds: null, wantsStream: true, + c: stubAuthedContext(), responseHeaders: new Headers(), requestStartedAt: 0, runtimeLocation: 'TEST', diff --git a/packages/gateway/src/data-plane/completions/serve.ts b/packages/gateway/src/data-plane/completions/serve.ts index 2ab560c4b..d72ed42c2 100644 --- a/packages/gateway/src/data-plane/completions/serve.ts +++ b/packages/gateway/src/data-plane/completions/serve.ts @@ -10,7 +10,7 @@ import type { Context } from 'hono'; import { tokenUsageFromCompletionsUsage } from './usage.ts'; import type { TokenUsage } from '../../repo/types.ts'; -import { createGatewayCtxFromHono } from '../chat/shared/gateway-ctx.ts'; +import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts'; import { readRequestBody } from '../chat/shared/request-body.ts'; import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts'; import { isOpenAIUsageOnlyEventShape, type ProtocolFrame } from '@floway-dev/protocols/common'; @@ -65,8 +65,7 @@ export const completions = async (c: Context): Promise => { }); if (request.type === 'invalid') { ctx.dump?.error('gateway'); - const response = passthroughApiError(c, request.message, 400); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, passthroughApiError(c, request.message, 400)); } ctx.dump?.requestedModel(request.model); @@ -115,5 +114,5 @@ export const completions = async (c: Context): Promise => { }, }, }); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); }; diff --git a/packages/gateway/src/data-plane/completions/serve_test.ts b/packages/gateway/src/data-plane/completions/serve_test.ts index 1cd2263bb..1dd90c6ed 100644 --- a/packages/gateway/src/data-plane/completions/serve_test.ts +++ b/packages/gateway/src/data-plane/completions/serve_test.ts @@ -2,6 +2,7 @@ import { test } from 'vitest'; import { initDumpBroker, initDumpStore } from '../../dump/registry.ts'; import { installDumpStubs } from '../../dump/test-fixtures.ts'; +import type { MemoryModelAliasesRepo } from '../../repo/memory.ts'; import { buildCustomUpstreamRecord, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts'; import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot'; import { assertEquals, assertExists, jsonResponse, withMockedFetch } from '@floway-dev/test-utils'; @@ -397,3 +398,49 @@ test('/v1/completions streaming records usage row, request_total+upstream_succes assertEquals(frames[3]?.type, 'done'); } }); + +// Alias header coverage for /v1/completions: the matched alias name rides +// out on `x-floway-alias`. Non-streaming path uses passthrough's `json` +// branch; the streaming path stamps the same header via Hono's `c.header` +// before `streamSSE` builds the response. +test('/v1/completions stamps x-floway-alias when the request hits an aliased model', async () => { + const { apiKey, repo } = await setupAppTest(); + await registerCompletionsUpstream(repo); + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'completions-alias', + targetModelId: 'davinci-002', + upstreamIds: [], + rules: {}, + visibleInModelsList: true, + onConflict: 'real-only', + createdAt: 0, + }, + ]); + + await withMockedFetch( + request => { + const url = new URL(request.url); + if (url.hostname === 'passthrough.example.com' && url.pathname === '/v1/completions') { + return jsonResponse({ + id: 'cmpl_resp', + object: 'text_completion', + created: 1, + model: 'davinci-002', + choices: [{ index: 0, text: ' world', finish_reason: 'stop' }], + usage: { prompt_tokens: 5, completion_tokens: 1, total_tokens: 6 }, + }); + } + throw new Error(`Unhandled fetch ${request.url}`); + }, + async () => { + const response = await requestApp('/v1/completions', { + method: 'POST', + headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key }, + body: JSON.stringify({ model: 'completions-alias', prompt: 'hello' }), + }); + assertEquals(response.status, 200); + assertEquals(response.headers.get('x-floway-alias'), 'completions-alias'); + }, + ); +}); diff --git a/packages/gateway/src/data-plane/embeddings/serve.ts b/packages/gateway/src/data-plane/embeddings/serve.ts index 6262546e5..9c33e6736 100644 --- a/packages/gateway/src/data-plane/embeddings/serve.ts +++ b/packages/gateway/src/data-plane/embeddings/serve.ts @@ -3,7 +3,7 @@ import type { Context } from 'hono'; -import { createGatewayCtxFromHono } from '../chat/shared/gateway-ctx.ts'; +import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts'; import { readRequestBody } from '../chat/shared/request-body.ts'; import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts'; import { tokenUsageFromEmbeddingsBody } from '../shared/telemetry/usage.ts'; @@ -49,8 +49,7 @@ export const embeddings = async (c: Context): Promise => { const request = prepareEmbeddingsRequest(requestBody.bytes); if (request.type === 'invalid') { ctx.dump?.error('gateway'); - const response = passthroughApiError(c, request.message, 400); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, passthroughApiError(c, request.message, 400)); } ctx.dump?.requestedModel(request.model); @@ -66,5 +65,5 @@ export const embeddings = async (c: Context): Promise => { }, response: { format: 'json', extractBilling: tokenUsageFromEmbeddingsBody }, }); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); }; diff --git a/packages/gateway/src/data-plane/embeddings/serve_test.ts b/packages/gateway/src/data-plane/embeddings/serve_test.ts index bf86dc9a7..c6c44f61b 100644 --- a/packages/gateway/src/data-plane/embeddings/serve_test.ts +++ b/packages/gateway/src/data-plane/embeddings/serve_test.ts @@ -1,5 +1,6 @@ import { test } from 'vitest'; +import type { MemoryModelAliasesRepo } from '../../repo/memory.ts'; import { buildCustomUpstreamRecord, copilotModels, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts'; import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot'; import { jsonResponse, withMockedFetch, assertEquals, assertExists } from '@floway-dev/test-utils'; @@ -495,3 +496,58 @@ test('/v1/embeddings rejects malformed body at the provider-independent boundary }, ); }); + +// Critical alias header coverage for the passthrough surface: the matched +// alias name must ride out on `x-floway-alias` so downstream observers can +// tell a real-model hit from an alias-routed one. Goes through Hono's +// `c.header` in `passthroughServe`, mirroring the chat path. +test('/v1/embeddings stamps x-floway-alias when the request hits an aliased model', async () => { + const { apiKey, repo } = await setupAppTest(); + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'embed-alias', + targetModelId: 'text-embedding-real', + upstreamIds: [], + rules: {}, + visibleInModelsList: true, + onConflict: 'real-only', + createdAt: 0, + }, + ]); + + await withMockedFetch( + request => { + const url = new URL(request.url); + if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']); + if (url.pathname === '/copilot_internal/v2/token') { + return jsonResponse({ + token: 'copilot-access-token', + expires_at: 4102444800, + refresh_in: 3600, + endpoints: { api: 'https://api.individual.githubcopilot.com' }, + }); + } + if (url.pathname === '/models') { + return jsonResponse(copilotModels([{ id: 'text-embedding-real', supported_endpoints: ['/embeddings'] }])); + } + if (url.pathname === '/embeddings') { + return jsonResponse({ + object: 'list', + model: 'text-embedding-real', + data: [{ object: 'embedding', index: 0, embedding: [0.1] }], + usage: { prompt_tokens: 1, total_tokens: 1 }, + }); + } + throw new Error(`Unhandled fetch ${request.url}`); + }, + async () => { + const response = await requestApp('/v1/embeddings', { + method: 'POST', + headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key }, + body: JSON.stringify({ model: 'embed-alias', input: 'hello' }), + }); + assertEquals(response.status, 200); + assertEquals(response.headers.get('x-floway-alias'), 'embed-alias'); + }, + ); +}); diff --git a/packages/gateway/src/data-plane/images/serve.ts b/packages/gateway/src/data-plane/images/serve.ts index 405b29b03..58f8a7a25 100644 --- a/packages/gateway/src/data-plane/images/serve.ts +++ b/packages/gateway/src/data-plane/images/serve.ts @@ -10,7 +10,7 @@ import type { Context } from 'hono'; -import { createGatewayCtxFromHono } from '../chat/shared/gateway-ctx.ts'; +import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts'; import { readRequestBody } from '../chat/shared/request-body.ts'; import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts'; import { tokenUsageFromImagesBody } from '../shared/telemetry/usage.ts'; @@ -48,8 +48,7 @@ export const imagesGenerations = async (c: Context): Promise => { const request = prepareImagesGenerationsRequest(requestBody.bytes); if (request.type === 'invalid') { ctx.dump?.error('gateway'); - const response = passthroughApiError(c, request.message, 400); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, passthroughApiError(c, request.message, 400)); } ctx.dump?.requestedModel(request.model); @@ -65,7 +64,7 @@ export const imagesGenerations = async (c: Context): Promise => { }, response: { format: 'json', extractBilling: tokenUsageFromImagesBody }, }); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); }; export const imagesEdits = async (c: Context): Promise => { @@ -82,15 +81,13 @@ export const imagesEdits = async (c: Context): Promise => { // parser's error text. The wording is enough for a client to know // they sent the wrong content type or a malformed body. ctx.dump?.error('gateway'); - const response = passthroughApiError(c, 'Image edits request body must be a valid multipart/form-data payload.', 400); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, passthroughApiError(c, 'Image edits request body must be a valid multipart/form-data payload.', 400)); } const modelRaw = form.get('model'); if (typeof modelRaw !== 'string' || modelRaw.length === 0) { ctx.dump?.error('gateway'); - const response = passthroughApiError(c, 'Image edits request body must include a model field.', 400); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, passthroughApiError(c, 'Image edits request body must include a model field.', 400)); } ctx.dump?.requestedModel(modelRaw); @@ -115,5 +112,5 @@ export const imagesEdits = async (c: Context): Promise => { }, response: { format: 'json', extractBilling: tokenUsageFromImagesBody }, }); - return (ctx.dump?.finalize(response) ?? response); + return finalizeGatewayResponse(ctx, response); }; diff --git a/packages/gateway/src/data-plane/images/serve_test.ts b/packages/gateway/src/data-plane/images/serve_test.ts index 85b5f1adf..f241ad89d 100644 --- a/packages/gateway/src/data-plane/images/serve_test.ts +++ b/packages/gateway/src/data-plane/images/serve_test.ts @@ -1,5 +1,6 @@ import { test } from 'vitest'; +import type { MemoryModelAliasesRepo } from '../../repo/memory.ts'; import { buildCustomUpstreamRecord, copilotModels, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts'; import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot'; import { jsonResponse, withMockedFetch, assertEquals, assertExists } from '@floway-dev/test-utils'; @@ -233,3 +234,62 @@ test('/v1/images/edits forwards a multipart request through an Azure model and r const usageRows = await repo.usage.listAll(); assertEquals(usageRows.some(row => row.model === 'gpt-image-2' && row.tokens.input === 7 && row.tokens.output === 11), true); }); + +// Alias header coverage for /v1/images/generations: an alias whose target is +// an image-generation model must surface its name on `x-floway-alias` for +// downstream observability. +test('/v1/images/generations stamps x-floway-alias when the request hits an aliased model', async () => { + const { apiKey, repo } = await setupAppTest(); + clearInProcessCopilotTokenCache(); + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'image-alias', + targetModelId: 'gpt-image-2', + upstreamIds: [], + rules: {}, + visibleInModelsList: true, + onConflict: 'real-only', + createdAt: 0, + }, + ]); + await repo.upstreams.save(buildCustomUpstreamRecord({ + id: 'up_images', + name: 'Custom Image Provider', + sortOrder: 100, + config: { + baseUrl: 'https://images.example.com', + authStyle: 'bearer', + apiKey: 'sk-images', + endpoints: {}, + }, + })); + + await withMockedFetch( + request => { + const url = new URL(request.url); + if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']); + if (url.pathname === '/copilot_internal/v2/token') { + return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } }); + } + if (url.hostname === 'api.individual.githubcopilot.com' && url.pathname === '/models') { + return jsonResponse(copilotModels([{ id: 'copilot-chat', supported_endpoints: ['/chat/completions'] }])); + } + if (url.hostname === 'images.example.com' && url.pathname === '/v1/models') { + return jsonResponse({ data: [{ id: 'gpt-image-2' }] }); + } + if (url.hostname === 'images.example.com' && url.pathname === '/v1/images/generations') { + return jsonResponse({ data: [{ b64_json: 'aGVsbG8=' }] }); + } + throw new Error(`Unhandled fetch ${request.url}`); + }, + async () => { + const response = await requestApp('/v1/images/generations', { + method: 'POST', + headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key }, + body: JSON.stringify({ model: 'image-alias', prompt: 'hi' }), + }); + assertEquals(response.status, 200); + assertEquals(response.headers.get('x-floway-alias'), 'image-alias'); + }, + ); +}); diff --git a/packages/gateway/src/data-plane/models/gemini_test.ts b/packages/gateway/src/data-plane/models/gemini_test.ts index 7f4cedaec..0327bae33 100644 --- a/packages/gateway/src/data-plane/models/gemini_test.ts +++ b/packages/gateway/src/data-plane/models/gemini_test.ts @@ -1,5 +1,6 @@ import { test } from 'vitest'; +import type { MemoryModelAliasesRepo } from '../../repo/memory.ts'; import { buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts'; import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot'; import { jsonResponse, withMockedFetch, assertEquals } from '@floway-dev/test-utils'; @@ -408,3 +409,48 @@ test('/v1beta/models hides malformed upstream response bodies', async () => { }, ); }); + +// Gemini's `Model` resource is closed (no `aliasedFrom` extension), so the +// `/v1beta/models` surface advertises an alias entry as a synthetic Gemini +// model carrying the alias id and the target's display fields. This test +// guards the synthetic shape — name, displayName, supportedGenerationMethods +// — so a future refactor of `loadGeminiModels` cannot silently drop the +// alias entries. +test('/v1beta/models appends visible aliases as synthetic Gemini model entries', async () => { + const { repo, apiKey } = await setupAppTest(); + + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'codex-auto-review', + targetModelId: 'gpt-gemini-list', + upstreamIds: [], + rules: { reasoning: { effort: 'low' } }, + visibleInModelsList: true, + onConflict: 'real-only', + createdAt: 1_700_000_000, + }, + ]); + + await withMockedFetch( + request => { + const url = new URL(request.url); + if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']); + if (url.pathname === '/copilot_internal/v2/token') { + return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } }); + } + if (url.pathname === '/models') { + return jsonResponse(copilotModels([{ id: 'gpt-gemini-list', display_name: 'GPT Gemini List' }])); + } + throw new Error(`Unhandled fetch ${request.url}`); + }, + async () => { + const response = await requestApp('/v1beta/models', { headers: { 'x-api-key': apiKey.key } }); + assertEquals(response.status, 200); + const body = await response.json() as { models: Array<{ name: string; displayName: string; supportedGenerationMethods: string[] }> }; + const aliasEntry = body.models.find(m => m.name === 'models/codex-auto-review'); + if (!aliasEntry) throw new Error('expected codex-auto-review alias entry'); + assertEquals(aliasEntry.displayName, 'codex-auto-review'); + assertEquals(aliasEntry.supportedGenerationMethods, ['generateContent', 'streamGenerateContent', 'countTokens']); + }, + ); +}); diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts index 6c917fb86..b566b582d 100644 --- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts +++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts @@ -24,6 +24,7 @@ import type { AuthedContext } from '../../middleware/auth.ts'; import { getRepo } from '../../repo/index.ts'; import type { TokenUsage } from '../../repo/types.ts'; import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts'; +import { stageGatewayResponseHeader } from '../chat/shared/gateway-ctx.ts'; import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts'; import { resolveModelForRequest } from '../providers/registry.ts'; import type { BackgroundScheduler } from '@floway-dev/platform'; @@ -155,8 +156,10 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise< // rules themselves never apply here — the inbound payload (embeddings, // images, /v1/completions) has no protocol-extension slots for the rule // knobs. We still surface the matched alias name on the - // `x-floway-alias` response header and trace one log line per dropped - // rule so an operator can confirm the rewrite ran. + // `x-floway-alias` response header (staged via Hono's `c.header` so it + // survives `streamSSE`'s internal `c.newResponse` on the streaming + // `/v1/completions` path) and trace one log line per dropped rule so an + // operator can confirm the rewrite ran. const aliases = await getRepo().modelAliases.loadAll(); // Each match is one (upstream, upstream-catalog id) pair that interprets // the inbound public id. Iteration order follows configured sort_order @@ -172,7 +175,7 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise< for (const match of matches) { if (!bindingServesEndpoint(match.binding)) continue; if (match.aliasName !== undefined) { - ctx.responseHeaders.set('x-floway-alias', match.aliasName); + stageGatewayResponseHeader(ctx, 'x-floway-alias', match.aliasName); traceDroppedAliasRulesForPassthrough(match.aliasName, aliases, sourceApi); } diff --git a/packages/gateway/src/test-helpers/gateway-ctx.ts b/packages/gateway/src/test-helpers/gateway-ctx.ts index 3ebf1f474..047981ed1 100644 --- a/packages/gateway/src/test-helpers/gateway-ctx.ts +++ b/packages/gateway/src/test-helpers/gateway-ctx.ts @@ -1,4 +1,13 @@ import type { GatewayCtx } from '../data-plane/chat/shared/gateway-ctx.ts'; +import type { AuthedContext } from '../middleware/auth.ts'; + +// Minimal stub for the Hono `c` carried on `GatewayCtx`. Only `c.header` +// is touched by the serve layer (to stamp `x-floway-alias`); unit tests +// that don't exercise the alias branch never call it. Integration tests +// that need real Hono behavior build the ctx via `createGatewayCtxFromHono` +// against a real `makeApp()` request rather than going through this stub. +export const stubAuthedContext = (): AuthedContext => + ({ header: () => {} } as unknown as AuthedContext); // Shared minimal GatewayCtx for tests that exercise serve / respond / // interceptor code in isolation. Defaults satisfy every required field; pass @@ -7,6 +16,7 @@ import type { GatewayCtx } from '../data-plane/chat/shared/gateway-ctx.ts'; // construct one and spread `{ abortSignal: controller.signal, // downstreamAbortController: controller }` into the overrides. export const mockGatewayCtx = (overrides: Partial = {}): GatewayCtx => ({ + c: stubAuthedContext(), apiKeyId: 'key_test', upstreamIds: null, wantsStream: false, From 17a7877c5fdcda16064bf9d972675583b5c69acb Mon Sep 17 00:00:00 2001 From: Menci Date: Fri, 26 Jun 2026 01:38:34 +0800 Subject: [PATCH 008/170] revert(translate): restore pre-extension native field translation on *-via-messages MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task 3 (`e1891e1d`) added synthesis of `thinking.display` from Responses-native `reasoning.summary` and Gemini-native `thinkingConfig.includeThoughts`, plus a new native-to-native `service_tier` carry on Responses → Messages. These are NATIVE fields with translation behavior the prior pairs had already decided; the alias work should not have reshaped that contract. Revert the native-field paths in: - responses-via-messages: drop `reasoning.summary` → `thinking.display` and `service_tier` → `service_tier` propagation. Keep the new extension-field carries (`thinking_budget`, `adaptive_thinking`, `anthropic_speed`). - gemini-via-messages: drop `thinkingConfig.includeThoughts` → `thinking.display` propagation. Keep `generationConfig.serviceTier`, `verbosity`, and top-level `anthropicSpeed` extension carries. Tests that asserted the new native-field synthesis are removed; the existing extension-field tests stay untouched. --- .../src/gemini-via-messages/request.ts | 8 ------ .../src/gemini-via-messages/request_test.ts | 16 ----------- .../src/responses-via-messages/request.ts | 28 ++++++------------- .../responses-via-messages/request_test.ts | 23 --------------- 4 files changed, 8 insertions(+), 67 deletions(-) diff --git a/packages/translate/src/gemini-via-messages/request.ts b/packages/translate/src/gemini-via-messages/request.ts index 91fbe0493..c109da54d 100644 --- a/packages/translate/src/gemini-via-messages/request.ts +++ b/packages/translate/src/gemini-via-messages/request.ts @@ -161,14 +161,6 @@ const applyThinkingConfig = (request: MessagesPayload, thinkingConfig?: GeminiTh } } - // `includeThoughts` materializes onto `thinking.display`: true → summarized - // (Anthropic redacts to a single-block summary), false → omitted (no - // thinking surface at all). Skip when the source did not express either. - if (thinkingConfig.includeThoughts !== undefined && request.thinking?.type !== 'disabled') { - const display = thinkingConfig.includeThoughts === true ? ('summarized' as const) : ('omitted' as const); - request.thinking = request.thinking ? { ...request.thinking, display } : { type: 'enabled', display }; - } - const effort = geminiThinkingLevelEffort(thinkingConfig); // Spread to merge with any output_config fields a sibling helper has // already written (e.g. structured-output `format` from diff --git a/packages/translate/src/gemini-via-messages/request_test.ts b/packages/translate/src/gemini-via-messages/request_test.ts index b10339a49..555c12aa4 100644 --- a/packages/translate/src/gemini-via-messages/request_test.ts +++ b/packages/translate/src/gemini-via-messages/request_test.ts @@ -428,22 +428,6 @@ test('buildTargetRequest emits generationConfig.serviceTier onto Messages servic assertEquals(result.service_tier, 'priority'); }); -test('buildTargetRequest maps includeThoughts onto thinking.display (true → summarized, false → omitted)', () => { - const summarized = buildTargetRequest( - { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: true } } }, - 'claude-test', - noOptions, - ); - const omitted = buildTargetRequest( - { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: false } } }, - 'claude-test', - noOptions, - ); - - assertEquals(summarized.thinking, { type: 'enabled', display: 'summarized' }); - assertEquals(omitted.thinking, { type: 'enabled', display: 'omitted' }); -}); - test('buildTargetRequest drops verbosity extension on Messages (no slot)', () => { const result = buildTargetRequest( { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'low' } }, diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts index 504ca45fa..f16acb936 100644 --- a/packages/translate/src/responses-via-messages/request.ts +++ b/packages/translate/src/responses-via-messages/request.ts @@ -1,7 +1,7 @@ import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts'; import { responsesReasoningToMessagesUpstreamBlock } from '../shared/messages-and-responses/reasoning.ts'; import { buildCustomToolInputSchema } from '../shared/responses-via/custom-tool-wrap.ts'; -import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts'; +import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts'; import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint, EPHEMERAL_CACHE_CONTROL } from '../shared/via-messages/cache-breakpoints.ts'; import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts'; import { @@ -332,29 +332,18 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op if (formatSchema) outputConfig.format = { type: 'json_schema', schema: formatSchema }; const hasOutputConfig = Object.keys(outputConfig).length > 0; - // Native Responses → Messages: `reasoning.summary` materializes onto the - // Messages-native `thinking.display`. Extension-driven thinking - // (`thinking_budget`, `adaptive_thinking`) takes precedence over the - // summary-only fallback because the alias write-side validator pins - // facets one-at-a-time; when neither extension is set and summary is the - // only signal, we synthesize `thinking.{type:'enabled', display}` so the - // display reaches the wire. + // Extension-driven thinking (`thinking_budget`, `adaptive_thinking`) wins + // over the native `effort === 'none'` disable, so the alias write-side + // facets that target the structured thinking slot survive the legacy + // disable shortcut. Native `reasoning.summary` and `service_tier` do not + // surface onto Messages — the Responses-native vocabulary keeps its + // pre-existing translation contract and rides the upstream sanitizer. const extensionThinking = buildMessagesThinkingFromExtensions({ thinkingBudget: payload.thinking_budget, adaptiveThinking: payload.adaptive_thinking, }); const disabledThinking = effort === 'none' ? { type: 'disabled' as const } : undefined; - const summaryDisplay = payload.reasoning?.summary !== undefined ? mapSummaryToAnthropicDisplay(payload.reasoning.summary) : undefined; - const fallbackDisplayThinking = - !extensionThinking && !disabledThinking && summaryDisplay !== undefined - ? { type: 'enabled' as const, display: summaryDisplay as NonNullable['display'] } - : undefined; - const thinkingFromExtensions = extensionThinking - ? summaryDisplay !== undefined - ? { ...extensionThinking, display: summaryDisplay as NonNullable['display'] } - : extensionThinking - : undefined; - const thinking = thinkingFromExtensions ?? disabledThinking ?? fallbackDisplayThinking; + const thinking = extensionThinking ?? disabledThinking; // Responses `metadata` is intentionally omitted on the Messages path; // not coerced into Anthropic metadata.user_id, prompt-cache, or safety @@ -372,7 +361,6 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op ...(thinking ? { thinking } : {}), ...(hasOutputConfig ? { output_config: outputConfig } : {}), ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}), - ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}), }; return { target, customToolNames }; diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts index f36ff89f6..57951048c 100644 --- a/packages/translate/src/responses-via-messages/request_test.ts +++ b/packages/translate/src/responses-via-messages/request_test.ts @@ -664,35 +664,12 @@ test('translateResponsesToMessages emits adaptive_thinking onto thinking.{adapti assertEquals(result.target.thinking, { type: 'adaptive' }); }); -test('translateResponsesToMessages maps reasoning.summary onto thinking.display (concise|detailed → summarized, omitted → omitted)', async () => { - const concise = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'concise' } })); - const detailed = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'detailed' } })); - const omitted = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'omitted' } })); - - assertEquals(concise.target.thinking, { type: 'enabled', display: 'summarized' }); - assertEquals(detailed.target.thinking, { type: 'enabled', display: 'summarized' }); - assertEquals(omitted.target.thinking, { type: 'enabled', display: 'omitted' }); -}); - test('translateResponsesToMessages emits anthropic_speed onto speed', async () => { const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_speed: 'fast' })); assertEquals(result.target.speed, 'fast'); }); -test('translateResponsesToMessages forwards service_tier verbatim', async () => { - const result = await translateResponsesToMessages(minimalResponsesPayload({ service_tier: 'priority' })); - assertEquals(result.target.service_tier, 'priority'); -}); - test('translateResponsesToMessages leaves anthropic_beta as inbound residue for the gateway header pass', async () => { const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_beta: ['fast-mode-2026-02-01'] })); assertEquals('anthropic_beta' in result.target, false); }); - -test('translateResponsesToMessages emission stack: budget + summary writes display onto the budget-driven block', async () => { - const result = await translateResponsesToMessages(minimalResponsesPayload({ - thinking_budget: 2048, - reasoning: { effort: 'medium', summary: 'concise' }, - })); - assertEquals(result.target.thinking, { type: 'enabled', budget_tokens: 2048, display: 'summarized' }); -}); From 6d13258431e2f2bb5508739f44d1082a38642e02 Mon Sep 17 00:00:00 2001 From: Menci Date: Fri, 26 Jun 2026 01:38:52 +0800 Subject: [PATCH 009/170] feat(aliases): drop responses entry shim; enumerate aliases per upstream + form MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two follow-up changes to the alias data-plane: 1. Remove the `/v1/responses` entry-level `codex-auto-review → gpt-5.4` rewrite shim. The seed alias in `0046_model_aliases.sql` now routes `codex-auto-review` everywhere through the normal matcher. On a Codex upstream that exposes a real `codex-auto-review`, `on_conflict=real-only` lets the real id win — Codex CLI callers wanting the previous shim behaviour must set `effort=low` themselves or pick a different `onConflict`. All other inbound surfaces are unchanged. 2. List aliases per-upstream and per-addressable-form in `/v1/models` and the Gemini `/v1beta/models` listing, instead of one synthetic entry per alias. Each visible alias now emits one entry per (provider, listed form) pair whose raw catalog can resolve the target, so dual-listed upstreams emit both `codex-auto-review` and `/codex-auto-review`. Aliases whose target is not reachable from any upstream produce zero entries; the previous "no silent hide" rule no longer fits a per-upstream model. A new `display_name` column on `model_aliases` (migration `0047`) carries an operator-set label; the listing composes it as `${upstream}: ${alias displayName}` when set, or `${upstream}: ${target displayName}${rules summary}` otherwise. The rules-summary formatter and display-name composer live in `control-plane/model-aliases/display.ts` and are covered by unit tests. The shared per-upstream alias emission helper sits in `data-plane/models/alias-listing.ts` and is reused by both the OpenAI and Gemini listings. `getModelsForListing` exposes the per-upstream raw catalog alongside the merged public model list so we collect catalogs once per request even when many aliases need them. --- .../0047_model_aliases_display_name.sql | 3 + .../control-plane/model-aliases/display.ts | 39 ++++ .../model-aliases/display_test.ts | 75 +++++++ .../src/control-plane/model-aliases/repo.ts | 4 +- .../control-plane/model-aliases/repo_test.ts | 23 +++ .../src/control-plane/model-aliases/types.ts | 5 + .../src/data-plane/chat/responses/http.ts | 41 +--- .../data-plane/chat/responses/http_test.ts | 60 ------ .../src/data-plane/models/alias-listing.ts | 52 +++++ .../gateway/src/data-plane/models/gemini.ts | 39 ++-- .../src/data-plane/models/gemini_test.ts | 2 +- .../gateway/src/data-plane/models/load.ts | 51 +++-- .../src/data-plane/models/serve_test.ts | 184 +++++++++++++++++- .../src/data-plane/providers/registry.ts | 37 +++- 14 files changed, 472 insertions(+), 143 deletions(-) create mode 100644 packages/gateway/migrations/0047_model_aliases_display_name.sql create mode 100644 packages/gateway/src/control-plane/model-aliases/display.ts create mode 100644 packages/gateway/src/control-plane/model-aliases/display_test.ts create mode 100644 packages/gateway/src/data-plane/models/alias-listing.ts diff --git a/packages/gateway/migrations/0047_model_aliases_display_name.sql b/packages/gateway/migrations/0047_model_aliases_display_name.sql new file mode 100644 index 000000000..9d21ed9a1 --- /dev/null +++ b/packages/gateway/migrations/0047_model_aliases_display_name.sql @@ -0,0 +1,3 @@ +ALTER TABLE model_aliases ADD COLUMN display_name TEXT; + +UPDATE model_aliases SET display_name = 'Codex Auto Review' WHERE alias = 'codex-auto-review'; diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts new file mode 100644 index 000000000..576c823a8 --- /dev/null +++ b/packages/gateway/src/control-plane/model-aliases/display.ts @@ -0,0 +1,39 @@ +import type { ModelAliasRules } from './types.ts'; + +// Render the rule set as a parenthesized, comma-joined string so the +// `/v1/models` listing can suffix it onto the target model's display name when +// the operator did not supply an explicit alias `displayName`. Empty rules +// produce an empty string (no parentheses); the join order is fixed across +// fields so a given rule set always renders the same way. +// +// `anthropicBeta` is sorted at format time so two operators carrying the same +// token set in different orders see the same label. +export const formatAliasRulesSummary = (rules: ModelAliasRules): string => { + const parts: string[] = []; + if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`); + if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`); + if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning'); + if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`); + if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`); + if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`); + if (rules.anthropicSpeed !== undefined) parts.push(`${rules.anthropicSpeed} speed`); + if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) { + parts.push(rules.anthropicBeta.toSorted().join('/')); + } + return parts.length > 0 ? ` (${parts.join(', ')})` : ''; +}; + +// Compose the final per-entry display name shown in `/v1/models`. The +// upstream name always leads so an operator scanning the listing sees which +// upstream each row belongs to before reading the alias-specific part. +export const composeAliasDisplayName = (input: { + upstreamDisplayName: string; + aliasDisplayName?: string; + targetDisplayName: string; + rules: ModelAliasRules; +}): string => { + if (input.aliasDisplayName !== undefined) { + return `${input.upstreamDisplayName}: ${input.aliasDisplayName}`; + } + return `${input.upstreamDisplayName}: ${input.targetDisplayName}${formatAliasRulesSummary(input.rules)}`; +}; diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/gateway/src/control-plane/model-aliases/display_test.ts new file mode 100644 index 000000000..7ba7700d0 --- /dev/null +++ b/packages/gateway/src/control-plane/model-aliases/display_test.ts @@ -0,0 +1,75 @@ +import { describe, expect, test } from 'vitest'; + +import { composeAliasDisplayName, formatAliasRulesSummary } from './display.ts'; + +describe('formatAliasRulesSummary', () => { + test('returns empty string when no rules are set', () => { + expect(formatAliasRulesSummary({})).toBe(''); + }); + + test('formats each rule field with its canonical suffix', () => { + expect(formatAliasRulesSummary({ reasoning: { effort: 'high' } })).toBe(' (high effort)'); + expect(formatAliasRulesSummary({ reasoning: { budgetTokens: 4096 } })).toBe(' (4096tk reasoning)'); + expect(formatAliasRulesSummary({ reasoning: { adaptive: true } })).toBe(' (adaptive reasoning)'); + expect(formatAliasRulesSummary({ reasoning: { summary: 'detailed' } })).toBe(' (detailed summary)'); + expect(formatAliasRulesSummary({ verbosity: 'low' })).toBe(' (low verbosity)'); + expect(formatAliasRulesSummary({ serviceTier: 'priority' })).toBe(' (priority tier)'); + expect(formatAliasRulesSummary({ anthropicSpeed: 'fast' })).toBe(' (fast speed)'); + }); + + test('sorts anthropicBeta tokens and joins with slashes', () => { + expect(formatAliasRulesSummary({ anthropicBeta: ['extended-thinking', 'fast-mode-2026-02-01'] })).toBe( + ' (extended-thinking/fast-mode-2026-02-01)', + ); + expect(formatAliasRulesSummary({ anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] })).toBe( + ' (extended-thinking/fast-mode-2026-02-01)', + ); + }); + + test('drops anthropicBeta when the token list is empty', () => { + expect(formatAliasRulesSummary({ anthropicBeta: [] })).toBe(''); + }); + + test('joins multiple fields with comma in deterministic order', () => { + expect( + formatAliasRulesSummary({ + reasoning: { effort: 'low', summary: 'concise' }, + verbosity: 'high', + anthropicSpeed: 'fast', + }), + ).toBe(' (low effort, concise summary, high verbosity, fast speed)'); + }); +}); + +describe('composeAliasDisplayName', () => { + test('uses alias displayName when set, suppressing the rules summary', () => { + expect( + composeAliasDisplayName({ + upstreamDisplayName: 'Azure', + aliasDisplayName: 'Codex Auto Review', + targetDisplayName: 'GPT-5.4', + rules: { reasoning: { effort: 'low' } }, + }), + ).toBe('Azure: Codex Auto Review'); + }); + + test('falls back to target displayName with rules suffix when alias displayName is missing', () => { + expect( + composeAliasDisplayName({ + upstreamDisplayName: 'Azure', + targetDisplayName: 'GPT-5.4', + rules: { reasoning: { effort: 'low' } }, + }), + ).toBe('Azure: GPT-5.4 (low effort)'); + }); + + test('omits the rules suffix when rules are empty', () => { + expect( + composeAliasDisplayName({ + upstreamDisplayName: 'Azure', + targetDisplayName: 'GPT-5.4', + rules: {}, + }), + ).toBe('Azure: GPT-5.4'); + }); +}); diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts index a7cfd963f..3718b5fd5 100644 --- a/packages/gateway/src/control-plane/model-aliases/repo.ts +++ b/packages/gateway/src/control-plane/model-aliases/repo.ts @@ -8,6 +8,7 @@ interface ModelAliasRow { rules_json: string; visible_in_models_list: number; on_conflict: OnConflict; + display_name: string | null; created_at: number; } @@ -17,7 +18,7 @@ interface ModelAliasRow { // emit alias entries in a stable, operator-predictable order across runtimes. export const loadAllAliases = async (db: SqlDatabase): Promise => { const { results } = await db - .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases ORDER BY alias') + .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at FROM model_aliases ORDER BY alias') .all(); return results.map(toModelAlias); }; @@ -29,6 +30,7 @@ const toModelAlias = (row: ModelAliasRow): ModelAlias => ({ rules: parseJsonField(row.alias, 'rules_json', row.rules_json), visibleInModelsList: row.visible_in_models_list === 1, onConflict: row.on_conflict, + ...(row.display_name !== null ? { displayName: row.display_name } : {}), createdAt: row.created_at, }); diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts index ff1efa046..5f1e4fa6d 100644 --- a/packages/gateway/src/control-plane/model-aliases/repo_test.ts +++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts @@ -22,6 +22,7 @@ test('loadAllAliases reads the seed row from a freshly migrated database', async rules: { reasoning: { effort: 'low' } }, visibleInModelsList: true, onConflict: 'real-only', + displayName: 'Codex Auto Review', }); }); @@ -72,6 +73,28 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo }); }); +test('loadAllAliases reads display_name and omits the field when SQL stored NULL', async () => { + const db = await createSqliteTestDb(); + await db.exec('DELETE FROM model_aliases'); + await db + .prepare( + 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)', + ) + .bind('with-label', 'gpt-5.4', '[]', '{}', 1, 'real-only', 'Pretty Label', 1_700_000_000) + .run(); + await db + .prepare( + 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)', + ) + .bind('no-label', 'gpt-5.4', '[]', '{}', 1, 'real-only', null, 1_700_000_001) + .run(); + + const byAlias = new Map((await loadAllAliases(db)).map(entry => [entry.alias, entry])); + assertEquals(byAlias.get('with-label')?.displayName, 'Pretty Label'); + // SQL NULL becomes undefined on the typed row so callers can branch on `=== undefined`. + assertEquals('displayName' in (byAlias.get('no-label') ?? {}), false); +}); + test('loadAllAliases surfaces malformed rules_json as a descriptive error', async () => { const db = await createSqliteTestDb(); await db.exec('DELETE FROM model_aliases'); diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts index 7594ceff6..3170b4b47 100644 --- a/packages/gateway/src/control-plane/model-aliases/types.ts +++ b/packages/gateway/src/control-plane/model-aliases/types.ts @@ -23,6 +23,11 @@ export type ModelAlias = { readonly rules: ModelAliasRules; readonly visibleInModelsList: boolean; readonly onConflict: OnConflict; + // Operator-set human-readable label shown after the upstream display name in + // `/v1/models` entries (e.g. "Azure: Codex Auto Review"). When unset, the + // listing falls back to the alias target's display name plus a rules-summary + // suffix; see `data-plane/model-aliases/display.ts`. + readonly displayName?: string; // Unix epoch seconds stamped at row insertion. Surfaced on the // `/v1/models` synthesized alias entry so callers see when an alias was // declared, matching the `created` semantics of the real entries. diff --git a/packages/gateway/src/data-plane/chat/responses/http.ts b/packages/gateway/src/data-plane/chat/responses/http.ts index 9346d5576..671e935ed 100644 --- a/packages/gateway/src/data-plane/chat/responses/http.ts +++ b/packages/gateway/src/data-plane/chat/responses/http.ts @@ -3,7 +3,6 @@ import { respondResponses } from './respond.ts'; import { PreviousResponseNotFoundError } from './serve-prep.ts'; import { responsesServe } from './serve.ts'; import type { AuthedContext } from '../../../middleware/auth.ts'; -import { CODEX_AUTO_REVIEW_ALIAS, CODEX_AUTO_REVIEW_TARGET } from '../../codex/auto-review-alias.ts'; import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts'; import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts'; import { readRequestBody, type RequestBody } from '../shared/request-body.ts'; @@ -11,38 +10,6 @@ import { providerModelsUnavailableResponse } from '../shared/upstream-models-err import type { ResponsesPayload } from '@floway-dev/protocols/responses'; import { internalErrorResult, toInternalDebugError } from '@floway-dev/provider'; -// Codex sends auto-review requests over the Responses wire API as a -// `codex-auto-review` model id; rewrite at the entry so downstream routing, -// performance telemetry, and usage accounting all see the real model name -// (and the `low` reasoning effort the alias implies — generate only; -// compact carries no `reasoning` field). -// -// This shim predates the operator-managed alias table seeded by migration -// `0046_model_aliases.sql`. The two paths overlap on `/v1/responses` — -// rewriting at this entry swaps the inbound `model` to `gpt-5.4` BEFORE the -// alias matcher in `enumerateProviderCandidates` runs, so the alias row -// never matches for this surface. The carveout is deliberate: the seeded -// alias is stored with `on_conflict='real-only'`, which means on a Codex -// upstream that exposes a real `codex-auto-review` model the alias would -// silently lose to the real id and the `reasoning.effort=low` rule would -// never apply — breaking parity with Codex CLI's native auto-review -// behavior. Other inbound surfaces (`/v1/messages`, `/v1/chat/completions`, -// `/v1beta/…`) carry no entry-level shim and reach the alias matcher -// unchanged; they observe `real-only` semantics as designed. -// -// The shim is a temporary carveout pending a follow-up that either deletes -// it after a deliberate Codex behavior change (e.g. switching to -// `both-alias-first`) or migrates the entire surface to the alias table. -const rewriteResponsesEntryModelAlias = (payload: ResponsesPayload, stampReasoningEffort: boolean): ResponsesPayload => { - if (payload.model !== CODEX_AUTO_REVIEW_ALIAS) return payload; - if (!stampReasoningEffort) return { ...payload, model: CODEX_AUTO_REVIEW_TARGET }; - return { - ...payload, - model: CODEX_AUTO_REVIEW_TARGET, - reasoning: { ...(payload.reasoning ?? {}), effort: 'low' }, - }; -}; - // OpenAI's verbatim previous_response_not_found envelope. Codex compares this // body byte-for-byte against upstream — see the cross-references on // `PreviousResponseNotFoundError` in serve-prep.ts. @@ -77,15 +44,15 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques return finalizeGatewayResponse(effectiveCtx, response); }; -const parsePayload = (requestBody: RequestBody, stampReasoningEffort: boolean): ResponsesPayload => - rewriteResponsesEntryModelAlias(JSON.parse(new TextDecoder().decode(requestBody.bytes)) as ResponsesPayload, stampReasoningEffort); +const parsePayload = (requestBody: RequestBody): ResponsesPayload => + JSON.parse(new TextDecoder().decode(requestBody.bytes)) as ResponsesPayload; export const responsesHttp = { generate: async (c: AuthedContext): Promise => { const requestBody = await readRequestBody(c); let ctx: GatewayCtx | undefined; try { - const payload = parsePayload(requestBody, true); + const payload = parsePayload(requestBody); const wantsStream = payload.stream === true; ctx = createGatewayCtxFromHono(c, { wantsStream, requestBody, model: payload.model }); const store = createResponsesHttpStore(ctx.apiKeyId, payload.store ?? undefined); @@ -106,7 +73,7 @@ export const responsesHttp = { const requestBody = await readRequestBody(c); let ctx: GatewayCtx | undefined; try { - const payload = parsePayload(requestBody, false); + const payload = parsePayload(requestBody); ctx = createGatewayCtxFromHono(c, { wantsStream: false, requestBody, model: payload.model }); const store = createResponsesHttpStore(ctx.apiKeyId, payload.store ?? undefined); const result = await responsesServe.compact({ payload, ctx, store, headers: inboundHeadersForUpstream(c) }); diff --git a/packages/gateway/src/data-plane/chat/responses/http_test.ts b/packages/gateway/src/data-plane/chat/responses/http_test.ts index d0e78cdaf..7425b640b 100644 --- a/packages/gateway/src/data-plane/chat/responses/http_test.ts +++ b/packages/gateway/src/data-plane/chat/responses/http_test.ts @@ -284,63 +284,3 @@ test('POST /v1/responses renders a routing-unavailable 400 when a forcing item n const body = await response.json() as { error: { code: string } }; assertEquals(body.error.code, 'responses_item_routing_unavailable'); }); - -test('POST /v1/responses rewrites the codex-auto-review alias before routing', async () => { - installRepo(); - seenModels.length = 0; - const observedBodies: { reasoning?: { effort?: string } }[] = []; - const callResponses = vi.fn(async (_model: unknown, body: unknown): Promise> => { - observedBodies.push(body as { reasoning?: { effort?: string } }); - return { - ok: true, - events: makeProviderEvents([completedEvent()]), - modelKey: 'test-model-key', - headers: new Headers(), - }; - }); - queueCandidates([makeCandidate({ callResponses })]); - - const response = await makeApp().request('/v1/responses', { - method: 'POST', - headers: new Headers({ 'content-type': 'application/json' }), - body: JSON.stringify({ model: 'codex-auto-review', input: 'hello' }), - }); - - assertEquals(response.status, 200); - assertEquals(seenModels, ['gpt-5.4']); - const observed = observedBodies[0]; - if (observed === undefined) throw new Error('expected callResponses to receive a body'); - assertEquals(observed.reasoning?.effort, 'low'); -}); - -test('POST /v1/responses/compact rewrites the codex-auto-review alias to gpt-5.4 with no reasoning field', async () => { - installRepo(); - seenModels.length = 0; - const observedBodies: { reasoning?: unknown }[] = []; - const compactionItem = { type: 'compaction' as const, id: 'cmp_1', encrypted_content: 'ENC' }; - const compactionResult: ResponsesResult = { - ...makeResponsesResult(), - object: 'response.compaction', - output: [compactionItem] as unknown as ResponsesResult['output'], - }; - const callResponsesCompact = vi.fn(async (_model: unknown, body: unknown) => { - observedBodies.push(body as { reasoning?: unknown }); - return { ok: true as const, result: compactionResult, modelKey: 'test-model-key' }; - }); - queueCandidates([makeCandidate({ callResponsesCompact })]); - - const response = await makeApp().request('/v1/responses/compact', { - method: 'POST', - headers: new Headers({ 'content-type': 'application/json' }), - body: JSON.stringify({ - model: 'codex-auto-review', - input: [{ type: 'message', role: 'user', content: 'kept' }], - }), - }); - - assertEquals(response.status, 200); - assertEquals(seenModels, ['gpt-5.4']); - const observed = observedBodies[0]; - if (observed === undefined) throw new Error('expected callResponsesCompact to receive a body'); - assertEquals(observed.reasoning, undefined); -}); diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts new file mode 100644 index 000000000..e41400111 --- /dev/null +++ b/packages/gateway/src/data-plane/models/alias-listing.ts @@ -0,0 +1,52 @@ +import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; +import type { ModelProviderInstance, UpstreamModel } from '@floway-dev/provider'; + +// One emission slot for an alias: a (provider, addressable form) pair where +// the provider's raw catalog carries the alias target id, plus the matched +// UpstreamModel so the synthesized listing entry can borrow the target's +// limits, owner, and cost without re-querying. +export interface AliasListingEmission { + provider: ModelProviderInstance; + form: 'unprefixed' | 'prefixed'; + target: UpstreamModel; +} + +// Per-upstream alias enumeration shared by `/v1/models` and the Gemini +// `/models` listings. An alias with empty `upstreamIds` matches every +// reachable provider; a non-empty list narrows the candidate set. Per +// provider, the alias emits one entry per `listed` form when its target sits +// in the upstream's raw catalog. Upstreams that do not carry the target — or +// whose operator disabled the target — drop the alias entirely for that row. +export const aliasListingEmissions = ( + alias: ModelAlias, + providers: readonly ModelProviderInstance[], + rawCatalogs: ReadonlyMap, +): AliasListingEmission[] => { + const out: AliasListingEmission[] = []; + const upstreamFilter = alias.upstreamIds.length > 0 ? new Set(alias.upstreamIds) : null; + for (const provider of providers) { + if (upstreamFilter !== null && !upstreamFilter.has(provider.upstream)) continue; + const catalog = rawCatalogs.get(provider.upstream); + if (catalog === undefined) continue; + const disabled = new Set(provider.disabledPublicModelIds); + const target = catalog.find(m => m.id === alias.targetModelId && !disabled.has(m.id)); + if (target === undefined) continue; + const cfg = provider.modelPrefix; + if (cfg === null) { + out.push({ provider, form: 'unprefixed', target }); + } else { + for (const form of cfg.listed) { + out.push({ provider, form, target }); + } + } + } + return out; +}; + +// The public id form an alias emission carries on the wire. Bare alias name +// for the unprefixed form; provider prefix + alias name for the prefixed +// form. Mirrors how real models are surfaced in the same listing pass. +export const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): string => { + const cfg = emission.provider.modelPrefix; + return emission.form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias; +}; diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts index 33dbefa40..0d08e4445 100644 --- a/packages/gateway/src/data-plane/models/gemini.ts +++ b/packages/gateway/src/data-plane/models/gemini.ts @@ -1,6 +1,8 @@ import type { Context } from 'hono'; +import { aliasListingEmissions, aliasPublicId } from './alias-listing.ts'; import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts'; +import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts'; import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; import { createPerRequestFetcher } from '../../dial/per-request.ts'; import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts'; @@ -8,7 +10,7 @@ import { getRepo } from '../../repo/index.ts'; import { backgroundSchedulerFromContext } from '../../runtime/background.ts'; import { getCurrentColo } from '../../runtime/runtime-info.ts'; import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts'; -import { getInternalModels } from '../providers/registry.ts'; +import { getModelsForListing } from '../providers/registry.ts'; import type { BackgroundScheduler } from '@floway-dev/platform'; import type { ModelPricing } from '@floway-dev/protocols/common'; import { ProviderModelsUnavailableError } from '@floway-dev/provider'; @@ -70,25 +72,32 @@ const loadGeminiModels = async ( scheduler: BackgroundScheduler, aliases: readonly ModelAlias[], ): Promise => { - const models = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler); + const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler); // Only chat models are representable in the Gemini /models shape. const realChatEntries = models.filter(model => model.kind === 'chat').map(toGeminiModel); - // Visible aliases append in `loadAllAliases` order; the Gemini surface - // carries no `aliasedFrom` extension (Gemini's `Model` resource is closed) - // so the entry advertises the alias id plus the target's display fields. - const byId = new Map(models.map(m => [m.id, m])); + // Per-upstream alias enumeration mirrors `/v1/models`. Each emission becomes + // one Gemini Model entry whose id and displayName reflect that specific + // (provider, addressable form) pair; targets of the wrong kind never reach + // here because they were already filtered out of the catalog walk. const aliasEntries: GeminiModel[] = []; for (const alias of aliases) { if (!alias.visibleInModelsList) continue; - const target = byId.get(alias.targetModelId); - if (target && target.kind !== 'chat') continue; - aliasEntries.push(toGeminiModel({ - ...(target ?? {} as InternalModel), - id: alias.alias, - display_name: alias.alias, - kind: 'chat', - limits: target?.limits ?? {}, - })); + for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) { + if (emission.target.kind !== 'chat') continue; + const targetDisplayName = emission.target.display_name ?? emission.target.id; + aliasEntries.push(toGeminiModel({ + ...emission.target, + id: aliasPublicId(alias, emission), + display_name: composeAliasDisplayName({ + upstreamDisplayName: emission.provider.name, + aliasDisplayName: alias.displayName, + targetDisplayName, + rules: alias.rules, + }), + kind: 'chat', + limits: emission.target.limits ?? {}, + })); + } } return [...realChatEntries, ...aliasEntries]; }; diff --git a/packages/gateway/src/data-plane/models/gemini_test.ts b/packages/gateway/src/data-plane/models/gemini_test.ts index 0327bae33..6a2d9a887 100644 --- a/packages/gateway/src/data-plane/models/gemini_test.ts +++ b/packages/gateway/src/data-plane/models/gemini_test.ts @@ -449,7 +449,7 @@ test('/v1beta/models appends visible aliases as synthetic Gemini model entries', const body = await response.json() as { models: Array<{ name: string; displayName: string; supportedGenerationMethods: string[] }> }; const aliasEntry = body.models.find(m => m.name === 'models/codex-auto-review'); if (!aliasEntry) throw new Error('expected codex-auto-review alias entry'); - assertEquals(aliasEntry.displayName, 'codex-auto-review'); + assertEquals(aliasEntry.displayName, 'GitHub Copilot (tester): GPT Gemini List (low effort)'); assertEquals(aliasEntry.supportedGenerationMethods, ['generateContent', 'streamGenerateContent', 'countTokens']); }, ); diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts index 585b5d638..569a601e2 100644 --- a/packages/gateway/src/data-plane/models/load.ts +++ b/packages/gateway/src/data-plane/models/load.ts @@ -1,5 +1,7 @@ +import { aliasListingEmissions, aliasPublicId, type AliasListingEmission } from './alias-listing.ts'; +import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts'; import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; -import { getInternalModels } from '../providers/registry.ts'; +import { getModelsForListing } from '../providers/registry.ts'; import type { BackgroundScheduler } from '@floway-dev/platform'; import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common'; import type { Fetcher, InternalModel } from '@floway-dev/provider'; @@ -22,20 +24,21 @@ export const toPublicModel = (model: InternalModel): PublicModel => { return info; }; -// Synthesize one PublicModel for each visible alias, appended after the real -// entries. The owner falls back to the alias-target's `owned_by` on whichever -// real entry resolves it; if the target isn't present on any reachable -// upstream, the entry still appears (operator-declared; the listing reflects -// operator intent) with a `floway` owner so the row is unambiguous. -export const toPublicModelFromAlias = (alias: ModelAlias, byId: ReadonlyMap): PublicModel => { - const target = byId.get(alias.targetModelId); +const publicModelForAliasEmission = (alias: ModelAlias, emission: AliasListingEmission): PublicModel => { + const { provider, target } = emission; + const targetDisplayName = target.display_name ?? target.id; const info: PublicModel = { - id: alias.alias, + id: aliasPublicId(alias, emission), object: 'model', type: 'model', - display_name: alias.alias, - limits: target?.limits ? { ...target.limits } : {}, - kind: target?.kind ?? 'chat', + display_name: composeAliasDisplayName({ + upstreamDisplayName: provider.name, + aliasDisplayName: alias.displayName, + targetDisplayName, + rules: alias.rules, + }), + limits: target.limits ? { ...target.limits } : {}, + kind: target.kind, created: alias.createdAt, created_at: new Date(alias.createdAt * 1000).toISOString(), aliasedFrom: { @@ -45,7 +48,8 @@ export const toPublicModelFromAlias = (alias: ModelAlias, byId: ReadonlyMap => { - const internal = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler); - const realEntries = internal.map(toPublicModel); - const byId = new Map(internal.map(m => [m.id, m])); - // Visible aliases append in `loadAllAliases` order, after every real entry. - // The spec's no-silent-hide policy keeps disabled-target aliases visible — - // the user-facing failure on call is the canonical signal, not the - // listing. - const aliasEntries = aliases.filter(a => a.visibleInModelsList).map(a => toPublicModelFromAlias(a, byId)); + const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler); + const realEntries = models.map(toPublicModel); + // Per-upstream alias enumeration: for each visible alias, emit one entry per + // (provider, addressable form) pair where the provider can resolve the + // alias's target. Upstreams that do not carry the target produce no entry — + // the alias listing is strictly anchored to "can be served from here". + const aliasEntries: PublicModel[] = []; + for (const alias of aliases) { + if (!alias.visibleInModelsList) continue; + for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) { + aliasEntries.push(publicModelForAliasEmission(alias, emission)); + } + } const data = [...realEntries, ...aliasEntries]; return { object: 'list', diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts index 855eca5fa..17647565b 100644 --- a/packages/gateway/src/data-plane/models/serve_test.ts +++ b/packages/gateway/src/data-plane/models/serve_test.ts @@ -711,7 +711,7 @@ test('/v1/models omits aliases marked visibleInModelsList=false', async () => { ); }); -test('/v1/models lists an alias whose target is not present on any upstream (no silent hide)', async () => { +test('/v1/models omits an alias whose target is not in any reachable upstream catalog', async () => { const { repo, apiKey } = await setupAppTest(); (repo.modelAliases as MemoryModelAliasesRepo).setAll([ @@ -755,12 +755,182 @@ test('/v1/models lists an alias whose target is not present on any upstream (no }, async () => { const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } }); - const body = await response.json() as { data: Array<{ id: string; aliasedFrom?: { targetModelId: string }; owned_by?: string }> }; - const orphan = body.data.find(m => m.id === 'orphan-alias'); - if (!orphan) throw new Error('expected orphan-alias entry'); - assertEquals(orphan.aliasedFrom?.targetModelId, 'never-resolves'); - // No matching real entry → owner falls back to the alias's primary upstream id. - assertEquals(orphan.owned_by, 'up_oai'); + const body = await response.json() as { data: Array<{ id: string }> }; + // Per-upstream alias enumeration: an alias whose target cannot be served + // by any reachable upstream produces zero entries — there is no surface + // form to attach the alias to. A request for `orphan-alias` still + // returns the canonical user-facing model-missing error. + assertEquals(body.data.map(m => m.id).includes('orphan-alias'), false); + }, + ); +}); + +test('/v1/models emits the alias on each reachable upstream + listed form, with display_name composed from the upstream label', async () => { + const { repo, apiKey } = await setupAppTest(); + + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'codex-auto-review', + targetModelId: 'gpt-5.4', + upstreamIds: [], + rules: { reasoning: { effort: 'low' } }, + visibleInModelsList: true, + onConflict: 'real-only', + displayName: 'Codex Auto Review', + createdAt: 1_700_000_000, + }, + ]); + + await repo.upstreams.save(buildCustomUpstreamRecord({ + id: 'up_azure', + name: 'Azure', + sortOrder: 100, + config: { + baseUrl: 'https://azure.example.com', + authStyle: 'bearer', + apiKey: 'sk-azure', + endpoints: { chatCompletions: {} }, + }, + modelPrefix: { prefix: 'azure/', addressable: ['unprefixed', 'prefixed'], listed: ['unprefixed', 'prefixed'] }, + })); + + await withMockedFetch( + request => { + const url = new URL(request.url); + if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']); + if (url.pathname === '/copilot_internal/v2/token') { + return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } }); + } + if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([])); + if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') { + return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4', display_name: 'GPT-5.4' }] }); + } + throw new Error(`Unhandled fetch ${request.url}`); + }, + async () => { + const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } }); + const body = await response.json() as { data: Array<{ id: string; display_name: string; aliasedFrom?: unknown }> }; + // Both addressable forms appear because the upstream listed both. + const bare = body.data.find(m => m.id === 'codex-auto-review'); + const prefixed = body.data.find(m => m.id === 'azure/codex-auto-review'); + if (!bare || !prefixed) throw new Error('expected both bare and prefixed alias entries'); + assertEquals(bare.display_name, 'Azure: Codex Auto Review'); + assertEquals(prefixed.display_name, 'Azure: Codex Auto Review'); + }, + ); +}); + +test('/v1/models falls back to target display_name + rules summary when the alias has no displayName', async () => { + const { repo, apiKey } = await setupAppTest(); + + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'codex-auto-review', + targetModelId: 'gpt-5.4', + upstreamIds: [], + rules: { reasoning: { effort: 'low' } }, + visibleInModelsList: true, + onConflict: 'real-only', + createdAt: 1_700_000_000, + }, + ]); + + await repo.upstreams.save(buildCustomUpstreamRecord({ + id: 'up_azure', + name: 'Azure', + sortOrder: 100, + config: { + baseUrl: 'https://azure.example.com', + authStyle: 'bearer', + apiKey: 'sk-azure', + endpoints: { chatCompletions: {} }, + }, + })); + + await withMockedFetch( + request => { + const url = new URL(request.url); + if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']); + if (url.pathname === '/copilot_internal/v2/token') { + return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } }); + } + if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([])); + if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') { + return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4', display_name: 'GPT-5.4' }] }); + } + throw new Error(`Unhandled fetch ${request.url}`); + }, + async () => { + const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } }); + const body = await response.json() as { data: Array<{ id: string; display_name: string }> }; + const entry = body.data.find(m => m.id === 'codex-auto-review'); + if (!entry) throw new Error('expected codex-auto-review alias entry'); + assertEquals(entry.display_name, 'Azure: GPT-5.4 (low effort)'); + }, + ); +}); + +test('/v1/models honours alias upstreamIds — only emits on the named upstream', async () => { + const { repo, apiKey } = await setupAppTest(); + + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'codex-auto-review', + targetModelId: 'gpt-5.4', + upstreamIds: ['up_azure'], + rules: {}, + visibleInModelsList: true, + onConflict: 'real-only', + createdAt: 1_700_000_000, + }, + ]); + + await repo.upstreams.save(buildCustomUpstreamRecord({ + id: 'up_azure', + name: 'Azure', + sortOrder: 100, + config: { + baseUrl: 'https://azure.example.com', + authStyle: 'bearer', + apiKey: 'sk-azure', + endpoints: { chatCompletions: {} }, + }, + })); + await repo.upstreams.save(buildCustomUpstreamRecord({ + id: 'up_other', + name: 'Other', + sortOrder: 200, + config: { + baseUrl: 'https://other.example.com', + authStyle: 'bearer', + apiKey: 'sk-other', + endpoints: { chatCompletions: {} }, + }, + })); + + await withMockedFetch( + request => { + const url = new URL(request.url); + if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']); + if (url.pathname === '/copilot_internal/v2/token') { + return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } }); + } + if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([])); + // Both upstreams expose gpt-5.4 — but the alias is restricted to up_azure. + if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') { + return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] }); + } + if (url.pathname === '/v1/models' && url.hostname === 'other.example.com') { + return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] }); + } + throw new Error(`Unhandled fetch ${request.url}`); + }, + async () => { + const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } }); + const body = await response.json() as { data: Array<{ id: string; display_name: string }> }; + const aliasRows = body.data.filter(m => m.id === 'codex-auto-review'); + assertEquals(aliasRows.length, 1); + assertEquals(aliasRows[0].display_name, 'Azure: gpt-5.4'); }, ); }); diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts index e29df1c6a..c8556ca45 100644 --- a/packages/gateway/src/data-plane/providers/registry.ts +++ b/packages/gateway/src/data-plane/providers/registry.ts @@ -20,6 +20,10 @@ interface ProviderModelsResult { // order as the input `providers` list so the model-missing renderer can // surface a stable, dashboard-aligned list. failedUpstreams: string[]; + // Raw per-upstream catalogs collected during the fan-out. Aliases consume + // this to enumerate per-upstream entries by addressable form without paying + // a second round-trip. + rawCatalogs: Map; } const NO_UPSTREAM_CONFIGURED_MESSAGE = 'No upstream provider configured — connect GitHub Copilot or add a Custom/Azure upstream in the dashboard'; @@ -143,6 +147,7 @@ const collectProviderModels = async ( scheduler: BackgroundScheduler, ): Promise => { const byId = new Map(); + const rawCatalogs = new Map(); let sawSuccess = false; let lastError: unknown = null; const failedUpstreams: string[] = []; @@ -174,6 +179,7 @@ const collectProviderModels = async ( } sawSuccess = true; const { instance, models: providedModels } = result.value; + rawCatalogs.set(instance.upstream, providedModels); // Operator-disabled public model ids vanish entirely for this upstream: // dropped before they reach the catalog map, so they appear in no /models // listing and resolve to nothing for routing. The disable is per-upstream, @@ -208,7 +214,7 @@ const collectProviderModels = async ( } } - return { models: [...byId.values()], sawSuccess, lastError, failedUpstreams }; + return { models: [...byId.values()], sawSuccess, lastError, failedUpstreams, rawCatalogs }; }; // Public-facing model-id ordering, applied in getModels() to every list that @@ -264,6 +270,35 @@ export const getModels = async ( return []; }; +// Returns the merged public model list AND the per-upstream raw catalogs and +// provider instances. Listing surfaces (`/v1/models`, Gemini `/models`) use the +// extra channels to synthesize alias entries that reflect which upstreams can +// actually serve each alias's target and in which addressable form. Computing +// both off the same `collectProviderModels` pass keeps catalog fetches to one +// round per upstream regardless of how many alias rows reference each target. +export interface PublicModelsListing { + models: ResolvedModel[]; + providers: readonly ModelProviderInstance[]; + rawCatalogs: ReadonlyMap; +} + +export const getModelsForListing = async ( + upstreamFilter: readonly string[] | null, + fetcherForUpstream: (upstreamId: string) => Fetcher, + scheduler: BackgroundScheduler, +): Promise => { + const providers = await listModelProviders(upstreamFilter); + if (providers.length === 0) { + throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE); + } + + const { models, sawSuccess, lastError, rawCatalogs } = await collectProviderModels(providers, fetcherForUpstream, scheduler); + + if (sawSuccess) return { models: models.sort((a, b) => compareModelIds(a.id, b.id)), providers, rawCatalogs }; + if (lastError) throw lastError; + return { models: [], providers, rawCatalogs }; +}; + export const getInternalModels = async ( upstreamFilter: readonly string[] | null, fetcherForUpstream: (upstreamId: string) => Fetcher, From fcb360afcb1cfc23982484dcae1d61c6f705fd84 Mon Sep 17 00:00:00 2001 From: Menci Date: Fri, 26 Jun 2026 01:51:45 +0800 Subject: [PATCH 010/170] revert(translate): restore pre-extension native field translation on remaining pairs MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Task 3 (`e1891e1d`) also reshaped NATIVE-field translation on the remaining three pairs the first revert wave (`17a7877c`) did not cover. The alias work should only have added emission of the new Floway extension fields; native-to-native handling on these pairs had been decided in the prior contract and is restored here. Revert the native-field paths in: - gemini-via-responses: restore the pre-Task-3 `reasoning` block shape where `includeThoughts: true` paired with a non-`none` effort produces `summary: 'detailed'`; drop the `false → 'omitted'` synthesis Task 3 added. Keep `verbosity` and `serviceTier` extension carries (Floway-only fields on Gemini IR). - messages-via-responses: drop `thinking.display` → `reasoning.summary` synthesis and the `service_tier` → `service_tier` native-to-native propagation. Keep the `verbosity` extension carry under `text`. The unused `mapAnthropicDisplayToSummary` helper is deleted. - messages-via-chat-completions: drop the `service_tier` → `service_tier` native-to-native propagation. Keep the `verbosity` extension carry. Tests that asserted the new native-field behavior are removed; the extension-field tests stay untouched. --- .../src/gemini-via-responses/request.ts | 18 ++++------ .../src/gemini-via-responses/request_test.ts | 20 ----------- .../messages-via-chat-completions/request.ts | 1 - .../request_test.ts | 11 ------ .../src/messages-via-responses/request.ts | 16 ++------- .../messages-via-responses/request_test.ts | 36 ------------------- .../shared/messages-via/reasoning-summary.ts | 21 ----------- 7 files changed, 9 insertions(+), 114 deletions(-) delete mode 100644 packages/translate/src/shared/messages-via/reasoning-summary.ts diff --git a/packages/translate/src/gemini-via-responses/request.ts b/packages/translate/src/gemini-via-responses/request.ts index df85e4729..36ceb37f0 100644 --- a/packages/translate/src/gemini-via-responses/request.ts +++ b/packages/translate/src/gemini-via-responses/request.ts @@ -152,18 +152,12 @@ const applyGenerationConfig = (request: ResponsesPayload, generationConfig?: Gem if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier; const effort = geminiReasoningEffort(generationConfig.thinkingConfig); - const summary = - generationConfig.thinkingConfig?.includeThoughts === true - ? ('detailed' as const) - : generationConfig.thinkingConfig?.includeThoughts === false - ? ('omitted' as const) - : undefined; - if (effort || summary !== undefined) { - request.reasoning = { - ...(effort ? { effort } : {}), - ...(summary !== undefined && effort !== 'none' ? { summary } : {}), - }; - } + if (!effort) return; + + request.reasoning = { + effort, + ...(effort !== 'none' && generationConfig.thinkingConfig?.includeThoughts === true ? { summary: 'detailed' as const } : {}), + }; }; const buildTools = (payload: GeminiPayload): ResponsesTool[] | undefined => { diff --git a/packages/translate/src/gemini-via-responses/request_test.ts b/packages/translate/src/gemini-via-responses/request_test.ts index 66476f0d8..9d568605f 100644 --- a/packages/translate/src/gemini-via-responses/request_test.ts +++ b/packages/translate/src/gemini-via-responses/request_test.ts @@ -432,26 +432,6 @@ test('buildTargetRequest emits generationConfig.serviceTier onto Responses servi assertEquals(result.service_tier, 'priority'); }); -test('buildTargetRequest maps includeThoughts onto reasoning.summary (true → detailed, false → omitted)', () => { - const withSummary = buildTargetRequest( - { - contents: [{ role: 'user', parts: [{ text: 'hi' }] }], - generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: true } }, - }, - 'gpt-test', - ); - const withoutSummary = buildTargetRequest( - { - contents: [{ role: 'user', parts: [{ text: 'hi' }] }], - generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: false } }, - }, - 'gpt-test', - ); - - assertEquals(withSummary.reasoning, { effort: 'high', summary: 'detailed' }); - assertEquals(withoutSummary.reasoning, { effort: 'high', summary: 'omitted' }); -}); - test('buildTargetRequest drops top-level Anthropic extensions on Responses', () => { const result = buildTargetRequest( { diff --git a/packages/translate/src/messages-via-chat-completions/request.ts b/packages/translate/src/messages-via-chat-completions/request.ts index bd08e1b26..6ae2d8a95 100644 --- a/packages/translate/src/messages-via-chat-completions/request.ts +++ b/packages/translate/src/messages-via-chat-completions/request.ts @@ -291,7 +291,6 @@ export const translateMessagesToChatCompletions = (payload: MessagesPayload): Ch tool_choice: translateMessagesToolChoice(payload.tool_choice, clientTools), ...(responseFormat ? { response_format: responseFormat } : {}), ...(payload.verbosity != null ? { verbosity: payload.verbosity } : {}), - ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}), }; }; diff --git a/packages/translate/src/messages-via-chat-completions/request_test.ts b/packages/translate/src/messages-via-chat-completions/request_test.ts index 50f326860..861962bf8 100644 --- a/packages/translate/src/messages-via-chat-completions/request_test.ts +++ b/packages/translate/src/messages-via-chat-completions/request_test.ts @@ -494,17 +494,6 @@ test('translateMessagesToChatCompletions emits verbosity extension verbatim', () assertEquals(result.verbosity, 'low'); }); -test('translateMessagesToChatCompletions forwards service_tier verbatim', () => { - const result = translateMessagesToChatCompletions({ - model: 'gpt-test', - max_tokens: 256, - messages: [{ role: 'user', content: 'hi' }], - service_tier: 'priority', - }); - - assertEquals(result.service_tier, 'priority'); -}); - test('translateMessagesToChatCompletions drops Anthropic-only knobs that have no Chat-completions slot', () => { const result = translateMessagesToChatCompletions({ model: 'gpt-test', diff --git a/packages/translate/src/messages-via-responses/request.ts b/packages/translate/src/messages-via-responses/request.ts index b1c593443..1adfcaeda 100644 --- a/packages/translate/src/messages-via-responses/request.ts +++ b/packages/translate/src/messages-via-responses/request.ts @@ -1,7 +1,6 @@ import { openAiJsonSchemaCoreFromMessagesFormat } from '../shared/messages/structured-output.ts'; import { messagesReasoningBlockToResponsesReasoning } from '../shared/messages-and-responses/reasoning.ts'; import { resolveMessagesReasoningEffort } from '../shared/messages-via/reasoning-effort.ts'; -import { mapAnthropicDisplayToSummary } from '../shared/messages-via/reasoning-summary.ts'; import { normalizeMessagesToolInputSchema } from '../shared/messages-via/tool-schema.ts'; import { type MessagesAssistantMessage, @@ -208,15 +207,7 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response // Responses upstream may reject it. Translation stays pairwise and leaves // target-side validation to the selected upstream endpoint. const effort = resolveMessagesReasoningEffort(payload); - const display = payload.thinking?.display; - const summary = display !== undefined ? mapAnthropicDisplayToSummary(display) : undefined; - const reasoning = - effort !== undefined || summary !== undefined - ? { - ...(effort !== undefined ? { effort } : {}), - ...(summary !== undefined ? { summary } : {}), - } - : undefined; + const reasoning = effort ? { effort } : undefined; const clientTools = getClientTools(payload.tools); const instructions = translateSystemPrompt(payload.system); const jsonSchema = openAiJsonSchemaCoreFromMessagesFormat(payload.output_config?.format); @@ -225,8 +216,8 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response const text = formatPart || verbosityPart ? { ...formatPart, ...verbosityPart } : undefined; // Keep fallback semantics strict: do not synthesize `temperature: 1`, - // `store: false`, or `parallel_tool_calls: true` when the Messages source - // did not express those knobs. + // `store: false`, `parallel_tool_calls: true`, or `reasoning.summary` when the + // Messages source did not express those knobs. return { model: payload.model, input: translateMessagesInput(payload.messages), @@ -240,7 +231,6 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response stream: true, ...(reasoning ? { reasoning } : {}), ...(text ? { text } : {}), - ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}), }; }; diff --git a/packages/translate/src/messages-via-responses/request_test.ts b/packages/translate/src/messages-via-responses/request_test.ts index 2846f1d39..a773f72ef 100644 --- a/packages/translate/src/messages-via-responses/request_test.ts +++ b/packages/translate/src/messages-via-responses/request_test.ts @@ -529,42 +529,6 @@ test('translateMessagesToResponses co-emits verbosity with json_schema format un assertEquals(result.text?.format?.type, 'json_schema'); }); -test('translateMessagesToResponses maps thinking.display onto reasoning.summary (summarized → concise, omitted → omitted, full → detailed)', () => { - const summarized = translateMessagesToResponses({ - model: 'gpt-test', - max_tokens: 256, - messages: [{ role: 'user', content: 'hi' }], - thinking: { type: 'enabled', display: 'summarized' }, - }); - const omitted = translateMessagesToResponses({ - model: 'gpt-test', - max_tokens: 256, - messages: [{ role: 'user', content: 'hi' }], - thinking: { type: 'enabled', display: 'omitted' }, - }); - const full = translateMessagesToResponses({ - model: 'gpt-test', - max_tokens: 256, - messages: [{ role: 'user', content: 'hi' }], - thinking: { type: 'enabled', display: 'full' }, - }); - - assertEquals(summarized.reasoning?.summary, 'concise'); - assertEquals(omitted.reasoning?.summary, 'omitted'); - assertEquals(full.reasoning?.summary, 'detailed'); -}); - -test('translateMessagesToResponses forwards service_tier verbatim', () => { - const result = translateMessagesToResponses({ - model: 'gpt-test', - max_tokens: 256, - messages: [{ role: 'user', content: 'hi' }], - service_tier: 'priority', - }); - - assertEquals(result.service_tier, 'priority'); -}); - test('translateMessagesToResponses drops Anthropic-only mode knobs the Responses wire cannot express', () => { const result = translateMessagesToResponses({ model: 'gpt-test', diff --git a/packages/translate/src/shared/messages-via/reasoning-summary.ts b/packages/translate/src/shared/messages-via/reasoning-summary.ts deleted file mode 100644 index 6d12bab9b..000000000 --- a/packages/translate/src/shared/messages-via/reasoning-summary.ts +++ /dev/null @@ -1,21 +0,0 @@ -import type { MessagesThinkingDisplay } from '@floway-dev/protocols/messages'; - -// Reverse of via-messages/anthropic-extensions.ts mapSummaryToAnthropicDisplay. -// Anthropic's `summarized` collapsed both `concise` and `detailed`; we pick -// `concise` as the canonical reverse since it is Responses' more compact -// summary mode and round-tripping through the gateway should not silently -// inflate verbosity. Unknown operator-typed values pass through verbatim so -// the Responses upstream gets the original spelling and decides for itself -// whether to accept it. -export const mapAnthropicDisplayToSummary = (display: MessagesThinkingDisplay | string): string | undefined => { - switch (display) { - case 'summarized': - return 'concise'; - case 'omitted': - return 'omitted'; - case 'full': - return 'detailed'; - default: - return display; - } -}; From 96afa8376b3d5d08d9a69f243a70ca381c6dfd35 Mon Sep 17 00:00:00 2001 From: Menci Date: Fri, 26 Jun 2026 02:04:58 +0800 Subject: [PATCH 011/170] refactor(aliases): split upstream-label prefix from composeAliasDisplayName The alias-local display name (operator-set displayName, or synthesized target + rules summary) is independent of which addressable form the entry surfaces under. The upstream-label prefix (`${upstream.name}: `) belongs at the caller, mirroring the real-model path in `registry.ts` where the synthesized prefix is added only on the `prefixed` listing form. Result: a bare alias listing (`codex-auto-review` on a no-prefix or unprefixed-listed upstream) reads as `"Codex Auto Review"` or `"GPT-5.4 (low effort)"` without an upstream label, matching how a bare real model renders. The prefixed form (`azure/codex-auto-review`) keeps the `"Azure: Codex Auto Review"` shape unchanged. --- .../src/control-plane/model-aliases/display.ts | 15 +++++++-------- .../control-plane/model-aliases/display_test.ts | 9 +++------ packages/gateway/src/data-plane/models/gemini.ts | 13 ++++++------- .../gateway/src/data-plane/models/gemini_test.ts | 2 +- packages/gateway/src/data-plane/models/load.ts | 15 +++++++-------- .../gateway/src/data-plane/models/serve_test.ts | 8 ++++---- 6 files changed, 28 insertions(+), 34 deletions(-) diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts index 576c823a8..cb82bc75f 100644 --- a/packages/gateway/src/control-plane/model-aliases/display.ts +++ b/packages/gateway/src/control-plane/model-aliases/display.ts @@ -23,17 +23,16 @@ export const formatAliasRulesSummary = (rules: ModelAliasRules): string => { return parts.length > 0 ? ` (${parts.join(', ')})` : ''; }; -// Compose the final per-entry display name shown in `/v1/models`. The -// upstream name always leads so an operator scanning the listing sees which -// upstream each row belongs to before reading the alias-specific part. +// Compose the alias-local display name — what the operator named the alias +// (when set) or a synthesized target + rules summary. Independent of which +// upstream is surfacing the alias; the prefixed listing form prepends the +// upstream display name at the call site, mirroring the real-model path in +// `registry.ts`. export const composeAliasDisplayName = (input: { - upstreamDisplayName: string; aliasDisplayName?: string; targetDisplayName: string; rules: ModelAliasRules; }): string => { - if (input.aliasDisplayName !== undefined) { - return `${input.upstreamDisplayName}: ${input.aliasDisplayName}`; - } - return `${input.upstreamDisplayName}: ${input.targetDisplayName}${formatAliasRulesSummary(input.rules)}`; + if (input.aliasDisplayName !== undefined) return input.aliasDisplayName; + return `${input.targetDisplayName}${formatAliasRulesSummary(input.rules)}`; }; diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/gateway/src/control-plane/model-aliases/display_test.ts index 7ba7700d0..d45a1b339 100644 --- a/packages/gateway/src/control-plane/model-aliases/display_test.ts +++ b/packages/gateway/src/control-plane/model-aliases/display_test.ts @@ -45,31 +45,28 @@ describe('composeAliasDisplayName', () => { test('uses alias displayName when set, suppressing the rules summary', () => { expect( composeAliasDisplayName({ - upstreamDisplayName: 'Azure', aliasDisplayName: 'Codex Auto Review', targetDisplayName: 'GPT-5.4', rules: { reasoning: { effort: 'low' } }, }), - ).toBe('Azure: Codex Auto Review'); + ).toBe('Codex Auto Review'); }); test('falls back to target displayName with rules suffix when alias displayName is missing', () => { expect( composeAliasDisplayName({ - upstreamDisplayName: 'Azure', targetDisplayName: 'GPT-5.4', rules: { reasoning: { effort: 'low' } }, }), - ).toBe('Azure: GPT-5.4 (low effort)'); + ).toBe('GPT-5.4 (low effort)'); }); test('omits the rules suffix when rules are empty', () => { expect( composeAliasDisplayName({ - upstreamDisplayName: 'Azure', targetDisplayName: 'GPT-5.4', rules: {}, }), - ).toBe('Azure: GPT-5.4'); + ).toBe('GPT-5.4'); }); }); diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts index 0d08e4445..3d3b5ab2c 100644 --- a/packages/gateway/src/data-plane/models/gemini.ts +++ b/packages/gateway/src/data-plane/models/gemini.ts @@ -84,16 +84,15 @@ const loadGeminiModels = async ( if (!alias.visibleInModelsList) continue; for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) { if (emission.target.kind !== 'chat') continue; - const targetDisplayName = emission.target.display_name ?? emission.target.id; + const aliasLocalName = composeAliasDisplayName({ + aliasDisplayName: alias.displayName, + targetDisplayName: emission.target.display_name ?? emission.target.id, + rules: alias.rules, + }); aliasEntries.push(toGeminiModel({ ...emission.target, id: aliasPublicId(alias, emission), - display_name: composeAliasDisplayName({ - upstreamDisplayName: emission.provider.name, - aliasDisplayName: alias.displayName, - targetDisplayName, - rules: alias.rules, - }), + display_name: emission.form === 'prefixed' ? `${emission.provider.name}: ${aliasLocalName}` : aliasLocalName, kind: 'chat', limits: emission.target.limits ?? {}, })); diff --git a/packages/gateway/src/data-plane/models/gemini_test.ts b/packages/gateway/src/data-plane/models/gemini_test.ts index 6a2d9a887..584054328 100644 --- a/packages/gateway/src/data-plane/models/gemini_test.ts +++ b/packages/gateway/src/data-plane/models/gemini_test.ts @@ -449,7 +449,7 @@ test('/v1beta/models appends visible aliases as synthetic Gemini model entries', const body = await response.json() as { models: Array<{ name: string; displayName: string; supportedGenerationMethods: string[] }> }; const aliasEntry = body.models.find(m => m.name === 'models/codex-auto-review'); if (!aliasEntry) throw new Error('expected codex-auto-review alias entry'); - assertEquals(aliasEntry.displayName, 'GitHub Copilot (tester): GPT Gemini List (low effort)'); + assertEquals(aliasEntry.displayName, 'GPT Gemini List (low effort)'); assertEquals(aliasEntry.supportedGenerationMethods, ['generateContent', 'streamGenerateContent', 'countTokens']); }, ); diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts index 569a601e2..2d054dc88 100644 --- a/packages/gateway/src/data-plane/models/load.ts +++ b/packages/gateway/src/data-plane/models/load.ts @@ -25,18 +25,17 @@ export const toPublicModel = (model: InternalModel): PublicModel => { }; const publicModelForAliasEmission = (alias: ModelAlias, emission: AliasListingEmission): PublicModel => { - const { provider, target } = emission; - const targetDisplayName = target.display_name ?? target.id; + const { provider, target, form } = emission; + const aliasLocalName = composeAliasDisplayName({ + aliasDisplayName: alias.displayName, + targetDisplayName: target.display_name ?? target.id, + rules: alias.rules, + }); const info: PublicModel = { id: aliasPublicId(alias, emission), object: 'model', type: 'model', - display_name: composeAliasDisplayName({ - upstreamDisplayName: provider.name, - aliasDisplayName: alias.displayName, - targetDisplayName, - rules: alias.rules, - }), + display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName, limits: target.limits ? { ...target.limits } : {}, kind: target.kind, created: alias.createdAt, diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts index 17647565b..f3c45e313 100644 --- a/packages/gateway/src/data-plane/models/serve_test.ts +++ b/packages/gateway/src/data-plane/models/serve_test.ts @@ -765,7 +765,7 @@ test('/v1/models omits an alias whose target is not in any reachable upstream ca ); }); -test('/v1/models emits the alias on each reachable upstream + listed form, with display_name composed from the upstream label', async () => { +test('/v1/models emits the alias on each reachable upstream + listed form; prefixed entries carry the upstream label, unprefixed entries do not', async () => { const { repo, apiKey } = await setupAppTest(); (repo.modelAliases as MemoryModelAliasesRepo).setAll([ @@ -814,7 +814,7 @@ test('/v1/models emits the alias on each reachable upstream + listed form, with const bare = body.data.find(m => m.id === 'codex-auto-review'); const prefixed = body.data.find(m => m.id === 'azure/codex-auto-review'); if (!bare || !prefixed) throw new Error('expected both bare and prefixed alias entries'); - assertEquals(bare.display_name, 'Azure: Codex Auto Review'); + assertEquals(bare.display_name, 'Codex Auto Review'); assertEquals(prefixed.display_name, 'Azure: Codex Auto Review'); }, ); @@ -865,7 +865,7 @@ test('/v1/models falls back to target display_name + rules summary when the alia const body = await response.json() as { data: Array<{ id: string; display_name: string }> }; const entry = body.data.find(m => m.id === 'codex-auto-review'); if (!entry) throw new Error('expected codex-auto-review alias entry'); - assertEquals(entry.display_name, 'Azure: GPT-5.4 (low effort)'); + assertEquals(entry.display_name, 'GPT-5.4 (low effort)'); }, ); }); @@ -930,7 +930,7 @@ test('/v1/models honours alias upstreamIds — only emits on the named upstream' const body = await response.json() as { data: Array<{ id: string; display_name: string }> }; const aliasRows = body.data.filter(m => m.id === 'codex-auto-review'); assertEquals(aliasRows.length, 1); - assertEquals(aliasRows[0].display_name, 'Azure: gpt-5.4'); + assertEquals(aliasRows[0].display_name, 'gpt-5.4'); }, ); }); From 6e3a6d7a554d0ca806d5ebd31c79950454794bfd Mon Sep 17 00:00:00 2001 From: Menci Date: Fri, 26 Jun 2026 02:44:34 +0800 Subject: [PATCH 012/170] refactor(aliases): synthesize alias listing entries inside getModelsForListing The three listing endpoints (/v1/models data plane, /api/models control plane, /v1beta/models Gemini) each independently looped over aliases and re-built the per-emission entry. Move the fan-out to a single synthesizeListedAliases() called once inside getModelsForListing(); the function returns ListedModel[] (ResolvedModel + optional aliasedFrom) that every surface mapper consumes uniformly. Side effect: the control-plane /api/models was previously alias-blind, because the dashboard hit getModels() instead of the listing function. Now it goes through the shared path and the dashboard Models page surfaces alias rows with their aliasedFrom provenance. --- .../src/control-plane/models/routes.ts | 22 +++-- .../src/control-plane/models/routes_test.ts | 41 +++++++++ .../src/data-plane/models/alias-listing.ts | 85 +++++++++++++++++-- .../gateway/src/data-plane/models/gemini.ts | 38 ++------- .../gateway/src/data-plane/models/load.ts | 54 ++---------- .../src/data-plane/providers/registry.ts | 21 +++-- 6 files changed, 168 insertions(+), 93 deletions(-) diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts index 3146a3edc..9db27e7f4 100644 --- a/packages/gateway/src/control-plane/models/routes.ts +++ b/packages/gateway/src/control-plane/models/routes.ts @@ -1,21 +1,25 @@ import type { Context } from 'hono'; +import type { ListedModel } from '../../data-plane/models/alias-listing.ts'; import { toPublicModel } from '../../data-plane/models/load.ts'; import { MODEL_LISTING_FAILURE_MESSAGE } from '../../data-plane/models/shared.ts'; -import { getModels } from '../../data-plane/providers/registry.ts'; +import { getModelsForListing } from '../../data-plane/providers/registry.ts'; import { createPerRequestFetcher } from '../../dial/per-request.ts'; import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts'; +import { getRepo } from '../../repo/index.ts'; import { backgroundSchedulerFromContext } from '../../runtime/background.ts'; import { getCurrentColo } from '../../runtime/runtime-info.ts'; import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common'; import { ProviderModelsUnavailableError } from '@floway-dev/provider'; -import type { ResolvedModel, UpstreamProviderKind } from '@floway-dev/provider'; +import type { UpstreamProviderKind } from '@floway-dev/provider'; // Same DTO as the public /models endpoint, plus one dashboard-only field: // `upstreams` lists every provider binding for this model as { kind, id, name } // triples. A single model id can be served by mixed provider kinds (e.g. one // azure deployment + one custom upstream both expose `gpt-5.5`), so a flat -// `provider`/`upstream_ids` split would misrepresent that. +// `provider`/`upstream_ids` split would misrepresent that. Alias entries +// carry a single binding (the upstream that resolves their target) and the +// `aliasedFrom` provenance flows through `toPublicModel`. interface ControlPlaneModel extends PublicModel { upstreams: { kind: UpstreamProviderKind; id: string; name: string }[]; } @@ -24,7 +28,7 @@ interface ControlPlaneModelsResponse extends Omit data: ControlPlaneModel[]; } -const toControlPlaneModel = (model: ResolvedModel): ControlPlaneModel => ({ +const toControlPlaneModel = (model: ListedModel): ControlPlaneModel => ({ ...toPublicModel(model), upstreams: model.providers.map(binding => ({ kind: binding.providerKind, id: binding.upstream, name: binding.upstreamName })), }); @@ -35,8 +39,16 @@ export const controlPlaneModels = async (c: Context) => { // like the data-plane /models endpoint. On a session request there is no // API key, so this resolves to the user's per-user upstream cap: a user who // has had an upstream removed must not see its models in the Models tab. + // Aliases come from the same repo singleton the data plane uses, so the + // dashboard sees exactly the alias rows the runtime would honour. const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw)); - const models = await getModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)); + const aliases = await getRepo().modelAliases.loadAll(); + const { models } = await getModelsForListing( + effectiveUpstreamIdsFromContext(c), + fetcherForUpstream, + backgroundSchedulerFromContext(c), + aliases, + ); const data = models.map(toControlPlaneModel); const response: ControlPlaneModelsResponse = { object: 'list', diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts index 7c611c31a..0be710c36 100644 --- a/packages/gateway/src/control-plane/models/routes_test.ts +++ b/packages/gateway/src/control-plane/models/routes_test.ts @@ -1,5 +1,6 @@ import { test } from 'vitest'; +import type { MemoryModelAliasesRepo } from '../../repo/memory.ts'; import { buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts'; import type { UpstreamRecord } from '@floway-dev/provider'; import { assertEquals, jsonResponse, withMockedFetch } from '@floway-dev/test-utils'; @@ -113,3 +114,43 @@ test('/api/models is scoped to the caller\'s effective upstreams — a removed u assertEquals(ids.includes('azure-public'), false); }); }); + +test('/api/models appends visible alias entries with aliasedFrom alongside real catalog rows', async () => { + const { apiKey, repo } = await setupAppTest(); + await repo.upstreams.save(buildCustomUpstreamRecord({ id: 'up_custom_models', sortOrder: 100 })); + + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'codex-auto-review', + displayName: 'Codex Auto Review', + targetModelId: 'custom-model', + upstreamIds: [], + rules: { reasoning: { effort: 'low' } }, + visibleInModelsList: true, + onConflict: 'real-only', + createdAt: 1_700_000_000, + }, + { + alias: 'hidden-alias', + targetModelId: 'custom-model', + upstreamIds: [], + rules: {}, + visibleInModelsList: false, + onConflict: 'real-only', + createdAt: 1_700_000_001, + }, + ]); + + await withMockedFetch(modelsFetchHandler, async () => { + const response = await requestApp('/api/models', { headers: { 'x-api-key': apiKey.key } }); + assertEquals(response.status, 200); + const body = (await response.json()) as { data: Array<{ id: string; display_name: string; upstreams: Array<{ kind: string; id: string; name: string }>; aliasedFrom?: { targetModelId: string; rules: Record } }> }; + const aliasEntry = body.data.find(model => model.id === 'codex-auto-review'); + if (!aliasEntry) throw new Error('expected codex-auto-review alias entry on /api/models'); + assertEquals(aliasEntry.display_name, 'Codex Auto Review'); + assertEquals(aliasEntry.upstreams, [{ kind: 'custom', id: 'up_custom_models', name: 'Custom Provider' }]); + assertEquals(aliasEntry.aliasedFrom?.targetModelId, 'custom-model'); + assertEquals(aliasEntry.aliasedFrom?.rules, { reasoning: { effort: 'low' } }); + assertEquals(body.data.some(model => model.id === 'hidden-alias'), false); + }); +}); diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts index e41400111..2f1880fe5 100644 --- a/packages/gateway/src/data-plane/models/alias-listing.ts +++ b/packages/gateway/src/data-plane/models/alias-listing.ts @@ -1,23 +1,33 @@ +import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts'; import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; -import type { ModelProviderInstance, UpstreamModel } from '@floway-dev/provider'; +import type { PublicModel } from '@floway-dev/protocols/common'; +import type { ModelProviderInstance, ProviderModelRecord, ResolvedModel, UpstreamModel } from '@floway-dev/provider'; // One emission slot for an alias: a (provider, addressable form) pair where // the provider's raw catalog carries the alias target id, plus the matched // UpstreamModel so the synthesized listing entry can borrow the target's // limits, owner, and cost without re-querying. -export interface AliasListingEmission { +interface AliasListingEmission { provider: ModelProviderInstance; form: 'unprefixed' | 'prefixed'; target: UpstreamModel; } -// Per-upstream alias enumeration shared by `/v1/models` and the Gemini -// `/models` listings. An alias with empty `upstreamIds` matches every -// reachable provider; a non-empty list narrows the candidate set. Per +// A `ResolvedModel` that may carry an `aliasedFrom` provenance — what +// `getModelsForListing` returns when alias entries have been interleaved into +// the catalog. Each listing endpoint's mapper (`toPublicModel`, +// `toControlPlaneModel`, `toGeminiModel`) reads the same shape, so the alias +// fan-out happens exactly once instead of being re-implemented per surface. +export type ListedModel = ResolvedModel & { + readonly aliasedFrom?: NonNullable; +}; + +// Per-upstream alias enumeration. An alias with empty `upstreamIds` matches +// every reachable provider; a non-empty list narrows the candidate set. Per // provider, the alias emits one entry per `listed` form when its target sits // in the upstream's raw catalog. Upstreams that do not carry the target — or // whose operator disabled the target — drop the alias entirely for that row. -export const aliasListingEmissions = ( +const aliasListingEmissions = ( alias: ModelAlias, providers: readonly ModelProviderInstance[], rawCatalogs: ReadonlyMap, @@ -46,7 +56,68 @@ export const aliasListingEmissions = ( // The public id form an alias emission carries on the wire. Bare alias name // for the unprefixed form; provider prefix + alias name for the prefixed // form. Mirrors how real models are surfaced in the same listing pass. -export const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): string => { +const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): string => { const cfg = emission.provider.modelPrefix; return emission.form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias; }; + +// Turn an alias emission into a `ListedModel` that walks the same listing +// pipeline as real catalog entries. The synthesized `providers` array carries +// a single binding pointing at the alias's target on this upstream, so the +// dashboard's per-binding view renders correctly without alias-specific +// branching. `aliasedFrom` rides out as the public protocol extension. +// +// Display name: the alias-local part (operator displayName, or +// `${target.display_name} (rules summary)`) lives by itself for the +// `unprefixed` listing form; the `prefixed` form mirrors the real-model path +// in `registry.ts` and prepends `${provider.name}: ` so the upstream is +// visible at a glance. +const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmission): ListedModel => { + const { provider, target, form } = emission; + const aliasLocalName = composeAliasDisplayName({ + aliasDisplayName: alias.displayName, + targetDisplayName: target.display_name ?? target.id, + rules: alias.rules, + }); + const record: ProviderModelRecord = { + upstream: provider.upstream, + upstreamName: provider.name, + providerKind: provider.providerKind, + provider: provider.provider, + upstreamModel: target, + enabledFlags: target.enabledFlags, + supportsResponsesItemReference: provider.supportsResponsesItemReference, + }; + const { providerData: _providerData, endpoints, id: _targetId, display_name: _targetDisplay, created: _targetCreated, ...rest } = target; + return { + ...rest, + id: aliasPublicId(alias, emission), + display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName, + created: alias.createdAt, + endpoints: { ...endpoints }, + providers: [record], + aliasedFrom: { + targetModelId: alias.targetModelId, + upstreamIds: alias.upstreamIds, + rules: alias.rules, + onConflict: alias.onConflict, + }, + }; +}; + +// Single-pass alias fan-out used by every listing surface. Visibility filter +// honoured here; per-surface callers just map ListedModel → their own DTO. +export const synthesizeListedAliases = ( + aliases: readonly ModelAlias[], + providers: readonly ModelProviderInstance[], + rawCatalogs: ReadonlyMap, +): ListedModel[] => { + const out: ListedModel[] = []; + for (const alias of aliases) { + if (!alias.visibleInModelsList) continue; + for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) { + out.push(aliasEmissionToListedModel(alias, emission)); + } + } + return out; +}; diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts index 3d3b5ab2c..f01579f9a 100644 --- a/packages/gateway/src/data-plane/models/gemini.ts +++ b/packages/gateway/src/data-plane/models/gemini.ts @@ -1,8 +1,6 @@ import type { Context } from 'hono'; -import { aliasListingEmissions, aliasPublicId } from './alias-listing.ts'; import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts'; -import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts'; import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; import { createPerRequestFetcher } from '../../dial/per-request.ts'; import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts'; @@ -34,6 +32,10 @@ interface GeminiModel { cost?: ModelPricing; } +// Gemini's Model resource is closed (no `aliasedFrom` extension), so an alias +// arrives here through `getModelsForListing` looking like any other chat +// model — `id`, `display_name`, `limits`, `cost` already finalized by +// `synthesizeListedAliases` — and the mapper has no alias-specific branch. const toGeminiModel = (model: InternalModel): GeminiModel => { const limits = model.limits; const inputTokenLimit = limits.max_prompt_tokens ?? limits.max_context_window_tokens; @@ -72,33 +74,11 @@ const loadGeminiModels = async ( scheduler: BackgroundScheduler, aliases: readonly ModelAlias[], ): Promise => { - const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler); - // Only chat models are representable in the Gemini /models shape. - const realChatEntries = models.filter(model => model.kind === 'chat').map(toGeminiModel); - // Per-upstream alias enumeration mirrors `/v1/models`. Each emission becomes - // one Gemini Model entry whose id and displayName reflect that specific - // (provider, addressable form) pair; targets of the wrong kind never reach - // here because they were already filtered out of the catalog walk. - const aliasEntries: GeminiModel[] = []; - for (const alias of aliases) { - if (!alias.visibleInModelsList) continue; - for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) { - if (emission.target.kind !== 'chat') continue; - const aliasLocalName = composeAliasDisplayName({ - aliasDisplayName: alias.displayName, - targetDisplayName: emission.target.display_name ?? emission.target.id, - rules: alias.rules, - }); - aliasEntries.push(toGeminiModel({ - ...emission.target, - id: aliasPublicId(alias, emission), - display_name: emission.form === 'prefixed' ? `${emission.provider.name}: ${aliasLocalName}` : aliasLocalName, - kind: 'chat', - limits: emission.target.limits ?? {}, - })); - } - } - return [...realChatEntries, ...aliasEntries]; + const { models } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler, aliases); + // Only chat models are representable in the Gemini /models shape — alias + // entries whose target is non-chat fall out of this filter just like real + // non-chat catalog entries do. + return models.filter(model => model.kind === 'chat').map(toGeminiModel); }; export const serveGeminiModels = async (c: Context): Promise => { diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts index 2d054dc88..5f2a32d64 100644 --- a/packages/gateway/src/data-plane/models/load.ts +++ b/packages/gateway/src/data-plane/models/load.ts @@ -1,12 +1,15 @@ -import { aliasListingEmissions, aliasPublicId, type AliasListingEmission } from './alias-listing.ts'; -import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts'; +import type { ListedModel } from './alias-listing.ts'; import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; import { getModelsForListing } from '../providers/registry.ts'; import type { BackgroundScheduler } from '@floway-dev/platform'; import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common'; import type { Fetcher, InternalModel } from '@floway-dev/provider'; -export const toPublicModel = (model: InternalModel): PublicModel => { +// Maps a single listed catalog entry (real or alias) to the wire DTO. Alias +// entries arrive with `aliasedFrom` pre-populated by +// `synthesizeListedAliases`; this mapper just rides it through so every +// listing surface sees the same provenance field. +export const toPublicModel = (model: InternalModel & { aliasedFrom?: ListedModel['aliasedFrom'] }): PublicModel => { const info: PublicModel = { id: model.id, object: 'model', @@ -21,34 +24,7 @@ export const toPublicModel = (model: InternalModel): PublicModel => { info.created_at = new Date(model.created * 1000).toISOString(); } if (model.cost) info.cost = model.cost; - return info; -}; - -const publicModelForAliasEmission = (alias: ModelAlias, emission: AliasListingEmission): PublicModel => { - const { provider, target, form } = emission; - const aliasLocalName = composeAliasDisplayName({ - aliasDisplayName: alias.displayName, - targetDisplayName: target.display_name ?? target.id, - rules: alias.rules, - }); - const info: PublicModel = { - id: aliasPublicId(alias, emission), - object: 'model', - type: 'model', - display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName, - limits: target.limits ? { ...target.limits } : {}, - kind: target.kind, - created: alias.createdAt, - created_at: new Date(alias.createdAt * 1000).toISOString(), - aliasedFrom: { - targetModelId: alias.targetModelId, - upstreamIds: alias.upstreamIds, - rules: alias.rules, - onConflict: alias.onConflict, - }, - }; - info.owned_by = target.owned_by ?? provider.upstream; - if (target.cost) info.cost = target.cost; + if (model.aliasedFrom) info.aliasedFrom = model.aliasedFrom; return info; }; @@ -58,20 +34,8 @@ export const loadModels = async ( scheduler: BackgroundScheduler, aliases: readonly ModelAlias[], ): Promise => { - const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler); - const realEntries = models.map(toPublicModel); - // Per-upstream alias enumeration: for each visible alias, emit one entry per - // (provider, addressable form) pair where the provider can resolve the - // alias's target. Upstreams that do not carry the target produce no entry — - // the alias listing is strictly anchored to "can be served from here". - const aliasEntries: PublicModel[] = []; - for (const alias of aliases) { - if (!alias.visibleInModelsList) continue; - for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) { - aliasEntries.push(publicModelForAliasEmission(alias, emission)); - } - } - const data = [...realEntries, ...aliasEntries]; + const { models } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler, aliases); + const data = models.map(toPublicModel); return { object: 'list', has_more: false, diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts index c8556ca45..e6dcb214f 100644 --- a/packages/gateway/src/data-plane/providers/registry.ts +++ b/packages/gateway/src/data-plane/providers/registry.ts @@ -2,6 +2,7 @@ import { fetchUpstreamModelsCached } from './models-cache.ts'; import type { ModelAlias, ModelAliasRules } from '../../control-plane/model-aliases/types.ts'; import { getRepo } from '../../repo/index.ts'; import { matchAlias } from '../model-aliases/match.ts'; +import { synthesizeListedAliases, type ListedModel } from '../models/alias-listing.ts'; import type { BackgroundScheduler } from '@floway-dev/platform'; import { type ModelEndpointKey, type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common'; import type { InternalModel, ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider'; @@ -271,13 +272,14 @@ export const getModels = async ( }; // Returns the merged public model list AND the per-upstream raw catalogs and -// provider instances. Listing surfaces (`/v1/models`, Gemini `/models`) use the -// extra channels to synthesize alias entries that reflect which upstreams can -// actually serve each alias's target and in which addressable form. Computing -// both off the same `collectProviderModels` pass keeps catalog fetches to one -// round per upstream regardless of how many alias rows reference each target. +// provider instances. Listing surfaces (`/v1/models`, `/api/models`, Gemini +// `/models`) use the same call so alias entries — synthesized once via +// `synthesizeListedAliases` against the same `(providers, rawCatalogs)` pair — +// are interleaved into the catalog before it returns. Per-surface mappers +// then walk one uniform `ListedModel[]` instead of re-implementing alias +// fan-out three times. export interface PublicModelsListing { - models: ResolvedModel[]; + models: ListedModel[]; providers: readonly ModelProviderInstance[]; rawCatalogs: ReadonlyMap; } @@ -286,6 +288,7 @@ export const getModelsForListing = async ( upstreamFilter: readonly string[] | null, fetcherForUpstream: (upstreamId: string) => Fetcher, scheduler: BackgroundScheduler, + aliases: readonly ModelAlias[], ): Promise => { const providers = await listModelProviders(upstreamFilter); if (providers.length === 0) { @@ -294,7 +297,11 @@ export const getModelsForListing = async ( const { models, sawSuccess, lastError, rawCatalogs } = await collectProviderModels(providers, fetcherForUpstream, scheduler); - if (sawSuccess) return { models: models.sort((a, b) => compareModelIds(a.id, b.id)), providers, rawCatalogs }; + if (sawSuccess) { + const real = models.sort((a, b) => compareModelIds(a.id, b.id)); + const aliasEntries = synthesizeListedAliases(aliases, providers, rawCatalogs); + return { models: [...real, ...aliasEntries], providers, rawCatalogs }; + } if (lastError) throw lastError; return { models: [], providers, rawCatalogs }; }; From 06c789b29b6180d16fc79721873064b352d85566 Mon Sep 17 00:00:00 2001 From: Menci Date: Fri, 26 Jun 2026 02:56:30 +0800 Subject: [PATCH 013/170] fix(aliases): dedupe alias listing emissions whose public id collides MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Two no-prefix upstreams both serving the alias target produced two identical `codex-auto-review` rows in /v1/models and /api/models — visible in the dashboard Models list as duplicate cards. mergeIntoCatalog dedupes real models the same way; alias entries now go through the equivalent union (endpoints OR-ed, kind re-derived, provider bindings concatenated) so a single alias surfaces as one row whose `upstreams` field carries every backing binding. --- .../src/data-plane/models/alias-listing.ts | 28 ++++++-- .../src/data-plane/models/serve_test.ts | 65 +++++++++++++++++++ .../src/data-plane/providers/registry.ts | 2 +- 3 files changed, 89 insertions(+), 6 deletions(-) diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts index 2f1880fe5..d1de04b09 100644 --- a/packages/gateway/src/data-plane/models/alias-listing.ts +++ b/packages/gateway/src/data-plane/models/alias-listing.ts @@ -1,6 +1,7 @@ import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts'; import type { ModelAlias } from '../../control-plane/model-aliases/types.ts'; -import type { PublicModel } from '@floway-dev/protocols/common'; +import { unionEndpoints } from '../providers/registry.ts'; +import { kindForEndpoints, type PublicModel } from '@floway-dev/protocols/common'; import type { ModelProviderInstance, ProviderModelRecord, ResolvedModel, UpstreamModel } from '@floway-dev/provider'; // One emission slot for an alias: a (provider, addressable form) pair where @@ -106,18 +107,35 @@ const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmi }; // Single-pass alias fan-out used by every listing surface. Visibility filter -// honoured here; per-surface callers just map ListedModel → their own DTO. +// honoured here. Emissions whose synthesized public id collides — two +// no-prefix upstreams both serving the alias target, or two prefix-aliased +// upstreams sharing a prefix — merge into one row with the bindings +// appended, mirroring how `mergeIntoCatalog` collapses duplicate real-model +// ids; the dashboard then renders a single alias row whose `upstreams` lists +// every backing binding instead of N identical rows. export const synthesizeListedAliases = ( aliases: readonly ModelAlias[], providers: readonly ModelProviderInstance[], rawCatalogs: ReadonlyMap, ): ListedModel[] => { - const out: ListedModel[] = []; + const byId = new Map(); for (const alias of aliases) { if (!alias.visibleInModelsList) continue; for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) { - out.push(aliasEmissionToListedModel(alias, emission)); + const next = aliasEmissionToListedModel(alias, emission); + const existing = byId.get(next.id); + if (existing === undefined) { + byId.set(next.id, next); + continue; + } + const endpoints = unionEndpoints(existing.endpoints, next.endpoints); + byId.set(next.id, { + ...existing, + endpoints, + kind: kindForEndpoints(endpoints), + providers: [...existing.providers, ...next.providers], + }); } } - return out; + return [...byId.values()]; }; diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts index f3c45e313..cc5227967 100644 --- a/packages/gateway/src/data-plane/models/serve_test.ts +++ b/packages/gateway/src/data-plane/models/serve_test.ts @@ -934,3 +934,68 @@ test('/v1/models honours alias upstreamIds — only emits on the named upstream' }, ); }); + +test('/v1/models merges alias emissions whose synthesized public id collides — one row, multiple backing upstreams', async () => { + const { repo, apiKey } = await setupAppTest(); + + (repo.modelAliases as MemoryModelAliasesRepo).setAll([ + { + alias: 'codex-auto-review', + displayName: 'Codex Auto Review', + targetModelId: 'gpt-5.4', + upstreamIds: [], + rules: { reasoning: { effort: 'low' } }, + visibleInModelsList: true, + onConflict: 'real-only', + createdAt: 1_700_000_000, + }, + ]); + + // Two no-prefix upstreams both serve gpt-5.4 — without dedupe, the alias + // would emit two `codex-auto-review` rows. With dedupe, the dashboard sees + // one row whose `upstreams` field lists both bindings, exactly like real + // models that exist on multiple upstreams. + await repo.upstreams.save(buildCustomUpstreamRecord({ + id: 'up_alpha', + name: 'Alpha', + sortOrder: 100, + config: { + baseUrl: 'https://alpha.example.com', + authStyle: 'bearer', + apiKey: 'sk-alpha', + endpoints: { chatCompletions: {} }, + }, + })); + await repo.upstreams.save(buildCustomUpstreamRecord({ + id: 'up_beta', + name: 'Beta', + sortOrder: 200, + config: { + baseUrl: 'https://beta.example.com', + authStyle: 'bearer', + apiKey: 'sk-beta', + endpoints: { chatCompletions: {} }, + }, + })); + + await withMockedFetch( + request => { + const url = new URL(request.url); + if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']); + if (url.pathname === '/copilot_internal/v2/token') { + return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } }); + } + if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([])); + if (url.pathname === '/v1/models' && (url.hostname === 'alpha.example.com' || url.hostname === 'beta.example.com')) { + return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] }); + } + throw new Error(`Unhandled fetch ${request.url}`); + }, + async () => { + const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } }); + const body = await response.json() as { data: Array<{ id: string }> }; + const rows = body.data.filter(m => m.id === 'codex-auto-review'); + assertEquals(rows.length, 1); + }, + ); +}); diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts index e6dcb214f..53619a613 100644 --- a/packages/gateway/src/data-plane/providers/registry.ts +++ b/packages/gateway/src/data-plane/providers/registry.ts @@ -89,7 +89,7 @@ export const listModelProviders = async ( // Merge two capability maps: a key present in either side is present in the // result, and its sub-capability flags are OR-ed so a sub-cap advertised by // either provider survives. -const unionEndpoints = (a: ModelEndpoints, b: ModelEndpoints): ModelEndpoints => { +export const unionEndpoints = (a: ModelEndpoints, b: ModelEndpoints): ModelEndpoints => { const result: ModelEndpoints = { ...a }; for (const key of Object.keys(b) as ModelEndpointKey[]) { const merged = { ...result[key], ...b[key] }; From e118cd1b0f056b3145abaea627d10b7da2be6735 Mon Sep 17 00:00:00 2001 From: Menci Date: Fri, 26 Jun 2026 03:07:47 +0800 Subject: [PATCH 014/170] feat(web): render alias rule badges on dashboard model rows Each rule field on an alias entry's aliasedFrom now appears as its own badge appended after the existing context/prompt/output badges, so the seed codex-auto-review shows "low effort" next to its upstream pills. Per-field labels move into a shared formatAliasRuleBadges helper in @floway-dev/protocols/common; the gateway's formatAliasRulesSummary derives from it (same wording, joined with commas, wrapped in parens when used as the synthesized display-name suffix). Dashboard and gateway therefore stay in lockstep on rule labels without parallel formatters drifting. --- apps/web/src/api/types.ts | 4 ++- .../src/components/models/ModelInfoBar.vue | 7 +++++ .../control-plane/model-aliases/display.ts | 26 +++++-------------- packages/protocols/src/common/models.ts | 22 ++++++++++++++++ 4 files changed, 39 insertions(+), 20 deletions(-) diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts index 1dd0c2c03..4ff2f417a 100644 --- a/apps/web/src/api/types.ts +++ b/apps/web/src/api/types.ts @@ -6,10 +6,11 @@ import type { ModelEndpoints, ModelKind, ModelPricing, + PublicModelAliasedFrom, } from '@floway-dev/protocols/common'; import type { AddressableForm, ModelPrefixConfig } from '@floway-dev/provider/model-prefix'; -export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing }; +export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing, PublicModelAliasedFrom }; export type { AddressableForm, ModelPrefixConfig }; export type UpstreamProviderKind = 'custom' | 'azure' | 'copilot' | 'codex' | 'claude-code' | 'ollama'; @@ -326,6 +327,7 @@ export interface PublicModel { endpoints?: Record; cost?: ModelPricing; kind?: ModelKind; + aliasedFrom?: PublicModelAliasedFrom; } export interface ControlPlaneModel extends PublicModel { diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue index f8bf98b6e..b66b53a70 100644 --- a/apps/web/src/components/models/ModelInfoBar.vue +++ b/apps/web/src/components/models/ModelInfoBar.vue @@ -1,6 +1,7 @@