From 726814b483e2444d584c9f81da2d3a11cb5f6ac2 Mon Sep 17 00:00:00 2001
From: Menci
Date: Thu, 25 Jun 2026 20:40:21 +0800
Subject: [PATCH 001/170] feat(aliases): add model_aliases table, types, and
repo
Introduces the storage layer for the model-aliases data-plane feature.
The table is global, primary-keyed by alias name. Conflict resolution
is encoded as a CHECK-constrained TEXT column, freeform rule values
are stored as JSON, and the codex-auto-review seed entry lands with
the table.
loadAllAliases reads the full table per request (the table is
operator-managed and small; a cache layer is unnecessary for v0).
---
.../gateway/migrations/0046_model_aliases.sql | 14 +++
.../src/control-plane/model-aliases/repo.ts | 37 ++++++++
.../control-plane/model-aliases/repo_test.ts | 92 +++++++++++++++++++
.../src/control-plane/model-aliases/types.ts | 26 ++++++
4 files changed, 169 insertions(+)
create mode 100644 packages/gateway/migrations/0046_model_aliases.sql
create mode 100644 packages/gateway/src/control-plane/model-aliases/repo.ts
create mode 100644 packages/gateway/src/control-plane/model-aliases/repo_test.ts
create mode 100644 packages/gateway/src/control-plane/model-aliases/types.ts
diff --git a/packages/gateway/migrations/0046_model_aliases.sql b/packages/gateway/migrations/0046_model_aliases.sql
new file mode 100644
index 000000000..c934d77b6
--- /dev/null
+++ b/packages/gateway/migrations/0046_model_aliases.sql
@@ -0,0 +1,14 @@
+CREATE TABLE model_aliases (
+ alias TEXT PRIMARY KEY,
+ target_model_id TEXT NOT NULL,
+ upstream_ids_json TEXT NOT NULL DEFAULT '[]',
+ rules_json TEXT NOT NULL DEFAULT '{}',
+ visible_in_models_list INTEGER NOT NULL DEFAULT 1,
+ on_conflict TEXT NOT NULL DEFAULT 'real-only'
+ CHECK (on_conflict IN ('alias-only', 'real-only', 'both-real-first', 'both-alias-first')),
+ created_at INTEGER NOT NULL DEFAULT (unixepoch()),
+ updated_at INTEGER NOT NULL DEFAULT (unixepoch())
+);
+
+INSERT INTO model_aliases (alias, target_model_id, rules_json, on_conflict)
+VALUES ('codex-auto-review', 'gpt-5.4', '{"reasoning":{"effort":"low"}}', 'real-only');
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
new file mode 100644
index 000000000..70024e0cd
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -0,0 +1,37 @@
+import type { ModelAlias, OnConflict } from './types.ts';
+import type { SqlDatabase } from '@floway-dev/platform';
+
+interface ModelAliasRow {
+ alias: string;
+ target_model_id: string;
+ upstream_ids_json: string;
+ rules_json: string;
+ visible_in_models_list: number;
+ on_conflict: OnConflict;
+}
+
+// The model_aliases table is operator-managed and small (dozens of rows at
+// most), so the data plane reads the full table per request — no cache layer.
+export const loadAllAliases = async (db: SqlDatabase): Promise => {
+ const { results } = await db
+ .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict FROM model_aliases')
+ .all();
+ return results.map(toModelAlias);
+};
+
+const toModelAlias = (row: ModelAliasRow): ModelAlias => ({
+ alias: row.alias,
+ targetModelId: row.target_model_id,
+ upstreamIds: parseJsonField(row.alias, 'upstream_ids_json', row.upstream_ids_json),
+ rules: parseJsonField(row.alias, 'rules_json', row.rules_json),
+ visibleInModelsList: row.visible_in_models_list === 1,
+ onConflict: row.on_conflict,
+});
+
+const parseJsonField = (alias: string, field: string, raw: string): T => {
+ try {
+ return JSON.parse(raw) as T;
+ } catch (cause) {
+ throw new Error(`Malformed model_aliases ${field} for ${alias}`, { cause });
+ }
+};
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
new file mode 100644
index 000000000..a4da76fde
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
@@ -0,0 +1,92 @@
+import { test } from 'vitest';
+
+import { loadAllAliases } from './repo.ts';
+import { createSqliteTestDb } from '../../repo/test-sqlite.ts';
+import { assertEquals, assertRejects } from '@floway-dev/test-utils';
+
+test('loadAllAliases reads the seed row from a freshly migrated database', async () => {
+ const db = await createSqliteTestDb();
+
+ const aliases = await loadAllAliases(db);
+
+ assertEquals(aliases, [
+ {
+ alias: 'codex-auto-review',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ },
+ ]);
+});
+
+test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_models_list to a boolean', async () => {
+ const db = await createSqliteTestDb();
+ await db.exec('DELETE FROM model_aliases');
+ await db
+ .prepare(
+ 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+ )
+ .bind(
+ 'opus-xhigh',
+ 'claude-opus-4-6',
+ '["up_priority","up_secondary"]',
+ '{"reasoning":{"effort":"xhigh"},"anthropicBeta":["fine-grained-tool-streaming"]}',
+ 0,
+ 'alias-only',
+ )
+ .run();
+ await db
+ .prepare(
+ 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+ )
+ .bind('gpt-5-fast', 'gpt-5.4', '[]', '{"serviceTier":"priority"}', 1, 'both-alias-first')
+ .run();
+
+ const aliases = await loadAllAliases(db);
+ const byAlias = new Map(aliases.map(entry => [entry.alias, entry]));
+
+ assertEquals(byAlias.get('opus-xhigh'), {
+ alias: 'opus-xhigh',
+ targetModelId: 'claude-opus-4-6',
+ upstreamIds: ['up_priority', 'up_secondary'],
+ rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] },
+ visibleInModelsList: false,
+ onConflict: 'alias-only',
+ });
+ assertEquals(byAlias.get('gpt-5-fast'), {
+ alias: 'gpt-5-fast',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { serviceTier: 'priority' },
+ visibleInModelsList: true,
+ onConflict: 'both-alias-first',
+ });
+});
+
+test('loadAllAliases surfaces malformed rules_json as a descriptive error', async () => {
+ const db = await createSqliteTestDb();
+ await db.exec('DELETE FROM model_aliases');
+ await db
+ .prepare(
+ 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+ )
+ .bind('bad-rules', 'gpt-5.4', '[]', '{not json', 1, 'real-only')
+ .run();
+
+ await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases rules_json for bad-rules');
+});
+
+test('loadAllAliases surfaces malformed upstream_ids_json as a descriptive error', async () => {
+ const db = await createSqliteTestDb();
+ await db.exec('DELETE FROM model_aliases');
+ await db
+ .prepare(
+ 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+ )
+ .bind('bad-upstreams', 'gpt-5.4', '[bad', '{}', 1, 'real-only')
+ .run();
+
+ await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases upstream_ids_json for bad-upstreams');
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts
new file mode 100644
index 000000000..8e1bff467
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/types.ts
@@ -0,0 +1,26 @@
+// Closed set of request-time mode knobs an operator can lock on a matched
+// alias. Each value is freeform — the gateway does not enum-gate operator
+// input so values pass through to upstream verbatim.
+export type ModelAliasRules = {
+ readonly reasoning?: {
+ readonly effort?: string;
+ readonly budgetTokens?: number;
+ readonly adaptive?: boolean;
+ readonly summary?: string;
+ };
+ readonly verbosity?: string;
+ readonly serviceTier?: string;
+ readonly anthropicSpeed?: string;
+ readonly anthropicBeta?: readonly string[];
+};
+
+export type OnConflict = 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
+
+export type ModelAlias = {
+ readonly alias: string;
+ readonly targetModelId: string;
+ readonly upstreamIds: readonly string[];
+ readonly rules: ModelAliasRules;
+ readonly visibleInModelsList: boolean;
+ readonly onConflict: OnConflict;
+};
From a4ac67e606297c4f988fa171f645647efa892b78 Mon Sep 17 00:00:00 2001
From: Menci
Date: Thu, 25 Jun 2026 20:53:03 +0800
Subject: [PATCH 002/170] feat(protocols): add Floway extension fields and
per-upstream sanitizers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Each inbound protocol IR gains the closed set of mode-knob fields it
cannot natively express (thinking_budget, adaptive_thinking,
reasoning_summary on chat-completions; thinking_budget, adaptive_thinking
on responses; verbosity on messages; verbosity, serviceTier inside
generationConfig on gemini; anthropic_speed/anthropicSpeed and
anthropic_beta/anthropicBeta everywhere they apply).
The extensions are public — a client can set them directly and they
behave identically to alias-injected rules. The per-upstream sanitizer
strips any extension residue before the upstream call and emits one
log line per drop when given a trace context, so cross-protocol drops
are observable without leaking the field to upstream.
---
.../src/data-plane/chat/shared/sanitize.ts | 41 +++++++++++
.../data-plane/chat/shared/sanitize_test.ts | 73 +++++++++++++++++++
packages/protocols/package.json | 3 +-
.../protocols/src/chat-completions/index.ts | 10 +++
packages/protocols/src/extensions/index.ts | 16 ++++
packages/protocols/src/gemini/index.ts | 8 ++
packages/protocols/src/index.ts | 1 +
packages/protocols/src/messages/index.ts | 2 +
packages/protocols/src/responses/index.ts | 8 ++
9 files changed, 161 insertions(+), 1 deletion(-)
create mode 100644 packages/gateway/src/data-plane/chat/shared/sanitize.ts
create mode 100644 packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
create mode 100644 packages/protocols/src/extensions/index.ts
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
new file mode 100644
index 000000000..918156d16
--- /dev/null
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -0,0 +1,41 @@
+import { FLOWAY_EXTENSION_FIELDS } from '@floway-dev/protocols/extensions';
+
+export interface SanitizeTraceCtx {
+ readonly aliasName?: string;
+ readonly emit: (line: { alias?: string; field: string; targetProtocol: string }) => void;
+}
+
+const stripKeys = (
+ body: Record,
+ keys: readonly string[],
+ targetProtocol: string,
+ trace: SanitizeTraceCtx | undefined,
+ fieldPrefix: string = '',
+): void => {
+ for (const key of keys) {
+ if (key in body) {
+ delete body[key];
+ trace?.emit({ alias: trace.aliasName, field: `${fieldPrefix}${key}`, targetProtocol });
+ }
+ }
+};
+
+export const sanitizeForChatCompletionsUpstream = (body: Record, trace?: SanitizeTraceCtx): void => {
+ stripKeys(body, FLOWAY_EXTENSION_FIELDS.chatCompletions, 'chat-completions', trace);
+};
+
+export const sanitizeForResponsesUpstream = (body: Record, trace?: SanitizeTraceCtx): void => {
+ stripKeys(body, FLOWAY_EXTENSION_FIELDS.responses, 'responses', trace);
+};
+
+export const sanitizeForMessagesUpstream = (body: Record, trace?: SanitizeTraceCtx): void => {
+ stripKeys(body, FLOWAY_EXTENSION_FIELDS.messages, 'messages', trace);
+};
+
+export const sanitizeForGeminiUpstream = (body: Record, trace?: SanitizeTraceCtx): void => {
+ stripKeys(body, FLOWAY_EXTENSION_FIELDS.gemini.topLevel, 'gemini', trace);
+ const generationConfig = body.generationConfig;
+ if (generationConfig && typeof generationConfig === 'object') {
+ stripKeys(generationConfig as Record, FLOWAY_EXTENSION_FIELDS.gemini.generationConfig, 'gemini', trace, 'generationConfig.');
+ }
+};
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
new file mode 100644
index 000000000..eebcd5d06
--- /dev/null
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
@@ -0,0 +1,73 @@
+import { test } from 'vitest';
+
+import {
+ sanitizeForChatCompletionsUpstream,
+ sanitizeForGeminiUpstream,
+ sanitizeForMessagesUpstream,
+ sanitizeForResponsesUpstream,
+ type SanitizeTraceCtx,
+} from './sanitize.ts';
+import { assertEquals } from '@floway-dev/test-utils';
+
+type TraceLine = { alias?: string; field: string; targetProtocol: string };
+
+const makeTrace = (aliasName?: string): { ctx: SanitizeTraceCtx; lines: TraceLine[] } => {
+ const lines: TraceLine[] = [];
+ return {
+ ctx: { aliasName, emit: line => lines.push(line) },
+ lines,
+ };
+};
+
+test('sanitizeForMessagesUpstream strips verbosity and emits one trace line', () => {
+ const body: Record = { verbosity: 'low', model: 'x' };
+ const { ctx, lines } = makeTrace('codex-auto-review');
+ sanitizeForMessagesUpstream(body, ctx);
+ assertEquals(body, { model: 'x' });
+ assertEquals(lines, [{ alias: 'codex-auto-review', field: 'verbosity', targetProtocol: 'messages' }]);
+});
+
+test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves native fields', () => {
+ const body: Record = {
+ thinking_budget: 4096,
+ anthropic_speed: 'fast',
+ reasoning_effort: 'high',
+ model: 'x',
+ };
+ const { ctx, lines } = makeTrace('alias-1');
+ sanitizeForChatCompletionsUpstream(body, ctx);
+ assertEquals(body, { reasoning_effort: 'high', model: 'x' });
+ assertEquals(lines.length, 2);
+ assertEquals(lines.every(l => l.alias === 'alias-1' && l.targetProtocol === 'chat-completions'), true);
+ const droppedFields = lines.map(l => l.field).sort();
+ assertEquals(droppedFields, ['anthropic_speed', 'thinking_budget']);
+});
+
+test('sanitizeForResponsesUpstream strips extensions without a trace context', () => {
+ const body: Record = { adaptive_thinking: true, anthropic_beta: ['ctx-1m'] };
+ sanitizeForResponsesUpstream(body);
+ assertEquals(body, {});
+});
+
+test('sanitizeForGeminiUpstream walks top-level and generationConfig', () => {
+ const body: Record = {
+ generationConfig: { verbosity: 'low', thinkingConfig: { thinkingBudget: 100 } },
+ anthropicSpeed: 'fast',
+ };
+ const { ctx, lines } = makeTrace('alias-g');
+ sanitizeForGeminiUpstream(body, ctx);
+ assertEquals(body, { generationConfig: { thinkingConfig: { thinkingBudget: 100 } } });
+ assertEquals(lines.length, 2);
+ const droppedFields = lines.map(l => l.field).sort();
+ assertEquals(droppedFields, ['anthropicSpeed', 'generationConfig.verbosity']);
+ assertEquals(lines.every(l => l.alias === 'alias-g' && l.targetProtocol === 'gemini'), true);
+});
+
+test('sanitizer is idempotent — a second run emits no additional traces', () => {
+ const body: Record = { verbosity: 'low', model: 'x' };
+ const { ctx, lines } = makeTrace();
+ sanitizeForMessagesUpstream(body, ctx);
+ assertEquals(lines.length, 1);
+ sanitizeForMessagesUpstream(body, ctx);
+ assertEquals(lines.length, 1);
+});
diff --git a/packages/protocols/package.json b/packages/protocols/package.json
index 5ada835f0..1a8409de0 100644
--- a/packages/protocols/package.json
+++ b/packages/protocols/package.json
@@ -12,7 +12,8 @@
"./messages": { "import": "./src/messages/index.ts", "types": "./src/messages/index.ts" },
"./gemini": { "import": "./src/gemini/index.ts", "types": "./src/gemini/index.ts" },
"./embeddings": { "import": "./src/embeddings/index.ts", "types": "./src/embeddings/index.ts" },
- "./images": { "import": "./src/images/index.ts", "types": "./src/images/index.ts" }
+ "./images": { "import": "./src/images/index.ts", "types": "./src/images/index.ts" },
+ "./extensions": { "import": "./src/extensions/index.ts", "types": "./src/extensions/index.ts" }
},
"scripts": {
"typecheck": "tsc --noEmit",
diff --git a/packages/protocols/src/chat-completions/index.ts b/packages/protocols/src/chat-completions/index.ts
index 582381555..64a62c91d 100644
--- a/packages/protocols/src/chat-completions/index.ts
+++ b/packages/protocols/src/chat-completions/index.ts
@@ -25,6 +25,16 @@ export interface ChatCompletionsPayload {
tool_choice?: 'none' | 'auto' | 'required' | { type: 'function'; function: { name: string } } | null;
/** Request usage stats in streaming responses */
stream_options?: { include_usage: boolean } | null;
+ /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ thinking_budget?: number;
+ /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ adaptive_thinking?: boolean;
+ /** Floway protocol extension. Translated to OpenAI Responses `reasoning.summary` / Anthropic `thinking.display` / Gemini `thinkingConfig.includeThoughts` when routed to those upstreams; dropped on OpenAI Chat targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ reasoning_summary?: string;
+ /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ anthropic_speed?: string;
+ /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ anthropic_beta?: readonly string[];
}
export interface ChatCompletionsTool {
diff --git a/packages/protocols/src/extensions/index.ts b/packages/protocols/src/extensions/index.ts
new file mode 100644
index 000000000..b6579ce2b
--- /dev/null
+++ b/packages/protocols/src/extensions/index.ts
@@ -0,0 +1,16 @@
+/**
+ * Closed enumeration of Floway protocol extension fields that the gateway
+ * adds to each inbound IR on top of the host protocol's own schema. The
+ * per-upstream sanitizer in the gateway reads this manifest to strip any
+ * extension residue before the upstream HTTP call. See
+ * docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+ */
+export const FLOWAY_EXTENSION_FIELDS = {
+ chatCompletions: ['thinking_budget', 'adaptive_thinking', 'reasoning_summary', 'anthropic_speed', 'anthropic_beta'] as const,
+ responses: ['thinking_budget', 'adaptive_thinking', 'anthropic_speed', 'anthropic_beta'] as const,
+ messages: ['verbosity'] as const,
+ gemini: {
+ topLevel: ['anthropicSpeed', 'anthropicBeta'] as const,
+ generationConfig: ['verbosity', 'serviceTier'] as const,
+ },
+} as const;
diff --git a/packages/protocols/src/gemini/index.ts b/packages/protocols/src/gemini/index.ts
index 1530fd2f5..ded7ebb36 100644
--- a/packages/protocols/src/gemini/index.ts
+++ b/packages/protocols/src/gemini/index.ts
@@ -6,6 +6,10 @@ export interface GeminiPayload {
generationConfig?: GeminiGenerationConfig;
safetySettings?: GeminiSafetySetting[];
cachedContent?: string;
+ /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ anthropicSpeed?: string;
+ /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ anthropicBeta?: readonly string[];
}
export interface GeminiContent {
@@ -38,6 +42,10 @@ export interface GeminiGenerationConfig {
responseMimeType?: string;
responseSchema?: unknown;
thinkingConfig?: GeminiThinkingConfig;
+ /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ verbosity?: string;
+ /** Floway protocol extension. Translated to OpenAI Chat `service_tier` / Responses `service_tier` / Anthropic `service_tier` when routed to those upstreams; dropped on Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ serviceTier?: string;
}
export interface GeminiThinkingConfig {
diff --git a/packages/protocols/src/index.ts b/packages/protocols/src/index.ts
index 981d4fda1..ceaa785f6 100644
--- a/packages/protocols/src/index.ts
+++ b/packages/protocols/src/index.ts
@@ -2,6 +2,7 @@ export * from './common/index.ts';
export * from './completions/index.ts';
export * from './chat-completions/index.ts';
export * from './embeddings/index.ts';
+export * from './extensions/index.ts';
export * from './gemini/index.ts';
export * from './messages/index.ts';
export * from './responses/index.ts';
diff --git a/packages/protocols/src/messages/index.ts b/packages/protocols/src/messages/index.ts
index 9689db240..94e44188e 100644
--- a/packages/protocols/src/messages/index.ts
+++ b/packages/protocols/src/messages/index.ts
@@ -56,6 +56,8 @@ export interface MessagesPayload {
// protocol layer because the gateway treats `speed: 'fast'` as the canonical
// client signal regardless of which upstream serves it.
speed?: 'standard' | 'fast' | (string & {});
+ /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ verbosity?: string;
}
export interface MessagesSearchResultLocationCitation {
diff --git a/packages/protocols/src/responses/index.ts b/packages/protocols/src/responses/index.ts
index 8822c1f3d..8cf83f0ea 100644
--- a/packages/protocols/src/responses/index.ts
+++ b/packages/protocols/src/responses/index.ts
@@ -33,6 +33,14 @@ export interface ResponsesPayload {
prompt_cache_key?: string | null;
safety_identifier?: string | null;
service_tier?: 'default' | 'auto' | 'flex' | 'priority' | 'scale' | (string & {}) | null;
+ /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ thinking_budget?: number;
+ /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ adaptive_thinking?: boolean;
+ /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ anthropic_speed?: string;
+ /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ anthropic_beta?: readonly string[];
}
// Narrower payload for `/responses/compact`. The official endpoint accepts a
From e1891e1dddb03b519090e99fa1fa1f262a09a96e Mon Sep 17 00:00:00 2001
From: Menci
Date: Thu, 25 Jun 2026 21:15:38 +0800
Subject: [PATCH 003/170] feat(translate): emit Floway extension fields to
upstream slots
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Each translate pair now reads the inbound IR's native and Floway-extension
mode-knob fields and writes them to the upstream protocol's natural slot
per the model-aliases design table. Routing is purely by upstream wire
protocol; translate never branches on model version.
Coverage per rule:
- reasoning.effort: emitted onto OpenAI Chat reasoning_effort, Responses
reasoning.effort, Anthropic output_config.effort, Gemini
thinkingConfig.thinkingLevel (the inverse mappers stay where they were).
- reasoning.budgetTokens / reasoning.adaptive: emitted onto Anthropic
thinking.{type:'enabled', budget_tokens} and thinking.{type:'adaptive'}
via a shared via-messages helper; Gemini path keeps its native
thinkingBudget handling.
- reasoning.summary: bidirectional Responses reasoning.summary ↔ Anthropic
thinking.display mapping with concise|detailed → summarized, omitted →
omitted, auto → upstream default; reverse picks concise as the
Responses-side canonical form.
- verbosity: native fields on Chat and Responses (added now — the IR
did not carry them yet), Floway extension on Messages and Gemini.
- serviceTier: passes through verbatim onto each protocol's service_tier
slot; Messages' service_tier type relaxed to admit operator-typed
values per the alias design's freeform contract.
- anthropicSpeed: emitted onto Anthropic Messages speed; dropped on
non-Messages targets.
- anthropicBeta: translate cannot move it to the request header (the
translate signature has no headers), so it is left as body residue
and the gateway-side rule-apply pass owns header materialization in
the next task; a mergeAnthropicBetaTokens helper lives in
via-messages/ for that consumer.
Drop-side emission stays the per-upstream sanitizer's job; translate
emits only the non-drop cells of the table.
The shared reasoning_effort union (gemini-via/gemini.ts) extends to the
seven values the alias suggestion list publishes (none|minimal|low|
medium|high|xhigh|max) and stops collapsing minimal onto low.
---
.../protocols/src/chat-completions/index.ts | 5 +
packages/protocols/src/gemini/index.ts | 2 +-
packages/protocols/src/messages/index.ts | 2 +-
packages/protocols/src/responses/index.ts | 8 +-
.../chat-completions-via-messages/request.ts | 15 +++
.../chat-completions-via-responses/request.ts | 15 ++-
.../gemini-via-chat-completions/request.ts | 5 +
.../src/gemini-via-messages/request.ts | 18 ++++
.../src/gemini-via-responses/request.ts | 27 ++++--
.../messages-via-chat-completions/request.ts | 2 +
.../src/messages-via-responses/request.ts | 20 +++-
.../responses-via-chat-completions/request.ts | 1 +
.../src/responses-via-messages/request.ts | 29 +++++-
.../responses-via-messages/request_test.ts | 4 +-
.../translate/src/shared/gemini-via/gemini.ts | 16 +++-
.../shared/messages-via/reasoning-summary.ts | 21 +++++
.../via-messages/anthropic-extensions.ts | 93 +++++++++++++++++++
17 files changed, 262 insertions(+), 21 deletions(-)
create mode 100644 packages/translate/src/shared/messages-via/reasoning-summary.ts
create mode 100644 packages/translate/src/shared/via-messages/anthropic-extensions.ts
diff --git a/packages/protocols/src/chat-completions/index.ts b/packages/protocols/src/chat-completions/index.ts
index 64a62c91d..8804fd449 100644
--- a/packages/protocols/src/chat-completions/index.ts
+++ b/packages/protocols/src/chat-completions/index.ts
@@ -18,6 +18,11 @@ export interface ChatCompletionsPayload {
parallel_tool_calls?: boolean | null;
response_format?: Record | null;
reasoning_effort?: string | null;
+ // GPT-5-family response-length control. Native OpenAI Chat field; Floway
+ // mirrors it onto Responses `text.verbosity` and exposes it as an
+ // extension on Messages / Gemini IRs.
+ // Reference: https://platform.openai.com/docs/api-reference/chat/create
+ verbosity?: string | null;
prompt_cache_key?: string | null;
safety_identifier?: string | null;
service_tier?: 'default' | 'auto' | 'flex' | 'priority' | 'scale' | (string & {}) | null;
diff --git a/packages/protocols/src/gemini/index.ts b/packages/protocols/src/gemini/index.ts
index ded7ebb36..c3e7e646a 100644
--- a/packages/protocols/src/gemini/index.ts
+++ b/packages/protocols/src/gemini/index.ts
@@ -50,7 +50,7 @@ export interface GeminiGenerationConfig {
export interface GeminiThinkingConfig {
thinkingBudget?: number;
- thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high' | string;
+ thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' | 'max' | string;
includeThoughts?: boolean;
}
diff --git a/packages/protocols/src/messages/index.ts b/packages/protocols/src/messages/index.ts
index 94e44188e..663dcef24 100644
--- a/packages/protocols/src/messages/index.ts
+++ b/packages/protocols/src/messages/index.ts
@@ -49,7 +49,7 @@ export interface MessagesPayload {
// no `json_object` variant.
format?: { type: 'json_schema'; schema: Record };
};
- service_tier?: 'auto' | 'standard_only';
+ service_tier?: 'auto' | 'standard_only' | (string & {});
// https://docs.claude.com/en/build-with-claude/fast-mode — Fast Mode is
// opt-in per request. Beta-only on the upstream wire (gated by
// `anthropic-beta: fast-mode-2026-02-01`), but we expose the field at the
diff --git a/packages/protocols/src/responses/index.ts b/packages/protocols/src/responses/index.ts
index 8cf83f0ea..39af5e148 100644
--- a/packages/protocols/src/responses/index.ts
+++ b/packages/protocols/src/responses/index.ts
@@ -26,10 +26,14 @@ export interface ResponsesPayload {
parallel_tool_calls?: boolean | null;
reasoning?: {
effort?: string;
- summary?: 'detailed' | 'auto' | 'concise';
+ summary?: 'detailed' | 'auto' | 'concise' | (string & {});
};
include?: string[];
- text?: { format?: Record | null } | null;
+ // `text.verbosity` is a native GPT-5-family Responses field that controls
+ // response length; `text.format` carries structured-output schemas. Both
+ // ride on the same `text` object.
+ // Reference: https://platform.openai.com/docs/api-reference/responses/create
+ text?: { format?: Record | null; verbosity?: string | null } | null;
prompt_cache_key?: string | null;
safety_identifier?: string | null;
service_tier?: 'default' | 'auto' | 'flex' | 'priority' | 'scale' | (string & {}) | null;
diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts
index a09bd44b9..82dd22c59 100644
--- a/packages/translate/src/chat-completions-via-messages/request.ts
+++ b/packages/translate/src/chat-completions-via-messages/request.ts
@@ -2,6 +2,7 @@ import { messagesThinkingBlockFromChatCompletionsScalarReasoning } from '../shar
import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts';
import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint } from '../shared/via-messages/cache-breakpoints.ts';
import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts';
+import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts';
import type { ChatCompletionsPayload, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsTool } from '@floway-dev/protocols/chat-completions';
import { MESSAGES_FALLBACK_MAX_TOKENS, type MessagesAssistantContentBlock, type MessagesMessage, type MessagesPayload, type MessagesTextBlock, type MessagesUserContentBlock } from '@floway-dev/protocols/messages';
@@ -188,6 +189,17 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
if (formatSchema) outputConfig.format = { type: 'json_schema', schema: formatSchema };
const hasOutputConfig = Object.keys(outputConfig).length > 0;
+ // Materialize the Floway extension fields onto their Messages-natural
+ // slots. `anthropic_beta` is body-side residue that the per-upstream
+ // sanitizer strips after translation; the gateway-side rule-apply pass owns
+ // moving its value onto the outbound `anthropic-beta` header before the
+ // upstream call. See docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+ const thinking = buildMessagesThinkingFromExtensions({
+ thinkingBudget: payload.thinking_budget,
+ adaptiveThinking: payload.adaptive_thinking,
+ reasoningSummary: payload.reasoning_summary,
+ });
+
// Leave OpenAI `user` and generic metadata out of the Messages fallback instead
// of treating them as a backchannel for Anthropic `metadata.user_id`.
return {
@@ -205,6 +217,9 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
...(tools ? { tools } : {}),
...(payload.tool_choice != null ? { tool_choice: translateChatCompletionsToolChoice(payload.tool_choice) } : {}),
...(hasOutputConfig ? { output_config: outputConfig } : {}),
+ ...(thinking ? { thinking } : {}),
+ ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
+ ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
};
};
diff --git a/packages/translate/src/chat-completions-via-responses/request.ts b/packages/translate/src/chat-completions-via-responses/request.ts
index 6efd82c34..1865ff40b 100644
--- a/packages/translate/src/chat-completions-via-responses/request.ts
+++ b/packages/translate/src/chat-completions-via-responses/request.ts
@@ -112,6 +112,19 @@ export const translateChatCompletionsToResponses = (payload: ChatCompletionsPayl
const responseTextConfig = payload.response_format === undefined ? undefined : payload.response_format === null ? null : { format: payload.response_format };
+ // `reasoning_summary` is the inbound CC extension that materializes onto
+ // the Responses-native `reasoning.summary` slot. Co-emit alongside
+ // `reasoning.effort` so a single `reasoning` object captures both knobs.
+ const reasoningEffort = payload.reasoning_effort != null ? payload.reasoning_effort : undefined;
+ const reasoningSummary = payload.reasoning_summary;
+ const reasoning =
+ reasoningEffort !== undefined || reasoningSummary !== undefined
+ ? {
+ ...(reasoningEffort !== undefined ? { effort: reasoningEffort } : {}),
+ ...(reasoningSummary !== undefined ? { summary: reasoningSummary } : {}),
+ }
+ : undefined;
+
return {
model: payload.model,
input,
@@ -134,7 +147,7 @@ export const translateChatCompletionsToResponses = (payload: ChatCompletionsPayl
// https://developers.openai.com/api/docs/guides/migrate-to-responses
...(payload.store !== undefined ? { store: payload.store } : {}),
...(payload.parallel_tool_calls !== undefined ? { parallel_tool_calls: payload.parallel_tool_calls } : {}),
- ...(payload.reasoning_effort != null ? { reasoning: { effort: payload.reasoning_effort } } : {}),
+ ...(reasoning ? { reasoning } : {}),
...(responseTextConfig !== undefined ? { text: responseTextConfig } : {}),
...(payload.prompt_cache_key !== undefined ? { prompt_cache_key: payload.prompt_cache_key } : {}),
...(payload.safety_identifier !== undefined ? { safety_identifier: payload.safety_identifier } : {}),
diff --git a/packages/translate/src/gemini-via-chat-completions/request.ts b/packages/translate/src/gemini-via-chat-completions/request.ts
index 6f490dc25..07cc8bdce 100644
--- a/packages/translate/src/gemini-via-chat-completions/request.ts
+++ b/packages/translate/src/gemini-via-chat-completions/request.ts
@@ -188,6 +188,11 @@ const applyGenerationConfig = (request: ChatCompletionsPayload, generationConfig
const reasoningEffort = geminiReasoningEffort(generationConfig.thinkingConfig);
if (reasoningEffort) request.reasoning_effort = reasoningEffort;
+
+ // Extension fields landed on CC: `verbosity` flows verbatim; `serviceTier`
+ // crosses naming conventions (camelCase Gemini → snake_case OpenAI).
+ if (generationConfig.verbosity != null) request.verbosity = generationConfig.verbosity;
+ if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier;
};
const buildTools = (payload: GeminiPayload): ChatCompletionsTool[] | undefined => {
diff --git a/packages/translate/src/gemini-via-messages/request.ts b/packages/translate/src/gemini-via-messages/request.ts
index 29dd7c066..91fbe0493 100644
--- a/packages/translate/src/gemini-via-messages/request.ts
+++ b/packages/translate/src/gemini-via-messages/request.ts
@@ -161,6 +161,14 @@ const applyThinkingConfig = (request: MessagesPayload, thinkingConfig?: GeminiTh
}
}
+ // `includeThoughts` materializes onto `thinking.display`: true → summarized
+ // (Anthropic redacts to a single-block summary), false → omitted (no
+ // thinking surface at all). Skip when the source did not express either.
+ if (thinkingConfig.includeThoughts !== undefined && request.thinking?.type !== 'disabled') {
+ const display = thinkingConfig.includeThoughts === true ? ('summarized' as const) : ('omitted' as const);
+ request.thinking = request.thinking ? { ...request.thinking, display } : { type: 'enabled', display };
+ }
+
const effort = geminiThinkingLevelEffort(thinkingConfig);
// Spread to merge with any output_config fields a sibling helper has
// already written (e.g. structured-output `format` from
@@ -196,6 +204,11 @@ const applyGenerationConfig = (request: MessagesPayload, generationConfig: Gemin
};
}
+ // `serviceTier` extension flows verbatim onto the Messages-native slot;
+ // `verbosity` has no Anthropic equivalent and stays as inbound residue
+ // that the sanitizer strips after translation.
+ if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier;
+
applyThinkingConfig(request, generationConfig.thinkingConfig);
};
@@ -260,6 +273,11 @@ export const buildTargetRequest = (
applyGenerationConfig(request, payload.generationConfig, fallbackMaxOutputTokens);
+ // Top-level Gemini Floway extensions: `anthropicSpeed` is the only one
+ // with a Messages-natural slot. `anthropicBeta` is header-bound at the
+ // gateway boundary (Task 5) since translate functions do not own headers.
+ if (payload.anthropicSpeed != null) request.speed = payload.anthropicSpeed;
+
const tools = buildTools(payload);
if (tools) request.tools = tools;
applyLastToolCacheBreakpoint(request.tools);
diff --git a/packages/translate/src/gemini-via-responses/request.ts b/packages/translate/src/gemini-via-responses/request.ts
index 62d67c827..df85e4729 100644
--- a/packages/translate/src/gemini-via-responses/request.ts
+++ b/packages/translate/src/gemini-via-responses/request.ts
@@ -132,6 +132,7 @@ const applyGenerationConfig = (request: ResponsesPayload, generationConfig?: Gem
if (generationConfig.responseSchema !== undefined) {
request.text = {
+ ...request.text,
format: {
type: 'json_schema',
json_schema: {
@@ -141,16 +142,28 @@ const applyGenerationConfig = (request: ResponsesPayload, generationConfig?: Gem
},
};
} else if (generationConfig.responseMimeType === 'application/json') {
- request.text = { format: { type: 'json_object' } };
+ request.text = { ...request.text, format: { type: 'json_object' } };
}
- const effort = geminiReasoningEffort(generationConfig.thinkingConfig);
- if (!effort) return;
+ // `verbosity` extension rides under `text` alongside the structured-output
+ // format, matching the native Responses placement.
+ if (generationConfig.verbosity != null) request.text = { ...request.text, verbosity: generationConfig.verbosity };
- request.reasoning = {
- effort,
- ...(effort !== 'none' && generationConfig.thinkingConfig?.includeThoughts === true ? { summary: 'detailed' as const } : {}),
- };
+ if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier;
+
+ const effort = geminiReasoningEffort(generationConfig.thinkingConfig);
+ const summary =
+ generationConfig.thinkingConfig?.includeThoughts === true
+ ? ('detailed' as const)
+ : generationConfig.thinkingConfig?.includeThoughts === false
+ ? ('omitted' as const)
+ : undefined;
+ if (effort || summary !== undefined) {
+ request.reasoning = {
+ ...(effort ? { effort } : {}),
+ ...(summary !== undefined && effort !== 'none' ? { summary } : {}),
+ };
+ }
};
const buildTools = (payload: GeminiPayload): ResponsesTool[] | undefined => {
diff --git a/packages/translate/src/messages-via-chat-completions/request.ts b/packages/translate/src/messages-via-chat-completions/request.ts
index 76f5347d8..bd08e1b26 100644
--- a/packages/translate/src/messages-via-chat-completions/request.ts
+++ b/packages/translate/src/messages-via-chat-completions/request.ts
@@ -290,6 +290,8 @@ export const translateMessagesToChatCompletions = (payload: MessagesPayload): Ch
tools: translateMessagesTools(clientTools),
tool_choice: translateMessagesToolChoice(payload.tool_choice, clientTools),
...(responseFormat ? { response_format: responseFormat } : {}),
+ ...(payload.verbosity != null ? { verbosity: payload.verbosity } : {}),
+ ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
};
};
diff --git a/packages/translate/src/messages-via-responses/request.ts b/packages/translate/src/messages-via-responses/request.ts
index 7bb365cfe..b1c593443 100644
--- a/packages/translate/src/messages-via-responses/request.ts
+++ b/packages/translate/src/messages-via-responses/request.ts
@@ -1,6 +1,7 @@
import { openAiJsonSchemaCoreFromMessagesFormat } from '../shared/messages/structured-output.ts';
import { messagesReasoningBlockToResponsesReasoning } from '../shared/messages-and-responses/reasoning.ts';
import { resolveMessagesReasoningEffort } from '../shared/messages-via/reasoning-effort.ts';
+import { mapAnthropicDisplayToSummary } from '../shared/messages-via/reasoning-summary.ts';
import { normalizeMessagesToolInputSchema } from '../shared/messages-via/tool-schema.ts';
import {
type MessagesAssistantMessage,
@@ -207,15 +208,25 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response
// Responses upstream may reject it. Translation stays pairwise and leaves
// target-side validation to the selected upstream endpoint.
const effort = resolveMessagesReasoningEffort(payload);
- const reasoning = effort ? { effort } : undefined;
+ const display = payload.thinking?.display;
+ const summary = display !== undefined ? mapAnthropicDisplayToSummary(display) : undefined;
+ const reasoning =
+ effort !== undefined || summary !== undefined
+ ? {
+ ...(effort !== undefined ? { effort } : {}),
+ ...(summary !== undefined ? { summary } : {}),
+ }
+ : undefined;
const clientTools = getClientTools(payload.tools);
const instructions = translateSystemPrompt(payload.system);
const jsonSchema = openAiJsonSchemaCoreFromMessagesFormat(payload.output_config?.format);
- const text = jsonSchema ? { format: { type: 'json_schema' as const, ...jsonSchema } } : undefined;
+ const formatPart = jsonSchema ? { format: { type: 'json_schema' as const, ...jsonSchema } } : undefined;
+ const verbosityPart = payload.verbosity != null ? { verbosity: payload.verbosity } : undefined;
+ const text = formatPart || verbosityPart ? { ...formatPart, ...verbosityPart } : undefined;
// Keep fallback semantics strict: do not synthesize `temperature: 1`,
- // `store: false`, `parallel_tool_calls: true`, or `reasoning.summary` when the
- // Messages source did not express those knobs.
+ // `store: false`, or `parallel_tool_calls: true` when the Messages source
+ // did not express those knobs.
return {
model: payload.model,
input: translateMessagesInput(payload.messages),
@@ -229,6 +240,7 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response
stream: true,
...(reasoning ? { reasoning } : {}),
...(text ? { text } : {}),
+ ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
};
};
diff --git a/packages/translate/src/responses-via-chat-completions/request.ts b/packages/translate/src/responses-via-chat-completions/request.ts
index 6d0230db3..e7d23486f 100644
--- a/packages/translate/src/responses-via-chat-completions/request.ts
+++ b/packages/translate/src/responses-via-chat-completions/request.ts
@@ -242,6 +242,7 @@ export const translateResponsesToChatCompletions = (payload: ResponsesPayload):
...(payload.prompt_cache_key !== undefined ? { prompt_cache_key: payload.prompt_cache_key } : {}),
...(payload.safety_identifier !== undefined ? { safety_identifier: payload.safety_identifier } : {}),
...(payload.reasoning?.effort != null ? { reasoning_effort: payload.reasoning.effort } : {}),
+ ...(payload.text?.verbosity != null ? { verbosity: payload.text.verbosity } : {}),
...(payload.service_tier !== undefined ? { service_tier: payload.service_tier } : {}),
// Chat Completions has no request-level counterpart for Responses
// `reasoning`; only explicit reasoning items survive this translation.
diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts
index 54fcf93a6..786ce4a98 100644
--- a/packages/translate/src/responses-via-messages/request.ts
+++ b/packages/translate/src/responses-via-messages/request.ts
@@ -3,6 +3,7 @@ import { responsesReasoningToMessagesUpstreamBlock } from '../shared/messages-an
import { buildCustomToolInputSchema } from '../shared/responses-via/custom-tool-wrap.ts';
import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint, EPHEMERAL_CACHE_CONTROL } from '../shared/via-messages/cache-breakpoints.ts';
import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts';
+import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts';
import {
MESSAGES_FALLBACK_MAX_TOKENS,
type MessagesAssistantContentBlock,
@@ -331,6 +332,30 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
if (formatSchema) outputConfig.format = { type: 'json_schema', schema: formatSchema };
const hasOutputConfig = Object.keys(outputConfig).length > 0;
+ // Native Responses → Messages: `reasoning.summary` materializes onto the
+ // Messages-native `thinking.display`. Extension-driven thinking
+ // (`thinking_budget`, `adaptive_thinking`) takes precedence over the
+ // summary-only fallback because the alias write-side validator pins
+ // facets one-at-a-time; when neither extension is set and summary is the
+ // only signal, we synthesize `thinking.{type:'enabled', display}` so the
+ // display reaches the wire.
+ const extensionThinking = buildMessagesThinkingFromExtensions({
+ thinkingBudget: payload.thinking_budget,
+ adaptiveThinking: payload.adaptive_thinking,
+ });
+ const disabledThinking = effort === 'none' ? { type: 'disabled' as const } : undefined;
+ const summaryDisplay = payload.reasoning?.summary !== undefined ? mapSummaryToAnthropicDisplay(payload.reasoning.summary) : undefined;
+ const fallbackDisplayThinking =
+ !extensionThinking && !disabledThinking && summaryDisplay !== undefined
+ ? { type: 'enabled' as const, display: summaryDisplay as NonNullable['display'] }
+ : undefined;
+ const thinkingFromExtensions = extensionThinking
+ ? summaryDisplay !== undefined
+ ? { ...extensionThinking, display: summaryDisplay as NonNullable['display'] }
+ : extensionThinking
+ : undefined;
+ const thinking = thinkingFromExtensions ?? disabledThinking ?? fallbackDisplayThinking;
+
// Responses `metadata` is intentionally omitted on the Messages path;
// not coerced into Anthropic metadata.user_id, prompt-cache, or safety
// semantics.
@@ -344,8 +369,10 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
stream: true,
tools,
tool_choice: translateToolChoice(payload.tool_choice),
- ...(effort === 'none' ? { thinking: { type: 'disabled' as const } } : {}),
+ ...(thinking ? { thinking } : {}),
...(hasOutputConfig ? { output_config: outputConfig } : {}),
+ ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
+ ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
};
return { target, customToolNames };
diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts
index 9b690ccbf..2aedc14b7 100644
--- a/packages/translate/src/responses-via-messages/request_test.ts
+++ b/packages/translate/src/responses-via-messages/request_test.ts
@@ -6,7 +6,7 @@ import { MESSAGES_FALLBACK_MAX_TOKENS, type MessagesClientTool, type MessagesToo
const stubRemoteImageLoader = (result: { mediaType: string | null; data: Uint8Array } | null) => () => Promise.resolve(result);
-test('translateResponsesToMessages maps reasoning.effort none to thinking.disabled', async () => {
+test('translateResponsesToMessages maps reasoning.effort none to thinking.disabled (summary ignored when reasoning is disabled)', async () => {
const result = await translateResponsesToMessages({
model: 'claude-test',
input: [{ type: 'message', role: 'user', content: 'hi' }],
@@ -41,7 +41,7 @@ test('translateResponsesToMessages maps reasoning.effort directly to output_conf
stream: null,
store: false,
parallel_tool_calls: true,
- reasoning: { effort: 'minimal', summary: 'detailed' },
+ reasoning: { effort: 'minimal' },
});
assertEquals(result.target.output_config, { effort: 'minimal' });
diff --git a/packages/translate/src/shared/gemini-via/gemini.ts b/packages/translate/src/shared/gemini-via/gemini.ts
index a5b4993ea..99d8b4872 100644
--- a/packages/translate/src/shared/gemini-via/gemini.ts
+++ b/packages/translate/src/shared/gemini-via/gemini.ts
@@ -117,21 +117,33 @@ export const geminiFunctionResponsePart = (part: GeminiPart, ids: GeminiToolCall
return { response, id: unmatched?.shift() ?? id };
};
-export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig): 'low' | 'medium' | 'high' | undefined => {
+// Reasoning effort is freeform on the inbound IRs (per Goal 2: never gate
+// operator-typed values), but the gateway publishes a canonical closed set so
+// translate-side mappers can normalize without rewriting unknown values.
+// References:
+// - docs/superpowers/specs/2026-06-25-model-aliases-design.md (Translate Layer)
+export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
+
+export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig): ReasoningEffort | undefined => {
switch (thinkingConfig?.thinkingLevel) {
case 'minimal':
+ return 'minimal';
case 'low':
return 'low';
case 'medium':
return 'medium';
case 'high':
return 'high';
+ case 'xhigh':
+ return 'xhigh';
+ case 'max':
+ return 'max';
default:
return undefined;
}
};
-export const geminiReasoningEffort = (thinkingConfig?: GeminiThinkingConfig): 'none' | 'low' | 'medium' | 'high' | null => {
+export const geminiReasoningEffort = (thinkingConfig?: GeminiThinkingConfig): ReasoningEffort | null => {
if (!thinkingConfig) return null;
if (thinkingConfig.thinkingBudget !== undefined) {
diff --git a/packages/translate/src/shared/messages-via/reasoning-summary.ts b/packages/translate/src/shared/messages-via/reasoning-summary.ts
new file mode 100644
index 000000000..6d12bab9b
--- /dev/null
+++ b/packages/translate/src/shared/messages-via/reasoning-summary.ts
@@ -0,0 +1,21 @@
+import type { MessagesThinkingDisplay } from '@floway-dev/protocols/messages';
+
+// Reverse of via-messages/anthropic-extensions.ts mapSummaryToAnthropicDisplay.
+// Anthropic's `summarized` collapsed both `concise` and `detailed`; we pick
+// `concise` as the canonical reverse since it is Responses' more compact
+// summary mode and round-tripping through the gateway should not silently
+// inflate verbosity. Unknown operator-typed values pass through verbatim so
+// the Responses upstream gets the original spelling and decides for itself
+// whether to accept it.
+export const mapAnthropicDisplayToSummary = (display: MessagesThinkingDisplay | string): string | undefined => {
+ switch (display) {
+ case 'summarized':
+ return 'concise';
+ case 'omitted':
+ return 'omitted';
+ case 'full':
+ return 'detailed';
+ default:
+ return display;
+ }
+};
diff --git a/packages/translate/src/shared/via-messages/anthropic-extensions.ts b/packages/translate/src/shared/via-messages/anthropic-extensions.ts
new file mode 100644
index 000000000..513db32ce
--- /dev/null
+++ b/packages/translate/src/shared/via-messages/anthropic-extensions.ts
@@ -0,0 +1,93 @@
+import type { MessagesPayload, MessagesThinkingDisplay } from '@floway-dev/protocols/messages';
+
+// Anthropic structured `thinking.display` enumerates three modes; the
+// inbound IR's `reasoning_summary` extension and the Responses-native
+// `reasoning.summary` share an OpenAI-style {auto|concise|detailed|omitted}
+// vocabulary. The mapping collapses concise+detailed onto Anthropic's single
+// `summarized` mode (both surface a redacted summary, not the full chain),
+// `omitted` is the canonical hide-everything spelling, and `auto` returns
+// `undefined` so Anthropic's account-default takes over. Operator-typed
+// values that match neither vocabulary pass through verbatim — Anthropic
+// rejects unknown values at the wire, which is the explicit-failure path we
+// want per the alias design's no-enum-gating contract.
+export const mapSummaryToAnthropicDisplay = (summary: string): MessagesThinkingDisplay | string | undefined => {
+ switch (summary) {
+ case 'concise':
+ case 'detailed':
+ return 'summarized';
+ case 'omitted':
+ return 'omitted';
+ case 'auto':
+ return undefined;
+ default:
+ return summary;
+ }
+};
+
+// Merge a beta token list onto an existing `anthropic-beta` header value.
+// The header is a case-sensitive, comma-separated list per the Anthropic
+// docs; dedupe is by exact-match equality so operators can carry parallel
+// tokens that differ only by date suffix. Re-joined with `, ` so the wire
+// shape matches both Anthropic's own examples and downstream gateways
+// (envoyproxy/ai-gateway).
+// References:
+// - https://platform.claude.com/docs/en/api/beta-headers
+// - https://github.com/envoyproxy/ai-gateway
+export const mergeAnthropicBetaTokens = (existing: string | null | undefined, additions: readonly string[]): string => {
+ const seen = new Set();
+ const merged: string[] = [];
+ const collect = (token: string): void => {
+ const trimmed = token.trim();
+ if (!trimmed || seen.has(trimmed)) return;
+ seen.add(trimmed);
+ merged.push(trimmed);
+ };
+
+ if (existing) {
+ for (const token of existing.split(',')) collect(token);
+ }
+ for (const token of additions) collect(token);
+
+ return merged.join(', ');
+};
+
+// Materialize the Messages-bound `anthropic_beta` extension list onto an
+// outbound request's `anthropic-beta` header. The helper takes a `Headers`
+// object so the caller (typically the gateway-side rule-apply pass) doesn't
+// have to re-parse and re-set the header itself.
+export const applyAnthropicBetaToHeaders = (headers: Headers, additions: readonly string[]): void => {
+ if (!additions.length) return;
+ const merged = mergeAnthropicBetaTokens(headers.get('anthropic-beta'), additions);
+ if (merged) headers.set('anthropic-beta', merged);
+};
+
+// Build a Messages `thinking` block from the Floway extension fields a
+// non-Messages inbound carries (`thinking_budget`, `adaptive_thinking`,
+// `reasoning_summary`). `adaptive_thinking: true` overrides `thinking_budget`
+// because the alias write-side validator enforces single-facet selection;
+// when both still arrive the adaptive choice wins.
+//
+// `reasoningSummary` is the OpenAI-style summary vocabulary
+// ({auto|concise|detailed|omitted} plus pass-through). It synthesizes
+// `thinking.{type:'enabled', display}` when the inbound carries summary
+// but no budget/adaptive signal — without an explicit thinking mode
+// Anthropic would otherwise discard the display field.
+export const buildMessagesThinkingFromExtensions = (input: {
+ thinkingBudget?: number;
+ adaptiveThinking?: boolean;
+ reasoningSummary?: string;
+}): MessagesPayload['thinking'] | undefined => {
+ const display = input.reasoningSummary !== undefined ? mapSummaryToAnthropicDisplay(input.reasoningSummary) : undefined;
+ const displayPart = display !== undefined ? { display: display as MessagesThinkingDisplay } : {};
+
+ if (input.adaptiveThinking === true) {
+ return { type: 'adaptive', ...displayPart };
+ }
+ if (input.thinkingBudget !== undefined) {
+ return { type: 'enabled', budget_tokens: input.thinkingBudget, ...displayPart };
+ }
+ if (input.reasoningSummary !== undefined && display !== undefined) {
+ return { type: 'enabled', ...displayPart };
+ }
+ return undefined;
+};
From 6ea94045b2b46b0eb144827e2dd2df36e1d81d3d Mon Sep 17 00:00:00 2001
From: Menci
Date: Thu, 25 Jun 2026 21:24:12 +0800
Subject: [PATCH 004/170] test(translate): cover Floway extension emission
across all nine pairs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
One assertion per non-drop cell of the model-aliases translate-emission
table: each test sets a single inbound rule (native or extension) and
checks the upstream-natural slot is present with the value forwarded
verbatim. Each pair also gets a drop-side assertion that the residue
field does not leak into the translated body — the per-upstream
sanitizer is the actual stripper, but translate must not invent a
target field where the mapping table says drop.
Pre-existing responses-via-messages tests that paired effort with
reasoning.summary keep their summary input (so the disabled-precedence
behavior is still verified) but no longer assume summary is silently
discarded; the new contract surfaces it as thinking.display where the
upstream has a slot, and the disabled case continues to win.
---
.../chat-completions-via-messages/request.ts | 2 +-
.../request_test.ts | 99 +++++++++++++++++++
.../chat-completions-via-responses/request.ts | 2 +-
.../request_test.ts | 53 ++++++++++
.../request_test.ts | 56 +++++++++++
.../src/gemini-via-messages/request_test.ts | 65 ++++++++++++
.../src/gemini-via-responses/request_test.ts | 55 +++++++++++
.../request_test.ts | 53 ++++++++++
.../messages-via-responses/request_test.ts | 80 +++++++++++++++
.../request_test.ts | 52 ++++++++++
.../src/responses-via-messages/request.ts | 2 +-
.../responses-via-messages/request_test.ts | 51 ++++++++++
12 files changed, 567 insertions(+), 3 deletions(-)
diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts
index 82dd22c59..5e83a230b 100644
--- a/packages/translate/src/chat-completions-via-messages/request.ts
+++ b/packages/translate/src/chat-completions-via-messages/request.ts
@@ -1,8 +1,8 @@
import { messagesThinkingBlockFromChatCompletionsScalarReasoning } from '../shared/chat-completions-and-messages/reasoning.ts';
import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts';
+import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts';
import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint } from '../shared/via-messages/cache-breakpoints.ts';
import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts';
-import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts';
import type { ChatCompletionsPayload, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsTool } from '@floway-dev/protocols/chat-completions';
import { MESSAGES_FALLBACK_MAX_TOKENS, type MessagesAssistantContentBlock, type MessagesMessage, type MessagesPayload, type MessagesTextBlock, type MessagesUserContentBlock } from '@floway-dev/protocols/messages';
diff --git a/packages/translate/src/chat-completions-via-messages/request_test.ts b/packages/translate/src/chat-completions-via-messages/request_test.ts
index e0c04c65b..0fce785db 100644
--- a/packages/translate/src/chat-completions-via-messages/request_test.ts
+++ b/packages/translate/src/chat-completions-via-messages/request_test.ts
@@ -1161,3 +1161,102 @@ test('translateChatCompletionsToMessages rejects an unknown user content part ty
'does not accept video_url content parts',
);
});
+
+// ── Floway extension emission ──
+
+test('translateChatCompletionsToMessages emits thinking_budget extension onto thinking.{enabled, budget_tokens}', async () => {
+ const result = await translateChatCompletionsToMessages(
+ mkPayload({
+ messages: [{ role: 'user', content: 'hi' }],
+ thinking_budget: 4096,
+ }),
+ );
+
+ assertEquals(result.thinking, { type: 'enabled', budget_tokens: 4096 });
+});
+
+test('translateChatCompletionsToMessages emits adaptive_thinking extension onto thinking.{adaptive} (wins over budget)', async () => {
+ const result = await translateChatCompletionsToMessages(
+ mkPayload({
+ messages: [{ role: 'user', content: 'hi' }],
+ thinking_budget: 4096,
+ adaptive_thinking: true,
+ }),
+ );
+
+ assertEquals(result.thinking, { type: 'adaptive' });
+});
+
+test('translateChatCompletionsToMessages maps reasoning_summary onto thinking.display via concise|detailed → summarized', async () => {
+ const concise = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'concise' }));
+ const detailed = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'detailed' }));
+ const omitted = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'omitted' }));
+ const auto = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'auto' }));
+
+ assertEquals(concise.thinking, { type: 'enabled', display: 'summarized' });
+ assertEquals(detailed.thinking, { type: 'enabled', display: 'summarized' });
+ assertEquals(omitted.thinking, { type: 'enabled', display: 'omitted' });
+ // `auto` returns undefined display so Anthropic's account-default applies;
+ // with no budget/adaptive signal there is no thinking block to attach to.
+ assertEquals(auto.thinking, undefined);
+});
+
+test('translateChatCompletionsToMessages merges reasoning_summary onto budget-driven thinking block', async () => {
+ const result = await translateChatCompletionsToMessages(
+ mkPayload({
+ messages: [{ role: 'user', content: 'hi' }],
+ thinking_budget: 2048,
+ reasoning_summary: 'concise',
+ }),
+ );
+
+ assertEquals(result.thinking, { type: 'enabled', budget_tokens: 2048, display: 'summarized' });
+});
+
+test('translateChatCompletionsToMessages emits anthropic_speed onto Messages speed', async () => {
+ const result = await translateChatCompletionsToMessages(
+ mkPayload({
+ messages: [{ role: 'user', content: 'hi' }],
+ anthropic_speed: 'fast',
+ }),
+ );
+
+ assertEquals(result.speed, 'fast');
+});
+
+test('translateChatCompletionsToMessages forwards service_tier verbatim', async () => {
+ const result = await translateChatCompletionsToMessages(
+ mkPayload({
+ messages: [{ role: 'user', content: 'hi' }],
+ service_tier: 'priority',
+ }),
+ );
+
+ assertEquals(result.service_tier, 'priority');
+});
+
+test('translateChatCompletionsToMessages does not emit Messages-protocol fields when the extension is unset', async () => {
+ const result = await translateChatCompletionsToMessages(
+ mkPayload({
+ messages: [{ role: 'user', content: 'hi' }],
+ }),
+ );
+
+ assertEquals(result.thinking, undefined);
+ assertEquals(result.speed, undefined);
+ assertEquals(result.service_tier, undefined);
+});
+
+test('translateChatCompletionsToMessages leaves anthropic_beta as inbound residue (header injection is the gateway-side rule-apply step)', async () => {
+ const result = await translateChatCompletionsToMessages(
+ mkPayload({
+ messages: [{ role: 'user', content: 'hi' }],
+ anthropic_beta: ['fast-mode-2026-02-01', 'context-1m-2025-08-07'],
+ }),
+ );
+
+ // The translated body must not echo the OpenAI-family `anthropic_beta`
+ // field; the per-upstream sanitizer is responsible for stripping any
+ // residue, and the rule-apply pass handles the outbound header.
+ assertEquals('anthropic_beta' in result, false);
+});
diff --git a/packages/translate/src/chat-completions-via-responses/request.ts b/packages/translate/src/chat-completions-via-responses/request.ts
index 1865ff40b..8d33b9e8a 100644
--- a/packages/translate/src/chat-completions-via-responses/request.ts
+++ b/packages/translate/src/chat-completions-via-responses/request.ts
@@ -115,7 +115,7 @@ export const translateChatCompletionsToResponses = (payload: ChatCompletionsPayl
// `reasoning_summary` is the inbound CC extension that materializes onto
// the Responses-native `reasoning.summary` slot. Co-emit alongside
// `reasoning.effort` so a single `reasoning` object captures both knobs.
- const reasoningEffort = payload.reasoning_effort != null ? payload.reasoning_effort : undefined;
+ const reasoningEffort = payload.reasoning_effort ?? undefined;
const reasoningSummary = payload.reasoning_summary;
const reasoning =
reasoningEffort !== undefined || reasoningSummary !== undefined
diff --git a/packages/translate/src/chat-completions-via-responses/request_test.ts b/packages/translate/src/chat-completions-via-responses/request_test.ts
index 3c753a35c..137df562f 100644
--- a/packages/translate/src/chat-completions-via-responses/request_test.ts
+++ b/packages/translate/src/chat-completions-via-responses/request_test.ts
@@ -431,3 +431,56 @@ test('translateChatCompletionsToResponses rejects an unknown message role', () =
'does not accept function messages',
);
});
+
+// ── Floway extension emission ──
+
+test('translateChatCompletionsToResponses maps reasoning_summary onto reasoning.summary', () => {
+ const result = translateChatCompletionsToResponses({
+ model: 'gpt-test',
+ messages: [{ role: 'user', content: 'hi' }],
+ reasoning_summary: 'detailed',
+ });
+
+ assertEquals(result.reasoning, { summary: 'detailed' });
+});
+
+test('translateChatCompletionsToResponses co-emits reasoning_effort and reasoning_summary on the same reasoning object', () => {
+ const result = translateChatCompletionsToResponses({
+ model: 'gpt-test',
+ messages: [{ role: 'user', content: 'hi' }],
+ reasoning_effort: 'xhigh',
+ reasoning_summary: 'concise',
+ });
+
+ assertEquals(result.reasoning, { effort: 'xhigh', summary: 'concise' });
+});
+
+test('translateChatCompletionsToResponses leaves Messages-only extensions as inbound residue', () => {
+ const result = translateChatCompletionsToResponses({
+ model: 'gpt-test',
+ messages: [{ role: 'user', content: 'hi' }],
+ thinking_budget: 4096,
+ adaptive_thinking: true,
+ anthropic_speed: 'fast',
+ anthropic_beta: ['fast-mode-2026-02-01'],
+ });
+
+ // Responses has no slot for any of these; the sanitizer strips the
+ // residue. Translate must not invent a target field.
+ assertEquals('thinking_budget' in result, false);
+ assertEquals('adaptive_thinking' in result, false);
+ assertEquals('anthropic_speed' in result, false);
+ assertEquals('anthropic_beta' in result, false);
+});
+
+test('translateChatCompletionsToResponses passes a fully extension-free payload through unchanged from prior behavior', () => {
+ const result = translateChatCompletionsToResponses({
+ model: 'gpt-test',
+ messages: [{ role: 'user', content: 'hi' }],
+ reasoning_effort: 'medium',
+ service_tier: 'priority',
+ });
+
+ assertEquals(result.reasoning, { effort: 'medium' });
+ assertEquals(result.service_tier, 'priority');
+});
diff --git a/packages/translate/src/gemini-via-chat-completions/request_test.ts b/packages/translate/src/gemini-via-chat-completions/request_test.ts
index 7b9c8403f..45f98c146 100644
--- a/packages/translate/src/gemini-via-chat-completions/request_test.ts
+++ b/packages/translate/src/gemini-via-chat-completions/request_test.ts
@@ -477,3 +477,59 @@ test('buildTargetRequest rejects a part with no recognized content field', () =>
'has no recognized content',
);
});
+
+// ── Floway extension emission ──
+
+test('buildTargetRequest emits generationConfig.verbosity onto Chat verbosity', () => {
+ const result = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'low' } },
+ 'gpt-test',
+ );
+
+ assertEquals(result.verbosity, 'low');
+});
+
+test('buildTargetRequest emits generationConfig.serviceTier onto Chat service_tier (camelCase → snake_case)', () => {
+ const result = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { serviceTier: 'priority' } },
+ 'gpt-test',
+ );
+
+ assertEquals(result.service_tier, 'priority');
+});
+
+test('buildTargetRequest drops top-level Anthropic extensions (anthropicSpeed, anthropicBeta) on Chat', () => {
+ const result = buildTargetRequest(
+ {
+ contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+ anthropicSpeed: 'fast',
+ anthropicBeta: ['fast-mode-2026-02-01'],
+ },
+ 'gpt-test',
+ );
+
+ assertEquals('anthropicSpeed' in result, false);
+ assertEquals('anthropic_speed' in result, false);
+ assertEquals('speed' in result, false);
+ assertEquals('anthropicBeta' in result, false);
+ assertEquals('anthropic_beta' in result, false);
+});
+
+test('buildTargetRequest extends reasoning_effort enum to recognize xhigh and max', () => {
+ const xhigh = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { thinkingLevel: 'xhigh' } } },
+ 'gpt-test',
+ );
+ const max = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { thinkingLevel: 'max' } } },
+ 'gpt-test',
+ );
+ const minimal = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { thinkingLevel: 'minimal' } } },
+ 'gpt-test',
+ );
+
+ assertEquals(xhigh.reasoning_effort, 'xhigh');
+ assertEquals(max.reasoning_effort, 'max');
+ assertEquals(minimal.reasoning_effort, 'minimal');
+});
diff --git a/packages/translate/src/gemini-via-messages/request_test.ts b/packages/translate/src/gemini-via-messages/request_test.ts
index 2bfd96510..b10339a49 100644
--- a/packages/translate/src/gemini-via-messages/request_test.ts
+++ b/packages/translate/src/gemini-via-messages/request_test.ts
@@ -405,3 +405,68 @@ test('buildTargetRequest rejects a part with no recognized content field', () =>
'has no recognized content',
);
});
+
+// ── Floway extension emission ──
+
+test('buildTargetRequest emits top-level anthropicSpeed onto Messages speed', () => {
+ const result = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], anthropicSpeed: 'fast' },
+ 'claude-test',
+ noOptions,
+ );
+
+ assertEquals(result.speed, 'fast');
+});
+
+test('buildTargetRequest emits generationConfig.serviceTier onto Messages service_tier', () => {
+ const result = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { serviceTier: 'priority' } },
+ 'claude-test',
+ noOptions,
+ );
+
+ assertEquals(result.service_tier, 'priority');
+});
+
+test('buildTargetRequest maps includeThoughts onto thinking.display (true → summarized, false → omitted)', () => {
+ const summarized = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: true } } },
+ 'claude-test',
+ noOptions,
+ );
+ const omitted = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: false } } },
+ 'claude-test',
+ noOptions,
+ );
+
+ assertEquals(summarized.thinking, { type: 'enabled', display: 'summarized' });
+ assertEquals(omitted.thinking, { type: 'enabled', display: 'omitted' });
+});
+
+test('buildTargetRequest drops verbosity extension on Messages (no slot)', () => {
+ const result = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'low' } },
+ 'claude-test',
+ noOptions,
+ );
+
+ assertEquals('verbosity' in result, false);
+});
+
+test('buildTargetRequest leaves anthropicBeta as inbound residue for the gateway header pass', () => {
+ const result = buildTargetRequest(
+ {
+ contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+ anthropicBeta: ['fast-mode-2026-02-01'],
+ },
+ 'claude-test',
+ noOptions,
+ );
+
+ // Translate cannot move it to a header; the gateway-side rule-apply pass
+ // (Task 5) materializes anthropicBeta into the outbound anthropic-beta
+ // header. The body must not echo it.
+ assertEquals('anthropicBeta' in result, false);
+ assertEquals('anthropic_beta' in result, false);
+});
diff --git a/packages/translate/src/gemini-via-responses/request_test.ts b/packages/translate/src/gemini-via-responses/request_test.ts
index e140d715c..66476f0d8 100644
--- a/packages/translate/src/gemini-via-responses/request_test.ts
+++ b/packages/translate/src/gemini-via-responses/request_test.ts
@@ -411,3 +411,58 @@ test('buildTargetRequest rejects a part with no recognized content field', () =>
'has no recognized content',
);
});
+
+// ── Floway extension emission ──
+
+test('buildTargetRequest emits generationConfig.verbosity onto text.verbosity', () => {
+ const result = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'medium' } },
+ 'gpt-test',
+ );
+
+ assertEquals(result.text?.verbosity, 'medium');
+});
+
+test('buildTargetRequest emits generationConfig.serviceTier onto Responses service_tier', () => {
+ const result = buildTargetRequest(
+ { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { serviceTier: 'priority' } },
+ 'gpt-test',
+ );
+
+ assertEquals(result.service_tier, 'priority');
+});
+
+test('buildTargetRequest maps includeThoughts onto reasoning.summary (true → detailed, false → omitted)', () => {
+ const withSummary = buildTargetRequest(
+ {
+ contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+ generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: true } },
+ },
+ 'gpt-test',
+ );
+ const withoutSummary = buildTargetRequest(
+ {
+ contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+ generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: false } },
+ },
+ 'gpt-test',
+ );
+
+ assertEquals(withSummary.reasoning, { effort: 'high', summary: 'detailed' });
+ assertEquals(withoutSummary.reasoning, { effort: 'high', summary: 'omitted' });
+});
+
+test('buildTargetRequest drops top-level Anthropic extensions on Responses', () => {
+ const result = buildTargetRequest(
+ {
+ contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+ anthropicSpeed: 'fast',
+ anthropicBeta: ['fast-mode-2026-02-01'],
+ },
+ 'gpt-test',
+ );
+
+ assertEquals('anthropicSpeed' in result, false);
+ assertEquals('anthropic_speed' in result, false);
+ assertEquals('anthropicBeta' in result, false);
+});
diff --git a/packages/translate/src/messages-via-chat-completions/request_test.ts b/packages/translate/src/messages-via-chat-completions/request_test.ts
index a76ec42b1..50f326860 100644
--- a/packages/translate/src/messages-via-chat-completions/request_test.ts
+++ b/packages/translate/src/messages-via-chat-completions/request_test.ts
@@ -480,3 +480,56 @@ test('translateMessagesToChatCompletions rejects an unknown message role', () =>
'does not accept role tool',
);
});
+
+// ── Floway extension emission ──
+
+test('translateMessagesToChatCompletions emits verbosity extension verbatim', () => {
+ const result = translateMessagesToChatCompletions({
+ model: 'gpt-test',
+ max_tokens: 256,
+ messages: [{ role: 'user', content: 'hi' }],
+ verbosity: 'low',
+ });
+
+ assertEquals(result.verbosity, 'low');
+});
+
+test('translateMessagesToChatCompletions forwards service_tier verbatim', () => {
+ const result = translateMessagesToChatCompletions({
+ model: 'gpt-test',
+ max_tokens: 256,
+ messages: [{ role: 'user', content: 'hi' }],
+ service_tier: 'priority',
+ });
+
+ assertEquals(result.service_tier, 'priority');
+});
+
+test('translateMessagesToChatCompletions drops Anthropic-only knobs that have no Chat-completions slot', () => {
+ const result = translateMessagesToChatCompletions({
+ model: 'gpt-test',
+ max_tokens: 256,
+ messages: [{ role: 'user', content: 'hi' }],
+ thinking: { type: 'enabled', budget_tokens: 4096, display: 'summarized' },
+ speed: 'fast',
+ });
+
+ // Only the OpenAI-canonical effort axis survives; budget_tokens, display,
+ // and speed have no Chat-completions equivalent and the translate function
+ // emits nothing for them. (The sanitizer would strip anything anyway.)
+ assertEquals(result.reasoning_effort, 'medium');
+ assertEquals('thinking_budget' in result, false);
+ assertEquals('reasoning_summary' in result, false);
+ assertEquals('speed' in result, false);
+ assertEquals('anthropic_speed' in result, false);
+});
+
+test('translateMessagesToChatCompletions does not emit verbosity when the extension is unset', () => {
+ const result = translateMessagesToChatCompletions({
+ model: 'gpt-test',
+ max_tokens: 256,
+ messages: [{ role: 'user', content: 'hi' }],
+ });
+
+ assertEquals('verbosity' in result, false);
+});
diff --git a/packages/translate/src/messages-via-responses/request_test.ts b/packages/translate/src/messages-via-responses/request_test.ts
index 6a6e3fa94..2846f1d39 100644
--- a/packages/translate/src/messages-via-responses/request_test.ts
+++ b/packages/translate/src/messages-via-responses/request_test.ts
@@ -502,3 +502,83 @@ test('translateMessagesToResponses rejects an unknown message role', () => {
'does not accept role tool',
);
});
+
+// ── Floway extension emission ──
+
+test('translateMessagesToResponses emits verbosity onto text.verbosity', () => {
+ const result = translateMessagesToResponses({
+ model: 'gpt-test',
+ max_tokens: 256,
+ messages: [{ role: 'user', content: 'hi' }],
+ verbosity: 'medium',
+ });
+
+ assertEquals(result.text?.verbosity, 'medium');
+});
+
+test('translateMessagesToResponses co-emits verbosity with json_schema format under text', () => {
+ const result = translateMessagesToResponses({
+ model: 'gpt-test',
+ max_tokens: 256,
+ messages: [{ role: 'user', content: 'hi' }],
+ verbosity: 'low',
+ output_config: { format: { type: 'json_schema', schema: { type: 'object', properties: {} } } },
+ });
+
+ assertEquals(result.text?.verbosity, 'low');
+ assertEquals(result.text?.format?.type, 'json_schema');
+});
+
+test('translateMessagesToResponses maps thinking.display onto reasoning.summary (summarized → concise, omitted → omitted, full → detailed)', () => {
+ const summarized = translateMessagesToResponses({
+ model: 'gpt-test',
+ max_tokens: 256,
+ messages: [{ role: 'user', content: 'hi' }],
+ thinking: { type: 'enabled', display: 'summarized' },
+ });
+ const omitted = translateMessagesToResponses({
+ model: 'gpt-test',
+ max_tokens: 256,
+ messages: [{ role: 'user', content: 'hi' }],
+ thinking: { type: 'enabled', display: 'omitted' },
+ });
+ const full = translateMessagesToResponses({
+ model: 'gpt-test',
+ max_tokens: 256,
+ messages: [{ role: 'user', content: 'hi' }],
+ thinking: { type: 'enabled', display: 'full' },
+ });
+
+ assertEquals(summarized.reasoning?.summary, 'concise');
+ assertEquals(omitted.reasoning?.summary, 'omitted');
+ assertEquals(full.reasoning?.summary, 'detailed');
+});
+
+test('translateMessagesToResponses forwards service_tier verbatim', () => {
+ const result = translateMessagesToResponses({
+ model: 'gpt-test',
+ max_tokens: 256,
+ messages: [{ role: 'user', content: 'hi' }],
+ service_tier: 'priority',
+ });
+
+ assertEquals(result.service_tier, 'priority');
+});
+
+test('translateMessagesToResponses drops Anthropic-only mode knobs the Responses wire cannot express', () => {
+ const result = translateMessagesToResponses({
+ model: 'gpt-test',
+ max_tokens: 256,
+ messages: [{ role: 'user', content: 'hi' }],
+ thinking: { type: 'enabled', budget_tokens: 4096 },
+ speed: 'fast',
+ });
+
+ // budget_tokens, adaptive, speed, anthropic-beta have no Responses slot;
+ // translate emits nothing for them. The sanitizer drops residue.
+ assertEquals('thinking_budget' in result, false);
+ assertEquals('adaptive_thinking' in result, false);
+ assertEquals('anthropic_speed' in result, false);
+ assertEquals('anthropic_beta' in result, false);
+ assertEquals('speed' in result, false);
+});
diff --git a/packages/translate/src/responses-via-chat-completions/request_test.ts b/packages/translate/src/responses-via-chat-completions/request_test.ts
index f708f5929..448222f45 100644
--- a/packages/translate/src/responses-via-chat-completions/request_test.ts
+++ b/packages/translate/src/responses-via-chat-completions/request_test.ts
@@ -1455,3 +1455,55 @@ test('translateResponsesToChatCompletions maps multimodal function_call_output i
{ type: 'image_url', image_url: { url: 'data:image/png;base64,AQID', detail: 'high' } },
]);
});
+
+// ── Floway extension emission ──
+
+test('translateResponsesToChatCompletions maps text.verbosity onto verbosity', () => {
+ const result = translateResponsesToChatCompletions({
+ model: 'gpt-test',
+ input: [{ type: 'message', role: 'user', content: 'hi' }],
+ text: { verbosity: 'low' },
+ });
+
+ assertEquals(result.target.verbosity, 'low');
+});
+
+test('translateResponsesToChatCompletions co-emits reasoning.effort onto reasoning_effort and service_tier verbatim', () => {
+ const result = translateResponsesToChatCompletions({
+ model: 'gpt-test',
+ input: [{ type: 'message', role: 'user', content: 'hi' }],
+ reasoning: { effort: 'xhigh' },
+ service_tier: 'priority',
+ });
+
+ assertEquals(result.target.reasoning_effort, 'xhigh');
+ assertEquals(result.target.service_tier, 'priority');
+});
+
+test('translateResponsesToChatCompletions leaves Messages-only extensions as inbound residue (CC has no slot)', () => {
+ const result = translateResponsesToChatCompletions({
+ model: 'gpt-test',
+ input: [{ type: 'message', role: 'user', content: 'hi' }],
+ thinking_budget: 4096,
+ adaptive_thinking: true,
+ anthropic_speed: 'fast',
+ anthropic_beta: ['fast-mode-2026-02-01'],
+ });
+
+ assertEquals('thinking_budget' in result.target, false);
+ assertEquals('adaptive_thinking' in result.target, false);
+ assertEquals('anthropic_speed' in result.target, false);
+ assertEquals('anthropic_beta' in result.target, false);
+});
+
+test('translateResponsesToChatCompletions drops reasoning.summary (Chat has no slot)', () => {
+ const result = translateResponsesToChatCompletions({
+ model: 'gpt-test',
+ input: [{ type: 'message', role: 'user', content: 'hi' }],
+ reasoning: { effort: 'medium', summary: 'concise' },
+ });
+
+ assertEquals(result.target.reasoning_effort, 'medium');
+ // Verbosity is on text.* not reasoning; ensure no surrogate field invented.
+ assertEquals('reasoning_summary' in result.target, false);
+});
diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts
index 786ce4a98..504ca45fa 100644
--- a/packages/translate/src/responses-via-messages/request.ts
+++ b/packages/translate/src/responses-via-messages/request.ts
@@ -1,9 +1,9 @@
import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts';
import { responsesReasoningToMessagesUpstreamBlock } from '../shared/messages-and-responses/reasoning.ts';
import { buildCustomToolInputSchema } from '../shared/responses-via/custom-tool-wrap.ts';
+import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts';
import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint, EPHEMERAL_CACHE_CONTROL } from '../shared/via-messages/cache-breakpoints.ts';
import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts';
-import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts';
import {
MESSAGES_FALLBACK_MAX_TOKENS,
type MessagesAssistantContentBlock,
diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts
index 2aedc14b7..f36ff89f6 100644
--- a/packages/translate/src/responses-via-messages/request_test.ts
+++ b/packages/translate/src/responses-via-messages/request_test.ts
@@ -645,3 +645,54 @@ test('translateResponsesToMessages keeps payload.instructions as the Messages to
assertEquals(result.target.messages[0], { role: 'system', content: 'mid-array note' });
assertEquals(result.target.messages[1].role, 'user');
});
+
+// ── Floway extension emission ──
+
+const minimalResponsesPayload = (overrides: Record) => ({
+ model: 'claude-test' as const,
+ input: [{ type: 'message' as const, role: 'user' as const, content: 'hi' }],
+ ...overrides,
+});
+
+test('translateResponsesToMessages emits thinking_budget onto thinking.{enabled, budget_tokens}', async () => {
+ const result = await translateResponsesToMessages(minimalResponsesPayload({ thinking_budget: 8192 }));
+ assertEquals(result.target.thinking, { type: 'enabled', budget_tokens: 8192 });
+});
+
+test('translateResponsesToMessages emits adaptive_thinking onto thinking.{adaptive}', async () => {
+ const result = await translateResponsesToMessages(minimalResponsesPayload({ adaptive_thinking: true }));
+ assertEquals(result.target.thinking, { type: 'adaptive' });
+});
+
+test('translateResponsesToMessages maps reasoning.summary onto thinking.display (concise|detailed → summarized, omitted → omitted)', async () => {
+ const concise = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'concise' } }));
+ const detailed = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'detailed' } }));
+ const omitted = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'omitted' } }));
+
+ assertEquals(concise.target.thinking, { type: 'enabled', display: 'summarized' });
+ assertEquals(detailed.target.thinking, { type: 'enabled', display: 'summarized' });
+ assertEquals(omitted.target.thinking, { type: 'enabled', display: 'omitted' });
+});
+
+test('translateResponsesToMessages emits anthropic_speed onto speed', async () => {
+ const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_speed: 'fast' }));
+ assertEquals(result.target.speed, 'fast');
+});
+
+test('translateResponsesToMessages forwards service_tier verbatim', async () => {
+ const result = await translateResponsesToMessages(minimalResponsesPayload({ service_tier: 'priority' }));
+ assertEquals(result.target.service_tier, 'priority');
+});
+
+test('translateResponsesToMessages leaves anthropic_beta as inbound residue for the gateway header pass', async () => {
+ const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_beta: ['fast-mode-2026-02-01'] }));
+ assertEquals('anthropic_beta' in result.target, false);
+});
+
+test('translateResponsesToMessages emission stack: budget + summary writes display onto the budget-driven block', async () => {
+ const result = await translateResponsesToMessages(minimalResponsesPayload({
+ thinking_budget: 2048,
+ reasoning: { effort: 'medium', summary: 'concise' },
+ }));
+ assertEquals(result.target.thinking, { type: 'enabled', budget_tokens: 2048, display: 'summarized' });
+});
From d7e9fe0d2c0a4d7a62cef83ff4101fa170a41187 Mon Sep 17 00:00:00 2001
From: Menci
Date: Thu, 25 Jun 2026 21:54:09 +0800
Subject: [PATCH 005/170] feat(gateway): weave alias matching into model
resolution fan-out
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
enumerateModelInterpretations now matches each (provider, lookupId) pair
against the global alias table (post-prefix-strip, semantic P). Per the
matched alias's onConflict, the fan-out pushes either the alias-rewrite
interpretation, the real-name interpretation, or both (in either order).
A post-resolution prune drops the alias-rewrite when the real-name
resolved under onConflict=real-only — the alias remains when the real
lookup misses, so an empty upstream catalog falls back to the alias's
target id.
The aliasRules and aliasName ride through into a new ChatCandidate
wrapper type so downstream attempt logic can apply the rules and set
the x-floway-alias response header without polluting the
@floway-dev/provider package. RoutingDecision and classifyResponsesItemAffinity
become generic over the candidate type to carry alias metadata across
the affinity walk without re-deriving it.
modelAliases is added to the central Repo interface so each chat
serve.ts call site reaches it through getRepo() — the same pattern
the other operator-managed config tables follow.
---
.../chat/chat-completions/routing.ts | 4 +-
.../data-plane/chat/chat-completions/serve.ts | 3 +
.../src/data-plane/chat/gemini/routing.ts | 4 +-
.../src/data-plane/chat/gemini/serve.ts | 5 +
.../src/data-plane/chat/messages/routing.ts | 4 +-
.../src/data-plane/chat/messages/serve.ts | 5 +
.../chat/responses/items/affinity.ts | 12 +-
.../src/data-plane/chat/responses/routing.ts | 4 +-
.../data-plane/chat/responses/serve-prep.ts | 7 +-
.../src/data-plane/chat/shared/candidates.ts | 41 +++-
.../data-plane/chat/shared/candidates_test.ts | 12 +
.../src/data-plane/chat/shared/routing.ts | 12 +-
.../src/data-plane/model-aliases/match.ts | 19 ++
.../data-plane/model-aliases/match_test.ts | 54 +++++
.../src/data-plane/providers/registry.ts | 123 ++++++++++-
.../src/data-plane/providers/registry_test.ts | 208 +++++++++++++++++-
packages/gateway/src/repo/memory.ts | 19 ++
packages/gateway/src/repo/sql.ts | 13 ++
packages/gateway/src/repo/types.ts | 8 +
19 files changed, 515 insertions(+), 42 deletions(-)
create mode 100644 packages/gateway/src/data-plane/model-aliases/match.ts
create mode 100644 packages/gateway/src/data-plane/model-aliases/match_test.ts
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/routing.ts b/packages/gateway/src/data-plane/chat/chat-completions/routing.ts
index 381feaea9..efcc380d6 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/routing.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/routing.ts
@@ -1,13 +1,13 @@
import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
import type { RoutingDecision } from '../shared/routing.ts';
import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
import { chatCompletionsViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
export const planChatCompletionsRouting = async (input: {
readonly payload: ChatCompletionsPayload;
- readonly candidates: readonly ProviderCandidate[];
+ readonly candidates: readonly ChatCandidate[];
readonly store: StatefulResponsesStore;
}): Promise =>
await classifyResponsesItemAffinity({
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 73e8c1afd..5d27541f9 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -1,6 +1,7 @@
import { chatCompletionsAttempt } from './attempt.ts';
import { renderChatCompletionsFailure } from './errors.ts';
import { planChatCompletionsRouting } from './routing.ts';
+import { getRepo } from '../../../repo/index.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -18,9 +19,11 @@ export interface ChatCompletionsServeGenerateArgs {
export const chatCompletionsServe = {
generate: async (args: ChatCompletionsServeGenerateArgs): Promise>> => {
const { payload, ctx, store, headers } = args;
+ const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: payload.model,
+ aliases,
pickTarget: endpoints =>
endpoints.chatCompletions ? 'chat-completions'
: endpoints.messages ? 'messages'
diff --git a/packages/gateway/src/data-plane/chat/gemini/routing.ts b/packages/gateway/src/data-plane/chat/gemini/routing.ts
index 2d5e37d87..28e353c59 100644
--- a/packages/gateway/src/data-plane/chat/gemini/routing.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/routing.ts
@@ -1,6 +1,6 @@
import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
import type { RoutingDecision } from '../shared/routing.ts';
import type { GeminiPayload } from '@floway-dev/protocols/gemini';
import { geminiViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
@@ -9,7 +9,7 @@ export type GeminiRoutingDecision = RoutingDecision;
export const planGeminiRouting = async (input: {
readonly payload: GeminiPayload;
- readonly candidates: readonly ProviderCandidate[];
+ readonly candidates: readonly ChatCandidate[];
readonly store: StatefulResponsesStore;
}): Promise =>
await classifyResponsesItemAffinity({
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index f5daa1d86..840da62c7 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -1,6 +1,7 @@
import { geminiAttempt } from './attempt.ts';
import { renderGeminiFailure } from './errors.ts';
import { planGeminiRouting } from './routing.ts';
+import { getRepo } from '../../../repo/index.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -30,9 +31,11 @@ export interface GeminiServeCountTokensArgs {
export const geminiServe = {
generate: async (args: GeminiServeGenerateArgs): Promise>> => {
const { payload, ctx, store, model, headers } = args;
+ const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model,
+ aliases,
// Gemini has no native upstream target in the provider API; prefer
// Chat Completions, then Messages, then Responses.
pickTarget: endpoints => endpoints.chatCompletions ? 'chat-completions' : endpoints.messages ? 'messages' : endpoints.responses ? 'responses' : null,
@@ -60,9 +63,11 @@ export const geminiServe = {
countTokens: async (args: GeminiServeCountTokensArgs): Promise> | PlainResult> => {
const { payload, ctx, store, model, headers } = args;
+ const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model,
+ aliases,
// Gemini countTokens has no native upstream support; only providers
// exposing the Messages endpoint qualify because we translate Gemini
// → Messages and call Messages count_tokens upstream.
diff --git a/packages/gateway/src/data-plane/chat/messages/routing.ts b/packages/gateway/src/data-plane/chat/messages/routing.ts
index d6de52107..e9783625c 100644
--- a/packages/gateway/src/data-plane/chat/messages/routing.ts
+++ b/packages/gateway/src/data-plane/chat/messages/routing.ts
@@ -1,6 +1,6 @@
import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
import type { RoutingDecision } from '../shared/routing.ts';
import type { MessagesPayload } from '@floway-dev/protocols/messages';
import { messagesViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
@@ -9,7 +9,7 @@ export type MessagesRoutingDecision = RoutingDecision;
export const planMessagesRouting = async (input: {
readonly payload: MessagesPayload;
- readonly candidates: readonly ProviderCandidate[];
+ readonly candidates: readonly ChatCandidate[];
readonly store: StatefulResponsesStore;
}): Promise =>
await classifyResponsesItemAffinity({
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index ae9bb5d6c..719091768 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -1,6 +1,7 @@
import { messagesAttempt } from './attempt.ts';
import { renderMessagesFailure } from './errors.ts';
import { planMessagesRouting } from './routing.ts';
+import { getRepo } from '../../../repo/index.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -25,9 +26,11 @@ export interface MessagesServeCountTokensArgs {
export const messagesServe = {
generate: async (args: MessagesServeGenerateArgs): Promise>> => {
const { payload, ctx, store, headers } = args;
+ const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: payload.model,
+ aliases,
pickTarget: endpoints =>
endpoints.messages ? 'messages'
: endpoints.responses ? 'responses'
@@ -57,9 +60,11 @@ export const messagesServe = {
countTokens: async (args: MessagesServeCountTokensArgs): Promise> | PlainResult> => {
const { payload, ctx, store, headers } = args;
+ const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: payload.model,
+ aliases,
pickTarget: endpoints => endpoints.messages ? 'messages' : null,
scheduler: ctx.backgroundScheduler,
currentColo: ctx.currentColo,
diff --git a/packages/gateway/src/data-plane/chat/responses/items/affinity.ts b/packages/gateway/src/data-plane/chat/responses/items/affinity.ts
index 8fbc1a146..d5b5e5e47 100644
--- a/packages/gateway/src/data-plane/chat/responses/items/affinity.ts
+++ b/packages/gateway/src/data-plane/chat/responses/items/affinity.ts
@@ -98,10 +98,10 @@ const collectStoredResponsesItemRefs = async (
return references;
};
-const orderCandidatesByStoredResponsesAffinity = (
- candidates: readonly ProviderCandidate[],
+const orderCandidatesByStoredResponsesAffinity = (
+ candidates: readonly T[],
preferredUpstreamIds: ReadonlySet,
-): readonly ProviderCandidate[] => {
+): readonly T[] => {
const preferred = [...preferredUpstreamIds].reverse();
if (preferred.length === 0) return candidates;
@@ -113,17 +113,17 @@ const orderCandidatesByStoredResponsesAffinity = (
return [...preferredCandidates, ...remainingCandidates];
};
-export const classifyResponsesItemAffinity = async (input: {
+export const classifyResponsesItemAffinity = async (input: {
sourceItems: TSourceItems;
view: ResponsesItemsView;
store: StatefulResponsesStore;
- candidates: readonly ProviderCandidate[];
+ candidates: readonly TCandidate[];
// Items the caller will stage as inputs after the affinity walk; passed
// here so `loadInputItems` can pre-load any stored row whose content hash
// matches one of them. Without this, a duplicate user message resent on
// a later turn cannot be reused — it would mint a fresh row each time.
inputItemsToStage?: readonly ResponsesInputItem[];
-}): Promise => {
+}): Promise> => {
const { sourceItems, view, store, candidates, inputItemsToStage } = input;
await store.loadInputItems({
sourceItems,
diff --git a/packages/gateway/src/data-plane/chat/responses/routing.ts b/packages/gateway/src/data-plane/chat/responses/routing.ts
index 519e95bfe..05661aa69 100644
--- a/packages/gateway/src/data-plane/chat/responses/routing.ts
+++ b/packages/gateway/src/data-plane/chat/responses/routing.ts
@@ -1,5 +1,5 @@
import { classifyResponsesItemAffinity } from './items/affinity.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
import type { RoutingDecision } from '../shared/routing.ts';
import type { StatefulResponsesStore } from './items/store.ts';
import type { ResponsesInputItem, ResponsesPayload } from '@floway-dev/protocols/responses';
@@ -7,7 +7,7 @@ import { responsesItemsView } from '@floway-dev/translate/via-responses/response
export const planResponsesRouting = async (input: {
readonly payload: ResponsesPayload;
- readonly candidates: readonly ProviderCandidate[];
+ readonly candidates: readonly ChatCandidate[];
readonly store: StatefulResponsesStore;
}): Promise => {
// A bare-string input is wrapped into a synthetic user message for staging;
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index 96096ee29..ec4a48afa 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -1,7 +1,8 @@
import { renderResponsesFailure } from './errors.ts';
import type { StatefulResponsesStore } from './items/store.ts';
import { planResponsesRouting } from './routing.ts';
-import { enumerateProviderCandidates, type ProviderCandidate } from '../shared/candidates.ts';
+import { getRepo } from '../../../repo/index.ts';
+import { enumerateProviderCandidates, type ChatCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common';
import type { ResponsesInputItem, ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
@@ -72,7 +73,7 @@ const stageUserInputItems = async (input: ResponsesPayload['input'], store: Stat
export type ResponsesServePlan =
| { readonly kind: 'failure'; readonly result: ExecuteResult> }
- | { readonly kind: 'ready'; readonly prepared: ResponsesPayload; readonly candidate: ProviderCandidate };
+ | { readonly kind: 'ready'; readonly prepared: ResponsesPayload; readonly candidate: ChatCandidate };
// Runs the shared serve-side prep both `responsesServe.generate` and
// `responsesServe.compact` need before dispatching to `responsesAttempt`:
@@ -88,9 +89,11 @@ export const prepareResponsesServePlan = async (args: {
}): Promise => {
const { payload, ctx, store, pickTarget } = args;
const prepared = await expandPreviousResponseId(payload, store);
+ const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: prepared.model,
+ aliases,
pickTarget,
scheduler: ctx.backgroundScheduler,
currentColo: ctx.currentColo,
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates.ts b/packages/gateway/src/data-plane/chat/shared/candidates.ts
index 86b02e721..7a58f9e08 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates.ts
@@ -1,3 +1,4 @@
+import type { ModelAlias, ModelAliasRules } from '../../../control-plane/model-aliases/types.ts';
import { createPerRequestFetcher } from '../../../dial/per-request.ts';
import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../../providers/registry.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
@@ -6,6 +7,18 @@ import type { ChatTargetApi, ProviderCandidate } from '@floway-dev/provider';
export type { ProviderCandidate };
+// Wrapper around `ProviderCandidate` that carries the matched alias's
+// operator-locked request-time rules and the alias name. The wrapper lives
+// here (in the gateway) rather than on `ProviderCandidate` itself to keep
+// the `@floway-dev/provider` package unaware of the gateway's alias
+// concept. Downstream attempt logic narrows the candidate when it needs
+// to apply rules or stamp the `x-floway-alias` response header; passthrough
+// consumers continue to treat the candidate as a plain `ProviderCandidate`.
+export type ChatCandidate = ProviderCandidate & {
+ readonly aliasRules?: ModelAliasRules;
+ readonly aliasName?: string;
+};
+
// Returns the candidates that satisfy both the model resolution and the
// target-endpoint pick, plus a `sawModel` flag that distinguishes the
// "model is missing entirely" failure from "model exists but does not
@@ -13,11 +26,16 @@ export type { ProviderCandidate };
// whose catalog fetch rejected this round so the caller's failure
// renderer can surface them parenthetically.
export const enumerateProviderCandidates = async ({
- upstreamIds, model, pickTarget, scheduler, currentColo,
+ upstreamIds, model, aliases, pickTarget, scheduler, currentColo,
}: {
// null = unrestricted; empty list = no providers visible.
upstreamIds: readonly string[] | null;
model: string;
+ // Operator-managed alias table loaded by the caller (typically via
+ // `getRepo().modelAliases.loadAll()`). The fan-out matches each
+ // (provider, lookupId) interpretation against this list; an empty list
+ // is a valid input and produces only literal interpretations.
+ aliases: readonly ModelAlias[];
pickTarget: (endpoints: ModelEndpoints) => ChatTargetApi | null;
// Threaded into `resolveModelForProvider` so the per-upstream catalog
// lookup hits the SWR-cached `fetchUpstreamModelsCached` instead of
@@ -27,7 +45,7 @@ export const enumerateProviderCandidates = async ({
// into the per-request fetcher so colo-scoped fallback entries can be
// honoured at dial time.
currentColo: string;
-}): Promise<{ readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean; readonly failedUpstreams: readonly string[] }> => {
+}): Promise<{ readonly candidates: readonly ChatCandidate[]; readonly sawModel: boolean; readonly failedUpstreams: readonly string[] }> => {
const fetcherForUpstream = await createPerRequestFetcher(currentColo);
const providers = await listModelProviders(upstreamIds);
@@ -39,17 +57,28 @@ export const enumerateProviderCandidates = async ({
// `resolveModelForRequest`; first-viable-wins ordering follows configured
// sort_order across upstreams, with the unprefixed interpretation pushed
// before the prefixed one within a single upstream.
- const interpretations = enumerateModelInterpretations(model, providers);
+ //
+ // Alias matching runs inside `enumerateModelInterpretations`: each
+ // (provider, lookupId) pair is checked against the alias table and the
+ // matched alias's `onConflict` decides what to push. The alias-rewrite
+ // metadata rides out alongside each resolved candidate so the attempt
+ // layer can apply the locked rules.
+ const interpretations = enumerateModelInterpretations(model, providers, aliases);
const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
- const candidates: ProviderCandidate[] = [];
+ const candidates: ChatCandidate[] = [];
let sawModel = false;
- for (const { provider, resolved } of resolutions) {
+ for (const { interpretation, provider, resolved } of resolutions) {
sawModel = true;
const targetApi = pickTarget(resolved.binding.upstreamModel.endpoints);
if (!targetApi) continue;
- candidates.push({ provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) });
+ const base: ProviderCandidate = { provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) };
+ candidates.push(
+ interpretation.aliasRules !== undefined
+ ? { ...base, aliasRules: interpretation.aliasRules, aliasName: interpretation.aliasName }
+ : base,
+ );
}
return { candidates, sawModel, failedUpstreams };
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
index 627b631d5..381d395b9 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
@@ -54,6 +54,7 @@ describe('enumerateProviderCandidates', () => {
const { candidates, sawModel } = await enumerateProviderCandidates({
upstreamIds: null,
+ aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -74,6 +75,7 @@ describe('enumerateProviderCandidates', () => {
const { candidates, sawModel } = await enumerateProviderCandidates({
upstreamIds: null,
+ aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -94,6 +96,7 @@ describe('enumerateProviderCandidates', () => {
const { candidates, sawModel } = await enumerateProviderCandidates({
upstreamIds: null,
+ aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -113,6 +116,7 @@ describe('enumerateProviderCandidates', () => {
const { candidates } = await enumerateProviderCandidates({
upstreamIds: null,
+ aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -133,6 +137,7 @@ describe('enumerateProviderCandidates', () => {
const { candidates } = await enumerateProviderCandidates({
upstreamIds: ['up_c', 'up_a'],
+ aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -155,6 +160,7 @@ describe('enumerateProviderCandidates', () => {
const { candidates } = await enumerateProviderCandidates({
upstreamIds: null,
+ aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -172,6 +178,7 @@ describe('enumerateProviderCandidates', () => {
const { candidates: msgCandidates } = await enumerateProviderCandidates({
upstreamIds: null,
+ aliases: [],
model: 'test-model',
pickTarget: pickMessagesOrResponses,
scheduler: testScheduler,
@@ -182,6 +189,7 @@ describe('enumerateProviderCandidates', () => {
const { candidates: resCandidates } = await enumerateProviderCandidates({
upstreamIds: null,
+ aliases: [],
model: 'test-model',
pickTarget: pickResponses,
scheduler: testScheduler,
@@ -198,6 +206,7 @@ describe('enumerateProviderCandidates', () => {
const { candidates: anyCandidates } = await enumerateProviderCandidates({
upstreamIds: null,
+ aliases: [],
model: 'test-model',
pickTarget: pickAny,
scheduler: testScheduler,
@@ -208,6 +217,7 @@ describe('enumerateProviderCandidates', () => {
const { candidates: msgCandidates, sawModel } = await enumerateProviderCandidates({
upstreamIds: null,
+ aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -245,6 +255,7 @@ describe('enumerateProviderCandidates', () => {
async () => {
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: null,
+ aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -288,6 +299,7 @@ describe('enumerateProviderCandidates', () => {
async () => {
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: null,
+ aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
diff --git a/packages/gateway/src/data-plane/chat/shared/routing.ts b/packages/gateway/src/data-plane/chat/shared/routing.ts
index 96785da70..392c50cf5 100644
--- a/packages/gateway/src/data-plane/chat/shared/routing.ts
+++ b/packages/gateway/src/data-plane/chat/shared/routing.ts
@@ -1,6 +1,12 @@
-import type { ProviderCandidate } from './candidates.ts';
+import type { ChatCandidate, ProviderCandidate } from './candidates.ts';
import type { ChatServeFailure } from './errors.ts';
-export type RoutingDecision =
- | { readonly kind: 'success'; readonly candidates: readonly ProviderCandidate[] }
+// Generic over the candidate type so call sites that hand in `ChatCandidate`
+// receive a decision whose surviving candidates retain the alias metadata.
+// The candidate filtering and ordering inside routing is shape-agnostic —
+// it touches `binding.upstream` and `binding.supportsResponsesItemReference`
+// only — so the generic narrows naturally from `ChatCandidate` back out
+// without re-deriving the alias fields.
+export type RoutingDecision =
+ | { readonly kind: 'success'; readonly candidates: readonly T[] }
| { readonly kind: 'failure'; readonly failure: ChatServeFailure };
diff --git a/packages/gateway/src/data-plane/model-aliases/match.ts b/packages/gateway/src/data-plane/model-aliases/match.ts
new file mode 100644
index 000000000..f297d1a50
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/match.ts
@@ -0,0 +1,19 @@
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
+
+export interface AliasMatchResult {
+ readonly alias: ModelAlias;
+}
+
+// Lookup an alias for the (post-prefix-strip) lookupId against the upstream's
+// id. An empty `upstreamIds` filter on the alias means "match any upstream";
+// a non-empty filter must include the upstream's id.
+export const matchAlias = (
+ lookupId: string,
+ upstreamId: string,
+ aliases: readonly ModelAlias[],
+): AliasMatchResult | undefined => {
+ const hit = aliases.find(a => a.alias === lookupId);
+ if (!hit) return undefined;
+ if (hit.upstreamIds.length > 0 && !hit.upstreamIds.includes(upstreamId)) return undefined;
+ return { alias: hit };
+};
diff --git a/packages/gateway/src/data-plane/model-aliases/match_test.ts b/packages/gateway/src/data-plane/model-aliases/match_test.ts
new file mode 100644
index 000000000..b1dd8ff2f
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/match_test.ts
@@ -0,0 +1,54 @@
+import { describe, expect, test } from 'vitest';
+
+import { matchAlias } from './match.ts';
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
+
+const make = (overrides: Partial): ModelAlias => ({
+ alias: 'a',
+ targetModelId: 't',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ ...overrides,
+});
+
+describe('matchAlias', () => {
+ test('matches by exact lookupId when alias has no upstream filter', () => {
+ const aliases = [make({ alias: 'codex-auto-review', targetModelId: 'gpt-5.4' })];
+ expect(matchAlias('codex-auto-review', 'up-1', aliases)?.alias.alias).toBe('codex-auto-review');
+ });
+
+ test('does not match when lookupId differs', () => {
+ const aliases = [make({ alias: 'codex-auto-review' })];
+ expect(matchAlias('something-else', 'up-1', aliases)).toBeUndefined();
+ });
+
+ test('respects upstreamIds allowlist (member matches)', () => {
+ const aliases = [make({ alias: 'a', upstreamIds: ['up-1', 'up-2'] })];
+ expect(matchAlias('a', 'up-1', aliases)).toBeDefined();
+ expect(matchAlias('a', 'up-2', aliases)).toBeDefined();
+ });
+
+ test('respects upstreamIds allowlist (non-member misses)', () => {
+ const aliases = [make({ alias: 'a', upstreamIds: ['up-1'] })];
+ expect(matchAlias('a', 'up-3', aliases)).toBeUndefined();
+ });
+
+ test('empty upstreamIds means match-any', () => {
+ const aliases = [make({ alias: 'a', upstreamIds: [] })];
+ expect(matchAlias('a', 'anywhere', aliases)).toBeDefined();
+ });
+
+ test('returns the first matching alias entry verbatim', () => {
+ const aliases = [
+ make({ alias: 'a', targetModelId: 'first', rules: { reasoning: { effort: 'low' } } }),
+ make({ alias: 'a', targetModelId: 'second' }),
+ ];
+ expect(matchAlias('a', 'up-x', aliases)?.alias).toEqual(aliases[0]);
+ });
+
+ test('returns undefined for an empty alias list', () => {
+ expect(matchAlias('a', 'up-x', [])).toBeUndefined();
+ });
+});
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 79a4f1c5a..8ca75c518 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -1,5 +1,7 @@
import { fetchUpstreamModelsCached } from './models-cache.ts';
+import type { ModelAlias, ModelAliasRules } from '../../control-plane/model-aliases/types.ts';
import { getRepo } from '../../repo/index.ts';
+import { matchAlias } from '../model-aliases/match.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
import { type ModelEndpointKey, type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common';
import type { InternalModel, ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
@@ -289,8 +291,22 @@ export interface ModelInterpretation {
provider: ModelProviderInstance;
// The bare id to query the upstream's catalog with. Equals the inbound
// model id for the unprefixed surface; equals `inbound.slice(prefix.length)`
- // for the prefixed surface.
+ // for the prefixed surface. For an alias-rewrite interpretation it equals
+ // the matched alias's `targetModelId`.
lookupId: string;
+ // Operator-locked request-time rules carried alongside an alias-rewrite
+ // interpretation. Set only when this interpretation is the alias-rewrite
+ // half of a matched alias; the real-name interpretation in the same
+ // `conflictGroup` (and every non-aliased interpretation) leaves this
+ // undefined.
+ aliasRules?: ModelAliasRules;
+ // The alias name as authored by the operator. Set in lockstep with
+ // `aliasRules` and carried out for the `x-floway-alias` response header.
+ aliasName?: string;
+ // Identity-keyed group shared by the two interpretations a single
+ // `onConflict: 'real-only'` alias emits. The post-resolution prune uses
+ // this to drop the alias-rewrite member when both halves resolved.
+ conflictGroup?: { readonly originalLookupId: string };
}
// Expands one inbound model id into every (provider, catalog-lookup-id) pair
@@ -298,44 +314,102 @@ export interface ModelInterpretation {
// when the inbound id literally equals one of the public-id surfaces the
// upstream advertises (bare and/or prefixed, per `modelPrefix.addressable`).
// The unprefixed interpretation is always pushed first when both apply.
+//
+// Each (provider, lookupId) candidate is then matched against the global
+// alias table — semantic P, post-prefix-strip — and the matched alias's
+// `onConflict` decides whether to push the real-name interpretation, the
+// alias-rewrite interpretation, or both (in either order). When neither
+// the alias nor the alias's target id is exposed by the upstream catalog,
+// the fan-out still emits both interpretations and resolution simply
+// drops the half that misses.
export const enumerateModelInterpretations = (
modelId: string,
providers: readonly ModelProviderInstance[],
+ aliases: readonly ModelAlias[],
): ModelInterpretation[] => {
const out: ModelInterpretation[] = [];
for (const provider of providers) {
const cfg = provider.modelPrefix;
if (cfg === null || cfg.addressable.includes('unprefixed')) {
- out.push({ provider, lookupId: modelId });
+ pushInterpretation(out, provider, modelId, aliases);
}
if (cfg !== null && cfg.addressable.includes('prefixed') && modelId.startsWith(cfg.prefix)) {
- out.push({ provider, lookupId: modelId.slice(cfg.prefix.length) });
+ pushInterpretation(out, provider, modelId.slice(cfg.prefix.length), aliases);
}
}
return out;
};
+const pushInterpretation = (
+ out: ModelInterpretation[],
+ provider: ModelProviderInstance,
+ lookupId: string,
+ aliases: readonly ModelAlias[],
+): void => {
+ const hit = matchAlias(lookupId, provider.upstream, aliases);
+ if (!hit) {
+ out.push({ provider, lookupId });
+ return;
+ }
+ const { alias } = hit;
+ const aliasInterp: ModelInterpretation = {
+ provider,
+ lookupId: alias.targetModelId,
+ aliasRules: alias.rules,
+ aliasName: alias.alias,
+ };
+ const realInterp: ModelInterpretation = { provider, lookupId };
+ switch (alias.onConflict) {
+ case 'alias-only':
+ out.push(aliasInterp);
+ return;
+ case 'real-only': {
+ // Both halves enter the resolution pass; the post-resolution prune
+ // drops the alias-rewrite member when the real-name resolved too.
+ // Identity-keyed group so the prune step can rejoin them without
+ // re-deriving an alias key.
+ const group = { originalLookupId: lookupId };
+ out.push({ ...realInterp, conflictGroup: group });
+ out.push({ ...aliasInterp, conflictGroup: group });
+ return;
+ }
+ case 'both-real-first':
+ out.push(realInterp);
+ out.push(aliasInterp);
+ return;
+ case 'both-alias-first':
+ out.push(aliasInterp);
+ out.push(realInterp);
+ return;
+ }
+};
+
// Fan out per-interpretation against the SWR cache and collect the resolved
// matches plus a deduped list of upstreams whose catalog fetch rejected.
// Shared by `resolveModelForRequest` and `enumerateProviderCandidates`; the
// per-caller divergence (passthrough vs LLM-candidate shape) happens after
// this returns. Cancellation (`AbortError`) propagates so the per-request
// abort signal cannot be masked by a slow upstream's rejection.
+//
+// Each successful resolution carries its source `interpretation` back to
+// the caller so the alias-rewrite metadata (`aliasRules`, `aliasName`)
+// rides through to the candidate, and so the `real-only` post-resolution
+// prune can rejoin the two halves of a conflict group.
export const collectInterpretationOutcomes = async (
interpretations: readonly ModelInterpretation[],
fetcherForUpstream: (upstreamId: string) => Fetcher,
scheduler: BackgroundScheduler,
): Promise<{
- resolutions: Array<{ provider: ModelProviderInstance; resolved: ProviderModelResolution }>;
+ resolutions: Array<{ interpretation: ModelInterpretation; provider: ModelProviderInstance; resolved: ProviderModelResolution }>;
failedUpstreams: string[];
}> => {
- const settled = await Promise.allSettled(interpretations.map(({ provider, lookupId }) =>
- resolveModelForProvider(provider, lookupId, fetcherForUpstream(provider.upstream), scheduler)
- .then(resolved => ({ provider, resolved }))));
+ const settled = await Promise.allSettled(interpretations.map(interpretation =>
+ resolveModelForProvider(interpretation.provider, interpretation.lookupId, fetcherForUpstream(interpretation.provider.upstream), scheduler)
+ .then(resolved => ({ interpretation, resolved }))));
const failedUpstreams: string[] = [];
const failedSeen = new Set();
- const resolutions: Array<{ provider: ModelProviderInstance; resolved: ProviderModelResolution }> = [];
+ const resolutions: Array<{ interpretation: ModelInterpretation; provider: ModelProviderInstance; resolved: ProviderModelResolution }> = [];
for (const [index, result] of settled.entries()) {
if (result.status === 'rejected') {
@@ -350,12 +424,36 @@ export const collectInterpretationOutcomes = async (
}
continue;
}
- const { provider, resolved } = result.value;
+ const { interpretation, resolved } = result.value;
if (!resolved) continue;
- resolutions.push({ provider, resolved });
+ resolutions.push({ interpretation, provider: interpretation.provider, resolved });
}
- return { resolutions, failedUpstreams };
+ // `onConflict: 'real-only'`: when both halves of a conflict group
+ // resolved, drop the alias-rewrite half so the real-name match is the
+ // only one downstream sees. When only the alias-rewrite half resolved
+ // (the upstream has no model named after the alias itself), keep it —
+ // the operator's intent is to fall back to the alias when no real model
+ // collides.
+ const droppedInterpretations = new Set();
+ const byGroup = new Map<{ readonly originalLookupId: string }, ModelInterpretation[]>();
+ for (const { interpretation } of resolutions) {
+ const group = interpretation.conflictGroup;
+ if (!group) continue;
+ const list = byGroup.get(group) ?? [];
+ list.push(interpretation);
+ byGroup.set(group, list);
+ }
+ for (const members of byGroup.values()) {
+ if (members.length < 2) continue;
+ const aliasRewriteMember = members.find(i => i.aliasRules !== undefined);
+ if (aliasRewriteMember) droppedInterpretations.add(aliasRewriteMember);
+ }
+
+ return {
+ resolutions: resolutions.filter(r => !droppedInterpretations.has(r.interpretation)),
+ failedUpstreams,
+ };
};
export const resolveModelForRequest = async (
@@ -363,13 +461,14 @@ export const resolveModelForRequest = async (
upstreamFilter: readonly string[] | null,
fetcherForUpstream: (upstreamId: string) => Fetcher,
scheduler: BackgroundScheduler,
+ aliases: readonly ModelAlias[] = [],
): Promise => {
const providers = await listModelProviders(upstreamFilter);
if (providers.length === 0) {
throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
}
- const interpretations = enumerateModelInterpretations(modelId, providers);
+ const interpretations = enumerateModelInterpretations(modelId, providers, aliases);
const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
return { matches: resolutions.map(r => r.resolved), failedUpstreams };
};
diff --git a/packages/gateway/src/data-plane/providers/registry_test.ts b/packages/gateway/src/data-plane/providers/registry_test.ts
index c330abfee..d470f9169 100644
--- a/packages/gateway/src/data-plane/providers/registry_test.ts
+++ b/packages/gateway/src/data-plane/providers/registry_test.ts
@@ -2,6 +2,7 @@ import { describe, expect, test } from 'vitest';
import { clearInFlightForTesting } from './models-cache.ts';
import { compareModelIds, enumerateModelInterpretations, getInternalModels, listModelProviders, resolveModelForProvider, resolveModelForRequest } from './registry.ts';
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, setupAppTest } from '../../test-helpers.ts';
import { directFetcher, type ModelProviderInstance } from '@floway-dev/provider';
import { createCopilotProvider } from '@floway-dev/provider-copilot';
@@ -609,20 +610,20 @@ describe('enumerateModelInterpretations', () => {
// A: no prefix, bare always accepted. B: prefixed-only addressable — bare
// is not accepted. C: dual-addressable, bare accepted; the prefixed form
// does not apply because `gpt-4o` does not start with `cx/`.
- assertEquals(shape(enumerateModelInterpretations('gpt-4o', [A, B, C])), [
+ assertEquals(shape(enumerateModelInterpretations('gpt-4o', [A, B, C], [])), [
{ upstream: 'A', lookupId: 'gpt-4o' },
{ upstream: 'C', lookupId: 'gpt-4o' },
]);
});
test('prefix-only-addressable upstream strips the prefix when it matches', () => {
- assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [B])), [
+ assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [B], [])), [
{ upstream: 'B', lookupId: 'gpt-4o' },
]);
});
test('prefix-only-addressable upstream is silent when the prefix does not match', () => {
- assertEquals(enumerateModelInterpretations('gpt-4o', [B]), []);
+ assertEquals(enumerateModelInterpretations('gpt-4o', [B], []), []);
});
test('dual-addressable upstream produces two interpretations when the prefix matches', () => {
@@ -633,7 +634,7 @@ describe('enumerateModelInterpretations', () => {
upstream: 'D', name: 'd',
modelPrefix: { prefix: 'or/', addressable: ['unprefixed', 'prefixed'], listed: ['prefixed'] },
});
- assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [D])), [
+ assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [D], [])), [
{ upstream: 'D', lookupId: 'or/gpt-4o' },
{ upstream: 'D', lookupId: 'gpt-4o' },
]);
@@ -653,7 +654,7 @@ describe('enumerateModelInterpretations', () => {
modelPrefix: { prefix: 'aa/bb/', addressable: ['prefixed'], listed: ['prefixed'] },
});
const Z = fakeProvider({ upstream: 'Z', name: 'z', modelPrefix: null });
- assertEquals(shape(enumerateModelInterpretations('aa/bb/gpt-5', [X, Y, Z])), [
+ assertEquals(shape(enumerateModelInterpretations('aa/bb/gpt-5', [X, Y, Z], [])), [
{ upstream: 'X', lookupId: 'bb/gpt-5' },
{ upstream: 'Y', lookupId: 'gpt-5' },
{ upstream: 'Z', lookupId: 'aa/bb/gpt-5' },
@@ -906,3 +907,200 @@ describe('catalog listing under modelPrefix', () => {
);
});
});
+
+// Synthetic-catalog alias matching against a single provider. Verifies that
+// each `onConflict` mode emits the right interpretation shape from
+// `enumerateModelInterpretations`. The downstream `collectInterpretationOutcomes`
+// pass is exercised in the e2e suite below.
+describe('enumerateModelInterpretations with alias matching', () => {
+ const provider = fakeProvider({ upstream: 'U', name: 'u', modelPrefix: null });
+
+ const makeAlias = (over: Partial): ModelAlias => ({
+ alias: 'codex-auto-review',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ ...over,
+ });
+
+ test('alias-only emits exactly the alias-rewrite interpretation, with rules', () => {
+ const aliases = [makeAlias({ onConflict: 'alias-only' })];
+ const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
+ assertEquals(out.length, 1);
+ assertEquals(out[0].lookupId, 'gpt-5.4');
+ assertEquals(out[0].aliasRules, { reasoning: { effort: 'low' } });
+ assertEquals(out[0].aliasName, 'codex-auto-review');
+ assertEquals(out[0].conflictGroup, undefined);
+ });
+
+ test('real-only emits both halves, tagged with a shared conflictGroup', () => {
+ const aliases = [makeAlias({ onConflict: 'real-only' })];
+ const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
+ assertEquals(out.length, 2);
+ // Real first, alias second — the prune step removes the alias when
+ // real resolved, so real-first keeps the natural iteration order.
+ assertEquals(out[0].lookupId, 'codex-auto-review');
+ assertEquals(out[0].aliasRules, undefined);
+ assertEquals(out[1].lookupId, 'gpt-5.4');
+ assertEquals(out[1].aliasRules, { reasoning: { effort: 'low' } });
+ expect(out[0].conflictGroup).toBeDefined();
+ expect(out[0].conflictGroup).toBe(out[1].conflictGroup);
+ });
+
+ test('both-real-first emits real then alias, neither group-tagged', () => {
+ const aliases = [makeAlias({ onConflict: 'both-real-first' })];
+ const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
+ assertEquals(out.length, 2);
+ assertEquals(out[0].lookupId, 'codex-auto-review');
+ assertEquals(out[0].aliasRules, undefined);
+ assertEquals(out[1].lookupId, 'gpt-5.4');
+ assertEquals(out[1].aliasRules, { reasoning: { effort: 'low' } });
+ assertEquals(out[0].conflictGroup, undefined);
+ assertEquals(out[1].conflictGroup, undefined);
+ });
+
+ test('both-alias-first emits alias then real, neither group-tagged', () => {
+ const aliases = [makeAlias({ onConflict: 'both-alias-first' })];
+ const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
+ assertEquals(out.length, 2);
+ assertEquals(out[0].lookupId, 'gpt-5.4');
+ assertEquals(out[0].aliasRules, { reasoning: { effort: 'low' } });
+ assertEquals(out[1].lookupId, 'codex-auto-review');
+ assertEquals(out[1].aliasRules, undefined);
+ });
+
+ test('upstreamIds filter skips the alias on providers outside the allowlist', () => {
+ const aliases = [makeAlias({ onConflict: 'alias-only', upstreamIds: ['OTHER'] })];
+ const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
+ // The alias only applies to OTHER, so this provider sees a literal
+ // (no-rewrite) interpretation.
+ assertEquals(out.length, 1);
+ assertEquals(out[0].lookupId, 'codex-auto-review');
+ assertEquals(out[0].aliasRules, undefined);
+ });
+
+ test('prefix-strip happens before alias matching (semantic P)', () => {
+ // Configure the provider with a prefix; the inbound `cx/codex-auto-review`
+ // strips to `codex-auto-review` and matches the alias. The alias-rewrite
+ // interpretation carries the target id `gpt-5.4`.
+ const prefixedProvider = fakeProvider({
+ upstream: 'P', name: 'p',
+ modelPrefix: { prefix: 'cx/', addressable: ['prefixed'], listed: ['prefixed'] },
+ });
+ const aliases = [makeAlias({ onConflict: 'alias-only' })];
+ const out = enumerateModelInterpretations('cx/codex-auto-review', [prefixedProvider], aliases);
+ assertEquals(out.length, 1);
+ assertEquals(out[0].lookupId, 'gpt-5.4');
+ assertEquals(out[0].aliasName, 'codex-auto-review');
+ });
+});
+
+// E2E coverage of the post-resolution prune. Uses a real Azure-backed
+// catalog (resolved without HTTP) so the conflict pruning behavior is
+// observed end-to-end via `resolveModelForRequest`.
+describe('resolveModelForRequest applies alias onConflict pruning', () => {
+ // Helper that stages a single Azure upstream exposing both the real
+ // alias-named model and the alias's target model.
+ const stageBothNamesUpstream = async (): Promise => {
+ const { repo } = await setupAppTest();
+ await repo.upstreams.deleteAll();
+ await repo.upstreams.save({
+ id: 'up_a',
+ provider: 'azure',
+ name: 'A',
+ enabled: true,
+ sortOrder: 1,
+ createdAt: '2026-05-21T00:00:00.000Z',
+ updatedAt: '2026-05-21T00:00:00.000Z',
+ config: {
+ endpoint: 'https://a.openai.azure.com',
+ apiKey: 'az-key',
+ models: [
+ { upstreamModelId: 'codex-auto-review', endpoints: { chatCompletions: {} } },
+ { upstreamModelId: 'gpt-5.4', endpoints: { chatCompletions: {} } },
+ ],
+ },
+ flagOverrides: {},
+ disabledPublicModelIds: [],
+ proxyFallbackList: [],
+ modelPrefix: null,
+ state: null,
+ });
+ };
+
+ // Helper that stages a single Azure upstream exposing ONLY the alias's
+ // target model (no real `codex-auto-review` collision).
+ const stageTargetOnlyUpstream = async (): Promise => {
+ const { repo } = await setupAppTest();
+ await repo.upstreams.deleteAll();
+ await repo.upstreams.save({
+ id: 'up_a',
+ provider: 'azure',
+ name: 'A',
+ enabled: true,
+ sortOrder: 1,
+ createdAt: '2026-05-21T00:00:00.000Z',
+ updatedAt: '2026-05-21T00:00:00.000Z',
+ config: {
+ endpoint: 'https://a.openai.azure.com',
+ apiKey: 'az-key',
+ models: [
+ { upstreamModelId: 'gpt-5.4', endpoints: { chatCompletions: {} } },
+ ],
+ },
+ flagOverrides: {},
+ disabledPublicModelIds: [],
+ proxyFallbackList: [],
+ modelPrefix: null,
+ state: null,
+ });
+ };
+
+ const aliasOf = (onConflict: ModelAlias['onConflict']): ModelAlias => ({
+ alias: 'codex-auto-review',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ visibleInModelsList: true,
+ onConflict,
+ });
+
+ test('alias-only resolves to a single match against the alias target id', async () => {
+ await stageBothNamesUpstream();
+ const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('alias-only')]);
+ assertEquals(resolved.matches.length, 1);
+ assertEquals(resolved.matches[0].id, 'gpt-5.4');
+ });
+
+ test('real-only drops the alias-rewrite resolution when the real-name resolves too', async () => {
+ await stageBothNamesUpstream();
+ const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('real-only')]);
+ assertEquals(resolved.matches.length, 1);
+ assertEquals(resolved.matches[0].id, 'codex-auto-review');
+ });
+
+ test('real-only keeps the alias-rewrite resolution when the real-name catalog lookup misses', async () => {
+ await stageTargetOnlyUpstream();
+ const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('real-only')]);
+ assertEquals(resolved.matches.length, 1);
+ assertEquals(resolved.matches[0].id, 'gpt-5.4');
+ });
+
+ test('both-real-first resolves to two matches, real first', async () => {
+ await stageBothNamesUpstream();
+ const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('both-real-first')]);
+ assertEquals(resolved.matches.length, 2);
+ assertEquals(resolved.matches[0].id, 'codex-auto-review');
+ assertEquals(resolved.matches[1].id, 'gpt-5.4');
+ });
+
+ test('both-alias-first resolves to two matches, alias first', async () => {
+ await stageBothNamesUpstream();
+ const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('both-alias-first')]);
+ assertEquals(resolved.matches.length, 2);
+ assertEquals(resolved.matches[0].id, 'gpt-5.4');
+ assertEquals(resolved.matches[1].id, 'codex-auto-review');
+ });
+});
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index 5a85dba39..85f01b621 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -13,6 +13,7 @@ import type {
ApiKeyRepo,
BackoffRow,
CachedModelsRow,
+ ModelAliasesRepo,
ModelsCacheRepo,
PerformanceDimensions,
PerformanceErrorSample,
@@ -39,6 +40,7 @@ import type {
UsersRepo,
} from './types.ts';
import { serializeStoredState } from './upstream-json.ts';
+import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
import { latencyBucketForMs } from '../shared/performance-histogram.ts';
import { generateSessionToken } from '../shared/session-tokens.ts';
import { assertWebSearchProviderName } from '../shared/web-search-providers.ts';
@@ -896,6 +898,7 @@ export class InMemoryRepo implements Repo {
proxyBackoffs: ProxyBackoffRepo;
responsesItems: ResponsesItemsRepo;
responsesSnapshots: ResponsesSnapshotsRepo;
+ modelAliases: ModelAliasesRepo;
constructor() {
this.users = new MemoryUsersRepo();
@@ -911,5 +914,21 @@ export class InMemoryRepo implements Repo {
this.proxyBackoffs = new MemoryProxyBackoffRepo();
this.responsesItems = new MemoryResponsesItemsRepo();
this.responsesSnapshots = new MemoryResponsesSnapshotsRepo();
+ this.modelAliases = new MemoryModelAliasesRepo();
+ }
+}
+
+// Test-only in-memory backing for the alias table. The list starts empty
+// and can be reseeded via `setAll` so tests exercising alias-resolution
+// behavior do not depend on a live SQL database.
+export class MemoryModelAliasesRepo implements ModelAliasesRepo {
+ private rows: readonly ModelAlias[] = [];
+
+ loadAll(): Promise {
+ return Promise.resolve(this.rows);
+ }
+
+ setAll(rows: readonly ModelAlias[]): void {
+ this.rows = rows;
}
}
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index b716d07e4..109b35024 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -7,6 +7,7 @@ import type {
ApiKeyRepo,
BackoffRow,
CachedModelsRow,
+ ModelAliasesRepo,
ModelsCacheRepo,
PerformanceDimensions,
PerformanceErrorSample,
@@ -34,6 +35,8 @@ import type {
UsersRepo,
} from './types.ts';
import { serializeStoredConfig, serializeStoredState } from './upstream-json.ts';
+import { loadAllAliases } from '../control-plane/model-aliases/repo.ts';
+import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
import { latencyBucketForMs } from '../shared/performance-histogram.ts';
import { generateSessionToken } from '../shared/session-tokens.ts';
import { assertWebSearchProviderName } from '../shared/web-search-providers.ts';
@@ -1599,6 +1602,7 @@ export class SqlRepo implements Repo {
proxyBackoffs: ProxyBackoffRepo;
responsesItems: ResponsesItemsRepo;
responsesSnapshots: ResponsesSnapshotsRepo;
+ modelAliases: ModelAliasesRepo;
constructor(db: SqlDatabase) {
this.users = new SqlUsersRepo(db);
@@ -1614,5 +1618,14 @@ export class SqlRepo implements Repo {
this.proxyBackoffs = new SqlProxyBackoffRepo(db);
this.responsesItems = new SqlResponsesItemsRepo(db);
this.responsesSnapshots = new SqlResponsesSnapshotsRepo(db);
+ this.modelAliases = new SqlModelAliasesRepo(db);
+ }
+}
+
+class SqlModelAliasesRepo implements ModelAliasesRepo {
+ constructor(private db: SqlDatabase) {}
+
+ loadAll(): Promise {
+ return loadAllAliases(this.db);
}
}
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 0341d41ef..d282aaa98 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -1,3 +1,4 @@
+import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
import type { HistogramBucket } from '../shared/performance-histogram.ts';
import type { WebSearchProviderName } from '../shared/web-search-providers.ts';
import type { BillingDimension, ModelPricing } from '@floway-dev/protocols/common';
@@ -332,4 +333,11 @@ export interface Repo {
proxyBackoffs: ProxyBackoffRepo;
responsesItems: ResponsesItemsRepo;
responsesSnapshots: ResponsesSnapshotsRepo;
+ modelAliases: ModelAliasesRepo;
+}
+
+// Operator-managed alias table; small (dozens of rows at most) and read
+// per request, so the repo deliberately exposes only a full-table fetch.
+export interface ModelAliasesRepo {
+ loadAll(): Promise;
}
From 8305153a58fac34afd42b8c92f5a1346a83c67b4 Mon Sep 17 00:00:00 2001
From: Menci
Date: Thu, 25 Jun 2026 23:12:35 +0800
Subject: [PATCH 006/170] feat(gateway): apply alias rules, synthesize
/v1/models entries, set x-floway-alias
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
applyAliasRulesTo writes rule values into each inbound
IR's native slot when the protocol supports the concept and the Floway
extension slot otherwise. Alias values override user-supplied values per
the operator-locked semantics in Goal 3 of the design.
/v1/models appends alias entries with aliasedFrom carrying the target,
upstream filter, rules, and conflict mode. Aliases with
visibleInModelsList=false are omitted; aliases whose targets are
unreachable are still listed — operator-declared, no silent hide. The
Gemini /v1beta/models surface mirrors the same alias-listing policy.
The x-floway-alias response header carries the matched alias name on
every call served via an alias, giving callers a no-mode-required debug
hook for understanding routing.
Per-upstream sanitizers run just before each upstream HTTP call,
emitting one drop-trace line per stripped extension field with the
matched alias name attached. The same sanitize emission point fires for
client-sent extension residue regardless of alias provenance.
Embeddings, images, and /v1/completions thread aliases through
resolveModelForRequest so alias-name resolution still rewrites the
target id; rules don't apply to these passthrough endpoints (no protocol
slots) but the matched alias name still rides out on the response
header, and one drop trace line per declared rule lands so an operator
can confirm the rewrite ran.
Side touches:
- ChatCandidate replaces ProviderCandidate on every chat attempt arg
type, restoring the alias-metadata propagation the routing layer
already preserves.
- GatewayCtx grows a per-request responseHeaders bag; the http wrappers
flush it onto the outgoing Response through a new
finalizeGatewayResponse helper that also routes through the dump
accumulator.
- ProviderModelResolution gains an optional aliasName; passthrough
callers read it directly off the resolved match.
- pushInterpretation's onConflict switch grows an assertNever default.
---
.../src/control-plane/model-aliases/repo.ts | 4 +-
.../control-plane/model-aliases/repo_test.ts | 35 +--
.../src/control-plane/model-aliases/types.ts | 4 +
.../chat/chat-completions/attempt.ts | 10 +-
.../chat/chat-completions/attempt_test.ts | 1 +
.../data-plane/chat/chat-completions/http.ts | 6 +-
.../demote-developer-to-system_test.ts | 1 +
.../demote-interleaved-system-to-user_test.ts | 1 +
...le-reasoning-on-forced-tool-choice_test.ts | 1 +
.../include-usage-stream-options_test.ts | 1 +
.../interceptors/normalize-usage_test.ts | 1 +
.../vendor-deepseek-normalize_test.ts | 1 +
.../vendor-kimi-normalize_test.ts | 1 +
.../vendor-qwen-normalize_test.ts | 1 +
.../data-plane/chat/chat-completions/serve.ts | 6 +
.../chat/chat-completions/serve_test.ts | 1 +
.../src/data-plane/chat/gemini/attempt.ts | 6 +-
.../data-plane/chat/gemini/attempt_test.ts | 1 +
.../src/data-plane/chat/gemini/http.ts | 10 +-
.../strip-safety-settings_test.ts | 1 +
.../strip-unsupported-part-fields_test.ts | 1 +
.../strip-unsupported-tools_test.ts | 1 +
.../suppress-thought-parts_test.ts | 1 +
.../data-plane/chat/gemini/respond_test.ts | 1 +
.../src/data-plane/chat/gemini/serve.ts | 8 +
.../src/data-plane/chat/gemini/serve_test.ts | 1 +
.../src/data-plane/chat/messages/attempt.ts | 28 ++-
.../data-plane/chat/messages/attempt_test.ts | 1 +
.../src/data-plane/chat/messages/http.ts | 8 +-
.../src/data-plane/chat/messages/http_test.ts | 65 +++++-
.../demote-interleaved-system-to-user_test.ts | 1 +
...le-reasoning-on-forced-tool-choice_test.ts | 1 +
.../strip-billing-attribution_test.ts | 1 +
.../interceptors/web-search-shim_test.ts | 1 +
.../data-plane/chat/messages/respond_test.ts | 1 +
.../src/data-plane/chat/messages/serve.ts | 13 ++
.../data-plane/chat/messages/serve_test.ts | 1 +
.../src/data-plane/chat/responses/attempt.ts | 15 +-
.../data-plane/chat/responses/attempt_test.ts | 1 +
.../src/data-plane/chat/responses/http.ts | 14 +-
.../canonicalize-encrypted-content_test.ts | 1 +
.../demote-developer-to-system_test.ts | 1 +
.../demote-interleaved-system-to-user_test.ts | 1 +
...le-reasoning-on-forced-tool-choice_test.ts | 1 +
.../interceptors/retry-cyber-policy_test.ts | 1 +
.../interceptors/server-tool-shim_test.ts | 2 +
.../image-generation-integration_test.ts | 1 +
.../server-tools/image-generation.ts | 9 +-
.../server-tools/image-generation_test.ts | 1 +
.../vendor-deepseek-normalize_test.ts | 1 +
.../vendor-qwen-normalize_test.ts | 1 +
.../src/data-plane/chat/responses/serve.ts | 12 +
.../data-plane/chat/responses/serve_test.ts | 1 +
.../src/data-plane/chat/shared/gateway-ctx.ts | 23 +-
.../data-plane/chat/shared/respond_test.ts | 1 +
.../src/data-plane/chat/shared/sanitize.ts | 10 +
.../chat/shared/upstream-telemetry_test.ts | 1 +
.../src/data-plane/model-aliases/apply.ts | 105 +++++++++
.../data-plane/model-aliases/apply_test.ts | 218 ++++++++++++++++++
.../data-plane/model-aliases/match_test.ts | 1 +
.../gateway/src/data-plane/models/gemini.ts | 29 ++-
.../gateway/src/data-plane/models/load.ts | 39 +++-
.../gateway/src/data-plane/models/serve.ts | 4 +-
.../src/data-plane/models/serve_test.ts | 178 ++++++++++++++
.../src/data-plane/providers/registry.ts | 18 +-
.../src/data-plane/providers/registry_test.ts | 2 +
.../data-plane/shared/passthrough-serve.ts | 40 +++-
.../gateway/src/test-helpers/gateway-ctx.ts | 1 +
packages/protocols/src/common/models.ts | 24 ++
packages/translate/package.json | 3 +-
70 files changed, 918 insertions(+), 69 deletions(-)
create mode 100644 packages/gateway/src/data-plane/model-aliases/apply.ts
create mode 100644 packages/gateway/src/data-plane/model-aliases/apply_test.ts
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index 70024e0cd..4c13cd09b 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -8,13 +8,14 @@ interface ModelAliasRow {
rules_json: string;
visible_in_models_list: number;
on_conflict: OnConflict;
+ created_at: number;
}
// The model_aliases table is operator-managed and small (dozens of rows at
// most), so the data plane reads the full table per request — no cache layer.
export const loadAllAliases = async (db: SqlDatabase): Promise => {
const { results } = await db
- .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict FROM model_aliases')
+ .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases')
.all();
return results.map(toModelAlias);
};
@@ -26,6 +27,7 @@ const toModelAlias = (row: ModelAliasRow): ModelAlias => ({
rules: parseJsonField(row.alias, 'rules_json', row.rules_json),
visibleInModelsList: row.visible_in_models_list === 1,
onConflict: row.on_conflict,
+ createdAt: row.created_at,
});
const parseJsonField = (alias: string, field: string, raw: string): T => {
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
index a4da76fde..ff1efa046 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
@@ -8,17 +8,21 @@ test('loadAllAliases reads the seed row from a freshly migrated database', async
const db = await createSqliteTestDb();
const aliases = await loadAllAliases(db);
-
- assertEquals(aliases, [
- {
- alias: 'codex-auto-review',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- },
- ]);
+ assertEquals(aliases.length, 1);
+ const [seed] = aliases;
+ // `createdAt` rides off the migration's `DEFAULT (unixepoch())`, so the
+ // exact value is wall-clock dependent. Assert structurally that it landed
+ // as a number and strip it before comparing the rest of the row.
+ assertEquals(typeof seed.createdAt, 'number');
+ const { createdAt: _createdAt, ...withoutTimestamp } = seed;
+ assertEquals(withoutTimestamp, {
+ alias: 'codex-auto-review',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ });
});
test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_models_list to a boolean', async () => {
@@ -26,7 +30,7 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo
await db.exec('DELETE FROM model_aliases');
await db
.prepare(
- 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+ 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
)
.bind(
'opus-xhigh',
@@ -35,13 +39,14 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo
'{"reasoning":{"effort":"xhigh"},"anthropicBeta":["fine-grained-tool-streaming"]}',
0,
'alias-only',
+ 1_700_000_000,
)
.run();
await db
.prepare(
- 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+ 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
)
- .bind('gpt-5-fast', 'gpt-5.4', '[]', '{"serviceTier":"priority"}', 1, 'both-alias-first')
+ .bind('gpt-5-fast', 'gpt-5.4', '[]', '{"serviceTier":"priority"}', 1, 'both-alias-first', 1_700_000_001)
.run();
const aliases = await loadAllAliases(db);
@@ -54,6 +59,7 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo
rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] },
visibleInModelsList: false,
onConflict: 'alias-only',
+ createdAt: 1_700_000_000,
});
assertEquals(byAlias.get('gpt-5-fast'), {
alias: 'gpt-5-fast',
@@ -62,6 +68,7 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo
rules: { serviceTier: 'priority' },
visibleInModelsList: true,
onConflict: 'both-alias-first',
+ createdAt: 1_700_000_001,
});
});
diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts
index 8e1bff467..7594ceff6 100644
--- a/packages/gateway/src/control-plane/model-aliases/types.ts
+++ b/packages/gateway/src/control-plane/model-aliases/types.ts
@@ -23,4 +23,8 @@ export type ModelAlias = {
readonly rules: ModelAliasRules;
readonly visibleInModelsList: boolean;
readonly onConflict: OnConflict;
+ // Unix epoch seconds stamped at row insertion. Surfaced on the
+ // `/v1/models` synthesized alias entry so callers see when an alias was
+ // declared, matching the `created` semantics of the real entries.
+ readonly createdAt: number;
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
index 5862192e5..71c3a8288 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
@@ -5,9 +5,10 @@ import { responsesAttempt } from '../responses/attempt.ts';
import { rewriteStoredResponsesItemsForCandidate } from '../responses/items/rewrite.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { providerStreamResultToExecuteResult, buildUpstreamCallOptions } from '../shared/attempt-helpers.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
import { tryCatchChatServeFailure } from '../shared/errors.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createSanitizeTraceCtx, sanitizeForChatCompletionsUpstream } from '../shared/sanitize.ts';
import { traverseTranslation } from '../shared/translate-traverse.ts';
import { createUpstreamLatencyRecorder } from '../shared/upstream-telemetry.ts';
import { runInterceptors } from '@floway-dev/interceptor';
@@ -21,7 +22,7 @@ export interface ChatCompletionsAttemptArgs {
readonly payload: ChatCompletionsPayload;
readonly ctx: GatewayCtx;
readonly store: StatefulResponsesStore;
- readonly candidate: ProviderCandidate;
+ readonly candidate: ChatCandidate;
readonly headers: Headers;
}
@@ -67,7 +68,7 @@ export const chatCompletionsAttempt = {
const rewriteOrRenderChatCompletionsFailure = async (
payload: ChatCompletionsPayload,
store: StatefulResponsesStore,
- candidate: ProviderCandidate,
+ candidate: ChatCandidate,
): Promise<{ payload: ChatCompletionsPayload; failure?: undefined } | { payload?: undefined; failure: ExecuteResult> & { type: 'api-error' } }> => {
try {
const rewrittenMessages = await rewriteStoredResponsesItemsForCandidate(
@@ -98,10 +99,11 @@ const rewriteOrRenderChatCompletionsFailure = async (
const callChatCompletionsAsExecuteResult = async (
payload: ChatCompletionsPayload,
ctx: GatewayCtx,
- candidate: ProviderCandidate,
+ candidate: ChatCandidate,
headers: Headers,
): Promise>> => {
const { model: _model, ...body } = payload;
+ sanitizeForChatCompletionsUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName));
const recorder = createUpstreamLatencyRecorder();
const providerResult = await candidate.binding.provider.callChatCompletions(
candidate.binding.upstreamModel,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
index 747a93de5..62b814359 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
@@ -23,6 +23,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/http.ts b/packages/gateway/src/data-plane/chat/chat-completions/http.ts
index a46d537ed..a22a86543 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/http.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/http.ts
@@ -3,7 +3,7 @@ import { chatCompletionsServe } from './serve.ts';
import type { AuthedContext } from '../../../middleware/auth.ts';
import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts';
import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts';
import { readRequestBody, type RequestBody } from '../shared/request-body.ts';
import { providerModelsUnavailableResponse } from '../shared/upstream-models-error.ts';
import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
@@ -24,7 +24,7 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques
const effectiveCtx = ctx ?? createGatewayCtxFromHono(c, { wantsStream: false, requestBody });
const result = internalErrorResult(502, toInternalDebugError(error));
const { response } = await respondChatCompletions(c, result, false, false, effectiveCtx);
- return (effectiveCtx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(effectiveCtx, response);
};
export const chatCompletionsHttp = {
@@ -44,7 +44,7 @@ export const chatCompletionsHttp = {
const store = createNonResponsesSourceStore(ctx.apiKeyId);
const result = await chatCompletionsServe.generate({ payload, ctx, store, headers: inboundHeadersForUpstream(c) });
const { response } = await respondChatCompletions(c, result, wantsStream, includeUsageChunk, ctx);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
} catch (error) {
return await respondWithInternalError(c, error, requestBody, ctx);
}
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
index 1d3c8252e..83d9bccb7 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
index c7f560cb3..156389a46 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 7ba1962fd..e46726510 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
index 1a28fef4a..e3e4147a2 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
index 0969e8d8e..0b6fed4f1 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
index d72f890f5..81be2c3ab 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
@@ -20,6 +20,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
index 74de17c38..1cfc304b7 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
index 4f7197da8..0506a1e25 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 5d27541f9..1347dd6bd 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -2,6 +2,7 @@ import { chatCompletionsAttempt } from './attempt.ts';
import { renderChatCompletionsFailure } from './errors.ts';
import { planChatCompletionsRouting } from './routing.ts';
import { getRepo } from '../../../repo/index.ts';
+import { applyAliasRulesToChatCompletions } from '../../model-aliases/apply.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -47,6 +48,11 @@ export const chatCompletionsServe = {
: { kind: 'model-missing', model: payload.model, failedUpstreams },
);
}
+ // Apply operator-locked alias rules to the inbound IR before the
+ // attempt runs its interceptor chain. The matching `x-floway-alias`
+ // header rides out via ctx.responseHeaders.
+ if (candidate.aliasRules) applyAliasRulesToChatCompletions(payload, candidate.aliasRules);
+ if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
return await chatCompletionsAttempt.generate({ payload, ctx, store, candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 616b2ba66..402803203 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -49,6 +49,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt.ts b/packages/gateway/src/data-plane/chat/gemini/attempt.ts
index 1d120ca00..880b0d62a 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt.ts
@@ -6,7 +6,7 @@ import { chatCompletionsAttempt } from '../chat-completions/attempt.ts';
import { messagesAttempt } from '../messages/attempt.ts';
import { responsesAttempt } from '../responses/attempt.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
import { traverseTranslation } from '../shared/translate-traverse.ts';
import { runInterceptors } from '@floway-dev/interceptor';
@@ -19,7 +19,7 @@ export interface GeminiAttemptGenerateArgs {
readonly payload: GeminiPayload;
readonly ctx: GatewayCtx;
readonly store: StatefulResponsesStore;
- readonly candidate: ProviderCandidate;
+ readonly candidate: ChatCandidate;
readonly headers: Headers;
}
@@ -27,7 +27,7 @@ export interface GeminiAttemptCountTokensArgs {
readonly payload: GeminiPayload;
readonly ctx: GatewayCtx;
readonly store: StatefulResponsesStore;
- readonly candidate: ProviderCandidate;
+ readonly candidate: ChatCandidate;
readonly headers: Headers;
}
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
index 9d08e557f..29a4e9bc5 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
@@ -24,6 +24,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/gemini/http.ts b/packages/gateway/src/data-plane/chat/gemini/http.ts
index 35a6e921f..8126d13e7 100644
--- a/packages/gateway/src/data-plane/chat/gemini/http.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/http.ts
@@ -3,7 +3,7 @@ import { geminiServe } from './serve.ts';
import type { AuthedContext } from '../../../middleware/auth.ts';
import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts';
import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts';
import { readRequestBody, type RequestBody } from '../shared/request-body.ts';
import type { GeminiContent, GeminiPayload } from '@floway-dev/protocols/gemini';
import { internalErrorResult, ProviderModelsUnavailableError, toInternalDebugError } from '@floway-dev/provider';
@@ -66,11 +66,11 @@ const respondWithGeminiError = async (
body: new TextEncoder().encode(body),
};
const { response } = await respondGemini(c, apiErrorResult, wantsStream, ctx);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
}
const internalResult = internalErrorResult(500, toInternalDebugError(error));
const { response } = await respondGemini(c, internalResult, wantsStream, ctx);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
};
// Single entry for `/v1beta/models/:modelAction`. Splits the model and action
@@ -97,7 +97,7 @@ const runGeminiGenerate = async (c: AuthedContext, model: string, wantsStream: b
try {
const result = await geminiServe.generate({ payload, ctx, store, model, headers: inboundHeadersForUpstream(c) });
const { response } = await respondGemini(c, result, wantsStream, ctx);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
} catch (error) {
return await respondWithGeminiError(c, error, ctx, wantsStream);
}
@@ -113,7 +113,7 @@ const runGeminiCountTokens = async (c: AuthedContext, model: string): Promise {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
index 6a4608cf4..3b02b63f8 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
index 7cbfc4593..6a2c20ef7 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
index e526bb9e3..eb67a0092 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
index a5c887c14..31981b544 100644
--- a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
@@ -26,6 +26,7 @@ const ctx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index 840da62c7..e1f61d628 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -2,6 +2,7 @@ import { geminiAttempt } from './attempt.ts';
import { renderGeminiFailure } from './errors.ts';
import { planGeminiRouting } from './routing.ts';
import { getRepo } from '../../../repo/index.ts';
+import { applyAliasRulesToGemini } from '../../model-aliases/apply.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -58,6 +59,11 @@ export const geminiServe = {
'generate',
);
}
+ // Operator-locked alias rules apply to the Gemini IR before the attempt
+ // runs; the matching `x-floway-alias` header rides out via
+ // ctx.responseHeaders.
+ if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
+ if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
return await geminiAttempt.generate({ payload, ctx, store, candidate, headers });
},
@@ -90,6 +96,8 @@ export const geminiServe = {
'countTokens',
);
}
+ if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
+ if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
return await geminiAttempt.countTokens({ payload, ctx, store, candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 42945a6d8..070d44471 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -48,6 +48,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt.ts b/packages/gateway/src/data-plane/chat/messages/attempt.ts
index e67387b85..3dcde67c7 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt.ts
@@ -6,10 +6,11 @@ import { responsesAttempt } from '../responses/attempt.ts';
import { rewriteStoredResponsesItemsForCandidate } from '../responses/items/rewrite.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { providerStreamResultToExecuteResult, buildUpstreamCallOptions } from '../shared/attempt-helpers.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
import { tryCatchChatServeFailure } from '../shared/errors.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
import { plainResultFromResponse } from '../shared/respond.ts';
+import { sanitizeForMessagesUpstream, createSanitizeTraceCtx } from '../shared/sanitize.ts';
import { traverseTranslation } from '../shared/translate-traverse.ts';
import { createUpstreamLatencyRecorder } from '../shared/upstream-telemetry.ts';
import { runInterceptors } from '@floway-dev/interceptor';
@@ -17,13 +18,14 @@ import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { MessagesMessage, MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
import { type ExecuteResult, type PlainResult } from '@floway-dev/provider';
import { translateMessagesViaChatCompletions, translateMessagesViaResponses } from '@floway-dev/translate';
+import { applyAnthropicBetaToHeaders } from '@floway-dev/translate/via-messages/anthropic-extensions';
import { messagesViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
export interface MessagesAttemptGenerateArgs {
readonly payload: MessagesPayload;
readonly ctx: GatewayCtx;
readonly store: StatefulResponsesStore;
- readonly candidate: ProviderCandidate;
+ readonly candidate: ChatCandidate;
readonly headers: Headers;
}
@@ -31,7 +33,7 @@ export interface MessagesAttemptCountTokensArgs {
readonly payload: MessagesPayload;
readonly ctx: GatewayCtx;
readonly store: StatefulResponsesStore;
- readonly candidate: ProviderCandidate;
+ readonly candidate: ChatCandidate;
readonly headers: Headers;
}
@@ -48,12 +50,21 @@ export const messagesAttempt = {
return await runInterceptors(invocation, ctx, messagesInterceptors, async () => {
if (candidate.targetApi === 'messages') {
const { model: _model, ...body } = invocation.payload;
+ // The candidate's `anthropic_beta` alias rule merges onto the
+ // anthropic-beta header (the wire path; the body slot is rejected
+ // by the http entry). Body extensions are stripped just before the
+ // upstream call, after every interceptor has had its say.
+ const outgoingHeaders = new Headers(invocation.headers);
+ if (candidate.aliasRules?.anthropicBeta?.length) {
+ applyAnthropicBetaToHeaders(outgoingHeaders, candidate.aliasRules.anthropicBeta);
+ }
+ sanitizeForMessagesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName));
const recorder = createUpstreamLatencyRecorder();
const providerResult = await candidate.binding.provider.callMessages(
candidate.binding.upstreamModel,
body,
ctx.abortSignal,
- buildUpstreamCallOptions(candidate, ctx, recorder.record, invocation.headers),
+ buildUpstreamCallOptions(candidate, ctx, recorder.record, outgoingHeaders),
);
return await providerStreamResultToExecuteResult(providerResult, candidate, ctx, recorder);
}
@@ -98,11 +109,16 @@ export const messagesAttempt = {
const recorder = createUpstreamLatencyRecorder();
const response = await runInterceptors(invocation, ctx, messagesCountTokensInterceptors, async () => {
const { model: _model, ...body } = invocation.payload;
+ const outgoingHeaders = new Headers(invocation.headers);
+ if (candidate.aliasRules?.anthropicBeta?.length) {
+ applyAnthropicBetaToHeaders(outgoingHeaders, candidate.aliasRules.anthropicBeta);
+ }
+ sanitizeForMessagesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName));
const { response } = await candidate.binding.provider.callMessagesCountTokens(
candidate.binding.upstreamModel,
body,
ctx.abortSignal,
- buildUpstreamCallOptions(candidate, ctx, recorder.record, invocation.headers),
+ buildUpstreamCallOptions(candidate, ctx, recorder.record, outgoingHeaders),
);
return response;
});
@@ -124,7 +140,7 @@ export const messagesAttempt = {
const rewriteOrRenderMessagesFailure = async (
payload: MessagesPayload,
store: StatefulResponsesStore,
- candidate: ProviderCandidate,
+ candidate: ChatCandidate,
): Promise<{ payload: MessagesPayload; failure?: undefined } | { payload?: undefined; failure: ExecuteResult> & { type: 'api-error' } }> => {
try {
const rewrittenMessages = await rewriteStoredResponsesItemsForCandidate(
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
index 2cd89323e..f9192e289 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
@@ -23,6 +23,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/messages/http.ts b/packages/gateway/src/data-plane/chat/messages/http.ts
index 8dfc4dc6e..b138a9ccf 100644
--- a/packages/gateway/src/data-plane/chat/messages/http.ts
+++ b/packages/gateway/src/data-plane/chat/messages/http.ts
@@ -3,7 +3,7 @@ import { messagesServe } from './serve.ts';
import type { AuthedContext } from '../../../middleware/auth.ts';
import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts';
import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts';
import { readRequestBody, type RequestBody } from '../shared/request-body.ts';
import { providerModelsUnavailableResponse } from '../shared/upstream-models-error.ts';
import type { MessagesPayload } from '@floway-dev/protocols/messages';
@@ -44,7 +44,7 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques
const effectiveCtx = ctx ?? createGatewayCtxFromHono(c, { wantsStream: false, requestBody });
const result = internalErrorResult(502, toInternalDebugError(error));
const { response } = await respondMessages(c, result, false, effectiveCtx);
- return (effectiveCtx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(effectiveCtx, response);
};
const parsePayload = (requestBody: RequestBody): MessagesPayload =>
@@ -64,7 +64,7 @@ export const messagesHttp = {
const store = createNonResponsesSourceStore(ctx.apiKeyId);
const result = await messagesServe.generate({ payload, ctx, store, headers: inboundHeadersForUpstream(c) });
const { response } = await respondMessages(c, result, wantsStream, ctx);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
} catch (error) {
return await respondWithInternalError(c, error, requestBody, ctx);
}
@@ -82,7 +82,7 @@ export const messagesHttp = {
const store = createNonResponsesSourceStore(ctx.apiKeyId);
const result = await messagesServe.countTokens({ payload, ctx, store, headers: inboundHeadersForUpstream(c) });
const { response } = await respondMessages(c, result, false, ctx);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
} catch (error) {
return await respondWithInternalError(c, error, requestBody, ctx);
}
diff --git a/packages/gateway/src/data-plane/chat/messages/http_test.ts b/packages/gateway/src/data-plane/chat/messages/http_test.ts
index 844a05177..cf32509d2 100644
--- a/packages/gateway/src/data-plane/chat/messages/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/http_test.ts
@@ -5,13 +5,13 @@ import type { AuthVars } from '../../../middleware/auth.ts';
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
import type { ApiKey, User } from '../../../repo/types.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
import { directFetcher, type ProviderCallResult, type ProviderStreamResult, type UpstreamCallOptions } from '@floway-dev/provider';
import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
-const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
+const candidatesQueue: { readonly candidates: readonly ChatCandidate[]; readonly sawModel: boolean }[] = [];
vi.mock('../shared/candidates.ts', async importOriginal => {
const original = await importOriginal();
return {
@@ -28,7 +28,7 @@ const { messagesHttp } = await import('./http.ts');
const API_KEY_ID = 'key_messages_http_test';
-const queueCandidates = (candidates: readonly ProviderCandidate[], sawModel = candidates.length > 0): void => {
+const queueCandidates = (candidates: readonly ChatCandidate[], sawModel = candidates.length > 0): void => {
candidatesQueue.push({ candidates, sawModel });
};
@@ -104,7 +104,7 @@ const makeCandidate = (overrides: {
upstream?: string;
callMessages?: (model: unknown, body: unknown, signal?: AbortSignal, opts?: UpstreamCallOptions) => Promise>;
callMessagesCountTokens?: (model: unknown, body: unknown, signal?: AbortSignal, opts?: UpstreamCallOptions) => Promise;
-} = {}): ProviderCandidate => {
+} = {}): ChatCandidate => {
const upstream = overrides.upstream ?? 'up_test';
const upstreamModel = stubUpstreamModel();
const provider = stubProvider({
@@ -272,3 +272,60 @@ test('POST /v1/messages forwards upstream response headers end-to-end (non-strea
assertEquals(response.headers.get('anthropic-ratelimit-unified-status'), 'allowed');
assertEquals(response.headers.get('cf-ray'), 'cf_ray_e2e');
});
+
+test('POST /v1/messages stamps x-floway-alias when the candidate is alias-matched', async () => {
+ installRepo();
+ const callMessages = vi.fn(async (): Promise> => ({
+ ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers(),
+ }));
+ const candidate = makeCandidate({ callMessages });
+ queueCandidates([{ ...candidate, aliasRules: { reasoning: { effort: 'low' } }, aliasName: 'codex-auto-review' }]);
+
+ const response = await makeApp().request('/v1/messages', {
+ method: 'POST',
+ headers: { 'content-type': 'application/json' },
+ body: JSON.stringify({ model: 'codex-auto-review', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
+ });
+
+ assertEquals(response.status, 200);
+ assertEquals(response.headers.get('x-floway-alias'), 'codex-auto-review');
+});
+
+test('POST /v1/messages does not set x-floway-alias when no alias matched', async () => {
+ installRepo();
+ const callMessages = vi.fn(async (): Promise> => ({
+ ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers(),
+ }));
+ queueCandidates([makeCandidate({ callMessages })]);
+
+ const response = await makeApp().request('/v1/messages', {
+ method: 'POST',
+ headers: { 'content-type': 'application/json' },
+ body: JSON.stringify({ model: 'test-model', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
+ });
+
+ assertEquals(response.status, 200);
+ assertEquals(response.headers.get('x-floway-alias'), null);
+});
+
+test('POST /v1/messages applies alias reasoning.effort onto output_config before upstream call', async () => {
+ installRepo();
+ const observedBodies: { output_config?: { effort?: string } }[] = [];
+ const callMessages = vi.fn(async (_model: unknown, body: unknown): Promise> => {
+ observedBodies.push(body as { output_config?: { effort?: string } });
+ return { ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers() };
+ });
+ const candidate = makeCandidate({ callMessages });
+ queueCandidates([{ ...candidate, aliasRules: { reasoning: { effort: 'high' } }, aliasName: 'alias-x' }]);
+
+ const response = await makeApp().request('/v1/messages', {
+ method: 'POST',
+ headers: { 'content-type': 'application/json' },
+ body: JSON.stringify({ model: 'alias-x', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
+ });
+
+ assertEquals(response.status, 200);
+ const observed = observedBodies[0];
+ if (observed === undefined) throw new Error('expected callMessages to receive a body');
+ assertEquals(observed.output_config?.effort, 'high');
+});
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
index 7fcb07153..9df67c5c0 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 7ed0ca556..7b7045355 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
index 3c74ea4a9..8ae90e232 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
index 27c5cad83..dca97addd 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
@@ -58,6 +58,7 @@ const gatewayCtx = (apiKeyId: string = 'test-key'): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/messages/respond_test.ts b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
index dfd3b10e3..79d0a9db6 100644
--- a/packages/gateway/src/data-plane/chat/messages/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
@@ -534,6 +534,7 @@ const makeRespondCtx = (): GatewayCtx => ({
wantsStream: false,
runtimeLocation: 'TEST',
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
currentColo: 'TEST',
dump: null,
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index 719091768..30282afd4 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -2,6 +2,7 @@ import { messagesAttempt } from './attempt.ts';
import { renderMessagesFailure } from './errors.ts';
import { planMessagesRouting } from './routing.ts';
import { getRepo } from '../../../repo/index.ts';
+import { applyAliasRulesToMessages } from '../../model-aliases/apply.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -55,6 +56,13 @@ export const messagesServe = {
'generate',
);
}
+ // Operator-locked alias rules go onto the inbound IR before the attempt
+ // begins so the per-protocol interceptor chain (and any downstream
+ // translate pass) sees the already-injected fields. The matching
+ // `x-floway-alias` response header is staged on the gateway-stamped
+ // header set; the http wrapper flushes it onto the outgoing Response.
+ if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
+ if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
return await messagesAttempt.generate({ payload, ctx, store, candidate, headers });
},
@@ -84,6 +92,11 @@ export const messagesServe = {
'countTokens',
);
}
+ // count_tokens carries the same alias semantics as generate — operator
+ // rules apply uniformly regardless of endpoint, and the response header
+ // rides out the same way.
+ if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
+ if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
return await messagesAttempt.countTokens({ payload, ctx, store, candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index 3bf0faace..734bad296 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -46,6 +46,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt.ts b/packages/gateway/src/data-plane/chat/responses/attempt.ts
index db7220990..c0fb6a902 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt.ts
@@ -10,9 +10,10 @@ import { recordPerformanceLatency, requireRecordedDurationMs } from '../../share
import { chatCompletionsAttempt } from '../chat-completions/attempt.ts';
import { messagesAttempt } from '../messages/attempt.ts';
import { providerStreamResultToExecuteResult, buildUpstreamCallOptions, telemetryModelIdentity } from '../shared/attempt-helpers.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
import { tryCatchChatServeFailure } from '../shared/errors.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createSanitizeTraceCtx, sanitizeForResponsesUpstream } from '../shared/sanitize.ts';
import { traverseTranslation } from '../shared/translate-traverse.ts';
import { createUpstreamLatencyRecorder, recordUpstreamHttpFailure, upstreamPerformanceContext } from '../shared/upstream-telemetry.ts';
import { runInterceptors } from '@floway-dev/interceptor';
@@ -26,7 +27,7 @@ export interface ResponsesAttemptGenerateArgs {
readonly payload: ResponsesPayload;
readonly ctx: GatewayCtx;
readonly store: StatefulResponsesStore;
- readonly candidate: ProviderCandidate;
+ readonly candidate: ChatCandidate;
// Native HTTP/WS entry passes 'append'; the cross-protocol translation-in
// path (another protocol's attempt translating into Responses) passes
// 'none' so the outer source owns snapshot persistence.
@@ -38,7 +39,7 @@ export interface ResponsesAttemptCompactArgs {
readonly payload: ResponsesPayload;
readonly ctx: GatewayCtx;
readonly store: StatefulResponsesStore;
- readonly candidate: ProviderCandidate;
+ readonly candidate: ChatCandidate;
readonly headers: Headers;
}
@@ -148,7 +149,7 @@ type RewriteOutcome =
const rewriteOrRenderFailure = async (
payload: ResponsesPayload,
store: StatefulResponsesStore,
- candidate: ProviderCandidate,
+ candidate: ChatCandidate,
): Promise => {
try {
return await rewriteResponsesItemsForCandidate(payload, store, candidate);
@@ -185,12 +186,13 @@ const dispatchResponses = async (
payload: ResponsesPayload,
ctx: GatewayCtx,
store: StatefulResponsesStore,
- candidate: ProviderCandidate,
+ candidate: ChatCandidate,
headers: Headers,
): Promise>> => {
switch (candidate.targetApi) {
case 'responses': {
const { model: _model, ...body } = payload;
+ sanitizeForResponsesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName));
const recorder = createUpstreamLatencyRecorder();
const providerResult = await candidate.binding.provider.callResponses(
candidate.binding.upstreamModel,
@@ -236,10 +238,11 @@ const dispatchResponses = async (
const callResponsesCompactAsExecuteResult = async (
payload: ResponsesPayload,
ctx: GatewayCtx,
- candidate: ProviderCandidate,
+ candidate: ChatCandidate,
headers: Headers,
): Promise>> => {
const { model: _model, stream: _stream, store: _store, ...body } = payload;
+ sanitizeForResponsesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName));
const recorder = createUpstreamLatencyRecorder();
const providerResult = await candidate.binding.provider.callResponsesCompact(
candidate.binding.upstreamModel,
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
index d698b5189..a952627b4 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
@@ -25,6 +25,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/http.ts b/packages/gateway/src/data-plane/chat/responses/http.ts
index dc6e1694b..54c497a0a 100644
--- a/packages/gateway/src/data-plane/chat/responses/http.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http.ts
@@ -5,7 +5,7 @@ import { responsesServe } from './serve.ts';
import type { AuthedContext } from '../../../middleware/auth.ts';
import { CODEX_AUTO_REVIEW_ALIAS, CODEX_AUTO_REVIEW_TARGET } from '../../codex/auto-review-alias.ts';
import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts';
-import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts';
import { readRequestBody, type RequestBody } from '../shared/request-body.ts';
import { providerModelsUnavailableResponse } from '../shared/upstream-models-error.ts';
import type { ResponsesPayload } from '@floway-dev/protocols/responses';
@@ -57,7 +57,7 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques
const effectiveCtx = ctx ?? createGatewayCtxFromHono(c, { wantsStream: false, requestBody });
const result = internalErrorResult(502, toInternalDebugError(error));
const { response } = await respondResponses(c, result, false, effectiveCtx);
- return (effectiveCtx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(effectiveCtx, response);
};
const parsePayload = (requestBody: RequestBody, stampReasoningEffort: boolean): ResponsesPayload =>
@@ -74,12 +74,12 @@ export const responsesHttp = {
const store = createResponsesHttpStore(ctx.apiKeyId, payload.store ?? undefined);
const result = await responsesServe.generate({ payload, ctx, store, snapshotMode: payload.store === false ? 'none' : 'append', headers: inboundHeadersForUpstream(c) });
const { response } = await respondResponses(c, result, wantsStream, ctx);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
} catch (error) {
if (error instanceof PreviousResponseNotFoundError) {
const response = previousResponseNotFoundResponse(error.previousResponseId);
ctx?.dump?.error('gateway');
- return (ctx?.dump?.finalize(response) ?? response);
+ return ctx ? finalizeGatewayResponse(ctx, response) : response;
}
return await respondWithInternalError(c, error, requestBody, ctx);
}
@@ -96,15 +96,15 @@ export const responsesHttp = {
if (result.type === 'result') {
ctx.dump?.success(result.modelIdentity, result.usage);
const compactResponse = Response.json(result.result);
- return (ctx.dump?.finalize(compactResponse) ?? compactResponse);
+ return finalizeGatewayResponse(ctx, compactResponse);
}
const { response } = await respondResponses(c, result, false, ctx);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
} catch (error) {
if (error instanceof PreviousResponseNotFoundError) {
const response = previousResponseNotFoundResponse(error.previousResponseId);
ctx?.dump?.error('gateway');
- return (ctx?.dump?.finalize(response) ?? response);
+ return ctx ? finalizeGatewayResponse(ctx, response) : response;
}
return await respondWithInternalError(c, error, requestBody, ctx);
}
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
index de97faf65..f904f85d5 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
index 0a705fd12..a193d01db 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
index e32752001..ae1fc3970 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 7c4ee2f6b..dcddbd6c8 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
index 98fa7fdcb..3cf947b01 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
@@ -45,6 +45,7 @@ const stubCtx = (overrides: { abortSignal?: AbortSignal } = {}): GatewayCtx => (
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
...overrides,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
index f9d5cf7b2..6688dcec7 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
@@ -348,6 +348,7 @@ const makeGatewayCtx = (apiKeyId: string = 'k1'): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
@@ -4496,6 +4497,7 @@ test('downstream AbortSignal threads through to provider search / fetchPage and
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
abortSignal: controller.signal,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
index 908b18489..46e30b043 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
@@ -144,6 +144,7 @@ const gatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
index a66fe9995..77904bb63 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
@@ -1,4 +1,5 @@
import { createPerRequestFetcher } from '../../../../../dial/per-request.ts';
+import { getRepo } from '../../../../../repo/index.ts';
import { sleep } from '../../../../../shared/sleep.ts';
import { resolveModelForRequest } from '../../../../providers/registry.ts';
import { appendFailedUpstreams } from '../../../../shared/failed-upstreams.ts';
@@ -535,7 +536,13 @@ const resolveImageBinding = async (
const endpointPath = isEdit ? '/images/edits' : '/images/generations';
let resolution;
try {
- resolution = await resolveModelForRequest(state.config.model, state.upstreamIds, fetcherForUpstream, state.backgroundScheduler);
+ // The image-generation server-tool runs inside a Responses request; the
+ // outer request's matched alias (if any) has already stamped the
+ // response header. Threading aliases here keeps the second
+ // resolveModelForRequest (for the image tool's own model id) consistent
+ // with how the outer LLM call resolved its candidate.
+ const aliases = await getRepo().modelAliases.loadAll();
+ resolution = await resolveModelForRequest(state.config.model, state.upstreamIds, fetcherForUpstream, state.backgroundScheduler, aliases);
} catch (e) {
return { ok: false, error: serverError(e) };
}
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
index 515b4ef6c..da94b3068 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
@@ -56,6 +56,7 @@ const gatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
index fbed0749c..7db1b6360 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
index 45de471f4..23afe8462 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/serve.ts b/packages/gateway/src/data-plane/chat/responses/serve.ts
index bed8b6df3..e66a2a29a 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve.ts
@@ -2,6 +2,7 @@ import { responsesAttempt } from './attempt.ts';
import type { ResponsesAttemptResult } from './interceptors/types.ts';
import type { ResponsesSnapshotMode, StatefulResponsesStore } from './items/store.ts';
import { prepareResponsesServePlan } from './serve-prep.ts';
+import { applyAliasRulesToResponses } from '../../model-aliases/apply.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
@@ -46,6 +47,11 @@ export const responsesServe = {
: null,
});
if (plan.kind === 'failure') return plan.result;
+ // Operator-locked alias rules apply to the prepared inbound IR before
+ // the attempt runs; the `x-floway-alias` header rides out via
+ // ctx.responseHeaders.
+ if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
+ if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName);
const effectiveSnapshotMode: ResponsesSnapshotMode = snapshotMode !== 'none' && containsCompactionTrigger(plan.prepared.input)
? 'replace'
: snapshotMode;
@@ -62,6 +68,12 @@ export const responsesServe = {
pickTarget: endpoints => endpoints.responses ? 'responses' : null,
});
if (plan.kind === 'failure') return plan.result;
+ // Alias rules also apply on the compact path. The upstream compact
+ // endpoint silently drops fields like `reasoning` it does not honor;
+ // applying uniformly keeps the operator's intent expressed at the
+ // inbound boundary regardless of which endpoint runs.
+ if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
+ if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName);
return await responsesAttempt.compact({ payload: plan.prepared, ctx, store, candidate: plan.candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index 389771d69..07369504d 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -57,6 +57,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
index 21f14d66d..0e199e403 100644
--- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
+++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
@@ -21,10 +21,17 @@ export interface GatewayCtx {
// provider-call boundary.
readonly runtimeLocation: string;
readonly currentColo: string;
- // Null when the api key has no retention configured, in which case the
- // respond layer's `ctx.dump?.X(...)` calls collapse to no-ops and
- // `ctx.dump?.finalize(response) ?? response` returns the response unchanged.
+ // Null when the api key has no retention configured, in which case
+ // `finalizeGatewayResponse` short-circuits the dump tee and returns the
+ // response untouched (headers from `responseHeaders` are still applied).
readonly dump: DumpAccumulator | null;
+ // Per-request response-header staging. The data-plane writes alias-aware
+ // and similar non-upstream headers here mid-request; the inbound HTTP
+ // wrapper merges them onto the final outgoing Response before
+ // `dump?.finalize`. Mutable on purpose — the serve layer owns the
+ // chosen candidate and is the right seam for stamping the
+ // `x-floway-alias` header.
+ readonly responseHeaders: Headers;
}
export interface CreateGatewayCtxOptions {
@@ -70,5 +77,15 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
runtimeLocation: colo,
currentColo: colo,
dump,
+ responseHeaders: new Headers(),
};
};
+
+// Apply ctx-stamped response headers onto the outgoing Response and then run
+// the dump-accumulator's finalize tee. Every inbound HTTP wrapper returns its
+// response through this seam so alias and other gateway-stamped headers ride
+// out uniformly across happy-path, error, and passthrough paths.
+export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => {
+ for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value);
+ return ctx.dump?.finalize(response) ?? response;
+};
diff --git a/packages/gateway/src/data-plane/chat/shared/respond_test.ts b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
index e57f52ae4..fd506b083 100644
--- a/packages/gateway/src/data-plane/chat/shared/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
@@ -45,6 +45,7 @@ const setup = (): Harness => {
dump: null,
backgroundScheduler: promise => { background.push(promise); },
requestStartedAt,
+ responseHeaders: new Headers(),
}),
};
};
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
index 918156d16..832f8f41d 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -5,6 +5,16 @@ export interface SanitizeTraceCtx {
readonly emit: (line: { alias?: string; field: string; targetProtocol: string }) => void;
}
+// Default per-request trace that flows through the gateway's console logger.
+// `aliasName` rides through to the trace line so an operator inspecting logs
+// can correlate the drop with the matched alias; when no alias matched the
+// field still appears (residue from a client-sent extension), just without
+// alias attribution.
+export const createSanitizeTraceCtx = (aliasName: string | undefined): SanitizeTraceCtx => ({
+ ...(aliasName !== undefined ? { aliasName } : {}),
+ emit: line => console.warn('floway.alias.drop', JSON.stringify(line)),
+});
+
const stripKeys = (
body: Record,
keys: readonly string[],
diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
index d1d10a103..374cab4fe 100644
--- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
@@ -24,6 +24,7 @@ const baseCtx = (overrides: Partial = {}): GatewayCtx => {
apiKeyId: 'key_1',
upstreamIds: null,
wantsStream: true,
+ responseHeaders: new Headers(),
requestStartedAt: 0,
runtimeLocation: 'TEST',
currentColo: 'TEST',
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
new file mode 100644
index 000000000..9a2bb5950
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -0,0 +1,105 @@
+import type { ModelAliasRules } from '../../control-plane/model-aliases/types.ts';
+import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
+import type { GeminiPayload } from '@floway-dev/protocols/gemini';
+import type { MessagesPayload, MessagesThinkingDisplay } from '@floway-dev/protocols/messages';
+import type { ResponsesPayload } from '@floway-dev/protocols/responses';
+import { mapSummaryToAnthropicDisplay } from '@floway-dev/translate/via-messages/anthropic-extensions';
+
+// Each function writes the alias rules into the inbound IR's slot best suited
+// to the host protocol: native when the protocol can express the concept,
+// extension otherwise. Writes overwrite any user-supplied value — aliases are
+// operator-locked per Goal 3. Mapping table is the single source of truth in
+// docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+
+export const applyAliasRulesToChatCompletions = (payload: ChatCompletionsPayload, rules: ModelAliasRules): void => {
+ // reasoning.effort is native; budget/adaptive/summary ride on extension slots
+ // because Chat Completions has no native expression for those.
+ if (rules.reasoning?.effort !== undefined) payload.reasoning_effort = rules.reasoning.effort;
+ if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens;
+ if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true;
+ if (rules.reasoning?.summary !== undefined) payload.reasoning_summary = rules.reasoning.summary;
+ if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
+ if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
+ if (rules.anthropicSpeed !== undefined) payload.anthropic_speed = rules.anthropicSpeed;
+ if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
+};
+
+export const applyAliasRulesToResponses = (payload: ResponsesPayload, rules: ModelAliasRules): void => {
+ // reasoning.{effort, summary} and text.verbosity / service_tier are native;
+ // budget/adaptive ride on extension slots; the two anthropic_* knobs only
+ // matter when this Responses inbound lands on a Messages upstream.
+ if (rules.reasoning?.effort !== undefined) payload.reasoning = { ...payload.reasoning, effort: rules.reasoning.effort };
+ if (rules.reasoning?.summary !== undefined) payload.reasoning = { ...payload.reasoning, summary: rules.reasoning.summary };
+ if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens;
+ if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true;
+ if (rules.verbosity !== undefined) payload.text = { ...payload.text, verbosity: rules.verbosity };
+ if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
+ if (rules.anthropicSpeed !== undefined) payload.anthropic_speed = rules.anthropicSpeed;
+ if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
+};
+
+export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: ModelAliasRules): void => {
+ // Anthropic has natives for effort, thinking, speed, and service_tier; only
+ // verbosity is a Floway extension on this inbound. anthropic_beta is the
+ // wire header — the attempt layer reads `candidate.aliasRules.anthropicBeta`
+ // and merges via mergeAnthropicBetaTokens, so we do not stamp the body here.
+ if (rules.reasoning?.effort !== undefined) {
+ payload.output_config = { ...payload.output_config, effort: rules.reasoning.effort };
+ }
+ // Adaptive wins over budgetTokens when both arrive — the write-side
+ // validator forbids the combination, but the apply step has to make a
+ // choice if both slip through and the translate-layer policy is
+ // adaptive-first.
+ if (rules.reasoning?.adaptive === true) {
+ payload.thinking = { type: 'adaptive' };
+ } else if (rules.reasoning?.budgetTokens !== undefined) {
+ payload.thinking = { type: 'enabled', budget_tokens: rules.reasoning.budgetTokens };
+ }
+ if (rules.reasoning?.summary !== undefined) {
+ const display = mapSummaryToAnthropicDisplay(rules.reasoning.summary);
+ if (display !== undefined) {
+ // When no prior thinking branch ran (no effort/budget/adaptive in this
+ // rule), synthesize `thinking: {type:'enabled', display}` so the
+ // operator's summary intent survives — Anthropic discards `display`
+ // without `type`. Matches `buildMessagesThinkingFromExtensions`.
+ const base = payload.thinking ?? { type: 'enabled' as const };
+ payload.thinking = { ...base, display: display as MessagesThinkingDisplay };
+ }
+ }
+ if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
+ if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
+ if (rules.anthropicSpeed !== undefined) payload.speed = rules.anthropicSpeed;
+};
+
+export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAliasRules): void => {
+ // All four reasoning knobs ride on the native thinkingConfig; verbosity and
+ // serviceTier ride on extension slots under generationConfig; the
+ // anthropic_* knobs ride on top-level extension slots so the existing
+ // gemini-via-messages translator picks them up there.
+ const hasThinking = rules.reasoning?.effort !== undefined
+ || rules.reasoning?.budgetTokens !== undefined
+ || rules.reasoning?.adaptive === true
+ || rules.reasoning?.summary !== undefined;
+ const hasGenerationConfig = hasThinking || rules.verbosity !== undefined || rules.serviceTier !== undefined;
+
+ if (hasGenerationConfig) {
+ const generationConfig = { ...payload.generationConfig };
+ const thinkingConfig = { ...generationConfig.thinkingConfig };
+ if (rules.reasoning?.effort !== undefined) thinkingConfig.thinkingLevel = rules.reasoning.effort;
+ if (rules.reasoning?.budgetTokens !== undefined) thinkingConfig.thinkingBudget = rules.reasoning.budgetTokens;
+ if (rules.reasoning?.adaptive === true) thinkingConfig.thinkingBudget = -1;
+ if (rules.reasoning?.summary !== undefined) {
+ // Gemini exposes a single boolean for summary; map summary='omitted' to
+ // false and every other value (auto / concise / detailed / freeform) to
+ // true. Operators that want to fall back to Gemini's account default
+ // simply omit `reasoning.summary` from the rule.
+ thinkingConfig.includeThoughts = rules.reasoning.summary !== 'omitted';
+ }
+ if (hasThinking) generationConfig.thinkingConfig = thinkingConfig;
+ if (rules.verbosity !== undefined) generationConfig.verbosity = rules.verbosity;
+ if (rules.serviceTier !== undefined) generationConfig.serviceTier = rules.serviceTier;
+ payload.generationConfig = generationConfig;
+ }
+ if (rules.anthropicSpeed !== undefined) payload.anthropicSpeed = rules.anthropicSpeed;
+ if (rules.anthropicBeta?.length) payload.anthropicBeta = [...rules.anthropicBeta];
+};
diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
new file mode 100644
index 000000000..e05f40c91
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
@@ -0,0 +1,218 @@
+import { describe, expect, test } from 'vitest';
+
+import {
+ applyAliasRulesToChatCompletions,
+ applyAliasRulesToGemini,
+ applyAliasRulesToMessages,
+ applyAliasRulesToResponses,
+} from './apply.ts';
+import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
+import type { GeminiPayload } from '@floway-dev/protocols/gemini';
+import type { MessagesPayload } from '@floway-dev/protocols/messages';
+import type { ResponsesPayload } from '@floway-dev/protocols/responses';
+
+// Empty-shaped payload helpers; the apply functions only touch the alias-rule
+// slots so the rest can stay structurally minimal.
+const cc = (overrides: Partial = {}): ChatCompletionsPayload => ({ model: 'x', messages: [], ...overrides });
+const resp = (overrides: Partial = {}): ResponsesPayload => ({ model: 'x', input: 'hi', ...overrides });
+const msg = (overrides: Partial = {}): MessagesPayload => ({ model: 'x', messages: [], max_tokens: 1, ...overrides });
+const gem = (overrides: Partial = {}): GeminiPayload => ({ ...overrides });
+
+describe('applyAliasRulesToChatCompletions', () => {
+ test('writes effort to native reasoning_effort and overrides user value', () => {
+ const payload = cc({ reasoning_effort: 'low' });
+ applyAliasRulesToChatCompletions(payload, { reasoning: { effort: 'high' } });
+ expect(payload.reasoning_effort).toBe('high');
+ });
+
+ test('writes budgetTokens to extension thinking_budget', () => {
+ const payload = cc();
+ applyAliasRulesToChatCompletions(payload, { reasoning: { budgetTokens: 4096 } });
+ expect(payload.thinking_budget).toBe(4096);
+ });
+
+ test('writes adaptive to extension adaptive_thinking', () => {
+ const payload = cc();
+ applyAliasRulesToChatCompletions(payload, { reasoning: { adaptive: true } });
+ expect(payload.adaptive_thinking).toBe(true);
+ });
+
+ test('writes summary to extension reasoning_summary', () => {
+ const payload = cc();
+ applyAliasRulesToChatCompletions(payload, { reasoning: { summary: 'detailed' } });
+ expect(payload.reasoning_summary).toBe('detailed');
+ });
+
+ test('writes verbosity, serviceTier, anthropicSpeed, anthropicBeta to their slots', () => {
+ const payload = cc();
+ applyAliasRulesToChatCompletions(payload, {
+ verbosity: 'low', serviceTier: 'flex', anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'],
+ });
+ expect(payload.verbosity).toBe('low');
+ expect(payload.service_tier).toBe('flex');
+ expect(payload.anthropic_speed).toBe('fast');
+ expect(payload.anthropic_beta).toEqual(['ctx-1m']);
+ });
+
+ test('leaves payload untouched when rules carry no fields', () => {
+ const payload = cc({ reasoning_effort: 'medium', verbosity: 'high' });
+ applyAliasRulesToChatCompletions(payload, {});
+ expect(payload.reasoning_effort).toBe('medium');
+ expect(payload.verbosity).toBe('high');
+ });
+});
+
+describe('applyAliasRulesToResponses', () => {
+ test('writes effort to native reasoning.effort and overrides user value', () => {
+ const payload = resp({ reasoning: { effort: 'low' } });
+ applyAliasRulesToResponses(payload, { reasoning: { effort: 'high' } });
+ expect(payload.reasoning?.effort).toBe('high');
+ });
+
+ test('writes summary to native reasoning.summary', () => {
+ const payload = resp();
+ applyAliasRulesToResponses(payload, { reasoning: { summary: 'detailed' } });
+ expect(payload.reasoning?.summary).toBe('detailed');
+ });
+
+ test('writes budgetTokens to extension thinking_budget', () => {
+ const payload = resp();
+ applyAliasRulesToResponses(payload, { reasoning: { budgetTokens: 4096 } });
+ expect(payload.thinking_budget).toBe(4096);
+ });
+
+ test('writes adaptive to extension adaptive_thinking', () => {
+ const payload = resp();
+ applyAliasRulesToResponses(payload, { reasoning: { adaptive: true } });
+ expect(payload.adaptive_thinking).toBe(true);
+ });
+
+ test('writes verbosity to native text.verbosity, preserving format', () => {
+ const payload = resp({ text: { format: { type: 'json_object' } } });
+ applyAliasRulesToResponses(payload, { verbosity: 'low' });
+ expect(payload.text?.verbosity).toBe('low');
+ expect(payload.text?.format).toEqual({ type: 'json_object' });
+ });
+
+ test('writes serviceTier to native service_tier', () => {
+ const payload = resp();
+ applyAliasRulesToResponses(payload, { serviceTier: 'flex' });
+ expect(payload.service_tier).toBe('flex');
+ });
+
+ test('writes anthropicSpeed / anthropicBeta to extension slots', () => {
+ const payload = resp();
+ applyAliasRulesToResponses(payload, { anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'] });
+ expect(payload.anthropic_speed).toBe('fast');
+ expect(payload.anthropic_beta).toEqual(['ctx-1m']);
+ });
+});
+
+describe('applyAliasRulesToMessages', () => {
+ test('writes effort to native output_config.effort', () => {
+ const payload = msg();
+ applyAliasRulesToMessages(payload, { reasoning: { effort: 'high' } });
+ expect(payload.output_config?.effort).toBe('high');
+ });
+
+ test('writes budgetTokens to thinking.enabled', () => {
+ const payload = msg();
+ applyAliasRulesToMessages(payload, { reasoning: { budgetTokens: 4096 } });
+ expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 4096 });
+ });
+
+ test('writes adaptive to thinking.type=adaptive', () => {
+ const payload = msg();
+ applyAliasRulesToMessages(payload, { reasoning: { adaptive: true } });
+ expect(payload.thinking).toEqual({ type: 'adaptive' });
+ });
+
+ test('writes summary to thinking.display (mapped from OpenAI vocabulary)', () => {
+ const payload = msg({ thinking: { type: 'enabled', budget_tokens: 1024 } });
+ applyAliasRulesToMessages(payload, { reasoning: { summary: 'detailed' } });
+ expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 1024, display: 'summarized' });
+ });
+
+ test('writes anthropicSpeed to native speed', () => {
+ const payload = msg();
+ applyAliasRulesToMessages(payload, { anthropicSpeed: 'fast' });
+ expect(payload.speed).toBe('fast');
+ });
+
+ test('writes serviceTier to native service_tier', () => {
+ const payload = msg();
+ applyAliasRulesToMessages(payload, { serviceTier: 'priority' });
+ expect(payload.service_tier).toBe('priority');
+ });
+
+ test('writes verbosity to the extension slot', () => {
+ const payload = msg();
+ applyAliasRulesToMessages(payload, { verbosity: 'low' });
+ expect(payload.verbosity).toBe('low');
+ });
+
+ test('adaptive overrides budgetTokens when both arrive on the same call', () => {
+ // The write-side validator forbids both, but if both still arrive the
+ // adaptive choice has to win to match the translate-layer policy.
+ const payload = msg();
+ applyAliasRulesToMessages(payload, { reasoning: { budgetTokens: 1024, adaptive: true } });
+ expect(payload.thinking).toEqual({ type: 'adaptive' });
+ });
+});
+
+describe('applyAliasRulesToGemini', () => {
+ test('writes effort to generationConfig.thinkingConfig.thinkingLevel', () => {
+ const payload = gem();
+ applyAliasRulesToGemini(payload, { reasoning: { effort: 'high' } });
+ expect(payload.generationConfig?.thinkingConfig?.thinkingLevel).toBe('high');
+ });
+
+ test('writes budgetTokens to generationConfig.thinkingConfig.thinkingBudget', () => {
+ const payload = gem();
+ applyAliasRulesToGemini(payload, { reasoning: { budgetTokens: 4096 } });
+ expect(payload.generationConfig?.thinkingConfig?.thinkingBudget).toBe(4096);
+ });
+
+ test('writes adaptive to generationConfig.thinkingConfig.thinkingBudget = -1', () => {
+ const payload = gem();
+ applyAliasRulesToGemini(payload, { reasoning: { adaptive: true } });
+ expect(payload.generationConfig?.thinkingConfig?.thinkingBudget).toBe(-1);
+ });
+
+ test('writes summary to generationConfig.thinkingConfig.includeThoughts when not omitted', () => {
+ const payload = gem();
+ applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } });
+ expect(payload.generationConfig?.thinkingConfig?.includeThoughts).toBe(true);
+ });
+
+ test('writes summary=omitted to generationConfig.thinkingConfig.includeThoughts=false', () => {
+ const payload = gem();
+ applyAliasRulesToGemini(payload, { reasoning: { summary: 'omitted' } });
+ expect(payload.generationConfig?.thinkingConfig?.includeThoughts).toBe(false);
+ });
+
+ test('writes verbosity to generationConfig.verbosity extension', () => {
+ const payload = gem();
+ applyAliasRulesToGemini(payload, { verbosity: 'low' });
+ expect(payload.generationConfig?.verbosity).toBe('low');
+ });
+
+ test('writes serviceTier to generationConfig.serviceTier extension', () => {
+ const payload = gem();
+ applyAliasRulesToGemini(payload, { serviceTier: 'flex' });
+ expect(payload.generationConfig?.serviceTier).toBe('flex');
+ });
+
+ test('writes anthropicSpeed / anthropicBeta to top-level extension slots', () => {
+ const payload = gem();
+ applyAliasRulesToGemini(payload, { anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'] });
+ expect(payload.anthropicSpeed).toBe('fast');
+ expect(payload.anthropicBeta).toEqual(['ctx-1m']);
+ });
+
+ test('preserves existing thinkingConfig entries when adding a new one', () => {
+ const payload = gem({ generationConfig: { thinkingConfig: { thinkingBudget: 1024 } } });
+ applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } });
+ expect(payload.generationConfig?.thinkingConfig).toEqual({ thinkingBudget: 1024, includeThoughts: true });
+ });
+});
diff --git a/packages/gateway/src/data-plane/model-aliases/match_test.ts b/packages/gateway/src/data-plane/model-aliases/match_test.ts
index b1dd8ff2f..7252078c9 100644
--- a/packages/gateway/src/data-plane/model-aliases/match_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/match_test.ts
@@ -10,6 +10,7 @@ const make = (overrides: Partial): ModelAlias => ({
rules: {},
visibleInModelsList: true,
onConflict: 'real-only',
+ createdAt: 0,
...overrides,
});
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 93f4b6822..33dbefa40 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -1,8 +1,10 @@
import type { Context } from 'hono';
import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
import { getCurrentColo } from '../../runtime/runtime-info.ts';
import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts';
@@ -66,16 +68,36 @@ const loadGeminiModels = async (
upstreamFilter: readonly string[] | null,
fetcherForUpstream: (upstreamId: string) => Fetcher,
scheduler: BackgroundScheduler,
+ aliases: readonly ModelAlias[],
): Promise => {
const models = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler);
// Only chat models are representable in the Gemini /models shape.
- return models.filter(model => model.kind === 'chat').map(toGeminiModel);
+ const realChatEntries = models.filter(model => model.kind === 'chat').map(toGeminiModel);
+ // Visible aliases append in `loadAllAliases` order; the Gemini surface
+ // carries no `aliasedFrom` extension (Gemini's `Model` resource is closed)
+ // so the entry advertises the alias id plus the target's display fields.
+ const byId = new Map(models.map(m => [m.id, m]));
+ const aliasEntries: GeminiModel[] = [];
+ for (const alias of aliases) {
+ if (!alias.visibleInModelsList) continue;
+ const target = byId.get(alias.targetModelId);
+ if (target && target.kind !== 'chat') continue;
+ aliasEntries.push(toGeminiModel({
+ ...(target ?? {} as InternalModel),
+ id: alias.alias,
+ display_name: alias.alias,
+ kind: 'chat',
+ limits: target?.limits ?? {},
+ }));
+ }
+ return [...realChatEntries, ...aliasEntries];
};
export const serveGeminiModels = async (c: Context): Promise => {
try {
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)) });
+ const aliases = await getRepo().modelAliases.loadAll();
+ return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases) });
} catch (error) {
return geminiModelLoadError(error);
}
@@ -88,7 +110,8 @@ export const serveGeminiModelInfo = async (c: Context): Promise => {
const modelId = rawModelId.replace(/^models\//, '');
try {
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c))).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
+ const aliases = await getRepo().modelAliases.loadAll();
+ const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases)).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
if (!model) return geminiError(404, `Model not found: ${modelId}`);
return Response.json(model);
} catch (error) {
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index eed33c9de..585b5d638 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,3 +1,4 @@
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
import { getInternalModels } from '../providers/registry.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
@@ -21,12 +22,48 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
return info;
};
+// Synthesize one PublicModel for each visible alias, appended after the real
+// entries. The owner falls back to the alias-target's `owned_by` on whichever
+// real entry resolves it; if the target isn't present on any reachable
+// upstream, the entry still appears (operator-declared; the listing reflects
+// operator intent) with a `floway` owner so the row is unambiguous.
+export const toPublicModelFromAlias = (alias: ModelAlias, byId: ReadonlyMap): PublicModel => {
+ const target = byId.get(alias.targetModelId);
+ const info: PublicModel = {
+ id: alias.alias,
+ object: 'model',
+ type: 'model',
+ display_name: alias.alias,
+ limits: target?.limits ? { ...target.limits } : {},
+ kind: target?.kind ?? 'chat',
+ created: alias.createdAt,
+ created_at: new Date(alias.createdAt * 1000).toISOString(),
+ aliasedFrom: {
+ targetModelId: alias.targetModelId,
+ upstreamIds: alias.upstreamIds,
+ rules: alias.rules,
+ onConflict: alias.onConflict,
+ },
+ };
+ info.owned_by = target?.owned_by ?? alias.upstreamIds[0] ?? 'floway';
+ return info;
+};
+
export const loadModels = async (
upstreamFilter: readonly string[] | null,
fetcherForUpstream: (upstreamId: string) => Fetcher,
scheduler: BackgroundScheduler,
+ aliases: readonly ModelAlias[],
): Promise => {
- const data = (await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler)).map(toPublicModel);
+ const internal = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler);
+ const realEntries = internal.map(toPublicModel);
+ const byId = new Map(internal.map(m => [m.id, m]));
+ // Visible aliases append in `loadAllAliases` order, after every real entry.
+ // The spec's no-silent-hide policy keeps disabled-target aliases visible —
+ // the user-facing failure on call is the canonical signal, not the
+ // listing.
+ const aliasEntries = aliases.filter(a => a.visibleInModelsList).map(a => toPublicModelFromAlias(a, byId));
+ const data = [...realEntries, ...aliasEntries];
return {
object: 'list',
has_more: false,
diff --git a/packages/gateway/src/data-plane/models/serve.ts b/packages/gateway/src/data-plane/models/serve.ts
index 9b8b510f9..60736266b 100644
--- a/packages/gateway/src/data-plane/models/serve.ts
+++ b/packages/gateway/src/data-plane/models/serve.ts
@@ -7,6 +7,7 @@ import { loadModels } from './load.ts';
import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
import { getCurrentColo } from '../../runtime/runtime-info.ts';
import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -14,7 +15,8 @@ import { ProviderModelsUnavailableError } from '@floway-dev/provider';
export const models = async (c: Context) => {
try {
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)));
+ const aliases = await getRepo().modelAliases.loadAll();
+ return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases));
} catch (e) {
// Upstream HTTP/parse failures squash to a generic message so we do not
// leak upstream identity. Other registry-thrown errors (e.g. the "no
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 1408f10a6..855eca5fa 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -1,5 +1,6 @@
import { test } from 'vitest';
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
import { jsonResponse, withMockedFetch, assertEquals } from '@floway-dev/test-utils';
@@ -586,3 +587,180 @@ test('/v1/models returns the last real error when every account model load fails
},
);
});
+
+// /v1/models alias-listing coverage. Each test exercises one slice of the
+// spec's visibility contract: visible alias appears with `aliasedFrom`,
+// hidden alias does not appear, alias-with-disabled-target is still listed,
+// the `aliasedFrom` shape matches the spec byte-for-byte.
+test('/v1/models appends a visible alias with aliasedFrom after the real entries', async () => {
+ const { repo, apiKey } = await setupAppTest();
+
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'codex-auto-review',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ },
+ ]);
+
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_oai',
+ name: 'Test OpenAI',
+ sortOrder: 100,
+ config: {
+ baseUrl: 'https://oai.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-test',
+ endpoints: { chatCompletions: {} },
+ },
+ }));
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({
+ token: 'copilot-access-token',
+ expires_at: 4102444800,
+ refresh_in: 3600,
+ endpoints: { api: 'https://api.individual.githubcopilot.com' },
+ });
+ }
+ if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
+ return jsonResponse(copilotModels([]));
+ }
+ if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
+ return jsonResponse({
+ object: 'list',
+ data: [{ id: 'gpt-5.4', owned_by: 'openai' }],
+ });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+ assertEquals(response.status, 200);
+ const body = await response.json() as { data: Array<{ id: string; owned_by?: string; aliasedFrom?: unknown }> };
+ const ids = body.data.map(m => m.id);
+ assertEquals(ids[ids.length - 1], 'codex-auto-review');
+ const aliasEntry = body.data.find(m => m.id === 'codex-auto-review');
+ if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
+ assertEquals(aliasEntry.aliasedFrom, {
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ onConflict: 'real-only',
+ });
+ assertEquals(aliasEntry.owned_by, 'openai');
+ },
+ );
+});
+
+test('/v1/models omits aliases marked visibleInModelsList=false', async () => {
+ const { repo, apiKey } = await setupAppTest();
+
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'hidden-alias',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: false,
+ onConflict: 'real-only',
+ createdAt: 0,
+ },
+ ]);
+
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_oai',
+ name: 'Test OpenAI',
+ sortOrder: 100,
+ config: {
+ baseUrl: 'https://oai.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-test',
+ endpoints: { chatCompletions: {} },
+ },
+ }));
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+ }
+ if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
+ return jsonResponse(copilotModels([]));
+ }
+ if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
+ return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+ const body = await response.json() as { data: Array<{ id: string }> };
+ assertEquals(body.data.map(m => m.id).includes('hidden-alias'), false);
+ },
+ );
+});
+
+test('/v1/models lists an alias whose target is not present on any upstream (no silent hide)', async () => {
+ const { repo, apiKey } = await setupAppTest();
+
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'orphan-alias',
+ targetModelId: 'never-resolves',
+ upstreamIds: ['up_oai'],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 0,
+ },
+ ]);
+
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_oai',
+ name: 'Test OpenAI',
+ sortOrder: 100,
+ config: {
+ baseUrl: 'https://oai.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-test',
+ endpoints: { chatCompletions: {} },
+ },
+ }));
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+ }
+ if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
+ return jsonResponse(copilotModels([]));
+ }
+ if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
+ return jsonResponse({ object: 'list', data: [] });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+ const body = await response.json() as { data: Array<{ id: string; aliasedFrom?: { targetModelId: string }; owned_by?: string }> };
+ const orphan = body.data.find(m => m.id === 'orphan-alias');
+ if (!orphan) throw new Error('expected orphan-alias entry');
+ assertEquals(orphan.aliasedFrom?.targetModelId, 'never-resolves');
+ // No matching real entry → owner falls back to the alias's primary upstream id.
+ assertEquals(orphan.owned_by, 'up_oai');
+ },
+ );
+});
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 8ca75c518..e29df1c6a 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -285,6 +285,11 @@ export interface ProviderModelResolution {
id: string;
model: UpstreamModel;
binding: ProviderModelRecord;
+ // Set when this resolution came from an alias-rewrite interpretation. The
+ // gateway-side passthrough callers (embeddings/images/completions) stamp
+ // this onto the `x-floway-alias` response header so alias-served calls are
+ // observable without enabling any extra mode.
+ aliasName?: string;
}
export interface ModelInterpretation {
@@ -381,6 +386,10 @@ const pushInterpretation = (
out.push(aliasInterp);
out.push(realInterp);
return;
+ default: {
+ const exhaustive: never = alias.onConflict;
+ throw new Error(`pushInterpretation: unhandled onConflict '${exhaustive as string}'`);
+ }
}
};
@@ -470,7 +479,14 @@ export const resolveModelForRequest = async (
const interpretations = enumerateModelInterpretations(modelId, providers, aliases);
const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
- return { matches: resolutions.map(r => r.resolved), failedUpstreams };
+ // Project each resolution's alias-rewrite interpretation onto the
+ // returned ProviderModelResolution so passthrough callers can stamp the
+ // `x-floway-alias` header without re-deriving the match.
+ const matches: ProviderModelResolution[] = resolutions.map(r =>
+ r.interpretation.aliasName !== undefined
+ ? { ...r.resolved, aliasName: r.interpretation.aliasName }
+ : r.resolved);
+ return { matches, failedUpstreams };
};
export const resolveModelForProvider = async (
diff --git a/packages/gateway/src/data-plane/providers/registry_test.ts b/packages/gateway/src/data-plane/providers/registry_test.ts
index d470f9169..9823aeb98 100644
--- a/packages/gateway/src/data-plane/providers/registry_test.ts
+++ b/packages/gateway/src/data-plane/providers/registry_test.ts
@@ -922,6 +922,7 @@ describe('enumerateModelInterpretations with alias matching', () => {
rules: { reasoning: { effort: 'low' } },
visibleInModelsList: true,
onConflict: 'real-only',
+ createdAt: 0,
...over,
});
@@ -1065,6 +1066,7 @@ describe('resolveModelForRequest applies alias onConflict pruning', () => {
rules: { reasoning: { effort: 'low' } },
visibleInModelsList: true,
onConflict,
+ createdAt: 0,
});
test('alias-only resolves to a single match against the alias target id', async () => {
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 1add1a115..6c917fb86 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -21,6 +21,7 @@ import { createUpstreamLatencyRecorder, recordPerformanceError, recordPerformanc
import { recordTokenUsage } from './telemetry/usage.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import type { AuthedContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
import type { TokenUsage } from '../../repo/types.ts';
import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
@@ -117,6 +118,31 @@ interface PassthroughServeContext {
export const passthroughApiError = (c: Context, message: string, status: ContentfulStatusCode): Response =>
c.json({ error: { message, type: 'api_error' } }, status);
+// Emit one trace line per rule field present on the matched alias when the
+// inbound endpoint has no slot for the rule. The passthrough endpoints
+// (embeddings, images, /v1/completions) carry no Floway-extension fields
+// so a non-empty `rules` object is structurally dropped before the upstream
+// call; emitting one trace line per knob gives an operator the same signal
+// the chat sanitizers do.
+const traceDroppedAliasRulesForPassthrough = (
+ aliasName: string,
+ aliases: readonly { alias: string; rules: Record }[],
+ sourceApi: PassthroughServeApiName,
+): void => {
+ const matched = aliases.find(a => a.alias === aliasName);
+ if (!matched) return;
+ const rules = matched.rules as { reasoning?: Record; verbosity?: unknown; serviceTier?: unknown; anthropicSpeed?: unknown; anthropicBeta?: readonly unknown[] };
+ const fields: string[] = [];
+ if (rules.reasoning) for (const key of Object.keys(rules.reasoning)) fields.push(`reasoning.${key}`);
+ if (rules.verbosity !== undefined) fields.push('verbosity');
+ if (rules.serviceTier !== undefined) fields.push('serviceTier');
+ if (rules.anthropicSpeed !== undefined) fields.push('anthropicSpeed');
+ if (rules.anthropicBeta?.length) fields.push('anthropicBeta');
+ for (const field of fields) {
+ console.warn('floway.alias.drop', JSON.stringify({ alias: aliasName, field, targetProtocol: sourceApi }));
+ }
+};
+
export const passthroughServe = async (input: PassthroughServeContext): Promise => {
const { c, ctx, sourceApi, model, bindingServesEndpoint, call, response: responseHandling } = input;
const requestStartedAt = performance.now();
@@ -124,12 +150,20 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
try {
const fetcherForUpstream = await createPerRequestFetcher(ctx.currentColo);
+ // Aliases pass through so a `(model, lookupId)` interpretation can rewrite
+ // to the alias's target id even for non-LLM-shaped endpoints. The alias
+ // rules themselves never apply here — the inbound payload (embeddings,
+ // images, /v1/completions) has no protocol-extension slots for the rule
+ // knobs. We still surface the matched alias name on the
+ // `x-floway-alias` response header and trace one log line per dropped
+ // rule so an operator can confirm the rewrite ran.
+ const aliases = await getRepo().modelAliases.loadAll();
// Each match is one (upstream, upstream-catalog id) pair that interprets
// the inbound public id. Iteration order follows configured sort_order
// across upstreams, with the unprefixed interpretation pushed before the
// prefixed one within a single upstream. The first match whose binding
// satisfies the endpoint capability wins.
- const { matches, failedUpstreams } = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler);
+ const { matches, failedUpstreams } = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler, aliases);
if (matches.length === 0) {
ctx.dump?.error('gateway');
return passthroughApiError(c, appendFailedUpstreams(`Model ${model} is not available on any configured upstream.`, failedUpstreams), 404);
@@ -137,6 +171,10 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
for (const match of matches) {
if (!bindingServesEndpoint(match.binding)) continue;
+ if (match.aliasName !== undefined) {
+ ctx.responseHeaders.set('x-floway-alias', match.aliasName);
+ traceDroppedAliasRulesForPassthrough(match.aliasName, aliases, sourceApi);
+ }
const recorder = createUpstreamLatencyRecorder();
const { response, modelKey } = await call(match.binding, {
diff --git a/packages/gateway/src/test-helpers/gateway-ctx.ts b/packages/gateway/src/test-helpers/gateway-ctx.ts
index 3a19af303..3ebf1f474 100644
--- a/packages/gateway/src/test-helpers/gateway-ctx.ts
+++ b/packages/gateway/src/test-helpers/gateway-ctx.ts
@@ -15,5 +15,6 @@ export const mockGatewayCtx = (overrides: Partial = {}): GatewayCtx
dump: null,
backgroundScheduler: promise => { void promise; },
requestStartedAt: 0,
+ responseHeaders: new Headers(),
...overrides,
});
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index fddc80318..383e0ffe0 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -110,6 +110,30 @@ export interface PublicModel {
};
kind: ModelKind;
cost?: ModelPricing;
+ // Floway protocol extension. Present on synthesized alias entries the
+ // gateway appends to the listing. Clients that do not know about the
+ // field ignore it; alias-aware clients (dashboard, CLI shims) render the
+ // alias's target id and rules from this payload directly.
+ // See docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+ aliasedFrom?: PublicModelAliasedFrom;
+}
+
+export interface PublicModelAliasedFrom {
+ targetModelId: string;
+ upstreamIds: readonly string[];
+ rules: {
+ reasoning?: {
+ effort?: string;
+ budgetTokens?: number;
+ adaptive?: boolean;
+ summary?: string;
+ };
+ verbosity?: string;
+ serviceTier?: string;
+ anthropicSpeed?: string;
+ anthropicBeta?: readonly string[];
+ };
+ onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
}
export interface PublicModelsResponse {
diff --git a/packages/translate/package.json b/packages/translate/package.json
index 462bf2d1c..e18d7f564 100644
--- a/packages/translate/package.json
+++ b/packages/translate/package.json
@@ -5,7 +5,8 @@
"type": "module",
"exports": {
".": { "import": "./src/index.ts", "types": "./src/index.ts" },
- "./via-responses/responses-items": { "import": "./src/shared/via-responses/responses-items.ts", "types": "./src/shared/via-responses/responses-items.ts" }
+ "./via-responses/responses-items": { "import": "./src/shared/via-responses/responses-items.ts", "types": "./src/shared/via-responses/responses-items.ts" },
+ "./via-messages/anthropic-extensions": { "import": "./src/shared/via-messages/anthropic-extensions.ts", "types": "./src/shared/via-messages/anthropic-extensions.ts" }
},
"scripts": {
"typecheck": "tsc --noEmit"
From 8f2ce38b7a896c093fb397b71dd681c04ab3d59a Mon Sep 17 00:00:00 2001
From: Menci
Date: Thu, 25 Jun 2026 23:55:54 +0800
Subject: [PATCH 007/170] fix(aliases): address final review (header on
passthrough, streaming-safe, idempotent seed, ordered listing)
Final-review fix wave on top of the model-aliases data-plane series. Each
finding from the whole-branch review is addressed; one shim is kept and
documented per the reviewer's option-B recommendation.
- Critical #1: `/v1/embeddings`, `/v1/images/*`, and `/v1/completions`
returned the response through the legacy `ctx.dump?.finalize` pattern
instead of `finalizeGatewayResponse`, so the `x-floway-alias` header
the passthrough scaffold stamped on the per-ctx bag was silently
dropped. Route all three call sites through `finalizeGatewayResponse`
for a uniform finalize seam.
- Important #4: Make the `x-floway-alias` stamp streaming-safe by
introducing `stageGatewayResponseHeader(ctx, name, value)` that writes
the header to BOTH Hono's `c.header` (the documented knob that
survives `streamSSE`'s internal `c.newResponse`) and the per-ctx
`responseHeaders` bag `finalizeGatewayResponse` merges onto Web-
`Response.json`-built non-streaming responses. The chat serve.ts
layers (messages, gemini, responses, chat-completions) and
passthrough-serve all go through this helper, eliminating the
reliance on post-construction `response.headers.set` for streaming.
- Important #3: Add coverage in `gemini_test.ts` that a visible alias
appears in `/v1beta/models` as a synthetic Gemini model entry with
the expected name, displayName, and supportedGenerationMethods. The
prior code path was untested; a refactor of `loadGeminiModels` would
not have been caught.
- Important #2: Keep the pre-alias-table `rewriteResponsesEntryModelAlias`
shim that swaps `codex-auto-review` -> `gpt-5.4` before the matcher
runs (option B from the review). Add a code comment above it
explaining the carveout: the seeded alias is `on_conflict='real-only'`
and on a Codex upstream that exposes a real `codex-auto-review` model
the alias would otherwise lose, breaking parity with Codex CLI's
native behavior. The shim is temporary pending a deliberate Codex
behavior change.
- Minor #6: Switch the `0046_model_aliases.sql` seed `INSERT` to
`INSERT OR IGNORE` so a fresh local-dev replay doesn't trip the
PRIMARY KEY uniqueness check.
- Minor #8: Add `ORDER BY alias` to `loadAllAliases` so the `/v1/models`
listing emits alias entries deterministically across runtimes.
The unit-test fan-out reflects adding `c: AuthedContext` to `GatewayCtx`
so the serve layer can call Hono's `c.header` directly. Test stubs go
through the shared `stubAuthedContext` helper.
---
.../gateway/migrations/0046_model_aliases.sql | 2 +-
.../src/control-plane/model-aliases/repo.ts | 4 +-
.../chat/chat-completions/attempt_test.ts | 2 +
.../demote-developer-to-system_test.ts | 2 +
.../demote-interleaved-system-to-user_test.ts | 2 +
...le-reasoning-on-forced-tool-choice_test.ts | 2 +
.../include-usage-stream-options_test.ts | 2 +
.../interceptors/normalize-usage_test.ts | 2 +
.../vendor-deepseek-normalize_test.ts | 2 +
.../vendor-kimi-normalize_test.ts | 2 +
.../vendor-qwen-normalize_test.ts | 2 +
.../data-plane/chat/chat-completions/serve.ts | 6 +-
.../chat/chat-completions/serve_test.ts | 2 +
.../data-plane/chat/gemini/attempt_test.ts | 2 +
.../strip-safety-settings_test.ts | 2 +
.../strip-unsupported-part-fields_test.ts | 2 +
.../strip-unsupported-tools_test.ts | 2 +
.../suppress-thought-parts_test.ts | 2 +
.../data-plane/chat/gemini/respond_test.ts | 2 +
.../src/data-plane/chat/gemini/serve.ts | 9 +--
.../src/data-plane/chat/gemini/serve_test.ts | 2 +
.../data-plane/chat/messages/attempt_test.ts | 2 +
.../demote-interleaved-system-to-user_test.ts | 2 +
...le-reasoning-on-forced-tool-choice_test.ts | 2 +
.../strip-billing-attribution_test.ts | 2 +
.../interceptors/web-search-shim_test.ts | 2 +
.../data-plane/chat/messages/respond_test.ts | 2 +
.../src/data-plane/chat/messages/serve.ts | 9 +--
.../data-plane/chat/messages/serve_test.ts | 2 +
.../data-plane/chat/responses/attempt_test.ts | 2 +
.../src/data-plane/chat/responses/http.ts | 17 ++++++
.../canonicalize-encrypted-content_test.ts | 2 +
.../demote-developer-to-system_test.ts | 2 +
.../demote-interleaved-system-to-user_test.ts | 2 +
...le-reasoning-on-forced-tool-choice_test.ts | 2 +
.../interceptors/retry-cyber-policy_test.ts | 2 +
.../interceptors/server-tool-shim_test.ts | 3 +
.../image-generation-integration_test.ts | 2 +
.../server-tools/image-generation_test.ts | 2 +
.../vendor-deepseek-normalize_test.ts | 2 +
.../vendor-qwen-normalize_test.ts | 2 +
.../src/data-plane/chat/responses/serve.ts | 9 +--
.../data-plane/chat/responses/serve_test.ts | 2 +
.../src/data-plane/chat/shared/gateway-ctx.ts | 39 +++++++++---
.../data-plane/chat/shared/respond_test.ts | 2 +
.../chat/shared/upstream-telemetry_test.ts | 2 +
.../src/data-plane/completions/serve.ts | 7 +--
.../src/data-plane/completions/serve_test.ts | 47 +++++++++++++++
.../src/data-plane/embeddings/serve.ts | 7 +--
.../src/data-plane/embeddings/serve_test.ts | 56 +++++++++++++++++
.../gateway/src/data-plane/images/serve.ts | 15 ++---
.../src/data-plane/images/serve_test.ts | 60 +++++++++++++++++++
.../src/data-plane/models/gemini_test.ts | 46 ++++++++++++++
.../data-plane/shared/passthrough-serve.ts | 9 ++-
.../gateway/src/test-helpers/gateway-ctx.ts | 10 ++++
55 files changed, 384 insertions(+), 45 deletions(-)
diff --git a/packages/gateway/migrations/0046_model_aliases.sql b/packages/gateway/migrations/0046_model_aliases.sql
index c934d77b6..d76687bbd 100644
--- a/packages/gateway/migrations/0046_model_aliases.sql
+++ b/packages/gateway/migrations/0046_model_aliases.sql
@@ -10,5 +10,5 @@ CREATE TABLE model_aliases (
updated_at INTEGER NOT NULL DEFAULT (unixepoch())
);
-INSERT INTO model_aliases (alias, target_model_id, rules_json, on_conflict)
+INSERT OR IGNORE INTO model_aliases (alias, target_model_id, rules_json, on_conflict)
VALUES ('codex-auto-review', 'gpt-5.4', '{"reasoning":{"effort":"low"}}', 'real-only');
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index 4c13cd09b..a7cfd963f 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -13,9 +13,11 @@ interface ModelAliasRow {
// The model_aliases table is operator-managed and small (dozens of rows at
// most), so the data plane reads the full table per request — no cache layer.
+// `ORDER BY alias` makes the read deterministic so `/v1/models` and friends
+// emit alias entries in a stable, operator-predictable order across runtimes.
export const loadAllAliases = async (db: SqlDatabase): Promise => {
const { results } = await db
- .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases')
+ .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases ORDER BY alias')
.all();
return results.map(toModelAlias);
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
index 62b814359..29f9d1b1b 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
@@ -3,6 +3,7 @@ import { test, vi } from 'vitest';
import { chatCompletionsAttempt } from './attempt.ts';
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import { createNonResponsesSourceStore } from '../responses/items/store.ts';
import type { ProviderCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -23,6 +24,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
index 83d9bccb7..b84aea40f 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { withDemoteDeveloperToSystem } from './demote-developer-to-system.ts';
import type { ChatCompletionsInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
import { eventResult } from '@floway-dev/provider';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
index 156389a46..4ecb21e72 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { withInterleavedSystemDemotedToUser } from './demote-interleaved-system-to-user.ts';
import type { ChatCompletionsInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
import { eventResult } from '@floway-dev/provider';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index e46726510..0ff191572 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts';
import type { ChatCompletionsInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
import { eventResult } from '@floway-dev/provider';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
index e3e4147a2..f8d0c33c5 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { withUsageStreamOptionsIncluded } from './include-usage-stream-options.ts';
import type { ChatCompletionsInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
import { eventResult } from '@floway-dev/provider';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
index 0b6fed4f1..b9dcb1b9c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { withUsageNormalized } from './normalize-usage.ts';
import type { ChatCompletionsInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
index 81be2c3ab..7f96ee5f2 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import type { ChatCompletionsInvocation } from './types.ts';
import { withVendorDeepseekChatCompletionsNormalize } from './vendor-deepseek-normalize.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -20,6 +21,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
index 1cfc304b7..2547a30b4 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import type { ChatCompletionsInvocation } from './types.ts';
import { withVendorKimiChatCompletionsNormalize } from './vendor-kimi-normalize.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
index 0506a1e25..e374ea3b4 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import type { ChatCompletionsInvocation } from './types.ts';
import { withVendorQwenChatCompletionsNormalize } from './vendor-qwen-normalize.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
import { eventResult } from '@floway-dev/provider';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 1347dd6bd..28e10cfd5 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -6,6 +6,7 @@ import { applyAliasRulesToChatCompletions } from '../../model-aliases/apply.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { ExecuteResult } from '@floway-dev/provider';
@@ -50,9 +51,10 @@ export const chatCompletionsServe = {
}
// Apply operator-locked alias rules to the inbound IR before the
// attempt runs its interceptor chain. The matching `x-floway-alias`
- // header rides out via ctx.responseHeaders.
+ // header is staged via Hono's `c.header` so it survives `streamSSE`'s
+ // internal `c.newResponse`.
if (candidate.aliasRules) applyAliasRulesToChatCompletions(payload, candidate.aliasRules);
- if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
+ if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
return await chatCompletionsAttempt.generate({ payload, ctx, store, candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 402803203..37441a5ba 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -2,6 +2,7 @@ import { test, vi } from 'vitest';
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import { createNonResponsesSourceStore } from '../responses/items/store.ts';
import type { ProviderCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -49,6 +50,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
index 29a4e9bc5..bc7c7949b 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
@@ -3,6 +3,7 @@ import { test, vi } from 'vitest';
import { geminiAttempt } from './attempt.ts';
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import { createNonResponsesSourceStore } from '../responses/items/store.ts';
import type { ProviderCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -24,6 +25,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
index d4b48ac3a..98d15dd39 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
@@ -1,6 +1,7 @@
import { test } from 'vitest';
import { stripSafetySettings } from './strip-safety-settings.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
index 3b02b63f8..e74d45a4f 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
@@ -1,6 +1,7 @@
import { test } from 'vitest';
import { stripUnsupportedPartFields } from './strip-unsupported-part-fields.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
index 6a2c20ef7..689ee6d0e 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
@@ -1,6 +1,7 @@
import { test } from 'vitest';
import { stripUnsupportedTools } from './strip-unsupported-tools.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
index eb67a0092..33e49a791 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
@@ -1,6 +1,7 @@
import { test } from 'vitest';
import { suppressThoughtParts } from './suppress-thought-parts.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
index 31981b544..61e0132a7 100644
--- a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
@@ -2,6 +2,7 @@ import { Hono } from 'hono';
import { test } from 'vitest';
import { respondGemini } from './respond.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import { eventFrame } from '@floway-dev/protocols/common';
@@ -26,6 +27,7 @@ const ctx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index e1f61d628..7d1cf345e 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -6,6 +6,7 @@ import { applyAliasRulesToGemini } from '../../model-aliases/apply.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
import type { ExecuteResult, PlainResult } from '@floway-dev/provider';
@@ -60,10 +61,10 @@ export const geminiServe = {
);
}
// Operator-locked alias rules apply to the Gemini IR before the attempt
- // runs; the matching `x-floway-alias` header rides out via
- // ctx.responseHeaders.
+ // runs; the matching `x-floway-alias` header is staged via Hono's
+ // `c.header` so it survives `streamSSE`'s internal `c.newResponse`.
if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
- if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
+ if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
return await geminiAttempt.generate({ payload, ctx, store, candidate, headers });
},
@@ -97,7 +98,7 @@ export const geminiServe = {
);
}
if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
- if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
+ if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
return await geminiAttempt.countTokens({ payload, ctx, store, candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 070d44471..3ef8114e8 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -2,6 +2,7 @@ import { test, vi } from 'vitest';
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import { createNonResponsesSourceStore } from '../responses/items/store.ts';
import type { ProviderCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -48,6 +49,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
index f9192e289..41a96f0de 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
@@ -3,6 +3,7 @@ import { test, vi } from 'vitest';
import { messagesAttempt } from './attempt.ts';
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import { createNonResponsesSourceStore } from '../responses/items/store.ts';
import type { ProviderCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -23,6 +24,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
index 9df67c5c0..bf29636a7 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { demoteInterleavedSystemToUser } from './demote-interleaved-system-to-user.ts';
import type { MessagesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 7b7045355..9db2fe856 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts';
import type { MessagesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
index 8ae90e232..c08720c71 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { stripBillingAttribution } from './strip-billing-attribution.ts';
import type { MessagesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
index dca97addd..8ddfa03e7 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
@@ -13,6 +13,7 @@ import {
} from './web-search-shim.ts';
import { initRepo } from '../../../../repo/index.ts';
import { InMemoryRepo } from '../../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import { DEFAULT_SEARCH_CONFIG } from '../../../tools/web-search/search-config.ts';
import type { WebSearchProvider, WebSearchProviderResult } from '../../../tools/web-search/types.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
@@ -58,6 +59,7 @@ const gatewayCtx = (apiKeyId: string = 'test-key'): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/messages/respond_test.ts b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
index 79d0a9db6..0caef38b6 100644
--- a/packages/gateway/src/data-plane/chat/messages/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
@@ -4,6 +4,7 @@ import { test } from 'vitest';
import { createMessagesStreamUsageState, respondMessages, tokenUsageFromMessagesFrame } from './respond.ts';
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -534,6 +535,7 @@ const makeRespondCtx = (): GatewayCtx => ({
wantsStream: false,
runtimeLocation: 'TEST',
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
currentColo: 'TEST',
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index 30282afd4..8b0b337f1 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -6,6 +6,7 @@ import { applyAliasRulesToMessages } from '../../model-aliases/apply.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
import type { ExecuteResult, PlainResult } from '@floway-dev/provider';
@@ -59,10 +60,10 @@ export const messagesServe = {
// Operator-locked alias rules go onto the inbound IR before the attempt
// begins so the per-protocol interceptor chain (and any downstream
// translate pass) sees the already-injected fields. The matching
- // `x-floway-alias` response header is staged on the gateway-stamped
- // header set; the http wrapper flushes it onto the outgoing Response.
+ // `x-floway-alias` header is staged via Hono's `c.header` so it
+ // survives `streamSSE`'s internal `c.newResponse`.
if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
- if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
+ if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
return await messagesAttempt.generate({ payload, ctx, store, candidate, headers });
},
@@ -96,7 +97,7 @@ export const messagesServe = {
// rules apply uniformly regardless of endpoint, and the response header
// rides out the same way.
if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
- if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
+ if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
return await messagesAttempt.countTokens({ payload, ctx, store, candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index 734bad296..bb1dfcfb0 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -2,6 +2,7 @@ import { test, vi } from 'vitest';
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import { createNonResponsesSourceStore } from '../responses/items/store.ts';
import type { ProviderCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -46,6 +47,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
index a952627b4..ceba118d7 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
@@ -7,6 +7,7 @@ import { createResponsesHttpStore } from './items/store.ts';
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
import type { StoredResponsesItem } from '../../../repo/types.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import type { ProviderCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -25,6 +26,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/http.ts b/packages/gateway/src/data-plane/chat/responses/http.ts
index 54c497a0a..9346d5576 100644
--- a/packages/gateway/src/data-plane/chat/responses/http.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http.ts
@@ -16,6 +16,23 @@ import { internalErrorResult, toInternalDebugError } from '@floway-dev/provider'
// performance telemetry, and usage accounting all see the real model name
// (and the `low` reasoning effort the alias implies — generate only;
// compact carries no `reasoning` field).
+//
+// This shim predates the operator-managed alias table seeded by migration
+// `0046_model_aliases.sql`. The two paths overlap on `/v1/responses` —
+// rewriting at this entry swaps the inbound `model` to `gpt-5.4` BEFORE the
+// alias matcher in `enumerateProviderCandidates` runs, so the alias row
+// never matches for this surface. The carveout is deliberate: the seeded
+// alias is stored with `on_conflict='real-only'`, which means on a Codex
+// upstream that exposes a real `codex-auto-review` model the alias would
+// silently lose to the real id and the `reasoning.effort=low` rule would
+// never apply — breaking parity with Codex CLI's native auto-review
+// behavior. Other inbound surfaces (`/v1/messages`, `/v1/chat/completions`,
+// `/v1beta/…`) carry no entry-level shim and reach the alias matcher
+// unchanged; they observe `real-only` semantics as designed.
+//
+// The shim is a temporary carveout pending a follow-up that either deletes
+// it after a deliberate Codex behavior change (e.g. switching to
+// `both-alias-first`) or migrates the entire surface to the alias table.
const rewriteResponsesEntryModelAlias = (payload: ResponsesPayload, stampReasoningEffort: boolean): ResponsesPayload => {
if (payload.model !== CODEX_AUTO_REVIEW_ALIAS) return payload;
if (!stampReasoningEffort) return { ...payload, model: CODEX_AUTO_REVIEW_TARGET };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
index f904f85d5..444ce069d 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { withReasoningEncryptedContentCanonicalized } from './canonicalize-encrypted-content.ts';
import type { ResponsesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
index a193d01db..a2951ef03 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { withDemoteDeveloperToSystem } from './demote-developer-to-system.ts';
import type { ResponsesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
import { doneFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
index ae1fc3970..f4f26c112 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { withInterleavedSystemDemotedToUser } from './demote-interleaved-system-to-user.ts';
import type { ResponsesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
import { doneFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index dcddbd6c8..ea7b872ab 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts';
import type { ResponsesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
import { doneFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
index 3cf947b01..5b9bb4c91 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { withCyberPolicyRetried } from './retry-cyber-policy.ts';
import type { ResponsesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -45,6 +46,7 @@ const stubCtx = (overrides: { abortSignal?: AbortSignal } = {}): GatewayCtx => (
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
...overrides,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
index 6688dcec7..ff2e0f910 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
@@ -16,6 +16,7 @@ import { SHIM_TOOL_NAME, webSearchServerTool } from './server-tools/web-search.t
import type { ResponsesInterceptor, ResponsesInvocation } from './types.ts';
import { initRepo } from '../../../../repo/index.ts';
import { InMemoryRepo } from '../../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import { resolveConfiguredWebSearchProvider } from '../../../tools/web-search/provider.ts';
import type {
ConfiguredWebSearchProvider,
@@ -348,6 +349,7 @@ const makeGatewayCtx = (apiKeyId: string = 'k1'): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
@@ -4497,6 +4499,7 @@ test('downstream AbortSignal threads through to provider search / fetchPage and
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
abortSignal: controller.signal,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
index 46e30b043..8f1abec4c 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
@@ -2,6 +2,7 @@ import { beforeEach, test, vi } from 'vitest';
import { initRepo } from '../../../../../repo/index.ts';
import { InMemoryRepo } from '../../../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../../shared/gateway-ctx.ts';
import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../../items/store.ts';
import type { ResponsesInvocation } from '../types.ts';
@@ -144,6 +145,7 @@ const gatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
index da94b3068..16f0415e1 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
@@ -19,6 +19,7 @@ import {
} from './image-generation.ts';
import { initRepo } from '../../../../../repo/index.ts';
import { InMemoryRepo } from '../../../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../../shared/gateway-ctx.ts';
import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../../items/store.ts';
import type { ResponsesInvocation } from '../types.ts';
@@ -56,6 +57,7 @@ const gatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
index 7db1b6360..c873f9225 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import type { ResponsesInvocation } from './types.ts';
import { withVendorDeepseekResponsesNormalize } from './vendor-deepseek-normalize.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
import { doneFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
index 23afe8462..6417306ff 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import type { ResponsesInvocation } from './types.ts';
import { withVendorQwenResponsesNormalize } from './vendor-qwen-normalize.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
import { doneFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/serve.ts b/packages/gateway/src/data-plane/chat/responses/serve.ts
index e66a2a29a..81035f20d 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve.ts
@@ -4,6 +4,7 @@ import type { ResponsesSnapshotMode, StatefulResponsesStore } from './items/stor
import { prepareResponsesServePlan } from './serve-prep.ts';
import { applyAliasRulesToResponses } from '../../model-aliases/apply.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
import type { ExecuteResult } from '@floway-dev/provider';
@@ -48,10 +49,10 @@ export const responsesServe = {
});
if (plan.kind === 'failure') return plan.result;
// Operator-locked alias rules apply to the prepared inbound IR before
- // the attempt runs; the `x-floway-alias` header rides out via
- // ctx.responseHeaders.
+ // the attempt runs; the `x-floway-alias` header is staged via Hono's
+ // `c.header` so it survives `streamSSE`'s internal `c.newResponse`.
if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
- if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName);
+ if (plan.candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', plan.candidate.aliasName);
const effectiveSnapshotMode: ResponsesSnapshotMode = snapshotMode !== 'none' && containsCompactionTrigger(plan.prepared.input)
? 'replace'
: snapshotMode;
@@ -73,7 +74,7 @@ export const responsesServe = {
// applying uniformly keeps the operator's intent expressed at the
// inbound boundary regardless of which endpoint runs.
if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
- if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName);
+ if (plan.candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', plan.candidate.aliasName);
return await responsesAttempt.compact({ payload: plan.prepared, ctx, store, candidate: plan.candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index 07369504d..b42f5bdbf 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -5,6 +5,7 @@ import { createResponsesHttpStore, MemoryStatefulResponsesBacking, LayeredStatef
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
import type { StoredResponsesItem, StoredResponsesSnapshot } from '../../../repo/types.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import type { ProviderCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
@@ -57,6 +58,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
currentColo: 'TEST',
dump: null,
backgroundScheduler: () => {},
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
index 0e199e403..ad0ef2169 100644
--- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
+++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
@@ -6,6 +6,14 @@ import { getCurrentColo } from '../../../runtime/runtime-info.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
export interface GatewayCtx {
+ // The inbound Hono context. Carried so the serve layer can stage
+ // response headers via `c.header(name, value)` — the Hono-documented
+ // knob that survives `streamSSE`'s internal `c.newResponse` for the
+ // streaming surfaces. For non-streaming surfaces that build the
+ // outgoing Response via the Web `Response.json` constructor (which
+ // bypasses Hono's context), the same value also lands on
+ // `responseHeaders` so `finalizeGatewayResponse` can stamp it.
+ readonly c: AuthedContext;
readonly apiKeyId: string;
readonly upstreamIds: readonly string[] | null;
readonly abortSignal?: AbortSignal;
@@ -23,14 +31,14 @@ export interface GatewayCtx {
readonly currentColo: string;
// Null when the api key has no retention configured, in which case
// `finalizeGatewayResponse` short-circuits the dump tee and returns the
- // response untouched (headers from `responseHeaders` are still applied).
+ // response untouched (entries from `responseHeaders` are still applied).
readonly dump: DumpAccumulator | null;
- // Per-request response-header staging. The data-plane writes alias-aware
- // and similar non-upstream headers here mid-request; the inbound HTTP
- // wrapper merges them onto the final outgoing Response before
- // `dump?.finalize`. Mutable on purpose — the serve layer owns the
- // chosen candidate and is the right seam for stamping the
- // `x-floway-alias` header.
+ // Per-request response-header staging for the non-streaming and error
+ // paths that build their Response via the Web `Response.json` constructor
+ // rather than through Hono's `c.json`/`streamSSE`. The serve layer writes
+ // gateway-stamped headers (e.g. `x-floway-alias`) here in lockstep with
+ // its `ctx.c.header(...)` call; `finalizeGatewayResponse` then merges
+ // them onto the outgoing Response.
readonly responseHeaders: Headers;
}
@@ -67,6 +75,7 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
if (opts.model !== undefined) dump?.requestedModel(opts.model);
const colo = getCurrentColo(c.req.raw);
return {
+ c,
apiKeyId: apiKey.id,
upstreamIds,
abortSignal: controller?.signal,
@@ -81,10 +90,22 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
};
};
+// Stage one gateway response header so it lands on the outgoing Response
+// regardless of which builder produced it. Calls Hono's `c.header` (the
+// only knob that survives `streamSSE`'s internal `c.newResponse`) AND
+// stages on the per-ctx `responseHeaders` bag that `finalizeGatewayResponse`
+// merges onto Web-`Response.json`-built non-streaming responses.
+export const stageGatewayResponseHeader = (ctx: GatewayCtx, name: string, value: string): void => {
+ ctx.c.header(name, value);
+ ctx.responseHeaders.set(name, value);
+};
+
// Apply ctx-stamped response headers onto the outgoing Response and then run
// the dump-accumulator's finalize tee. Every inbound HTTP wrapper returns its
-// response through this seam so alias and other gateway-stamped headers ride
-// out uniformly across happy-path, error, and passthrough paths.
+// response through this seam so gateway-stamped headers ride out uniformly
+// across happy-path, error, and passthrough paths — including the
+// non-streaming surfaces that build their Response via Web `Response.json`
+// rather than Hono's `c.json`.
export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => {
for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value);
return ctx.dump?.finalize(response) ?? response;
diff --git a/packages/gateway/src/data-plane/chat/shared/respond_test.ts b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
index fd506b083..738bb6399 100644
--- a/packages/gateway/src/data-plane/chat/shared/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
@@ -4,6 +4,7 @@ import type { GatewayCtx } from './gateway-ctx.ts';
import { SourceStreamState, recordPerformance, recordUsage } from './respond.ts';
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import type { PerformanceTelemetryContext, TelemetryModelIdentity } from '@floway-dev/provider';
import { assertEquals } from '@floway-dev/test-utils';
@@ -45,6 +46,7 @@ const setup = (): Harness => {
dump: null,
backgroundScheduler: promise => { background.push(promise); },
requestStartedAt,
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
}),
};
diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
index 374cab4fe..d3e9ad8b0 100644
--- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
@@ -4,6 +4,7 @@ import type { GatewayCtx } from './gateway-ctx.ts';
import { withUpstreamTelemetry } from './upstream-telemetry.ts';
import { initRepo } from '../../../repo/index.ts';
import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
import type { PerformanceTelemetryContext } from '@floway-dev/provider';
@@ -24,6 +25,7 @@ const baseCtx = (overrides: Partial = {}): GatewayCtx => {
apiKeyId: 'key_1',
upstreamIds: null,
wantsStream: true,
+ c: stubAuthedContext(),
responseHeaders: new Headers(),
requestStartedAt: 0,
runtimeLocation: 'TEST',
diff --git a/packages/gateway/src/data-plane/completions/serve.ts b/packages/gateway/src/data-plane/completions/serve.ts
index 2ab560c4b..d72ed42c2 100644
--- a/packages/gateway/src/data-plane/completions/serve.ts
+++ b/packages/gateway/src/data-plane/completions/serve.ts
@@ -10,7 +10,7 @@ import type { Context } from 'hono';
import { tokenUsageFromCompletionsUsage } from './usage.ts';
import type { TokenUsage } from '../../repo/types.ts';
-import { createGatewayCtxFromHono } from '../chat/shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
import { readRequestBody } from '../chat/shared/request-body.ts';
import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
import { isOpenAIUsageOnlyEventShape, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -65,8 +65,7 @@ export const completions = async (c: Context): Promise => {
});
if (request.type === 'invalid') {
ctx.dump?.error('gateway');
- const response = passthroughApiError(c, request.message, 400);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, passthroughApiError(c, request.message, 400));
}
ctx.dump?.requestedModel(request.model);
@@ -115,5 +114,5 @@ export const completions = async (c: Context): Promise => {
},
},
});
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
};
diff --git a/packages/gateway/src/data-plane/completions/serve_test.ts b/packages/gateway/src/data-plane/completions/serve_test.ts
index 1cd2263bb..1dd90c6ed 100644
--- a/packages/gateway/src/data-plane/completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/completions/serve_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
import { initDumpBroker, initDumpStore } from '../../dump/registry.ts';
import { installDumpStubs } from '../../dump/test-fixtures.ts';
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCustomUpstreamRecord, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
import { assertEquals, assertExists, jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
@@ -397,3 +398,49 @@ test('/v1/completions streaming records usage row, request_total+upstream_succes
assertEquals(frames[3]?.type, 'done');
}
});
+
+// Alias header coverage for /v1/completions: the matched alias name rides
+// out on `x-floway-alias`. Non-streaming path uses passthrough's `json`
+// branch; the streaming path stamps the same header via Hono's `c.header`
+// before `streamSSE` builds the response.
+test('/v1/completions stamps x-floway-alias when the request hits an aliased model', async () => {
+ const { apiKey, repo } = await setupAppTest();
+ await registerCompletionsUpstream(repo);
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'completions-alias',
+ targetModelId: 'davinci-002',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 0,
+ },
+ ]);
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'passthrough.example.com' && url.pathname === '/v1/completions') {
+ return jsonResponse({
+ id: 'cmpl_resp',
+ object: 'text_completion',
+ created: 1,
+ model: 'davinci-002',
+ choices: [{ index: 0, text: ' world', finish_reason: 'stop' }],
+ usage: { prompt_tokens: 5, completion_tokens: 1, total_tokens: 6 },
+ });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/completions', {
+ method: 'POST',
+ headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
+ body: JSON.stringify({ model: 'completions-alias', prompt: 'hello' }),
+ });
+ assertEquals(response.status, 200);
+ assertEquals(response.headers.get('x-floway-alias'), 'completions-alias');
+ },
+ );
+});
diff --git a/packages/gateway/src/data-plane/embeddings/serve.ts b/packages/gateway/src/data-plane/embeddings/serve.ts
index 6262546e5..9c33e6736 100644
--- a/packages/gateway/src/data-plane/embeddings/serve.ts
+++ b/packages/gateway/src/data-plane/embeddings/serve.ts
@@ -3,7 +3,7 @@
import type { Context } from 'hono';
-import { createGatewayCtxFromHono } from '../chat/shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
import { readRequestBody } from '../chat/shared/request-body.ts';
import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
import { tokenUsageFromEmbeddingsBody } from '../shared/telemetry/usage.ts';
@@ -49,8 +49,7 @@ export const embeddings = async (c: Context): Promise => {
const request = prepareEmbeddingsRequest(requestBody.bytes);
if (request.type === 'invalid') {
ctx.dump?.error('gateway');
- const response = passthroughApiError(c, request.message, 400);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, passthroughApiError(c, request.message, 400));
}
ctx.dump?.requestedModel(request.model);
@@ -66,5 +65,5 @@ export const embeddings = async (c: Context): Promise => {
},
response: { format: 'json', extractBilling: tokenUsageFromEmbeddingsBody },
});
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
};
diff --git a/packages/gateway/src/data-plane/embeddings/serve_test.ts b/packages/gateway/src/data-plane/embeddings/serve_test.ts
index bf86dc9a7..c6c44f61b 100644
--- a/packages/gateway/src/data-plane/embeddings/serve_test.ts
+++ b/packages/gateway/src/data-plane/embeddings/serve_test.ts
@@ -1,5 +1,6 @@
import { test } from 'vitest';
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCustomUpstreamRecord, copilotModels, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
import { jsonResponse, withMockedFetch, assertEquals, assertExists } from '@floway-dev/test-utils';
@@ -495,3 +496,58 @@ test('/v1/embeddings rejects malformed body at the provider-independent boundary
},
);
});
+
+// Critical alias header coverage for the passthrough surface: the matched
+// alias name must ride out on `x-floway-alias` so downstream observers can
+// tell a real-model hit from an alias-routed one. Goes through Hono's
+// `c.header` in `passthroughServe`, mirroring the chat path.
+test('/v1/embeddings stamps x-floway-alias when the request hits an aliased model', async () => {
+ const { apiKey, repo } = await setupAppTest();
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'embed-alias',
+ targetModelId: 'text-embedding-real',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 0,
+ },
+ ]);
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({
+ token: 'copilot-access-token',
+ expires_at: 4102444800,
+ refresh_in: 3600,
+ endpoints: { api: 'https://api.individual.githubcopilot.com' },
+ });
+ }
+ if (url.pathname === '/models') {
+ return jsonResponse(copilotModels([{ id: 'text-embedding-real', supported_endpoints: ['/embeddings'] }]));
+ }
+ if (url.pathname === '/embeddings') {
+ return jsonResponse({
+ object: 'list',
+ model: 'text-embedding-real',
+ data: [{ object: 'embedding', index: 0, embedding: [0.1] }],
+ usage: { prompt_tokens: 1, total_tokens: 1 },
+ });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/embeddings', {
+ method: 'POST',
+ headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
+ body: JSON.stringify({ model: 'embed-alias', input: 'hello' }),
+ });
+ assertEquals(response.status, 200);
+ assertEquals(response.headers.get('x-floway-alias'), 'embed-alias');
+ },
+ );
+});
diff --git a/packages/gateway/src/data-plane/images/serve.ts b/packages/gateway/src/data-plane/images/serve.ts
index 405b29b03..58f8a7a25 100644
--- a/packages/gateway/src/data-plane/images/serve.ts
+++ b/packages/gateway/src/data-plane/images/serve.ts
@@ -10,7 +10,7 @@
import type { Context } from 'hono';
-import { createGatewayCtxFromHono } from '../chat/shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
import { readRequestBody } from '../chat/shared/request-body.ts';
import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
import { tokenUsageFromImagesBody } from '../shared/telemetry/usage.ts';
@@ -48,8 +48,7 @@ export const imagesGenerations = async (c: Context): Promise => {
const request = prepareImagesGenerationsRequest(requestBody.bytes);
if (request.type === 'invalid') {
ctx.dump?.error('gateway');
- const response = passthroughApiError(c, request.message, 400);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, passthroughApiError(c, request.message, 400));
}
ctx.dump?.requestedModel(request.model);
@@ -65,7 +64,7 @@ export const imagesGenerations = async (c: Context): Promise => {
},
response: { format: 'json', extractBilling: tokenUsageFromImagesBody },
});
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
};
export const imagesEdits = async (c: Context): Promise => {
@@ -82,15 +81,13 @@ export const imagesEdits = async (c: Context): Promise => {
// parser's error text. The wording is enough for a client to know
// they sent the wrong content type or a malformed body.
ctx.dump?.error('gateway');
- const response = passthroughApiError(c, 'Image edits request body must be a valid multipart/form-data payload.', 400);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, passthroughApiError(c, 'Image edits request body must be a valid multipart/form-data payload.', 400));
}
const modelRaw = form.get('model');
if (typeof modelRaw !== 'string' || modelRaw.length === 0) {
ctx.dump?.error('gateway');
- const response = passthroughApiError(c, 'Image edits request body must include a model field.', 400);
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, passthroughApiError(c, 'Image edits request body must include a model field.', 400));
}
ctx.dump?.requestedModel(modelRaw);
@@ -115,5 +112,5 @@ export const imagesEdits = async (c: Context): Promise => {
},
response: { format: 'json', extractBilling: tokenUsageFromImagesBody },
});
- return (ctx.dump?.finalize(response) ?? response);
+ return finalizeGatewayResponse(ctx, response);
};
diff --git a/packages/gateway/src/data-plane/images/serve_test.ts b/packages/gateway/src/data-plane/images/serve_test.ts
index 85b5f1adf..f241ad89d 100644
--- a/packages/gateway/src/data-plane/images/serve_test.ts
+++ b/packages/gateway/src/data-plane/images/serve_test.ts
@@ -1,5 +1,6 @@
import { test } from 'vitest';
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCustomUpstreamRecord, copilotModels, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
import { jsonResponse, withMockedFetch, assertEquals, assertExists } from '@floway-dev/test-utils';
@@ -233,3 +234,62 @@ test('/v1/images/edits forwards a multipart request through an Azure model and r
const usageRows = await repo.usage.listAll();
assertEquals(usageRows.some(row => row.model === 'gpt-image-2' && row.tokens.input === 7 && row.tokens.output === 11), true);
});
+
+// Alias header coverage for /v1/images/generations: an alias whose target is
+// an image-generation model must surface its name on `x-floway-alias` for
+// downstream observability.
+test('/v1/images/generations stamps x-floway-alias when the request hits an aliased model', async () => {
+ const { apiKey, repo } = await setupAppTest();
+ clearInProcessCopilotTokenCache();
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'image-alias',
+ targetModelId: 'gpt-image-2',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 0,
+ },
+ ]);
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_images',
+ name: 'Custom Image Provider',
+ sortOrder: 100,
+ config: {
+ baseUrl: 'https://images.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-images',
+ endpoints: {},
+ },
+ }));
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+ }
+ if (url.hostname === 'api.individual.githubcopilot.com' && url.pathname === '/models') {
+ return jsonResponse(copilotModels([{ id: 'copilot-chat', supported_endpoints: ['/chat/completions'] }]));
+ }
+ if (url.hostname === 'images.example.com' && url.pathname === '/v1/models') {
+ return jsonResponse({ data: [{ id: 'gpt-image-2' }] });
+ }
+ if (url.hostname === 'images.example.com' && url.pathname === '/v1/images/generations') {
+ return jsonResponse({ data: [{ b64_json: 'aGVsbG8=' }] });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/images/generations', {
+ method: 'POST',
+ headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
+ body: JSON.stringify({ model: 'image-alias', prompt: 'hi' }),
+ });
+ assertEquals(response.status, 200);
+ assertEquals(response.headers.get('x-floway-alias'), 'image-alias');
+ },
+ );
+});
diff --git a/packages/gateway/src/data-plane/models/gemini_test.ts b/packages/gateway/src/data-plane/models/gemini_test.ts
index 7f4cedaec..0327bae33 100644
--- a/packages/gateway/src/data-plane/models/gemini_test.ts
+++ b/packages/gateway/src/data-plane/models/gemini_test.ts
@@ -1,5 +1,6 @@
import { test } from 'vitest';
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
import { jsonResponse, withMockedFetch, assertEquals } from '@floway-dev/test-utils';
@@ -408,3 +409,48 @@ test('/v1beta/models hides malformed upstream response bodies', async () => {
},
);
});
+
+// Gemini's `Model` resource is closed (no `aliasedFrom` extension), so the
+// `/v1beta/models` surface advertises an alias entry as a synthetic Gemini
+// model carrying the alias id and the target's display fields. This test
+// guards the synthetic shape — name, displayName, supportedGenerationMethods
+// — so a future refactor of `loadGeminiModels` cannot silently drop the
+// alias entries.
+test('/v1beta/models appends visible aliases as synthetic Gemini model entries', async () => {
+ const { repo, apiKey } = await setupAppTest();
+
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'codex-auto-review',
+ targetModelId: 'gpt-gemini-list',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ },
+ ]);
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+ }
+ if (url.pathname === '/models') {
+ return jsonResponse(copilotModels([{ id: 'gpt-gemini-list', display_name: 'GPT Gemini List' }]));
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1beta/models', { headers: { 'x-api-key': apiKey.key } });
+ assertEquals(response.status, 200);
+ const body = await response.json() as { models: Array<{ name: string; displayName: string; supportedGenerationMethods: string[] }> };
+ const aliasEntry = body.models.find(m => m.name === 'models/codex-auto-review');
+ if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
+ assertEquals(aliasEntry.displayName, 'codex-auto-review');
+ assertEquals(aliasEntry.supportedGenerationMethods, ['generateContent', 'streamGenerateContent', 'countTokens']);
+ },
+ );
+});
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 6c917fb86..b566b582d 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -24,6 +24,7 @@ import type { AuthedContext } from '../../middleware/auth.ts';
import { getRepo } from '../../repo/index.ts';
import type { TokenUsage } from '../../repo/types.ts';
import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
+import { stageGatewayResponseHeader } from '../chat/shared/gateway-ctx.ts';
import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
import { resolveModelForRequest } from '../providers/registry.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
@@ -155,8 +156,10 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
// rules themselves never apply here — the inbound payload (embeddings,
// images, /v1/completions) has no protocol-extension slots for the rule
// knobs. We still surface the matched alias name on the
- // `x-floway-alias` response header and trace one log line per dropped
- // rule so an operator can confirm the rewrite ran.
+ // `x-floway-alias` response header (staged via Hono's `c.header` so it
+ // survives `streamSSE`'s internal `c.newResponse` on the streaming
+ // `/v1/completions` path) and trace one log line per dropped rule so an
+ // operator can confirm the rewrite ran.
const aliases = await getRepo().modelAliases.loadAll();
// Each match is one (upstream, upstream-catalog id) pair that interprets
// the inbound public id. Iteration order follows configured sort_order
@@ -172,7 +175,7 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
for (const match of matches) {
if (!bindingServesEndpoint(match.binding)) continue;
if (match.aliasName !== undefined) {
- ctx.responseHeaders.set('x-floway-alias', match.aliasName);
+ stageGatewayResponseHeader(ctx, 'x-floway-alias', match.aliasName);
traceDroppedAliasRulesForPassthrough(match.aliasName, aliases, sourceApi);
}
diff --git a/packages/gateway/src/test-helpers/gateway-ctx.ts b/packages/gateway/src/test-helpers/gateway-ctx.ts
index 3ebf1f474..047981ed1 100644
--- a/packages/gateway/src/test-helpers/gateway-ctx.ts
+++ b/packages/gateway/src/test-helpers/gateway-ctx.ts
@@ -1,4 +1,13 @@
import type { GatewayCtx } from '../data-plane/chat/shared/gateway-ctx.ts';
+import type { AuthedContext } from '../middleware/auth.ts';
+
+// Minimal stub for the Hono `c` carried on `GatewayCtx`. Only `c.header`
+// is touched by the serve layer (to stamp `x-floway-alias`); unit tests
+// that don't exercise the alias branch never call it. Integration tests
+// that need real Hono behavior build the ctx via `createGatewayCtxFromHono`
+// against a real `makeApp()` request rather than going through this stub.
+export const stubAuthedContext = (): AuthedContext =>
+ ({ header: () => {} } as unknown as AuthedContext);
// Shared minimal GatewayCtx for tests that exercise serve / respond /
// interceptor code in isolation. Defaults satisfy every required field; pass
@@ -7,6 +16,7 @@ import type { GatewayCtx } from '../data-plane/chat/shared/gateway-ctx.ts';
// construct one and spread `{ abortSignal: controller.signal,
// downstreamAbortController: controller }` into the overrides.
export const mockGatewayCtx = (overrides: Partial = {}): GatewayCtx => ({
+ c: stubAuthedContext(),
apiKeyId: 'key_test',
upstreamIds: null,
wantsStream: false,
From 17a7877c5fdcda16064bf9d972675583b5c69acb Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 01:38:34 +0800
Subject: [PATCH 008/170] revert(translate): restore pre-extension native field
translation on *-via-messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Task 3 (`e1891e1d`) added synthesis of `thinking.display` from Responses-native
`reasoning.summary` and Gemini-native `thinkingConfig.includeThoughts`, plus a
new native-to-native `service_tier` carry on Responses → Messages. These are
NATIVE fields with translation behavior the prior pairs had already decided;
the alias work should not have reshaped that contract.
Revert the native-field paths in:
- responses-via-messages: drop `reasoning.summary` → `thinking.display` and
`service_tier` → `service_tier` propagation. Keep the new extension-field
carries (`thinking_budget`, `adaptive_thinking`, `anthropic_speed`).
- gemini-via-messages: drop `thinkingConfig.includeThoughts` →
`thinking.display` propagation. Keep `generationConfig.serviceTier`,
`verbosity`, and top-level `anthropicSpeed` extension carries.
Tests that asserted the new native-field synthesis are removed; the existing
extension-field tests stay untouched.
---
.../src/gemini-via-messages/request.ts | 8 ------
.../src/gemini-via-messages/request_test.ts | 16 -----------
.../src/responses-via-messages/request.ts | 28 ++++++-------------
.../responses-via-messages/request_test.ts | 23 ---------------
4 files changed, 8 insertions(+), 67 deletions(-)
diff --git a/packages/translate/src/gemini-via-messages/request.ts b/packages/translate/src/gemini-via-messages/request.ts
index 91fbe0493..c109da54d 100644
--- a/packages/translate/src/gemini-via-messages/request.ts
+++ b/packages/translate/src/gemini-via-messages/request.ts
@@ -161,14 +161,6 @@ const applyThinkingConfig = (request: MessagesPayload, thinkingConfig?: GeminiTh
}
}
- // `includeThoughts` materializes onto `thinking.display`: true → summarized
- // (Anthropic redacts to a single-block summary), false → omitted (no
- // thinking surface at all). Skip when the source did not express either.
- if (thinkingConfig.includeThoughts !== undefined && request.thinking?.type !== 'disabled') {
- const display = thinkingConfig.includeThoughts === true ? ('summarized' as const) : ('omitted' as const);
- request.thinking = request.thinking ? { ...request.thinking, display } : { type: 'enabled', display };
- }
-
const effort = geminiThinkingLevelEffort(thinkingConfig);
// Spread to merge with any output_config fields a sibling helper has
// already written (e.g. structured-output `format` from
diff --git a/packages/translate/src/gemini-via-messages/request_test.ts b/packages/translate/src/gemini-via-messages/request_test.ts
index b10339a49..555c12aa4 100644
--- a/packages/translate/src/gemini-via-messages/request_test.ts
+++ b/packages/translate/src/gemini-via-messages/request_test.ts
@@ -428,22 +428,6 @@ test('buildTargetRequest emits generationConfig.serviceTier onto Messages servic
assertEquals(result.service_tier, 'priority');
});
-test('buildTargetRequest maps includeThoughts onto thinking.display (true → summarized, false → omitted)', () => {
- const summarized = buildTargetRequest(
- { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: true } } },
- 'claude-test',
- noOptions,
- );
- const omitted = buildTargetRequest(
- { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: false } } },
- 'claude-test',
- noOptions,
- );
-
- assertEquals(summarized.thinking, { type: 'enabled', display: 'summarized' });
- assertEquals(omitted.thinking, { type: 'enabled', display: 'omitted' });
-});
-
test('buildTargetRequest drops verbosity extension on Messages (no slot)', () => {
const result = buildTargetRequest(
{ contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'low' } },
diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts
index 504ca45fa..f16acb936 100644
--- a/packages/translate/src/responses-via-messages/request.ts
+++ b/packages/translate/src/responses-via-messages/request.ts
@@ -1,7 +1,7 @@
import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts';
import { responsesReasoningToMessagesUpstreamBlock } from '../shared/messages-and-responses/reasoning.ts';
import { buildCustomToolInputSchema } from '../shared/responses-via/custom-tool-wrap.ts';
-import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts';
+import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts';
import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint, EPHEMERAL_CACHE_CONTROL } from '../shared/via-messages/cache-breakpoints.ts';
import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts';
import {
@@ -332,29 +332,18 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
if (formatSchema) outputConfig.format = { type: 'json_schema', schema: formatSchema };
const hasOutputConfig = Object.keys(outputConfig).length > 0;
- // Native Responses → Messages: `reasoning.summary` materializes onto the
- // Messages-native `thinking.display`. Extension-driven thinking
- // (`thinking_budget`, `adaptive_thinking`) takes precedence over the
- // summary-only fallback because the alias write-side validator pins
- // facets one-at-a-time; when neither extension is set and summary is the
- // only signal, we synthesize `thinking.{type:'enabled', display}` so the
- // display reaches the wire.
+ // Extension-driven thinking (`thinking_budget`, `adaptive_thinking`) wins
+ // over the native `effort === 'none'` disable, so the alias write-side
+ // facets that target the structured thinking slot survive the legacy
+ // disable shortcut. Native `reasoning.summary` and `service_tier` do not
+ // surface onto Messages — the Responses-native vocabulary keeps its
+ // pre-existing translation contract and rides the upstream sanitizer.
const extensionThinking = buildMessagesThinkingFromExtensions({
thinkingBudget: payload.thinking_budget,
adaptiveThinking: payload.adaptive_thinking,
});
const disabledThinking = effort === 'none' ? { type: 'disabled' as const } : undefined;
- const summaryDisplay = payload.reasoning?.summary !== undefined ? mapSummaryToAnthropicDisplay(payload.reasoning.summary) : undefined;
- const fallbackDisplayThinking =
- !extensionThinking && !disabledThinking && summaryDisplay !== undefined
- ? { type: 'enabled' as const, display: summaryDisplay as NonNullable['display'] }
- : undefined;
- const thinkingFromExtensions = extensionThinking
- ? summaryDisplay !== undefined
- ? { ...extensionThinking, display: summaryDisplay as NonNullable['display'] }
- : extensionThinking
- : undefined;
- const thinking = thinkingFromExtensions ?? disabledThinking ?? fallbackDisplayThinking;
+ const thinking = extensionThinking ?? disabledThinking;
// Responses `metadata` is intentionally omitted on the Messages path;
// not coerced into Anthropic metadata.user_id, prompt-cache, or safety
@@ -372,7 +361,6 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
...(thinking ? { thinking } : {}),
...(hasOutputConfig ? { output_config: outputConfig } : {}),
...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
- ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
};
return { target, customToolNames };
diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts
index f36ff89f6..57951048c 100644
--- a/packages/translate/src/responses-via-messages/request_test.ts
+++ b/packages/translate/src/responses-via-messages/request_test.ts
@@ -664,35 +664,12 @@ test('translateResponsesToMessages emits adaptive_thinking onto thinking.{adapti
assertEquals(result.target.thinking, { type: 'adaptive' });
});
-test('translateResponsesToMessages maps reasoning.summary onto thinking.display (concise|detailed → summarized, omitted → omitted)', async () => {
- const concise = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'concise' } }));
- const detailed = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'detailed' } }));
- const omitted = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'omitted' } }));
-
- assertEquals(concise.target.thinking, { type: 'enabled', display: 'summarized' });
- assertEquals(detailed.target.thinking, { type: 'enabled', display: 'summarized' });
- assertEquals(omitted.target.thinking, { type: 'enabled', display: 'omitted' });
-});
-
test('translateResponsesToMessages emits anthropic_speed onto speed', async () => {
const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_speed: 'fast' }));
assertEquals(result.target.speed, 'fast');
});
-test('translateResponsesToMessages forwards service_tier verbatim', async () => {
- const result = await translateResponsesToMessages(minimalResponsesPayload({ service_tier: 'priority' }));
- assertEquals(result.target.service_tier, 'priority');
-});
-
test('translateResponsesToMessages leaves anthropic_beta as inbound residue for the gateway header pass', async () => {
const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_beta: ['fast-mode-2026-02-01'] }));
assertEquals('anthropic_beta' in result.target, false);
});
-
-test('translateResponsesToMessages emission stack: budget + summary writes display onto the budget-driven block', async () => {
- const result = await translateResponsesToMessages(minimalResponsesPayload({
- thinking_budget: 2048,
- reasoning: { effort: 'medium', summary: 'concise' },
- }));
- assertEquals(result.target.thinking, { type: 'enabled', budget_tokens: 2048, display: 'summarized' });
-});
From 6d13258431e2f2bb5508739f44d1082a38642e02 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 01:38:52 +0800
Subject: [PATCH 009/170] feat(aliases): drop responses entry shim; enumerate
aliases per upstream + form
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Two follow-up changes to the alias data-plane:
1. Remove the `/v1/responses` entry-level `codex-auto-review → gpt-5.4`
rewrite shim. The seed alias in `0046_model_aliases.sql` now routes
`codex-auto-review` everywhere through the normal matcher. On a Codex
upstream that exposes a real `codex-auto-review`, `on_conflict=real-only`
lets the real id win — Codex CLI callers wanting the previous shim
behaviour must set `effort=low` themselves or pick a different
`onConflict`. All other inbound surfaces are unchanged.
2. List aliases per-upstream and per-addressable-form in `/v1/models` and the
Gemini `/v1beta/models` listing, instead of one synthetic entry per alias.
Each visible alias now emits one entry per (provider, listed form) pair
whose raw catalog can resolve the target, so dual-listed upstreams emit
both `codex-auto-review` and `/codex-auto-review`. Aliases whose
target is not reachable from any upstream produce zero entries; the
previous "no silent hide" rule no longer fits a per-upstream model.
A new `display_name` column on `model_aliases` (migration `0047`) carries
an operator-set label; the listing composes it as `${upstream}: ${alias
displayName}` when set, or `${upstream}: ${target displayName}${rules
summary}` otherwise. The rules-summary formatter and display-name
composer live in `control-plane/model-aliases/display.ts` and are
covered by unit tests.
The shared per-upstream alias emission helper sits in
`data-plane/models/alias-listing.ts` and is reused by both the OpenAI and
Gemini listings. `getModelsForListing` exposes the per-upstream raw
catalog alongside the merged public model list so we collect catalogs
once per request even when many aliases need them.
---
.../0047_model_aliases_display_name.sql | 3 +
.../control-plane/model-aliases/display.ts | 39 ++++
.../model-aliases/display_test.ts | 75 +++++++
.../src/control-plane/model-aliases/repo.ts | 4 +-
.../control-plane/model-aliases/repo_test.ts | 23 +++
.../src/control-plane/model-aliases/types.ts | 5 +
.../src/data-plane/chat/responses/http.ts | 41 +---
.../data-plane/chat/responses/http_test.ts | 60 ------
.../src/data-plane/models/alias-listing.ts | 52 +++++
.../gateway/src/data-plane/models/gemini.ts | 39 ++--
.../src/data-plane/models/gemini_test.ts | 2 +-
.../gateway/src/data-plane/models/load.ts | 51 +++--
.../src/data-plane/models/serve_test.ts | 184 +++++++++++++++++-
.../src/data-plane/providers/registry.ts | 37 +++-
14 files changed, 472 insertions(+), 143 deletions(-)
create mode 100644 packages/gateway/migrations/0047_model_aliases_display_name.sql
create mode 100644 packages/gateway/src/control-plane/model-aliases/display.ts
create mode 100644 packages/gateway/src/control-plane/model-aliases/display_test.ts
create mode 100644 packages/gateway/src/data-plane/models/alias-listing.ts
diff --git a/packages/gateway/migrations/0047_model_aliases_display_name.sql b/packages/gateway/migrations/0047_model_aliases_display_name.sql
new file mode 100644
index 000000000..9d21ed9a1
--- /dev/null
+++ b/packages/gateway/migrations/0047_model_aliases_display_name.sql
@@ -0,0 +1,3 @@
+ALTER TABLE model_aliases ADD COLUMN display_name TEXT;
+
+UPDATE model_aliases SET display_name = 'Codex Auto Review' WHERE alias = 'codex-auto-review';
diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
new file mode 100644
index 000000000..576c823a8
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/display.ts
@@ -0,0 +1,39 @@
+import type { ModelAliasRules } from './types.ts';
+
+// Render the rule set as a parenthesized, comma-joined string so the
+// `/v1/models` listing can suffix it onto the target model's display name when
+// the operator did not supply an explicit alias `displayName`. Empty rules
+// produce an empty string (no parentheses); the join order is fixed across
+// fields so a given rule set always renders the same way.
+//
+// `anthropicBeta` is sorted at format time so two operators carrying the same
+// token set in different orders see the same label.
+export const formatAliasRulesSummary = (rules: ModelAliasRules): string => {
+ const parts: string[] = [];
+ if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
+ if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
+ if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
+ if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
+ if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
+ if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
+ if (rules.anthropicSpeed !== undefined) parts.push(`${rules.anthropicSpeed} speed`);
+ if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
+ parts.push(rules.anthropicBeta.toSorted().join('/'));
+ }
+ return parts.length > 0 ? ` (${parts.join(', ')})` : '';
+};
+
+// Compose the final per-entry display name shown in `/v1/models`. The
+// upstream name always leads so an operator scanning the listing sees which
+// upstream each row belongs to before reading the alias-specific part.
+export const composeAliasDisplayName = (input: {
+ upstreamDisplayName: string;
+ aliasDisplayName?: string;
+ targetDisplayName: string;
+ rules: ModelAliasRules;
+}): string => {
+ if (input.aliasDisplayName !== undefined) {
+ return `${input.upstreamDisplayName}: ${input.aliasDisplayName}`;
+ }
+ return `${input.upstreamDisplayName}: ${input.targetDisplayName}${formatAliasRulesSummary(input.rules)}`;
+};
diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/gateway/src/control-plane/model-aliases/display_test.ts
new file mode 100644
index 000000000..7ba7700d0
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/display_test.ts
@@ -0,0 +1,75 @@
+import { describe, expect, test } from 'vitest';
+
+import { composeAliasDisplayName, formatAliasRulesSummary } from './display.ts';
+
+describe('formatAliasRulesSummary', () => {
+ test('returns empty string when no rules are set', () => {
+ expect(formatAliasRulesSummary({})).toBe('');
+ });
+
+ test('formats each rule field with its canonical suffix', () => {
+ expect(formatAliasRulesSummary({ reasoning: { effort: 'high' } })).toBe(' (high effort)');
+ expect(formatAliasRulesSummary({ reasoning: { budgetTokens: 4096 } })).toBe(' (4096tk reasoning)');
+ expect(formatAliasRulesSummary({ reasoning: { adaptive: true } })).toBe(' (adaptive reasoning)');
+ expect(formatAliasRulesSummary({ reasoning: { summary: 'detailed' } })).toBe(' (detailed summary)');
+ expect(formatAliasRulesSummary({ verbosity: 'low' })).toBe(' (low verbosity)');
+ expect(formatAliasRulesSummary({ serviceTier: 'priority' })).toBe(' (priority tier)');
+ expect(formatAliasRulesSummary({ anthropicSpeed: 'fast' })).toBe(' (fast speed)');
+ });
+
+ test('sorts anthropicBeta tokens and joins with slashes', () => {
+ expect(formatAliasRulesSummary({ anthropicBeta: ['extended-thinking', 'fast-mode-2026-02-01'] })).toBe(
+ ' (extended-thinking/fast-mode-2026-02-01)',
+ );
+ expect(formatAliasRulesSummary({ anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] })).toBe(
+ ' (extended-thinking/fast-mode-2026-02-01)',
+ );
+ });
+
+ test('drops anthropicBeta when the token list is empty', () => {
+ expect(formatAliasRulesSummary({ anthropicBeta: [] })).toBe('');
+ });
+
+ test('joins multiple fields with comma in deterministic order', () => {
+ expect(
+ formatAliasRulesSummary({
+ reasoning: { effort: 'low', summary: 'concise' },
+ verbosity: 'high',
+ anthropicSpeed: 'fast',
+ }),
+ ).toBe(' (low effort, concise summary, high verbosity, fast speed)');
+ });
+});
+
+describe('composeAliasDisplayName', () => {
+ test('uses alias displayName when set, suppressing the rules summary', () => {
+ expect(
+ composeAliasDisplayName({
+ upstreamDisplayName: 'Azure',
+ aliasDisplayName: 'Codex Auto Review',
+ targetDisplayName: 'GPT-5.4',
+ rules: { reasoning: { effort: 'low' } },
+ }),
+ ).toBe('Azure: Codex Auto Review');
+ });
+
+ test('falls back to target displayName with rules suffix when alias displayName is missing', () => {
+ expect(
+ composeAliasDisplayName({
+ upstreamDisplayName: 'Azure',
+ targetDisplayName: 'GPT-5.4',
+ rules: { reasoning: { effort: 'low' } },
+ }),
+ ).toBe('Azure: GPT-5.4 (low effort)');
+ });
+
+ test('omits the rules suffix when rules are empty', () => {
+ expect(
+ composeAliasDisplayName({
+ upstreamDisplayName: 'Azure',
+ targetDisplayName: 'GPT-5.4',
+ rules: {},
+ }),
+ ).toBe('Azure: GPT-5.4');
+ });
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index a7cfd963f..3718b5fd5 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -8,6 +8,7 @@ interface ModelAliasRow {
rules_json: string;
visible_in_models_list: number;
on_conflict: OnConflict;
+ display_name: string | null;
created_at: number;
}
@@ -17,7 +18,7 @@ interface ModelAliasRow {
// emit alias entries in a stable, operator-predictable order across runtimes.
export const loadAllAliases = async (db: SqlDatabase): Promise => {
const { results } = await db
- .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases ORDER BY alias')
+ .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at FROM model_aliases ORDER BY alias')
.all();
return results.map(toModelAlias);
};
@@ -29,6 +30,7 @@ const toModelAlias = (row: ModelAliasRow): ModelAlias => ({
rules: parseJsonField(row.alias, 'rules_json', row.rules_json),
visibleInModelsList: row.visible_in_models_list === 1,
onConflict: row.on_conflict,
+ ...(row.display_name !== null ? { displayName: row.display_name } : {}),
createdAt: row.created_at,
});
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
index ff1efa046..5f1e4fa6d 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
@@ -22,6 +22,7 @@ test('loadAllAliases reads the seed row from a freshly migrated database', async
rules: { reasoning: { effort: 'low' } },
visibleInModelsList: true,
onConflict: 'real-only',
+ displayName: 'Codex Auto Review',
});
});
@@ -72,6 +73,28 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo
});
});
+test('loadAllAliases reads display_name and omits the field when SQL stored NULL', async () => {
+ const db = await createSqliteTestDb();
+ await db.exec('DELETE FROM model_aliases');
+ await db
+ .prepare(
+ 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
+ )
+ .bind('with-label', 'gpt-5.4', '[]', '{}', 1, 'real-only', 'Pretty Label', 1_700_000_000)
+ .run();
+ await db
+ .prepare(
+ 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
+ )
+ .bind('no-label', 'gpt-5.4', '[]', '{}', 1, 'real-only', null, 1_700_000_001)
+ .run();
+
+ const byAlias = new Map((await loadAllAliases(db)).map(entry => [entry.alias, entry]));
+ assertEquals(byAlias.get('with-label')?.displayName, 'Pretty Label');
+ // SQL NULL becomes undefined on the typed row so callers can branch on `=== undefined`.
+ assertEquals('displayName' in (byAlias.get('no-label') ?? {}), false);
+});
+
test('loadAllAliases surfaces malformed rules_json as a descriptive error', async () => {
const db = await createSqliteTestDb();
await db.exec('DELETE FROM model_aliases');
diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts
index 7594ceff6..3170b4b47 100644
--- a/packages/gateway/src/control-plane/model-aliases/types.ts
+++ b/packages/gateway/src/control-plane/model-aliases/types.ts
@@ -23,6 +23,11 @@ export type ModelAlias = {
readonly rules: ModelAliasRules;
readonly visibleInModelsList: boolean;
readonly onConflict: OnConflict;
+ // Operator-set human-readable label shown after the upstream display name in
+ // `/v1/models` entries (e.g. "Azure: Codex Auto Review"). When unset, the
+ // listing falls back to the alias target's display name plus a rules-summary
+ // suffix; see `data-plane/model-aliases/display.ts`.
+ readonly displayName?: string;
// Unix epoch seconds stamped at row insertion. Surfaced on the
// `/v1/models` synthesized alias entry so callers see when an alias was
// declared, matching the `created` semantics of the real entries.
diff --git a/packages/gateway/src/data-plane/chat/responses/http.ts b/packages/gateway/src/data-plane/chat/responses/http.ts
index 9346d5576..671e935ed 100644
--- a/packages/gateway/src/data-plane/chat/responses/http.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http.ts
@@ -3,7 +3,6 @@ import { respondResponses } from './respond.ts';
import { PreviousResponseNotFoundError } from './serve-prep.ts';
import { responsesServe } from './serve.ts';
import type { AuthedContext } from '../../../middleware/auth.ts';
-import { CODEX_AUTO_REVIEW_ALIAS, CODEX_AUTO_REVIEW_TARGET } from '../../codex/auto-review-alias.ts';
import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts';
import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts';
import { readRequestBody, type RequestBody } from '../shared/request-body.ts';
@@ -11,38 +10,6 @@ import { providerModelsUnavailableResponse } from '../shared/upstream-models-err
import type { ResponsesPayload } from '@floway-dev/protocols/responses';
import { internalErrorResult, toInternalDebugError } from '@floway-dev/provider';
-// Codex sends auto-review requests over the Responses wire API as a
-// `codex-auto-review` model id; rewrite at the entry so downstream routing,
-// performance telemetry, and usage accounting all see the real model name
-// (and the `low` reasoning effort the alias implies — generate only;
-// compact carries no `reasoning` field).
-//
-// This shim predates the operator-managed alias table seeded by migration
-// `0046_model_aliases.sql`. The two paths overlap on `/v1/responses` —
-// rewriting at this entry swaps the inbound `model` to `gpt-5.4` BEFORE the
-// alias matcher in `enumerateProviderCandidates` runs, so the alias row
-// never matches for this surface. The carveout is deliberate: the seeded
-// alias is stored with `on_conflict='real-only'`, which means on a Codex
-// upstream that exposes a real `codex-auto-review` model the alias would
-// silently lose to the real id and the `reasoning.effort=low` rule would
-// never apply — breaking parity with Codex CLI's native auto-review
-// behavior. Other inbound surfaces (`/v1/messages`, `/v1/chat/completions`,
-// `/v1beta/…`) carry no entry-level shim and reach the alias matcher
-// unchanged; they observe `real-only` semantics as designed.
-//
-// The shim is a temporary carveout pending a follow-up that either deletes
-// it after a deliberate Codex behavior change (e.g. switching to
-// `both-alias-first`) or migrates the entire surface to the alias table.
-const rewriteResponsesEntryModelAlias = (payload: ResponsesPayload, stampReasoningEffort: boolean): ResponsesPayload => {
- if (payload.model !== CODEX_AUTO_REVIEW_ALIAS) return payload;
- if (!stampReasoningEffort) return { ...payload, model: CODEX_AUTO_REVIEW_TARGET };
- return {
- ...payload,
- model: CODEX_AUTO_REVIEW_TARGET,
- reasoning: { ...(payload.reasoning ?? {}), effort: 'low' },
- };
-};
-
// OpenAI's verbatim previous_response_not_found envelope. Codex compares this
// body byte-for-byte against upstream — see the cross-references on
// `PreviousResponseNotFoundError` in serve-prep.ts.
@@ -77,15 +44,15 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques
return finalizeGatewayResponse(effectiveCtx, response);
};
-const parsePayload = (requestBody: RequestBody, stampReasoningEffort: boolean): ResponsesPayload =>
- rewriteResponsesEntryModelAlias(JSON.parse(new TextDecoder().decode(requestBody.bytes)) as ResponsesPayload, stampReasoningEffort);
+const parsePayload = (requestBody: RequestBody): ResponsesPayload =>
+ JSON.parse(new TextDecoder().decode(requestBody.bytes)) as ResponsesPayload;
export const responsesHttp = {
generate: async (c: AuthedContext): Promise => {
const requestBody = await readRequestBody(c);
let ctx: GatewayCtx | undefined;
try {
- const payload = parsePayload(requestBody, true);
+ const payload = parsePayload(requestBody);
const wantsStream = payload.stream === true;
ctx = createGatewayCtxFromHono(c, { wantsStream, requestBody, model: payload.model });
const store = createResponsesHttpStore(ctx.apiKeyId, payload.store ?? undefined);
@@ -106,7 +73,7 @@ export const responsesHttp = {
const requestBody = await readRequestBody(c);
let ctx: GatewayCtx | undefined;
try {
- const payload = parsePayload(requestBody, false);
+ const payload = parsePayload(requestBody);
ctx = createGatewayCtxFromHono(c, { wantsStream: false, requestBody, model: payload.model });
const store = createResponsesHttpStore(ctx.apiKeyId, payload.store ?? undefined);
const result = await responsesServe.compact({ payload, ctx, store, headers: inboundHeadersForUpstream(c) });
diff --git a/packages/gateway/src/data-plane/chat/responses/http_test.ts b/packages/gateway/src/data-plane/chat/responses/http_test.ts
index d0e78cdaf..7425b640b 100644
--- a/packages/gateway/src/data-plane/chat/responses/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http_test.ts
@@ -284,63 +284,3 @@ test('POST /v1/responses renders a routing-unavailable 400 when a forcing item n
const body = await response.json() as { error: { code: string } };
assertEquals(body.error.code, 'responses_item_routing_unavailable');
});
-
-test('POST /v1/responses rewrites the codex-auto-review alias before routing', async () => {
- installRepo();
- seenModels.length = 0;
- const observedBodies: { reasoning?: { effort?: string } }[] = [];
- const callResponses = vi.fn(async (_model: unknown, body: unknown): Promise> => {
- observedBodies.push(body as { reasoning?: { effort?: string } });
- return {
- ok: true,
- events: makeProviderEvents([completedEvent()]),
- modelKey: 'test-model-key',
- headers: new Headers(),
- };
- });
- queueCandidates([makeCandidate({ callResponses })]);
-
- const response = await makeApp().request('/v1/responses', {
- method: 'POST',
- headers: new Headers({ 'content-type': 'application/json' }),
- body: JSON.stringify({ model: 'codex-auto-review', input: 'hello' }),
- });
-
- assertEquals(response.status, 200);
- assertEquals(seenModels, ['gpt-5.4']);
- const observed = observedBodies[0];
- if (observed === undefined) throw new Error('expected callResponses to receive a body');
- assertEquals(observed.reasoning?.effort, 'low');
-});
-
-test('POST /v1/responses/compact rewrites the codex-auto-review alias to gpt-5.4 with no reasoning field', async () => {
- installRepo();
- seenModels.length = 0;
- const observedBodies: { reasoning?: unknown }[] = [];
- const compactionItem = { type: 'compaction' as const, id: 'cmp_1', encrypted_content: 'ENC' };
- const compactionResult: ResponsesResult = {
- ...makeResponsesResult(),
- object: 'response.compaction',
- output: [compactionItem] as unknown as ResponsesResult['output'],
- };
- const callResponsesCompact = vi.fn(async (_model: unknown, body: unknown) => {
- observedBodies.push(body as { reasoning?: unknown });
- return { ok: true as const, result: compactionResult, modelKey: 'test-model-key' };
- });
- queueCandidates([makeCandidate({ callResponsesCompact })]);
-
- const response = await makeApp().request('/v1/responses/compact', {
- method: 'POST',
- headers: new Headers({ 'content-type': 'application/json' }),
- body: JSON.stringify({
- model: 'codex-auto-review',
- input: [{ type: 'message', role: 'user', content: 'kept' }],
- }),
- });
-
- assertEquals(response.status, 200);
- assertEquals(seenModels, ['gpt-5.4']);
- const observed = observedBodies[0];
- if (observed === undefined) throw new Error('expected callResponsesCompact to receive a body');
- assertEquals(observed.reasoning, undefined);
-});
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
new file mode 100644
index 000000000..e41400111
--- /dev/null
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -0,0 +1,52 @@
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
+import type { ModelProviderInstance, UpstreamModel } from '@floway-dev/provider';
+
+// One emission slot for an alias: a (provider, addressable form) pair where
+// the provider's raw catalog carries the alias target id, plus the matched
+// UpstreamModel so the synthesized listing entry can borrow the target's
+// limits, owner, and cost without re-querying.
+export interface AliasListingEmission {
+ provider: ModelProviderInstance;
+ form: 'unprefixed' | 'prefixed';
+ target: UpstreamModel;
+}
+
+// Per-upstream alias enumeration shared by `/v1/models` and the Gemini
+// `/models` listings. An alias with empty `upstreamIds` matches every
+// reachable provider; a non-empty list narrows the candidate set. Per
+// provider, the alias emits one entry per `listed` form when its target sits
+// in the upstream's raw catalog. Upstreams that do not carry the target — or
+// whose operator disabled the target — drop the alias entirely for that row.
+export const aliasListingEmissions = (
+ alias: ModelAlias,
+ providers: readonly ModelProviderInstance[],
+ rawCatalogs: ReadonlyMap,
+): AliasListingEmission[] => {
+ const out: AliasListingEmission[] = [];
+ const upstreamFilter = alias.upstreamIds.length > 0 ? new Set(alias.upstreamIds) : null;
+ for (const provider of providers) {
+ if (upstreamFilter !== null && !upstreamFilter.has(provider.upstream)) continue;
+ const catalog = rawCatalogs.get(provider.upstream);
+ if (catalog === undefined) continue;
+ const disabled = new Set(provider.disabledPublicModelIds);
+ const target = catalog.find(m => m.id === alias.targetModelId && !disabled.has(m.id));
+ if (target === undefined) continue;
+ const cfg = provider.modelPrefix;
+ if (cfg === null) {
+ out.push({ provider, form: 'unprefixed', target });
+ } else {
+ for (const form of cfg.listed) {
+ out.push({ provider, form, target });
+ }
+ }
+ }
+ return out;
+};
+
+// The public id form an alias emission carries on the wire. Bare alias name
+// for the unprefixed form; provider prefix + alias name for the prefixed
+// form. Mirrors how real models are surfaced in the same listing pass.
+export const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): string => {
+ const cfg = emission.provider.modelPrefix;
+ return emission.form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias;
+};
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 33dbefa40..0d08e4445 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -1,6 +1,8 @@
import type { Context } from 'hono';
+import { aliasListingEmissions, aliasPublicId } from './alias-listing.ts';
import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
+import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
@@ -8,7 +10,7 @@ import { getRepo } from '../../repo/index.ts';
import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
import { getCurrentColo } from '../../runtime/runtime-info.ts';
import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts';
-import { getInternalModels } from '../providers/registry.ts';
+import { getModelsForListing } from '../providers/registry.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
import type { ModelPricing } from '@floway-dev/protocols/common';
import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -70,25 +72,32 @@ const loadGeminiModels = async (
scheduler: BackgroundScheduler,
aliases: readonly ModelAlias[],
): Promise => {
- const models = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler);
+ const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler);
// Only chat models are representable in the Gemini /models shape.
const realChatEntries = models.filter(model => model.kind === 'chat').map(toGeminiModel);
- // Visible aliases append in `loadAllAliases` order; the Gemini surface
- // carries no `aliasedFrom` extension (Gemini's `Model` resource is closed)
- // so the entry advertises the alias id plus the target's display fields.
- const byId = new Map(models.map(m => [m.id, m]));
+ // Per-upstream alias enumeration mirrors `/v1/models`. Each emission becomes
+ // one Gemini Model entry whose id and displayName reflect that specific
+ // (provider, addressable form) pair; targets of the wrong kind never reach
+ // here because they were already filtered out of the catalog walk.
const aliasEntries: GeminiModel[] = [];
for (const alias of aliases) {
if (!alias.visibleInModelsList) continue;
- const target = byId.get(alias.targetModelId);
- if (target && target.kind !== 'chat') continue;
- aliasEntries.push(toGeminiModel({
- ...(target ?? {} as InternalModel),
- id: alias.alias,
- display_name: alias.alias,
- kind: 'chat',
- limits: target?.limits ?? {},
- }));
+ for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
+ if (emission.target.kind !== 'chat') continue;
+ const targetDisplayName = emission.target.display_name ?? emission.target.id;
+ aliasEntries.push(toGeminiModel({
+ ...emission.target,
+ id: aliasPublicId(alias, emission),
+ display_name: composeAliasDisplayName({
+ upstreamDisplayName: emission.provider.name,
+ aliasDisplayName: alias.displayName,
+ targetDisplayName,
+ rules: alias.rules,
+ }),
+ kind: 'chat',
+ limits: emission.target.limits ?? {},
+ }));
+ }
}
return [...realChatEntries, ...aliasEntries];
};
diff --git a/packages/gateway/src/data-plane/models/gemini_test.ts b/packages/gateway/src/data-plane/models/gemini_test.ts
index 0327bae33..6a2d9a887 100644
--- a/packages/gateway/src/data-plane/models/gemini_test.ts
+++ b/packages/gateway/src/data-plane/models/gemini_test.ts
@@ -449,7 +449,7 @@ test('/v1beta/models appends visible aliases as synthetic Gemini model entries',
const body = await response.json() as { models: Array<{ name: string; displayName: string; supportedGenerationMethods: string[] }> };
const aliasEntry = body.models.find(m => m.name === 'models/codex-auto-review');
if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
- assertEquals(aliasEntry.displayName, 'codex-auto-review');
+ assertEquals(aliasEntry.displayName, 'GitHub Copilot (tester): GPT Gemini List (low effort)');
assertEquals(aliasEntry.supportedGenerationMethods, ['generateContent', 'streamGenerateContent', 'countTokens']);
},
);
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 585b5d638..569a601e2 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,5 +1,7 @@
+import { aliasListingEmissions, aliasPublicId, type AliasListingEmission } from './alias-listing.ts';
+import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-import { getInternalModels } from '../providers/registry.ts';
+import { getModelsForListing } from '../providers/registry.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
import type { Fetcher, InternalModel } from '@floway-dev/provider';
@@ -22,20 +24,21 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
return info;
};
-// Synthesize one PublicModel for each visible alias, appended after the real
-// entries. The owner falls back to the alias-target's `owned_by` on whichever
-// real entry resolves it; if the target isn't present on any reachable
-// upstream, the entry still appears (operator-declared; the listing reflects
-// operator intent) with a `floway` owner so the row is unambiguous.
-export const toPublicModelFromAlias = (alias: ModelAlias, byId: ReadonlyMap): PublicModel => {
- const target = byId.get(alias.targetModelId);
+const publicModelForAliasEmission = (alias: ModelAlias, emission: AliasListingEmission): PublicModel => {
+ const { provider, target } = emission;
+ const targetDisplayName = target.display_name ?? target.id;
const info: PublicModel = {
- id: alias.alias,
+ id: aliasPublicId(alias, emission),
object: 'model',
type: 'model',
- display_name: alias.alias,
- limits: target?.limits ? { ...target.limits } : {},
- kind: target?.kind ?? 'chat',
+ display_name: composeAliasDisplayName({
+ upstreamDisplayName: provider.name,
+ aliasDisplayName: alias.displayName,
+ targetDisplayName,
+ rules: alias.rules,
+ }),
+ limits: target.limits ? { ...target.limits } : {},
+ kind: target.kind,
created: alias.createdAt,
created_at: new Date(alias.createdAt * 1000).toISOString(),
aliasedFrom: {
@@ -45,7 +48,8 @@ export const toPublicModelFromAlias = (alias: ModelAlias, byId: ReadonlyMap => {
- const internal = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler);
- const realEntries = internal.map(toPublicModel);
- const byId = new Map(internal.map(m => [m.id, m]));
- // Visible aliases append in `loadAllAliases` order, after every real entry.
- // The spec's no-silent-hide policy keeps disabled-target aliases visible —
- // the user-facing failure on call is the canonical signal, not the
- // listing.
- const aliasEntries = aliases.filter(a => a.visibleInModelsList).map(a => toPublicModelFromAlias(a, byId));
+ const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler);
+ const realEntries = models.map(toPublicModel);
+ // Per-upstream alias enumeration: for each visible alias, emit one entry per
+ // (provider, addressable form) pair where the provider can resolve the
+ // alias's target. Upstreams that do not carry the target produce no entry —
+ // the alias listing is strictly anchored to "can be served from here".
+ const aliasEntries: PublicModel[] = [];
+ for (const alias of aliases) {
+ if (!alias.visibleInModelsList) continue;
+ for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
+ aliasEntries.push(publicModelForAliasEmission(alias, emission));
+ }
+ }
const data = [...realEntries, ...aliasEntries];
return {
object: 'list',
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 855eca5fa..17647565b 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -711,7 +711,7 @@ test('/v1/models omits aliases marked visibleInModelsList=false', async () => {
);
});
-test('/v1/models lists an alias whose target is not present on any upstream (no silent hide)', async () => {
+test('/v1/models omits an alias whose target is not in any reachable upstream catalog', async () => {
const { repo, apiKey } = await setupAppTest();
(repo.modelAliases as MemoryModelAliasesRepo).setAll([
@@ -755,12 +755,182 @@ test('/v1/models lists an alias whose target is not present on any upstream (no
},
async () => {
const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
- const body = await response.json() as { data: Array<{ id: string; aliasedFrom?: { targetModelId: string }; owned_by?: string }> };
- const orphan = body.data.find(m => m.id === 'orphan-alias');
- if (!orphan) throw new Error('expected orphan-alias entry');
- assertEquals(orphan.aliasedFrom?.targetModelId, 'never-resolves');
- // No matching real entry → owner falls back to the alias's primary upstream id.
- assertEquals(orphan.owned_by, 'up_oai');
+ const body = await response.json() as { data: Array<{ id: string }> };
+ // Per-upstream alias enumeration: an alias whose target cannot be served
+ // by any reachable upstream produces zero entries — there is no surface
+ // form to attach the alias to. A request for `orphan-alias` still
+ // returns the canonical user-facing model-missing error.
+ assertEquals(body.data.map(m => m.id).includes('orphan-alias'), false);
+ },
+ );
+});
+
+test('/v1/models emits the alias on each reachable upstream + listed form, with display_name composed from the upstream label', async () => {
+ const { repo, apiKey } = await setupAppTest();
+
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'codex-auto-review',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ displayName: 'Codex Auto Review',
+ createdAt: 1_700_000_000,
+ },
+ ]);
+
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_azure',
+ name: 'Azure',
+ sortOrder: 100,
+ config: {
+ baseUrl: 'https://azure.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-azure',
+ endpoints: { chatCompletions: {} },
+ },
+ modelPrefix: { prefix: 'azure/', addressable: ['unprefixed', 'prefixed'], listed: ['unprefixed', 'prefixed'] },
+ }));
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+ }
+ if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
+ if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
+ return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4', display_name: 'GPT-5.4' }] });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+ const body = await response.json() as { data: Array<{ id: string; display_name: string; aliasedFrom?: unknown }> };
+ // Both addressable forms appear because the upstream listed both.
+ const bare = body.data.find(m => m.id === 'codex-auto-review');
+ const prefixed = body.data.find(m => m.id === 'azure/codex-auto-review');
+ if (!bare || !prefixed) throw new Error('expected both bare and prefixed alias entries');
+ assertEquals(bare.display_name, 'Azure: Codex Auto Review');
+ assertEquals(prefixed.display_name, 'Azure: Codex Auto Review');
+ },
+ );
+});
+
+test('/v1/models falls back to target display_name + rules summary when the alias has no displayName', async () => {
+ const { repo, apiKey } = await setupAppTest();
+
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'codex-auto-review',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ },
+ ]);
+
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_azure',
+ name: 'Azure',
+ sortOrder: 100,
+ config: {
+ baseUrl: 'https://azure.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-azure',
+ endpoints: { chatCompletions: {} },
+ },
+ }));
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+ }
+ if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
+ if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
+ return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4', display_name: 'GPT-5.4' }] });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+ const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
+ const entry = body.data.find(m => m.id === 'codex-auto-review');
+ if (!entry) throw new Error('expected codex-auto-review alias entry');
+ assertEquals(entry.display_name, 'Azure: GPT-5.4 (low effort)');
+ },
+ );
+});
+
+test('/v1/models honours alias upstreamIds — only emits on the named upstream', async () => {
+ const { repo, apiKey } = await setupAppTest();
+
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'codex-auto-review',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: ['up_azure'],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ },
+ ]);
+
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_azure',
+ name: 'Azure',
+ sortOrder: 100,
+ config: {
+ baseUrl: 'https://azure.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-azure',
+ endpoints: { chatCompletions: {} },
+ },
+ }));
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_other',
+ name: 'Other',
+ sortOrder: 200,
+ config: {
+ baseUrl: 'https://other.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-other',
+ endpoints: { chatCompletions: {} },
+ },
+ }));
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+ }
+ if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
+ // Both upstreams expose gpt-5.4 — but the alias is restricted to up_azure.
+ if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
+ return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
+ }
+ if (url.pathname === '/v1/models' && url.hostname === 'other.example.com') {
+ return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+ const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
+ const aliasRows = body.data.filter(m => m.id === 'codex-auto-review');
+ assertEquals(aliasRows.length, 1);
+ assertEquals(aliasRows[0].display_name, 'Azure: gpt-5.4');
},
);
});
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index e29df1c6a..c8556ca45 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -20,6 +20,10 @@ interface ProviderModelsResult {
// order as the input `providers` list so the model-missing renderer can
// surface a stable, dashboard-aligned list.
failedUpstreams: string[];
+ // Raw per-upstream catalogs collected during the fan-out. Aliases consume
+ // this to enumerate per-upstream entries by addressable form without paying
+ // a second round-trip.
+ rawCatalogs: Map;
}
const NO_UPSTREAM_CONFIGURED_MESSAGE = 'No upstream provider configured — connect GitHub Copilot or add a Custom/Azure upstream in the dashboard';
@@ -143,6 +147,7 @@ const collectProviderModels = async (
scheduler: BackgroundScheduler,
): Promise => {
const byId = new Map();
+ const rawCatalogs = new Map();
let sawSuccess = false;
let lastError: unknown = null;
const failedUpstreams: string[] = [];
@@ -174,6 +179,7 @@ const collectProviderModels = async (
}
sawSuccess = true;
const { instance, models: providedModels } = result.value;
+ rawCatalogs.set(instance.upstream, providedModels);
// Operator-disabled public model ids vanish entirely for this upstream:
// dropped before they reach the catalog map, so they appear in no /models
// listing and resolve to nothing for routing. The disable is per-upstream,
@@ -208,7 +214,7 @@ const collectProviderModels = async (
}
}
- return { models: [...byId.values()], sawSuccess, lastError, failedUpstreams };
+ return { models: [...byId.values()], sawSuccess, lastError, failedUpstreams, rawCatalogs };
};
// Public-facing model-id ordering, applied in getModels() to every list that
@@ -264,6 +270,35 @@ export const getModels = async (
return [];
};
+// Returns the merged public model list AND the per-upstream raw catalogs and
+// provider instances. Listing surfaces (`/v1/models`, Gemini `/models`) use the
+// extra channels to synthesize alias entries that reflect which upstreams can
+// actually serve each alias's target and in which addressable form. Computing
+// both off the same `collectProviderModels` pass keeps catalog fetches to one
+// round per upstream regardless of how many alias rows reference each target.
+export interface PublicModelsListing {
+ models: ResolvedModel[];
+ providers: readonly ModelProviderInstance[];
+ rawCatalogs: ReadonlyMap;
+}
+
+export const getModelsForListing = async (
+ upstreamFilter: readonly string[] | null,
+ fetcherForUpstream: (upstreamId: string) => Fetcher,
+ scheduler: BackgroundScheduler,
+): Promise => {
+ const providers = await listModelProviders(upstreamFilter);
+ if (providers.length === 0) {
+ throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
+ }
+
+ const { models, sawSuccess, lastError, rawCatalogs } = await collectProviderModels(providers, fetcherForUpstream, scheduler);
+
+ if (sawSuccess) return { models: models.sort((a, b) => compareModelIds(a.id, b.id)), providers, rawCatalogs };
+ if (lastError) throw lastError;
+ return { models: [], providers, rawCatalogs };
+};
+
export const getInternalModels = async (
upstreamFilter: readonly string[] | null,
fetcherForUpstream: (upstreamId: string) => Fetcher,
From fcb360afcb1cfc23982484dcae1d61c6f705fd84 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 01:51:45 +0800
Subject: [PATCH 010/170] revert(translate): restore pre-extension native field
translation on remaining pairs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Task 3 (`e1891e1d`) also reshaped NATIVE-field translation on the
remaining three pairs the first revert wave (`17a7877c`) did not cover.
The alias work should only have added emission of the new Floway
extension fields; native-to-native handling on these pairs had been
decided in the prior contract and is restored here.
Revert the native-field paths in:
- gemini-via-responses: restore the pre-Task-3 `reasoning` block shape
where `includeThoughts: true` paired with a non-`none` effort produces
`summary: 'detailed'`; drop the `false → 'omitted'` synthesis Task 3
added. Keep `verbosity` and `serviceTier` extension carries
(Floway-only fields on Gemini IR).
- messages-via-responses: drop `thinking.display` → `reasoning.summary`
synthesis and the `service_tier` → `service_tier` native-to-native
propagation. Keep the `verbosity` extension carry under `text`. The
unused `mapAnthropicDisplayToSummary` helper is deleted.
- messages-via-chat-completions: drop the `service_tier` →
`service_tier` native-to-native propagation. Keep the `verbosity`
extension carry.
Tests that asserted the new native-field behavior are removed; the
extension-field tests stay untouched.
---
.../src/gemini-via-responses/request.ts | 18 ++++------
.../src/gemini-via-responses/request_test.ts | 20 -----------
.../messages-via-chat-completions/request.ts | 1 -
.../request_test.ts | 11 ------
.../src/messages-via-responses/request.ts | 16 ++-------
.../messages-via-responses/request_test.ts | 36 -------------------
.../shared/messages-via/reasoning-summary.ts | 21 -----------
7 files changed, 9 insertions(+), 114 deletions(-)
delete mode 100644 packages/translate/src/shared/messages-via/reasoning-summary.ts
diff --git a/packages/translate/src/gemini-via-responses/request.ts b/packages/translate/src/gemini-via-responses/request.ts
index df85e4729..36ceb37f0 100644
--- a/packages/translate/src/gemini-via-responses/request.ts
+++ b/packages/translate/src/gemini-via-responses/request.ts
@@ -152,18 +152,12 @@ const applyGenerationConfig = (request: ResponsesPayload, generationConfig?: Gem
if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier;
const effort = geminiReasoningEffort(generationConfig.thinkingConfig);
- const summary =
- generationConfig.thinkingConfig?.includeThoughts === true
- ? ('detailed' as const)
- : generationConfig.thinkingConfig?.includeThoughts === false
- ? ('omitted' as const)
- : undefined;
- if (effort || summary !== undefined) {
- request.reasoning = {
- ...(effort ? { effort } : {}),
- ...(summary !== undefined && effort !== 'none' ? { summary } : {}),
- };
- }
+ if (!effort) return;
+
+ request.reasoning = {
+ effort,
+ ...(effort !== 'none' && generationConfig.thinkingConfig?.includeThoughts === true ? { summary: 'detailed' as const } : {}),
+ };
};
const buildTools = (payload: GeminiPayload): ResponsesTool[] | undefined => {
diff --git a/packages/translate/src/gemini-via-responses/request_test.ts b/packages/translate/src/gemini-via-responses/request_test.ts
index 66476f0d8..9d568605f 100644
--- a/packages/translate/src/gemini-via-responses/request_test.ts
+++ b/packages/translate/src/gemini-via-responses/request_test.ts
@@ -432,26 +432,6 @@ test('buildTargetRequest emits generationConfig.serviceTier onto Responses servi
assertEquals(result.service_tier, 'priority');
});
-test('buildTargetRequest maps includeThoughts onto reasoning.summary (true → detailed, false → omitted)', () => {
- const withSummary = buildTargetRequest(
- {
- contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
- generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: true } },
- },
- 'gpt-test',
- );
- const withoutSummary = buildTargetRequest(
- {
- contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
- generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: false } },
- },
- 'gpt-test',
- );
-
- assertEquals(withSummary.reasoning, { effort: 'high', summary: 'detailed' });
- assertEquals(withoutSummary.reasoning, { effort: 'high', summary: 'omitted' });
-});
-
test('buildTargetRequest drops top-level Anthropic extensions on Responses', () => {
const result = buildTargetRequest(
{
diff --git a/packages/translate/src/messages-via-chat-completions/request.ts b/packages/translate/src/messages-via-chat-completions/request.ts
index bd08e1b26..6ae2d8a95 100644
--- a/packages/translate/src/messages-via-chat-completions/request.ts
+++ b/packages/translate/src/messages-via-chat-completions/request.ts
@@ -291,7 +291,6 @@ export const translateMessagesToChatCompletions = (payload: MessagesPayload): Ch
tool_choice: translateMessagesToolChoice(payload.tool_choice, clientTools),
...(responseFormat ? { response_format: responseFormat } : {}),
...(payload.verbosity != null ? { verbosity: payload.verbosity } : {}),
- ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
};
};
diff --git a/packages/translate/src/messages-via-chat-completions/request_test.ts b/packages/translate/src/messages-via-chat-completions/request_test.ts
index 50f326860..861962bf8 100644
--- a/packages/translate/src/messages-via-chat-completions/request_test.ts
+++ b/packages/translate/src/messages-via-chat-completions/request_test.ts
@@ -494,17 +494,6 @@ test('translateMessagesToChatCompletions emits verbosity extension verbatim', ()
assertEquals(result.verbosity, 'low');
});
-test('translateMessagesToChatCompletions forwards service_tier verbatim', () => {
- const result = translateMessagesToChatCompletions({
- model: 'gpt-test',
- max_tokens: 256,
- messages: [{ role: 'user', content: 'hi' }],
- service_tier: 'priority',
- });
-
- assertEquals(result.service_tier, 'priority');
-});
-
test('translateMessagesToChatCompletions drops Anthropic-only knobs that have no Chat-completions slot', () => {
const result = translateMessagesToChatCompletions({
model: 'gpt-test',
diff --git a/packages/translate/src/messages-via-responses/request.ts b/packages/translate/src/messages-via-responses/request.ts
index b1c593443..1adfcaeda 100644
--- a/packages/translate/src/messages-via-responses/request.ts
+++ b/packages/translate/src/messages-via-responses/request.ts
@@ -1,7 +1,6 @@
import { openAiJsonSchemaCoreFromMessagesFormat } from '../shared/messages/structured-output.ts';
import { messagesReasoningBlockToResponsesReasoning } from '../shared/messages-and-responses/reasoning.ts';
import { resolveMessagesReasoningEffort } from '../shared/messages-via/reasoning-effort.ts';
-import { mapAnthropicDisplayToSummary } from '../shared/messages-via/reasoning-summary.ts';
import { normalizeMessagesToolInputSchema } from '../shared/messages-via/tool-schema.ts';
import {
type MessagesAssistantMessage,
@@ -208,15 +207,7 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response
// Responses upstream may reject it. Translation stays pairwise and leaves
// target-side validation to the selected upstream endpoint.
const effort = resolveMessagesReasoningEffort(payload);
- const display = payload.thinking?.display;
- const summary = display !== undefined ? mapAnthropicDisplayToSummary(display) : undefined;
- const reasoning =
- effort !== undefined || summary !== undefined
- ? {
- ...(effort !== undefined ? { effort } : {}),
- ...(summary !== undefined ? { summary } : {}),
- }
- : undefined;
+ const reasoning = effort ? { effort } : undefined;
const clientTools = getClientTools(payload.tools);
const instructions = translateSystemPrompt(payload.system);
const jsonSchema = openAiJsonSchemaCoreFromMessagesFormat(payload.output_config?.format);
@@ -225,8 +216,8 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response
const text = formatPart || verbosityPart ? { ...formatPart, ...verbosityPart } : undefined;
// Keep fallback semantics strict: do not synthesize `temperature: 1`,
- // `store: false`, or `parallel_tool_calls: true` when the Messages source
- // did not express those knobs.
+ // `store: false`, `parallel_tool_calls: true`, or `reasoning.summary` when the
+ // Messages source did not express those knobs.
return {
model: payload.model,
input: translateMessagesInput(payload.messages),
@@ -240,7 +231,6 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response
stream: true,
...(reasoning ? { reasoning } : {}),
...(text ? { text } : {}),
- ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
};
};
diff --git a/packages/translate/src/messages-via-responses/request_test.ts b/packages/translate/src/messages-via-responses/request_test.ts
index 2846f1d39..a773f72ef 100644
--- a/packages/translate/src/messages-via-responses/request_test.ts
+++ b/packages/translate/src/messages-via-responses/request_test.ts
@@ -529,42 +529,6 @@ test('translateMessagesToResponses co-emits verbosity with json_schema format un
assertEquals(result.text?.format?.type, 'json_schema');
});
-test('translateMessagesToResponses maps thinking.display onto reasoning.summary (summarized → concise, omitted → omitted, full → detailed)', () => {
- const summarized = translateMessagesToResponses({
- model: 'gpt-test',
- max_tokens: 256,
- messages: [{ role: 'user', content: 'hi' }],
- thinking: { type: 'enabled', display: 'summarized' },
- });
- const omitted = translateMessagesToResponses({
- model: 'gpt-test',
- max_tokens: 256,
- messages: [{ role: 'user', content: 'hi' }],
- thinking: { type: 'enabled', display: 'omitted' },
- });
- const full = translateMessagesToResponses({
- model: 'gpt-test',
- max_tokens: 256,
- messages: [{ role: 'user', content: 'hi' }],
- thinking: { type: 'enabled', display: 'full' },
- });
-
- assertEquals(summarized.reasoning?.summary, 'concise');
- assertEquals(omitted.reasoning?.summary, 'omitted');
- assertEquals(full.reasoning?.summary, 'detailed');
-});
-
-test('translateMessagesToResponses forwards service_tier verbatim', () => {
- const result = translateMessagesToResponses({
- model: 'gpt-test',
- max_tokens: 256,
- messages: [{ role: 'user', content: 'hi' }],
- service_tier: 'priority',
- });
-
- assertEquals(result.service_tier, 'priority');
-});
-
test('translateMessagesToResponses drops Anthropic-only mode knobs the Responses wire cannot express', () => {
const result = translateMessagesToResponses({
model: 'gpt-test',
diff --git a/packages/translate/src/shared/messages-via/reasoning-summary.ts b/packages/translate/src/shared/messages-via/reasoning-summary.ts
deleted file mode 100644
index 6d12bab9b..000000000
--- a/packages/translate/src/shared/messages-via/reasoning-summary.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-import type { MessagesThinkingDisplay } from '@floway-dev/protocols/messages';
-
-// Reverse of via-messages/anthropic-extensions.ts mapSummaryToAnthropicDisplay.
-// Anthropic's `summarized` collapsed both `concise` and `detailed`; we pick
-// `concise` as the canonical reverse since it is Responses' more compact
-// summary mode and round-tripping through the gateway should not silently
-// inflate verbosity. Unknown operator-typed values pass through verbatim so
-// the Responses upstream gets the original spelling and decides for itself
-// whether to accept it.
-export const mapAnthropicDisplayToSummary = (display: MessagesThinkingDisplay | string): string | undefined => {
- switch (display) {
- case 'summarized':
- return 'concise';
- case 'omitted':
- return 'omitted';
- case 'full':
- return 'detailed';
- default:
- return display;
- }
-};
From 96afa8376b3d5d08d9a69f243a70ca381c6dfd35 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 02:04:58 +0800
Subject: [PATCH 011/170] refactor(aliases): split upstream-label prefix from
composeAliasDisplayName
The alias-local display name (operator-set displayName, or synthesized
target + rules summary) is independent of which addressable form the
entry surfaces under. The upstream-label prefix (`${upstream.name}: `)
belongs at the caller, mirroring the real-model path in
`registry.ts` where the synthesized prefix is added only on the
`prefixed` listing form.
Result: a bare alias listing (`codex-auto-review` on a no-prefix or
unprefixed-listed upstream) reads as `"Codex Auto Review"` or
`"GPT-5.4 (low effort)"` without an upstream label, matching how a
bare real model renders. The prefixed form (`azure/codex-auto-review`)
keeps the `"Azure: Codex Auto Review"` shape unchanged.
---
.../src/control-plane/model-aliases/display.ts | 15 +++++++--------
.../control-plane/model-aliases/display_test.ts | 9 +++------
packages/gateway/src/data-plane/models/gemini.ts | 13 ++++++-------
.../gateway/src/data-plane/models/gemini_test.ts | 2 +-
packages/gateway/src/data-plane/models/load.ts | 15 +++++++--------
.../gateway/src/data-plane/models/serve_test.ts | 8 ++++----
6 files changed, 28 insertions(+), 34 deletions(-)
diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
index 576c823a8..cb82bc75f 100644
--- a/packages/gateway/src/control-plane/model-aliases/display.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display.ts
@@ -23,17 +23,16 @@ export const formatAliasRulesSummary = (rules: ModelAliasRules): string => {
return parts.length > 0 ? ` (${parts.join(', ')})` : '';
};
-// Compose the final per-entry display name shown in `/v1/models`. The
-// upstream name always leads so an operator scanning the listing sees which
-// upstream each row belongs to before reading the alias-specific part.
+// Compose the alias-local display name — what the operator named the alias
+// (when set) or a synthesized target + rules summary. Independent of which
+// upstream is surfacing the alias; the prefixed listing form prepends the
+// upstream display name at the call site, mirroring the real-model path in
+// `registry.ts`.
export const composeAliasDisplayName = (input: {
- upstreamDisplayName: string;
aliasDisplayName?: string;
targetDisplayName: string;
rules: ModelAliasRules;
}): string => {
- if (input.aliasDisplayName !== undefined) {
- return `${input.upstreamDisplayName}: ${input.aliasDisplayName}`;
- }
- return `${input.upstreamDisplayName}: ${input.targetDisplayName}${formatAliasRulesSummary(input.rules)}`;
+ if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
+ return `${input.targetDisplayName}${formatAliasRulesSummary(input.rules)}`;
};
diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/gateway/src/control-plane/model-aliases/display_test.ts
index 7ba7700d0..d45a1b339 100644
--- a/packages/gateway/src/control-plane/model-aliases/display_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display_test.ts
@@ -45,31 +45,28 @@ describe('composeAliasDisplayName', () => {
test('uses alias displayName when set, suppressing the rules summary', () => {
expect(
composeAliasDisplayName({
- upstreamDisplayName: 'Azure',
aliasDisplayName: 'Codex Auto Review',
targetDisplayName: 'GPT-5.4',
rules: { reasoning: { effort: 'low' } },
}),
- ).toBe('Azure: Codex Auto Review');
+ ).toBe('Codex Auto Review');
});
test('falls back to target displayName with rules suffix when alias displayName is missing', () => {
expect(
composeAliasDisplayName({
- upstreamDisplayName: 'Azure',
targetDisplayName: 'GPT-5.4',
rules: { reasoning: { effort: 'low' } },
}),
- ).toBe('Azure: GPT-5.4 (low effort)');
+ ).toBe('GPT-5.4 (low effort)');
});
test('omits the rules suffix when rules are empty', () => {
expect(
composeAliasDisplayName({
- upstreamDisplayName: 'Azure',
targetDisplayName: 'GPT-5.4',
rules: {},
}),
- ).toBe('Azure: GPT-5.4');
+ ).toBe('GPT-5.4');
});
});
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 0d08e4445..3d3b5ab2c 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -84,16 +84,15 @@ const loadGeminiModels = async (
if (!alias.visibleInModelsList) continue;
for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
if (emission.target.kind !== 'chat') continue;
- const targetDisplayName = emission.target.display_name ?? emission.target.id;
+ const aliasLocalName = composeAliasDisplayName({
+ aliasDisplayName: alias.displayName,
+ targetDisplayName: emission.target.display_name ?? emission.target.id,
+ rules: alias.rules,
+ });
aliasEntries.push(toGeminiModel({
...emission.target,
id: aliasPublicId(alias, emission),
- display_name: composeAliasDisplayName({
- upstreamDisplayName: emission.provider.name,
- aliasDisplayName: alias.displayName,
- targetDisplayName,
- rules: alias.rules,
- }),
+ display_name: emission.form === 'prefixed' ? `${emission.provider.name}: ${aliasLocalName}` : aliasLocalName,
kind: 'chat',
limits: emission.target.limits ?? {},
}));
diff --git a/packages/gateway/src/data-plane/models/gemini_test.ts b/packages/gateway/src/data-plane/models/gemini_test.ts
index 6a2d9a887..584054328 100644
--- a/packages/gateway/src/data-plane/models/gemini_test.ts
+++ b/packages/gateway/src/data-plane/models/gemini_test.ts
@@ -449,7 +449,7 @@ test('/v1beta/models appends visible aliases as synthetic Gemini model entries',
const body = await response.json() as { models: Array<{ name: string; displayName: string; supportedGenerationMethods: string[] }> };
const aliasEntry = body.models.find(m => m.name === 'models/codex-auto-review');
if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
- assertEquals(aliasEntry.displayName, 'GitHub Copilot (tester): GPT Gemini List (low effort)');
+ assertEquals(aliasEntry.displayName, 'GPT Gemini List (low effort)');
assertEquals(aliasEntry.supportedGenerationMethods, ['generateContent', 'streamGenerateContent', 'countTokens']);
},
);
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 569a601e2..2d054dc88 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -25,18 +25,17 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
};
const publicModelForAliasEmission = (alias: ModelAlias, emission: AliasListingEmission): PublicModel => {
- const { provider, target } = emission;
- const targetDisplayName = target.display_name ?? target.id;
+ const { provider, target, form } = emission;
+ const aliasLocalName = composeAliasDisplayName({
+ aliasDisplayName: alias.displayName,
+ targetDisplayName: target.display_name ?? target.id,
+ rules: alias.rules,
+ });
const info: PublicModel = {
id: aliasPublicId(alias, emission),
object: 'model',
type: 'model',
- display_name: composeAliasDisplayName({
- upstreamDisplayName: provider.name,
- aliasDisplayName: alias.displayName,
- targetDisplayName,
- rules: alias.rules,
- }),
+ display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName,
limits: target.limits ? { ...target.limits } : {},
kind: target.kind,
created: alias.createdAt,
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 17647565b..f3c45e313 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -765,7 +765,7 @@ test('/v1/models omits an alias whose target is not in any reachable upstream ca
);
});
-test('/v1/models emits the alias on each reachable upstream + listed form, with display_name composed from the upstream label', async () => {
+test('/v1/models emits the alias on each reachable upstream + listed form; prefixed entries carry the upstream label, unprefixed entries do not', async () => {
const { repo, apiKey } = await setupAppTest();
(repo.modelAliases as MemoryModelAliasesRepo).setAll([
@@ -814,7 +814,7 @@ test('/v1/models emits the alias on each reachable upstream + listed form, with
const bare = body.data.find(m => m.id === 'codex-auto-review');
const prefixed = body.data.find(m => m.id === 'azure/codex-auto-review');
if (!bare || !prefixed) throw new Error('expected both bare and prefixed alias entries');
- assertEquals(bare.display_name, 'Azure: Codex Auto Review');
+ assertEquals(bare.display_name, 'Codex Auto Review');
assertEquals(prefixed.display_name, 'Azure: Codex Auto Review');
},
);
@@ -865,7 +865,7 @@ test('/v1/models falls back to target display_name + rules summary when the alia
const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
const entry = body.data.find(m => m.id === 'codex-auto-review');
if (!entry) throw new Error('expected codex-auto-review alias entry');
- assertEquals(entry.display_name, 'Azure: GPT-5.4 (low effort)');
+ assertEquals(entry.display_name, 'GPT-5.4 (low effort)');
},
);
});
@@ -930,7 +930,7 @@ test('/v1/models honours alias upstreamIds — only emits on the named upstream'
const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
const aliasRows = body.data.filter(m => m.id === 'codex-auto-review');
assertEquals(aliasRows.length, 1);
- assertEquals(aliasRows[0].display_name, 'Azure: gpt-5.4');
+ assertEquals(aliasRows[0].display_name, 'gpt-5.4');
},
);
});
From 6e3a6d7a554d0ca806d5ebd31c79950454794bfd Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 02:44:34 +0800
Subject: [PATCH 012/170] refactor(aliases): synthesize alias listing entries
inside getModelsForListing
The three listing endpoints (/v1/models data plane, /api/models control
plane, /v1beta/models Gemini) each independently looped over aliases and
re-built the per-emission entry. Move the fan-out to a single
synthesizeListedAliases() called once inside getModelsForListing(); the
function returns ListedModel[] (ResolvedModel + optional aliasedFrom)
that every surface mapper consumes uniformly.
Side effect: the control-plane /api/models was previously alias-blind,
because the dashboard hit getModels() instead of the listing function.
Now it goes through the shared path and the dashboard Models page
surfaces alias rows with their aliasedFrom provenance.
---
.../src/control-plane/models/routes.ts | 22 +++--
.../src/control-plane/models/routes_test.ts | 41 +++++++++
.../src/data-plane/models/alias-listing.ts | 85 +++++++++++++++++--
.../gateway/src/data-plane/models/gemini.ts | 38 ++-------
.../gateway/src/data-plane/models/load.ts | 54 ++----------
.../src/data-plane/providers/registry.ts | 21 +++--
6 files changed, 168 insertions(+), 93 deletions(-)
diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 3146a3edc..9db27e7f4 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -1,21 +1,25 @@
import type { Context } from 'hono';
+import type { ListedModel } from '../../data-plane/models/alias-listing.ts';
import { toPublicModel } from '../../data-plane/models/load.ts';
import { MODEL_LISTING_FAILURE_MESSAGE } from '../../data-plane/models/shared.ts';
-import { getModels } from '../../data-plane/providers/registry.ts';
+import { getModelsForListing } from '../../data-plane/providers/registry.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
import { getCurrentColo } from '../../runtime/runtime-info.ts';
import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
import { ProviderModelsUnavailableError } from '@floway-dev/provider';
-import type { ResolvedModel, UpstreamProviderKind } from '@floway-dev/provider';
+import type { UpstreamProviderKind } from '@floway-dev/provider';
// Same DTO as the public /models endpoint, plus one dashboard-only field:
// `upstreams` lists every provider binding for this model as { kind, id, name }
// triples. A single model id can be served by mixed provider kinds (e.g. one
// azure deployment + one custom upstream both expose `gpt-5.5`), so a flat
-// `provider`/`upstream_ids` split would misrepresent that.
+// `provider`/`upstream_ids` split would misrepresent that. Alias entries
+// carry a single binding (the upstream that resolves their target) and the
+// `aliasedFrom` provenance flows through `toPublicModel`.
interface ControlPlaneModel extends PublicModel {
upstreams: { kind: UpstreamProviderKind; id: string; name: string }[];
}
@@ -24,7 +28,7 @@ interface ControlPlaneModelsResponse extends Omit
data: ControlPlaneModel[];
}
-const toControlPlaneModel = (model: ResolvedModel): ControlPlaneModel => ({
+const toControlPlaneModel = (model: ListedModel): ControlPlaneModel => ({
...toPublicModel(model),
upstreams: model.providers.map(binding => ({ kind: binding.providerKind, id: binding.upstream, name: binding.upstreamName })),
});
@@ -35,8 +39,16 @@ export const controlPlaneModels = async (c: Context) => {
// like the data-plane /models endpoint. On a session request there is no
// API key, so this resolves to the user's per-user upstream cap: a user who
// has had an upstream removed must not see its models in the Models tab.
+ // Aliases come from the same repo singleton the data plane uses, so the
+ // dashboard sees exactly the alias rows the runtime would honour.
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- const models = await getModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c));
+ const aliases = await getRepo().modelAliases.loadAll();
+ const { models } = await getModelsForListing(
+ effectiveUpstreamIdsFromContext(c),
+ fetcherForUpstream,
+ backgroundSchedulerFromContext(c),
+ aliases,
+ );
const data = models.map(toControlPlaneModel);
const response: ControlPlaneModelsResponse = {
object: 'list',
diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts
index 7c611c31a..0be710c36 100644
--- a/packages/gateway/src/control-plane/models/routes_test.ts
+++ b/packages/gateway/src/control-plane/models/routes_test.ts
@@ -1,5 +1,6 @@
import { test } from 'vitest';
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
import type { UpstreamRecord } from '@floway-dev/provider';
import { assertEquals, jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
@@ -113,3 +114,43 @@ test('/api/models is scoped to the caller\'s effective upstreams — a removed u
assertEquals(ids.includes('azure-public'), false);
});
});
+
+test('/api/models appends visible alias entries with aliasedFrom alongside real catalog rows', async () => {
+ const { apiKey, repo } = await setupAppTest();
+ await repo.upstreams.save(buildCustomUpstreamRecord({ id: 'up_custom_models', sortOrder: 100 }));
+
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'codex-auto-review',
+ displayName: 'Codex Auto Review',
+ targetModelId: 'custom-model',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ },
+ {
+ alias: 'hidden-alias',
+ targetModelId: 'custom-model',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: false,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_001,
+ },
+ ]);
+
+ await withMockedFetch(modelsFetchHandler, async () => {
+ const response = await requestApp('/api/models', { headers: { 'x-api-key': apiKey.key } });
+ assertEquals(response.status, 200);
+ const body = (await response.json()) as { data: Array<{ id: string; display_name: string; upstreams: Array<{ kind: string; id: string; name: string }>; aliasedFrom?: { targetModelId: string; rules: Record } }> };
+ const aliasEntry = body.data.find(model => model.id === 'codex-auto-review');
+ if (!aliasEntry) throw new Error('expected codex-auto-review alias entry on /api/models');
+ assertEquals(aliasEntry.display_name, 'Codex Auto Review');
+ assertEquals(aliasEntry.upstreams, [{ kind: 'custom', id: 'up_custom_models', name: 'Custom Provider' }]);
+ assertEquals(aliasEntry.aliasedFrom?.targetModelId, 'custom-model');
+ assertEquals(aliasEntry.aliasedFrom?.rules, { reasoning: { effort: 'low' } });
+ assertEquals(body.data.some(model => model.id === 'hidden-alias'), false);
+ });
+});
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index e41400111..2f1880fe5 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -1,23 +1,33 @@
+import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-import type { ModelProviderInstance, UpstreamModel } from '@floway-dev/provider';
+import type { PublicModel } from '@floway-dev/protocols/common';
+import type { ModelProviderInstance, ProviderModelRecord, ResolvedModel, UpstreamModel } from '@floway-dev/provider';
// One emission slot for an alias: a (provider, addressable form) pair where
// the provider's raw catalog carries the alias target id, plus the matched
// UpstreamModel so the synthesized listing entry can borrow the target's
// limits, owner, and cost without re-querying.
-export interface AliasListingEmission {
+interface AliasListingEmission {
provider: ModelProviderInstance;
form: 'unprefixed' | 'prefixed';
target: UpstreamModel;
}
-// Per-upstream alias enumeration shared by `/v1/models` and the Gemini
-// `/models` listings. An alias with empty `upstreamIds` matches every
-// reachable provider; a non-empty list narrows the candidate set. Per
+// A `ResolvedModel` that may carry an `aliasedFrom` provenance — what
+// `getModelsForListing` returns when alias entries have been interleaved into
+// the catalog. Each listing endpoint's mapper (`toPublicModel`,
+// `toControlPlaneModel`, `toGeminiModel`) reads the same shape, so the alias
+// fan-out happens exactly once instead of being re-implemented per surface.
+export type ListedModel = ResolvedModel & {
+ readonly aliasedFrom?: NonNullable;
+};
+
+// Per-upstream alias enumeration. An alias with empty `upstreamIds` matches
+// every reachable provider; a non-empty list narrows the candidate set. Per
// provider, the alias emits one entry per `listed` form when its target sits
// in the upstream's raw catalog. Upstreams that do not carry the target — or
// whose operator disabled the target — drop the alias entirely for that row.
-export const aliasListingEmissions = (
+const aliasListingEmissions = (
alias: ModelAlias,
providers: readonly ModelProviderInstance[],
rawCatalogs: ReadonlyMap,
@@ -46,7 +56,68 @@ export const aliasListingEmissions = (
// The public id form an alias emission carries on the wire. Bare alias name
// for the unprefixed form; provider prefix + alias name for the prefixed
// form. Mirrors how real models are surfaced in the same listing pass.
-export const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): string => {
+const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): string => {
const cfg = emission.provider.modelPrefix;
return emission.form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias;
};
+
+// Turn an alias emission into a `ListedModel` that walks the same listing
+// pipeline as real catalog entries. The synthesized `providers` array carries
+// a single binding pointing at the alias's target on this upstream, so the
+// dashboard's per-binding view renders correctly without alias-specific
+// branching. `aliasedFrom` rides out as the public protocol extension.
+//
+// Display name: the alias-local part (operator displayName, or
+// `${target.display_name} (rules summary)`) lives by itself for the
+// `unprefixed` listing form; the `prefixed` form mirrors the real-model path
+// in `registry.ts` and prepends `${provider.name}: ` so the upstream is
+// visible at a glance.
+const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmission): ListedModel => {
+ const { provider, target, form } = emission;
+ const aliasLocalName = composeAliasDisplayName({
+ aliasDisplayName: alias.displayName,
+ targetDisplayName: target.display_name ?? target.id,
+ rules: alias.rules,
+ });
+ const record: ProviderModelRecord = {
+ upstream: provider.upstream,
+ upstreamName: provider.name,
+ providerKind: provider.providerKind,
+ provider: provider.provider,
+ upstreamModel: target,
+ enabledFlags: target.enabledFlags,
+ supportsResponsesItemReference: provider.supportsResponsesItemReference,
+ };
+ const { providerData: _providerData, endpoints, id: _targetId, display_name: _targetDisplay, created: _targetCreated, ...rest } = target;
+ return {
+ ...rest,
+ id: aliasPublicId(alias, emission),
+ display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName,
+ created: alias.createdAt,
+ endpoints: { ...endpoints },
+ providers: [record],
+ aliasedFrom: {
+ targetModelId: alias.targetModelId,
+ upstreamIds: alias.upstreamIds,
+ rules: alias.rules,
+ onConflict: alias.onConflict,
+ },
+ };
+};
+
+// Single-pass alias fan-out used by every listing surface. Visibility filter
+// honoured here; per-surface callers just map ListedModel → their own DTO.
+export const synthesizeListedAliases = (
+ aliases: readonly ModelAlias[],
+ providers: readonly ModelProviderInstance[],
+ rawCatalogs: ReadonlyMap,
+): ListedModel[] => {
+ const out: ListedModel[] = [];
+ for (const alias of aliases) {
+ if (!alias.visibleInModelsList) continue;
+ for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
+ out.push(aliasEmissionToListedModel(alias, emission));
+ }
+ }
+ return out;
+};
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 3d3b5ab2c..f01579f9a 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -1,8 +1,6 @@
import type { Context } from 'hono';
-import { aliasListingEmissions, aliasPublicId } from './alias-listing.ts';
import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
-import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
@@ -34,6 +32,10 @@ interface GeminiModel {
cost?: ModelPricing;
}
+// Gemini's Model resource is closed (no `aliasedFrom` extension), so an alias
+// arrives here through `getModelsForListing` looking like any other chat
+// model — `id`, `display_name`, `limits`, `cost` already finalized by
+// `synthesizeListedAliases` — and the mapper has no alias-specific branch.
const toGeminiModel = (model: InternalModel): GeminiModel => {
const limits = model.limits;
const inputTokenLimit = limits.max_prompt_tokens ?? limits.max_context_window_tokens;
@@ -72,33 +74,11 @@ const loadGeminiModels = async (
scheduler: BackgroundScheduler,
aliases: readonly ModelAlias[],
): Promise => {
- const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler);
- // Only chat models are representable in the Gemini /models shape.
- const realChatEntries = models.filter(model => model.kind === 'chat').map(toGeminiModel);
- // Per-upstream alias enumeration mirrors `/v1/models`. Each emission becomes
- // one Gemini Model entry whose id and displayName reflect that specific
- // (provider, addressable form) pair; targets of the wrong kind never reach
- // here because they were already filtered out of the catalog walk.
- const aliasEntries: GeminiModel[] = [];
- for (const alias of aliases) {
- if (!alias.visibleInModelsList) continue;
- for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
- if (emission.target.kind !== 'chat') continue;
- const aliasLocalName = composeAliasDisplayName({
- aliasDisplayName: alias.displayName,
- targetDisplayName: emission.target.display_name ?? emission.target.id,
- rules: alias.rules,
- });
- aliasEntries.push(toGeminiModel({
- ...emission.target,
- id: aliasPublicId(alias, emission),
- display_name: emission.form === 'prefixed' ? `${emission.provider.name}: ${aliasLocalName}` : aliasLocalName,
- kind: 'chat',
- limits: emission.target.limits ?? {},
- }));
- }
- }
- return [...realChatEntries, ...aliasEntries];
+ const { models } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler, aliases);
+ // Only chat models are representable in the Gemini /models shape — alias
+ // entries whose target is non-chat fall out of this filter just like real
+ // non-chat catalog entries do.
+ return models.filter(model => model.kind === 'chat').map(toGeminiModel);
};
export const serveGeminiModels = async (c: Context): Promise => {
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 2d054dc88..5f2a32d64 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,12 +1,15 @@
-import { aliasListingEmissions, aliasPublicId, type AliasListingEmission } from './alias-listing.ts';
-import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
+import type { ListedModel } from './alias-listing.ts';
import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
import { getModelsForListing } from '../providers/registry.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
import type { Fetcher, InternalModel } from '@floway-dev/provider';
-export const toPublicModel = (model: InternalModel): PublicModel => {
+// Maps a single listed catalog entry (real or alias) to the wire DTO. Alias
+// entries arrive with `aliasedFrom` pre-populated by
+// `synthesizeListedAliases`; this mapper just rides it through so every
+// listing surface sees the same provenance field.
+export const toPublicModel = (model: InternalModel & { aliasedFrom?: ListedModel['aliasedFrom'] }): PublicModel => {
const info: PublicModel = {
id: model.id,
object: 'model',
@@ -21,34 +24,7 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
info.created_at = new Date(model.created * 1000).toISOString();
}
if (model.cost) info.cost = model.cost;
- return info;
-};
-
-const publicModelForAliasEmission = (alias: ModelAlias, emission: AliasListingEmission): PublicModel => {
- const { provider, target, form } = emission;
- const aliasLocalName = composeAliasDisplayName({
- aliasDisplayName: alias.displayName,
- targetDisplayName: target.display_name ?? target.id,
- rules: alias.rules,
- });
- const info: PublicModel = {
- id: aliasPublicId(alias, emission),
- object: 'model',
- type: 'model',
- display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName,
- limits: target.limits ? { ...target.limits } : {},
- kind: target.kind,
- created: alias.createdAt,
- created_at: new Date(alias.createdAt * 1000).toISOString(),
- aliasedFrom: {
- targetModelId: alias.targetModelId,
- upstreamIds: alias.upstreamIds,
- rules: alias.rules,
- onConflict: alias.onConflict,
- },
- };
- info.owned_by = target.owned_by ?? provider.upstream;
- if (target.cost) info.cost = target.cost;
+ if (model.aliasedFrom) info.aliasedFrom = model.aliasedFrom;
return info;
};
@@ -58,20 +34,8 @@ export const loadModels = async (
scheduler: BackgroundScheduler,
aliases: readonly ModelAlias[],
): Promise => {
- const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler);
- const realEntries = models.map(toPublicModel);
- // Per-upstream alias enumeration: for each visible alias, emit one entry per
- // (provider, addressable form) pair where the provider can resolve the
- // alias's target. Upstreams that do not carry the target produce no entry —
- // the alias listing is strictly anchored to "can be served from here".
- const aliasEntries: PublicModel[] = [];
- for (const alias of aliases) {
- if (!alias.visibleInModelsList) continue;
- for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
- aliasEntries.push(publicModelForAliasEmission(alias, emission));
- }
- }
- const data = [...realEntries, ...aliasEntries];
+ const { models } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler, aliases);
+ const data = models.map(toPublicModel);
return {
object: 'list',
has_more: false,
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index c8556ca45..e6dcb214f 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -2,6 +2,7 @@ import { fetchUpstreamModelsCached } from './models-cache.ts';
import type { ModelAlias, ModelAliasRules } from '../../control-plane/model-aliases/types.ts';
import { getRepo } from '../../repo/index.ts';
import { matchAlias } from '../model-aliases/match.ts';
+import { synthesizeListedAliases, type ListedModel } from '../models/alias-listing.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
import { type ModelEndpointKey, type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common';
import type { InternalModel, ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
@@ -271,13 +272,14 @@ export const getModels = async (
};
// Returns the merged public model list AND the per-upstream raw catalogs and
-// provider instances. Listing surfaces (`/v1/models`, Gemini `/models`) use the
-// extra channels to synthesize alias entries that reflect which upstreams can
-// actually serve each alias's target and in which addressable form. Computing
-// both off the same `collectProviderModels` pass keeps catalog fetches to one
-// round per upstream regardless of how many alias rows reference each target.
+// provider instances. Listing surfaces (`/v1/models`, `/api/models`, Gemini
+// `/models`) use the same call so alias entries — synthesized once via
+// `synthesizeListedAliases` against the same `(providers, rawCatalogs)` pair —
+// are interleaved into the catalog before it returns. Per-surface mappers
+// then walk one uniform `ListedModel[]` instead of re-implementing alias
+// fan-out three times.
export interface PublicModelsListing {
- models: ResolvedModel[];
+ models: ListedModel[];
providers: readonly ModelProviderInstance[];
rawCatalogs: ReadonlyMap;
}
@@ -286,6 +288,7 @@ export const getModelsForListing = async (
upstreamFilter: readonly string[] | null,
fetcherForUpstream: (upstreamId: string) => Fetcher,
scheduler: BackgroundScheduler,
+ aliases: readonly ModelAlias[],
): Promise => {
const providers = await listModelProviders(upstreamFilter);
if (providers.length === 0) {
@@ -294,7 +297,11 @@ export const getModelsForListing = async (
const { models, sawSuccess, lastError, rawCatalogs } = await collectProviderModels(providers, fetcherForUpstream, scheduler);
- if (sawSuccess) return { models: models.sort((a, b) => compareModelIds(a.id, b.id)), providers, rawCatalogs };
+ if (sawSuccess) {
+ const real = models.sort((a, b) => compareModelIds(a.id, b.id));
+ const aliasEntries = synthesizeListedAliases(aliases, providers, rawCatalogs);
+ return { models: [...real, ...aliasEntries], providers, rawCatalogs };
+ }
if (lastError) throw lastError;
return { models: [], providers, rawCatalogs };
};
From 06c789b29b6180d16fc79721873064b352d85566 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 02:56:30 +0800
Subject: [PATCH 013/170] fix(aliases): dedupe alias listing emissions whose
public id collides
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Two no-prefix upstreams both serving the alias target produced two
identical `codex-auto-review` rows in /v1/models and /api/models —
visible in the dashboard Models list as duplicate cards.
mergeIntoCatalog dedupes real models the same way; alias entries now go
through the equivalent union (endpoints OR-ed, kind re-derived, provider
bindings concatenated) so a single alias surfaces as one row whose
`upstreams` field carries every backing binding.
---
.../src/data-plane/models/alias-listing.ts | 28 ++++++--
.../src/data-plane/models/serve_test.ts | 65 +++++++++++++++++++
.../src/data-plane/providers/registry.ts | 2 +-
3 files changed, 89 insertions(+), 6 deletions(-)
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index 2f1880fe5..d1de04b09 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -1,6 +1,7 @@
import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-import type { PublicModel } from '@floway-dev/protocols/common';
+import { unionEndpoints } from '../providers/registry.ts';
+import { kindForEndpoints, type PublicModel } from '@floway-dev/protocols/common';
import type { ModelProviderInstance, ProviderModelRecord, ResolvedModel, UpstreamModel } from '@floway-dev/provider';
// One emission slot for an alias: a (provider, addressable form) pair where
@@ -106,18 +107,35 @@ const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmi
};
// Single-pass alias fan-out used by every listing surface. Visibility filter
-// honoured here; per-surface callers just map ListedModel → their own DTO.
+// honoured here. Emissions whose synthesized public id collides — two
+// no-prefix upstreams both serving the alias target, or two prefix-aliased
+// upstreams sharing a prefix — merge into one row with the bindings
+// appended, mirroring how `mergeIntoCatalog` collapses duplicate real-model
+// ids; the dashboard then renders a single alias row whose `upstreams` lists
+// every backing binding instead of N identical rows.
export const synthesizeListedAliases = (
aliases: readonly ModelAlias[],
providers: readonly ModelProviderInstance[],
rawCatalogs: ReadonlyMap,
): ListedModel[] => {
- const out: ListedModel[] = [];
+ const byId = new Map();
for (const alias of aliases) {
if (!alias.visibleInModelsList) continue;
for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
- out.push(aliasEmissionToListedModel(alias, emission));
+ const next = aliasEmissionToListedModel(alias, emission);
+ const existing = byId.get(next.id);
+ if (existing === undefined) {
+ byId.set(next.id, next);
+ continue;
+ }
+ const endpoints = unionEndpoints(existing.endpoints, next.endpoints);
+ byId.set(next.id, {
+ ...existing,
+ endpoints,
+ kind: kindForEndpoints(endpoints),
+ providers: [...existing.providers, ...next.providers],
+ });
}
}
- return out;
+ return [...byId.values()];
};
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index f3c45e313..cc5227967 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -934,3 +934,68 @@ test('/v1/models honours alias upstreamIds — only emits on the named upstream'
},
);
});
+
+test('/v1/models merges alias emissions whose synthesized public id collides — one row, multiple backing upstreams', async () => {
+ const { repo, apiKey } = await setupAppTest();
+
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'codex-auto-review',
+ displayName: 'Codex Auto Review',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ },
+ ]);
+
+ // Two no-prefix upstreams both serve gpt-5.4 — without dedupe, the alias
+ // would emit two `codex-auto-review` rows. With dedupe, the dashboard sees
+ // one row whose `upstreams` field lists both bindings, exactly like real
+ // models that exist on multiple upstreams.
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_alpha',
+ name: 'Alpha',
+ sortOrder: 100,
+ config: {
+ baseUrl: 'https://alpha.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-alpha',
+ endpoints: { chatCompletions: {} },
+ },
+ }));
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_beta',
+ name: 'Beta',
+ sortOrder: 200,
+ config: {
+ baseUrl: 'https://beta.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-beta',
+ endpoints: { chatCompletions: {} },
+ },
+ }));
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+ }
+ if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
+ if (url.pathname === '/v1/models' && (url.hostname === 'alpha.example.com' || url.hostname === 'beta.example.com')) {
+ return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+ const body = await response.json() as { data: Array<{ id: string }> };
+ const rows = body.data.filter(m => m.id === 'codex-auto-review');
+ assertEquals(rows.length, 1);
+ },
+ );
+});
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index e6dcb214f..53619a613 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -89,7 +89,7 @@ export const listModelProviders = async (
// Merge two capability maps: a key present in either side is present in the
// result, and its sub-capability flags are OR-ed so a sub-cap advertised by
// either provider survives.
-const unionEndpoints = (a: ModelEndpoints, b: ModelEndpoints): ModelEndpoints => {
+export const unionEndpoints = (a: ModelEndpoints, b: ModelEndpoints): ModelEndpoints => {
const result: ModelEndpoints = { ...a };
for (const key of Object.keys(b) as ModelEndpointKey[]) {
const merged = { ...result[key], ...b[key] };
From e118cd1b0f056b3145abaea627d10b7da2be6735 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 03:07:47 +0800
Subject: [PATCH 014/170] feat(web): render alias rule badges on dashboard
model rows
Each rule field on an alias entry's aliasedFrom now appears as its own
badge appended after the existing context/prompt/output badges, so the
seed codex-auto-review shows "low effort" next to its upstream pills.
Per-field labels move into a shared formatAliasRuleBadges helper in
@floway-dev/protocols/common; the gateway's formatAliasRulesSummary
derives from it (same wording, joined with commas, wrapped in parens
when used as the synthesized display-name suffix). Dashboard and
gateway therefore stay in lockstep on rule labels without parallel
formatters drifting.
---
apps/web/src/api/types.ts | 4 ++-
.../src/components/models/ModelInfoBar.vue | 7 +++++
.../control-plane/model-aliases/display.ts | 26 +++++--------------
packages/protocols/src/common/models.ts | 22 ++++++++++++++++
4 files changed, 39 insertions(+), 20 deletions(-)
diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 1dd0c2c03..4ff2f417a 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -6,10 +6,11 @@ import type {
ModelEndpoints,
ModelKind,
ModelPricing,
+ PublicModelAliasedFrom,
} from '@floway-dev/protocols/common';
import type { AddressableForm, ModelPrefixConfig } from '@floway-dev/provider/model-prefix';
-export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing };
+export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing, PublicModelAliasedFrom };
export type { AddressableForm, ModelPrefixConfig };
export type UpstreamProviderKind = 'custom' | 'azure' | 'copilot' | 'codex' | 'claude-code' | 'ollama';
@@ -326,6 +327,7 @@ export interface PublicModel {
endpoints?: Record;
cost?: ModelPricing;
kind?: ModelKind;
+ aliasedFrom?: PublicModelAliasedFrom;
}
export interface ControlPlaneModel extends PublicModel {
diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index f8bf98b6e..b66b53a70 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -1,6 +1,7 @@
@@ -45,11 +54,10 @@ const formatTokenLimit = (n: number) => {
output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
{{ badge }}
+ v-for="badge in (model.aliasedFrom ? aliasBadges(model.aliasedFrom) : [])"
+ :key="`${badge.label}:${badge.value ?? ''}`"
+ class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/[0.08] text-gray-400"
+ >{{ badge.label }}{{ badge.value !== undefined ? `: ${badge.value}` : '' }}
diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
index 1fc9e6181..80b567488 100644
--- a/packages/gateway/src/control-plane/model-aliases/display.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display.ts
@@ -1,13 +1,26 @@
import type { ModelAliasRules } from './types.ts';
-import { formatAliasRuleBadges } from '@floway-dev/protocols/common';
// Render the closed rule set as a parenthesized suffix the gateway appends to
// the target model's display name when the operator did not supply an
-// explicit alias `displayName`. The per-rule labels come from the protocol's
-// shared `formatAliasRuleBadges` so the dashboard's per-badge view and this
-// inline suffix always agree on wording and order.
+// explicit alias `displayName`. The wording stays compact (`value label`,
+// joined with commas) because the suffix has to fit alongside the target
+// name in narrow listings — the dashboard's per-badge view uses
+// `formatAliasRuleBadges` for the self-describing `label: value` form.
+//
+// `anthropicBeta` is sorted at format time so two operators carrying the same
+// token set in different orders see the same label.
export const formatAliasRulesSummary = (rules: ModelAliasRules): string => {
- const parts = formatAliasRuleBadges(rules);
+ const parts: string[] = [];
+ if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
+ if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
+ if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
+ if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
+ if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
+ if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
+ if (rules.anthropicSpeed !== undefined) parts.push(`${rules.anthropicSpeed} speed`);
+ if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
+ parts.push([...rules.anthropicBeta].sort().join('/'));
+ }
return parts.length > 0 ? ` (${parts.join(', ')})` : '';
};
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index c05904787..7b058edde 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -136,26 +136,32 @@ export interface PublicModelAliasedFrom {
onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
}
-// Per-rule short labels for the closed knob set an alias may lock. Returned
-// in the deterministic order the dashboard and the synthesized display name
-// both render, so the order an operator sees stays stable across surfaces
-// regardless of how the JSON key order arrived. Each entry is meant to render
-// as its own badge in the dashboard `/models` row and is joined with `, ` to
-// build the parenthesized rules summary the gateway appends when an alias has
-// no explicit `displayName`.
-export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): string[] => {
- const parts: string[] = [];
- if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
- if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
- if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
- if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
- if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
- if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
- if (rules.anthropicSpeed !== undefined) parts.push(`${rules.anthropicSpeed} speed`);
+// One badge per rule field on an alias, in a `${label}` / `${label}: ${value}`
+// shape the dashboard renders inline next to the model row. Returned in a
+// deterministic order so the badge sequence stays stable across surfaces and
+// across JSON key arrivals. Boolean toggles render label-only (no colon);
+// every other field renders as `${label}: ${value}`. The gateway's
+// `formatAliasRulesSummary` uses its own labels for the parenthesized
+// display-name suffix — the two surfaces deliberately diverge so the suffix
+// stays compact while the badge view stays self-describing.
+export interface AliasRuleBadge {
+ label: string;
+ value?: string;
+}
+
+export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): AliasRuleBadge[] => {
+ const out: AliasRuleBadge[] = [];
+ if (rules.reasoning?.effort !== undefined) out.push({ label: 'effort', value: rules.reasoning.effort });
+ if (rules.reasoning?.budgetTokens !== undefined) out.push({ label: 'reasoning budget', value: `${rules.reasoning.budgetTokens}tk` });
+ if (rules.reasoning?.adaptive === true) out.push({ label: 'adaptive reasoning' });
+ if (rules.reasoning?.summary !== undefined) out.push({ label: 'reasoning summary', value: rules.reasoning.summary });
+ if (rules.verbosity !== undefined) out.push({ label: 'verbosity', value: rules.verbosity });
+ if (rules.serviceTier !== undefined) out.push({ label: 'service tier', value: rules.serviceTier });
+ if (rules.anthropicSpeed !== undefined) out.push({ label: 'speed', value: rules.anthropicSpeed });
if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
- parts.push([...rules.anthropicBeta].sort().join('/'));
+ out.push({ label: 'anthropic beta', value: [...rules.anthropicBeta].sort().join('/') });
}
- return parts;
+ return out;
};
export interface PublicModelsResponse {
From ca6fac5e9b559c09bc484b1c25084eaf04bd240d Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 03:29:05 +0800
Subject: [PATCH 016/170] chore(aliases): strip uncommitted-spec references
from comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Comments must not reference in-progress design docs that live under
docs/superpowers/ (gitignored). Stripping the "See docs/..." tails from
JSDocs on the protocol-extension fields and the apply.ts header — the
preceding sentences already document the translation contract.
---
packages/gateway/src/data-plane/model-aliases/apply.ts | 3 +--
packages/protocols/src/chat-completions/index.ts | 10 +++++-----
packages/protocols/src/common/models.ts | 1 -
packages/protocols/src/extensions/index.ts | 3 +--
packages/protocols/src/gemini/index.ts | 8 ++++----
packages/protocols/src/messages/index.ts | 2 +-
packages/protocols/src/responses/index.ts | 8 ++++----
.../src/chat-completions-via-messages/request.ts | 2 +-
packages/translate/src/shared/gemini-via/gemini.ts | 2 --
9 files changed, 17 insertions(+), 22 deletions(-)
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index 9a2bb5950..bd7503141 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -8,8 +8,7 @@ import { mapSummaryToAnthropicDisplay } from '@floway-dev/translate/via-messages
// Each function writes the alias rules into the inbound IR's slot best suited
// to the host protocol: native when the protocol can express the concept,
// extension otherwise. Writes overwrite any user-supplied value — aliases are
-// operator-locked per Goal 3. Mapping table is the single source of truth in
-// docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+// operator-locked.
export const applyAliasRulesToChatCompletions = (payload: ChatCompletionsPayload, rules: ModelAliasRules): void => {
// reasoning.effort is native; budget/adaptive/summary ride on extension slots
diff --git a/packages/protocols/src/chat-completions/index.ts b/packages/protocols/src/chat-completions/index.ts
index 8804fd449..072348261 100644
--- a/packages/protocols/src/chat-completions/index.ts
+++ b/packages/protocols/src/chat-completions/index.ts
@@ -30,15 +30,15 @@ export interface ChatCompletionsPayload {
tool_choice?: 'none' | 'auto' | 'required' | { type: 'function'; function: { name: string } } | null;
/** Request usage stats in streaming responses */
stream_options?: { include_usage: boolean } | null;
- /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. */
thinking_budget?: number;
- /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. */
adaptive_thinking?: boolean;
- /** Floway protocol extension. Translated to OpenAI Responses `reasoning.summary` / Anthropic `thinking.display` / Gemini `thinkingConfig.includeThoughts` when routed to those upstreams; dropped on OpenAI Chat targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to OpenAI Responses `reasoning.summary` / Anthropic `thinking.display` / Gemini `thinkingConfig.includeThoughts` when routed to those upstreams; dropped on OpenAI Chat targets. */
reasoning_summary?: string;
- /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
anthropic_speed?: string;
- /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
anthropic_beta?: readonly string[];
}
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 7b058edde..278a6254b 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -114,7 +114,6 @@ export interface PublicModel {
// gateway appends to the listing. Clients that do not know about the
// field ignore it; alias-aware clients (dashboard, CLI shims) render the
// alias's target id and rules from this payload directly.
- // See docs/superpowers/specs/2026-06-25-model-aliases-design.md.
aliasedFrom?: PublicModelAliasedFrom;
}
diff --git a/packages/protocols/src/extensions/index.ts b/packages/protocols/src/extensions/index.ts
index b6579ce2b..3f2c67750 100644
--- a/packages/protocols/src/extensions/index.ts
+++ b/packages/protocols/src/extensions/index.ts
@@ -2,8 +2,7 @@
* Closed enumeration of Floway protocol extension fields that the gateway
* adds to each inbound IR on top of the host protocol's own schema. The
* per-upstream sanitizer in the gateway reads this manifest to strip any
- * extension residue before the upstream HTTP call. See
- * docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+ * extension residue before the upstream HTTP call.
*/
export const FLOWAY_EXTENSION_FIELDS = {
chatCompletions: ['thinking_budget', 'adaptive_thinking', 'reasoning_summary', 'anthropic_speed', 'anthropic_beta'] as const,
diff --git a/packages/protocols/src/gemini/index.ts b/packages/protocols/src/gemini/index.ts
index c3e7e646a..d3786aad5 100644
--- a/packages/protocols/src/gemini/index.ts
+++ b/packages/protocols/src/gemini/index.ts
@@ -6,9 +6,9 @@ export interface GeminiPayload {
generationConfig?: GeminiGenerationConfig;
safetySettings?: GeminiSafetySetting[];
cachedContent?: string;
- /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
anthropicSpeed?: string;
- /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
anthropicBeta?: readonly string[];
}
@@ -42,9 +42,9 @@ export interface GeminiGenerationConfig {
responseMimeType?: string;
responseSchema?: unknown;
thinkingConfig?: GeminiThinkingConfig;
- /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. */
verbosity?: string;
- /** Floway protocol extension. Translated to OpenAI Chat `service_tier` / Responses `service_tier` / Anthropic `service_tier` when routed to those upstreams; dropped on Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to OpenAI Chat `service_tier` / Responses `service_tier` / Anthropic `service_tier` when routed to those upstreams; dropped on Gemini targets. */
serviceTier?: string;
}
diff --git a/packages/protocols/src/messages/index.ts b/packages/protocols/src/messages/index.ts
index 663dcef24..43ed05197 100644
--- a/packages/protocols/src/messages/index.ts
+++ b/packages/protocols/src/messages/index.ts
@@ -56,7 +56,7 @@ export interface MessagesPayload {
// protocol layer because the gateway treats `speed: 'fast'` as the canonical
// client signal regardless of which upstream serves it.
speed?: 'standard' | 'fast' | (string & {});
- /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. */
verbosity?: string;
}
diff --git a/packages/protocols/src/responses/index.ts b/packages/protocols/src/responses/index.ts
index 39af5e148..2669fe41d 100644
--- a/packages/protocols/src/responses/index.ts
+++ b/packages/protocols/src/responses/index.ts
@@ -37,13 +37,13 @@ export interface ResponsesPayload {
prompt_cache_key?: string | null;
safety_identifier?: string | null;
service_tier?: 'default' | 'auto' | 'flex' | 'priority' | 'scale' | (string & {}) | null;
- /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. */
thinking_budget?: number;
- /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. */
adaptive_thinking?: boolean;
- /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
anthropic_speed?: string;
- /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+ /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
anthropic_beta?: readonly string[];
}
diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts
index 5e83a230b..f4ff25285 100644
--- a/packages/translate/src/chat-completions-via-messages/request.ts
+++ b/packages/translate/src/chat-completions-via-messages/request.ts
@@ -193,7 +193,7 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
// slots. `anthropic_beta` is body-side residue that the per-upstream
// sanitizer strips after translation; the gateway-side rule-apply pass owns
// moving its value onto the outbound `anthropic-beta` header before the
- // upstream call. See docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+ // upstream call.
const thinking = buildMessagesThinkingFromExtensions({
thinkingBudget: payload.thinking_budget,
adaptiveThinking: payload.adaptive_thinking,
diff --git a/packages/translate/src/shared/gemini-via/gemini.ts b/packages/translate/src/shared/gemini-via/gemini.ts
index 99d8b4872..0c98c09e3 100644
--- a/packages/translate/src/shared/gemini-via/gemini.ts
+++ b/packages/translate/src/shared/gemini-via/gemini.ts
@@ -120,8 +120,6 @@ export const geminiFunctionResponsePart = (part: GeminiPart, ids: GeminiToolCall
// Reasoning effort is freeform on the inbound IRs (per Goal 2: never gate
// operator-typed values), but the gateway publishes a canonical closed set so
// translate-side mappers can normalize without rewriting unknown values.
-// References:
-// - docs/superpowers/specs/2026-06-25-model-aliases-design.md (Translate Layer)
export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig): ReasoningEffort | undefined => {
From 322e5abdc47aab3da735ae951589f9e7ebda1d71 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 03:29:06 +0800
Subject: [PATCH 017/170] fix(translate): propagate native service_tier on
responses-via-messages
Both Responses and Messages carry native service_tier; the translator
silently dropped it, so an alias serviceTier rule landing on a Responses
inbound that routed to a Messages upstream vanished. Spread it onto the
target alongside the other native fields.
---
packages/translate/src/responses-via-messages/request.ts | 8 +++++---
1 file changed, 5 insertions(+), 3 deletions(-)
diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts
index f16acb936..9bfd8bc51 100644
--- a/packages/translate/src/responses-via-messages/request.ts
+++ b/packages/translate/src/responses-via-messages/request.ts
@@ -335,9 +335,10 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
// Extension-driven thinking (`thinking_budget`, `adaptive_thinking`) wins
// over the native `effort === 'none'` disable, so the alias write-side
// facets that target the structured thinking slot survive the legacy
- // disable shortcut. Native `reasoning.summary` and `service_tier` do not
- // surface onto Messages — the Responses-native vocabulary keeps its
- // pre-existing translation contract and rides the upstream sanitizer.
+ // disable shortcut. Native `reasoning.summary` does not surface onto
+ // Messages — the Responses-native vocabulary keeps its pre-existing
+ // translation contract and rides the upstream sanitizer. `service_tier`
+ // is native on both protocols and propagates verbatim.
const extensionThinking = buildMessagesThinkingFromExtensions({
thinkingBudget: payload.thinking_budget,
adaptiveThinking: payload.adaptive_thinking,
@@ -360,6 +361,7 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
tool_choice: translateToolChoice(payload.tool_choice),
...(thinking ? { thinking } : {}),
...(hasOutputConfig ? { output_config: outputConfig } : {}),
+ ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
};
From 9f38cef6263e9980d5dfc2ee887cdaef24773846 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 03:35:51 +0800
Subject: [PATCH 018/170] refactor(aliases): inline AliasMatchResult
single-field wrapper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
matchAlias returns the alias directly; the sole caller (pushInterpretation
in registry.ts) was already destructuring the wrapper away. Both the
review and cleanup passes converged on this — remove the indirection.
---
packages/gateway/src/data-plane/model-aliases/match.ts | 8 ++------
.../gateway/src/data-plane/model-aliases/match_test.ts | 4 ++--
packages/gateway/src/data-plane/providers/registry.ts | 5 ++---
3 files changed, 6 insertions(+), 11 deletions(-)
diff --git a/packages/gateway/src/data-plane/model-aliases/match.ts b/packages/gateway/src/data-plane/model-aliases/match.ts
index f297d1a50..edb31d071 100644
--- a/packages/gateway/src/data-plane/model-aliases/match.ts
+++ b/packages/gateway/src/data-plane/model-aliases/match.ts
@@ -1,9 +1,5 @@
import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-export interface AliasMatchResult {
- readonly alias: ModelAlias;
-}
-
// Lookup an alias for the (post-prefix-strip) lookupId against the upstream's
// id. An empty `upstreamIds` filter on the alias means "match any upstream";
// a non-empty filter must include the upstream's id.
@@ -11,9 +7,9 @@ export const matchAlias = (
lookupId: string,
upstreamId: string,
aliases: readonly ModelAlias[],
-): AliasMatchResult | undefined => {
+): ModelAlias | undefined => {
const hit = aliases.find(a => a.alias === lookupId);
if (!hit) return undefined;
if (hit.upstreamIds.length > 0 && !hit.upstreamIds.includes(upstreamId)) return undefined;
- return { alias: hit };
+ return hit;
};
diff --git a/packages/gateway/src/data-plane/model-aliases/match_test.ts b/packages/gateway/src/data-plane/model-aliases/match_test.ts
index 7252078c9..b3fbf5596 100644
--- a/packages/gateway/src/data-plane/model-aliases/match_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/match_test.ts
@@ -17,7 +17,7 @@ const make = (overrides: Partial): ModelAlias => ({
describe('matchAlias', () => {
test('matches by exact lookupId when alias has no upstream filter', () => {
const aliases = [make({ alias: 'codex-auto-review', targetModelId: 'gpt-5.4' })];
- expect(matchAlias('codex-auto-review', 'up-1', aliases)?.alias.alias).toBe('codex-auto-review');
+ expect(matchAlias('codex-auto-review', 'up-1', aliases)?.alias).toBe('codex-auto-review');
});
test('does not match when lookupId differs', () => {
@@ -46,7 +46,7 @@ describe('matchAlias', () => {
make({ alias: 'a', targetModelId: 'first', rules: { reasoning: { effort: 'low' } } }),
make({ alias: 'a', targetModelId: 'second' }),
];
- expect(matchAlias('a', 'up-x', aliases)?.alias).toEqual(aliases[0]);
+ expect(matchAlias('a', 'up-x', aliases)).toEqual(aliases[0]);
});
test('returns undefined for an empty alias list', () => {
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 53619a613..3645a041b 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -393,12 +393,11 @@ const pushInterpretation = (
lookupId: string,
aliases: readonly ModelAlias[],
): void => {
- const hit = matchAlias(lookupId, provider.upstream, aliases);
- if (!hit) {
+ const alias = matchAlias(lookupId, provider.upstream, aliases);
+ if (!alias) {
out.push({ provider, lookupId });
return;
}
- const { alias } = hit;
const aliasInterp: ModelInterpretation = {
provider,
lookupId: alias.targetModelId,
From 1fd7bba13523bdeb8761436dd4cc6a1aef8c6917 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 03:56:20 +0800
Subject: [PATCH 019/170] refactor(aliases): inline single-use display/listing
helpers
formatAliasRulesSummary was only consumed by composeAliasDisplayName in
the same file; the standalone export existed so the test could import it
directly (anti-test-bending). aliasPublicId was a 2-line ternary used
exactly once inside aliasEmissionToListedModel. Both now live at their
call site; tests target the surviving public entry.
---
.../control-plane/model-aliases/display.ts | 44 +++++----
.../model-aliases/display_test.ts | 90 +++++++++----------
.../src/data-plane/models/alias-listing.ts | 16 ++--
3 files changed, 72 insertions(+), 78 deletions(-)
diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
index 80b567488..57ecd0eec 100644
--- a/packages/gateway/src/control-plane/model-aliases/display.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display.ts
@@ -1,16 +1,25 @@
import type { ModelAliasRules } from './types.ts';
-// Render the closed rule set as a parenthesized suffix the gateway appends to
-// the target model's display name when the operator did not supply an
-// explicit alias `displayName`. The wording stays compact (`value label`,
-// joined with commas) because the suffix has to fit alongside the target
-// name in narrow listings — the dashboard's per-badge view uses
-// `formatAliasRuleBadges` for the self-describing `label: value` form.
+// Compose the alias-local display name — what the operator named the alias
+// (when set) or a synthesized target + rules summary. Independent of which
+// upstream is surfacing the alias; the prefixed listing form prepends the
+// upstream display name at the call site, mirroring the real-model path in
+// `registry.ts`.
//
-// `anthropicBeta` is sorted at format time so two operators carrying the same
-// token set in different orders see the same label.
-export const formatAliasRulesSummary = (rules: ModelAliasRules): string => {
+// The synthesized form's parenthesized rules suffix uses the compact
+// `value label` wording so it fits alongside the target name in narrow
+// listings — the dashboard's per-badge view uses `formatAliasRuleBadges`
+// for the self-describing `label: value` form. `anthropicBeta` tokens are
+// sorted so two operators carrying the same set in different orders see
+// the same label.
+export const composeAliasDisplayName = (input: {
+ aliasDisplayName?: string;
+ targetDisplayName: string;
+ rules: ModelAliasRules;
+}): string => {
+ if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
const parts: string[] = [];
+ const { rules } = input;
if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
@@ -21,19 +30,6 @@ export const formatAliasRulesSummary = (rules: ModelAliasRules): string => {
if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
parts.push([...rules.anthropicBeta].sort().join('/'));
}
- return parts.length > 0 ? ` (${parts.join(', ')})` : '';
-};
-
-// Compose the alias-local display name — what the operator named the alias
-// (when set) or a synthesized target + rules summary. Independent of which
-// upstream is surfacing the alias; the prefixed listing form prepends the
-// upstream display name at the call site, mirroring the real-model path in
-// `registry.ts`.
-export const composeAliasDisplayName = (input: {
- aliasDisplayName?: string;
- targetDisplayName: string;
- rules: ModelAliasRules;
-}): string => {
- if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
- return `${input.targetDisplayName}${formatAliasRulesSummary(input.rules)}`;
+ const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : '';
+ return `${input.targetDisplayName}${suffix}`;
};
diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/gateway/src/control-plane/model-aliases/display_test.ts
index d45a1b339..a9eec7070 100644
--- a/packages/gateway/src/control-plane/model-aliases/display_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display_test.ts
@@ -1,72 +1,72 @@
import { describe, expect, test } from 'vitest';
-import { composeAliasDisplayName, formatAliasRulesSummary } from './display.ts';
+import { composeAliasDisplayName } from './display.ts';
-describe('formatAliasRulesSummary', () => {
- test('returns empty string when no rules are set', () => {
- expect(formatAliasRulesSummary({})).toBe('');
- });
-
- test('formats each rule field with its canonical suffix', () => {
- expect(formatAliasRulesSummary({ reasoning: { effort: 'high' } })).toBe(' (high effort)');
- expect(formatAliasRulesSummary({ reasoning: { budgetTokens: 4096 } })).toBe(' (4096tk reasoning)');
- expect(formatAliasRulesSummary({ reasoning: { adaptive: true } })).toBe(' (adaptive reasoning)');
- expect(formatAliasRulesSummary({ reasoning: { summary: 'detailed' } })).toBe(' (detailed summary)');
- expect(formatAliasRulesSummary({ verbosity: 'low' })).toBe(' (low verbosity)');
- expect(formatAliasRulesSummary({ serviceTier: 'priority' })).toBe(' (priority tier)');
- expect(formatAliasRulesSummary({ anthropicSpeed: 'fast' })).toBe(' (fast speed)');
+describe('composeAliasDisplayName', () => {
+ test('uses alias displayName when set, suppressing the rules summary', () => {
+ expect(
+ composeAliasDisplayName({
+ aliasDisplayName: 'Codex Auto Review',
+ targetDisplayName: 'GPT-5.4',
+ rules: { reasoning: { effort: 'low' } },
+ }),
+ ).toBe('Codex Auto Review');
});
- test('sorts anthropicBeta tokens and joins with slashes', () => {
- expect(formatAliasRulesSummary({ anthropicBeta: ['extended-thinking', 'fast-mode-2026-02-01'] })).toBe(
- ' (extended-thinking/fast-mode-2026-02-01)',
- );
- expect(formatAliasRulesSummary({ anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] })).toBe(
- ' (extended-thinking/fast-mode-2026-02-01)',
- );
+ test('omits the rules suffix when rules are empty', () => {
+ expect(
+ composeAliasDisplayName({
+ targetDisplayName: 'GPT-5.4',
+ rules: {},
+ }),
+ ).toBe('GPT-5.4');
});
- test('drops anthropicBeta when the token list is empty', () => {
- expect(formatAliasRulesSummary({ anthropicBeta: [] })).toBe('');
+ test('formats each rule field with its canonical suffix when alias displayName is missing', () => {
+ const target = 'GPT-5.4';
+ expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { effort: 'high' } } })).toBe('GPT-5.4 (high effort)');
+ expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { budgetTokens: 4096 } } })).toBe('GPT-5.4 (4096tk reasoning)');
+ expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { adaptive: true } } })).toBe('GPT-5.4 (adaptive reasoning)');
+ expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { summary: 'detailed' } } })).toBe('GPT-5.4 (detailed summary)');
+ expect(composeAliasDisplayName({ targetDisplayName: target, rules: { verbosity: 'low' } })).toBe('GPT-5.4 (low verbosity)');
+ expect(composeAliasDisplayName({ targetDisplayName: target, rules: { serviceTier: 'priority' } })).toBe('GPT-5.4 (priority tier)');
+ expect(composeAliasDisplayName({ targetDisplayName: target, rules: { anthropicSpeed: 'fast' } })).toBe('GPT-5.4 (fast speed)');
});
- test('joins multiple fields with comma in deterministic order', () => {
+ test('sorts anthropicBeta tokens and joins with slashes', () => {
expect(
- formatAliasRulesSummary({
- reasoning: { effort: 'low', summary: 'concise' },
- verbosity: 'high',
- anthropicSpeed: 'fast',
+ composeAliasDisplayName({
+ targetDisplayName: 'Claude',
+ rules: { anthropicBeta: ['extended-thinking', 'fast-mode-2026-02-01'] },
}),
- ).toBe(' (low effort, concise summary, high verbosity, fast speed)');
- });
-});
-
-describe('composeAliasDisplayName', () => {
- test('uses alias displayName when set, suppressing the rules summary', () => {
+ ).toBe('Claude (extended-thinking/fast-mode-2026-02-01)');
expect(
composeAliasDisplayName({
- aliasDisplayName: 'Codex Auto Review',
- targetDisplayName: 'GPT-5.4',
- rules: { reasoning: { effort: 'low' } },
+ targetDisplayName: 'Claude',
+ rules: { anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] },
}),
- ).toBe('Codex Auto Review');
+ ).toBe('Claude (extended-thinking/fast-mode-2026-02-01)');
});
- test('falls back to target displayName with rules suffix when alias displayName is missing', () => {
+ test('drops anthropicBeta when the token list is empty', () => {
expect(
composeAliasDisplayName({
- targetDisplayName: 'GPT-5.4',
- rules: { reasoning: { effort: 'low' } },
+ targetDisplayName: 'Claude',
+ rules: { anthropicBeta: [] },
}),
- ).toBe('GPT-5.4 (low effort)');
+ ).toBe('Claude');
});
- test('omits the rules suffix when rules are empty', () => {
+ test('joins multiple fields with comma in deterministic order', () => {
expect(
composeAliasDisplayName({
targetDisplayName: 'GPT-5.4',
- rules: {},
+ rules: {
+ reasoning: { effort: 'low', summary: 'concise' },
+ verbosity: 'high',
+ anthropicSpeed: 'fast',
+ },
}),
- ).toBe('GPT-5.4');
+ ).toBe('GPT-5.4 (low effort, concise summary, high verbosity, fast speed)');
});
});
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index d1de04b09..e9762ed35 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -54,14 +54,6 @@ const aliasListingEmissions = (
return out;
};
-// The public id form an alias emission carries on the wire. Bare alias name
-// for the unprefixed form; provider prefix + alias name for the prefixed
-// form. Mirrors how real models are surfaced in the same listing pass.
-const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): string => {
- const cfg = emission.provider.modelPrefix;
- return emission.form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias;
-};
-
// Turn an alias emission into a `ListedModel` that walks the same listing
// pipeline as real catalog entries. The synthesized `providers` array carries
// a single binding pointing at the alias's target on this upstream, so the
@@ -73,6 +65,10 @@ const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): strin
// `unprefixed` listing form; the `prefixed` form mirrors the real-model path
// in `registry.ts` and prepends `${provider.name}: ` so the upstream is
// visible at a glance.
+//
+// Public id: bare alias name for the unprefixed form; provider prefix + alias
+// name for the prefixed form. Mirrors how real models are surfaced in the
+// same listing pass.
const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmission): ListedModel => {
const { provider, target, form } = emission;
const aliasLocalName = composeAliasDisplayName({
@@ -80,6 +76,8 @@ const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmi
targetDisplayName: target.display_name ?? target.id,
rules: alias.rules,
});
+ const cfg = provider.modelPrefix;
+ const publicId = form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias;
const record: ProviderModelRecord = {
upstream: provider.upstream,
upstreamName: provider.name,
@@ -92,7 +90,7 @@ const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmi
const { providerData: _providerData, endpoints, id: _targetId, display_name: _targetDisplay, created: _targetCreated, ...rest } = target;
return {
...rest,
- id: aliasPublicId(alias, emission),
+ id: publicId,
display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName,
created: alias.createdAt,
endpoints: { ...endpoints },
From 7d4a3e34da80afb2cf03fdecb26bddf533774285 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 03:56:20 +0800
Subject: [PATCH 020/170] refactor(aliases): collapse passthrough drop-trace
into the shared sanitize helper
The passthrough serve was re-emitting the floway.alias.drop log shape
that chat/shared/sanitize.ts already owns, and re-finding the matched
alias by name to walk its rules. ModelAliasRules now rides through
resolveModelForRequest alongside aliasName, so passthrough has the
rules in hand; the rules walker moves into sanitize.ts as
traceAllRulesDropped and reuses createSanitizeTraceCtx so both
surfaces emit identical trace lines.
---
.../src/data-plane/chat/shared/sanitize.ts | 23 ++++++++++++++
.../src/data-plane/providers/registry.ts | 6 +++-
.../data-plane/shared/passthrough-serve.ts | 30 +++----------------
3 files changed, 32 insertions(+), 27 deletions(-)
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
index 832f8f41d..8f8b878e1 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -1,3 +1,4 @@
+import type { ModelAliasRules } from '../../../control-plane/model-aliases/types.ts';
import { FLOWAY_EXTENSION_FIELDS } from '@floway-dev/protocols/extensions';
export interface SanitizeTraceCtx {
@@ -49,3 +50,25 @@ export const sanitizeForGeminiUpstream = (body: Record, trace?:
stripKeys(generationConfig as Record, FLOWAY_EXTENSION_FIELDS.gemini.generationConfig, 'gemini', trace, 'generationConfig.');
}
};
+
+// Walks the alias rules object and emits one trace line per non-empty rule
+// field. Used by inbound surfaces that have no protocol-extension slots for
+// the rules in the first place (embeddings, images, /v1/completions) — the
+// rules are structurally dropped before the upstream call, and this helper
+// gives the operator the same `floway.alias.drop` signal the chat
+// sanitizers produce when they strip extension residue.
+export const traceAllRulesDropped = (
+ rules: ModelAliasRules,
+ targetProtocol: string,
+ trace: SanitizeTraceCtx,
+): void => {
+ if (rules.reasoning) {
+ for (const key of Object.keys(rules.reasoning)) {
+ trace.emit({ alias: trace.aliasName, field: `reasoning.${key}`, targetProtocol });
+ }
+ }
+ if (rules.verbosity !== undefined) trace.emit({ alias: trace.aliasName, field: 'verbosity', targetProtocol });
+ if (rules.serviceTier !== undefined) trace.emit({ alias: trace.aliasName, field: 'serviceTier', targetProtocol });
+ if (rules.anthropicSpeed !== undefined) trace.emit({ alias: trace.aliasName, field: 'anthropicSpeed', targetProtocol });
+ if (rules.anthropicBeta?.length) trace.emit({ alias: trace.aliasName, field: 'anthropicBeta', targetProtocol });
+};
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 3645a041b..1264eed0d 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -332,6 +332,10 @@ export interface ProviderModelResolution {
// this onto the `x-floway-alias` response header so alias-served calls are
// observable without enabling any extra mode.
aliasName?: string;
+ // Operator-locked rules carried alongside `aliasName`. Set in lockstep so
+ // passthrough callers can trace the dropped rule fields without re-finding
+ // the matched alias by name.
+ aliasRules?: ModelAliasRules;
}
export interface ModelInterpretation {
@@ -525,7 +529,7 @@ export const resolveModelForRequest = async (
// `x-floway-alias` header without re-deriving the match.
const matches: ProviderModelResolution[] = resolutions.map(r =>
r.interpretation.aliasName !== undefined
- ? { ...r.resolved, aliasName: r.interpretation.aliasName }
+ ? { ...r.resolved, aliasName: r.interpretation.aliasName, aliasRules: r.interpretation.aliasRules }
: r.resolved);
return { matches, failedUpstreams };
};
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index b566b582d..31b162e9a 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -23,6 +23,7 @@ import { createPerRequestFetcher } from '../../dial/per-request.ts';
import type { AuthedContext } from '../../middleware/auth.ts';
import { getRepo } from '../../repo/index.ts';
import type { TokenUsage } from '../../repo/types.ts';
+import { createSanitizeTraceCtx, traceAllRulesDropped } from '../chat/shared/sanitize.ts';
import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
import { stageGatewayResponseHeader } from '../chat/shared/gateway-ctx.ts';
import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
@@ -119,31 +120,6 @@ interface PassthroughServeContext {
export const passthroughApiError = (c: Context, message: string, status: ContentfulStatusCode): Response =>
c.json({ error: { message, type: 'api_error' } }, status);
-// Emit one trace line per rule field present on the matched alias when the
-// inbound endpoint has no slot for the rule. The passthrough endpoints
-// (embeddings, images, /v1/completions) carry no Floway-extension fields
-// so a non-empty `rules` object is structurally dropped before the upstream
-// call; emitting one trace line per knob gives an operator the same signal
-// the chat sanitizers do.
-const traceDroppedAliasRulesForPassthrough = (
- aliasName: string,
- aliases: readonly { alias: string; rules: Record }[],
- sourceApi: PassthroughServeApiName,
-): void => {
- const matched = aliases.find(a => a.alias === aliasName);
- if (!matched) return;
- const rules = matched.rules as { reasoning?: Record; verbosity?: unknown; serviceTier?: unknown; anthropicSpeed?: unknown; anthropicBeta?: readonly unknown[] };
- const fields: string[] = [];
- if (rules.reasoning) for (const key of Object.keys(rules.reasoning)) fields.push(`reasoning.${key}`);
- if (rules.verbosity !== undefined) fields.push('verbosity');
- if (rules.serviceTier !== undefined) fields.push('serviceTier');
- if (rules.anthropicSpeed !== undefined) fields.push('anthropicSpeed');
- if (rules.anthropicBeta?.length) fields.push('anthropicBeta');
- for (const field of fields) {
- console.warn('floway.alias.drop', JSON.stringify({ alias: aliasName, field, targetProtocol: sourceApi }));
- }
-};
-
export const passthroughServe = async (input: PassthroughServeContext): Promise => {
const { c, ctx, sourceApi, model, bindingServesEndpoint, call, response: responseHandling } = input;
const requestStartedAt = performance.now();
@@ -176,7 +152,9 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
if (!bindingServesEndpoint(match.binding)) continue;
if (match.aliasName !== undefined) {
stageGatewayResponseHeader(ctx, 'x-floway-alias', match.aliasName);
- traceDroppedAliasRulesForPassthrough(match.aliasName, aliases, sourceApi);
+ if (match.aliasRules) {
+ traceAllRulesDropped(match.aliasRules, sourceApi, createSanitizeTraceCtx(match.aliasName));
+ }
}
const recorder = createUpstreamLatencyRecorder();
From e79d27ba7523800afed921da82bfb3dfb7a8ddd2 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 03:56:20 +0800
Subject: [PATCH 021/170] docs(aliases): note summary='auto' is an explicit
no-op on Messages target
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
mapSummaryToAnthropicDisplay('auto') returns undefined, so the apply
step has always left a user-supplied thinking.display untouched in
that case. The comment now spells out that this is intentional —
'auto' means "defer to upstream default", and operator-locked
overwrite applies to every other summary value.
---
packages/gateway/src/data-plane/model-aliases/apply.ts | 5 +++++
1 file changed, 5 insertions(+)
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index bd7503141..283845982 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -56,6 +56,11 @@ export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: Model
}
if (rules.reasoning?.summary !== undefined) {
const display = mapSummaryToAnthropicDisplay(rules.reasoning.summary);
+ // summary='auto' maps to undefined and is an explicit no-op on the
+ // Messages path — the operator chose "let upstream default decide", so
+ // we neither synthesize a thinking block nor overwrite a user-supplied
+ // thinking.display. Every other summary value enforces operator-locked
+ // overwrite.
if (display !== undefined) {
// When no prior thinking branch ran (no effort/budget/adaptive in this
// rule), synthesize `thinking: {type:'enabled', display}` so the
From 7122dc7bc435b9f4c144880d7827cc983fc1edf0 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 04:04:18 +0800
Subject: [PATCH 022/170] feat(aliases): drop redundant anthropicSpeed alias
knob
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The cross-protocol service_tier↔speed:'fast' bridge that #114 added to the
*-via-messages and messages-via-* translators makes the alias-extension
knob anthropicSpeed redundant — operators who want speed: 'fast' on a
Messages upstream can set serviceTier: 'fast' on the alias and the bridge
handles the wire-level conversion in both directions.
Removed before any external client relies on it (the alias schema is not
yet public — PR is still open):
- ModelAliasRules.anthropicSpeed plus the matching PublicModelAliasedFrom
field on /v1/models.
- The anthropic_speed Chat / Responses extension fields, the top-level
anthropicSpeed Gemini field, and their entries in
FLOWAY_EXTENSION_FIELDS.
- The four applyAliasRules* branches that wrote the knob into each
inbound IR's natural slot, plus the matching emit branches in
chat-completions-via-messages, responses-via-messages, and
gemini-via-messages translators.
- The trace-helper and display/badge formatters that surfaced the field.
- All tests asserting either side of the now-removed contract.
anthropicBeta is unrelated (Anthropic beta header tokens) and is kept
intact. The native Messages `speed` field is also untouched — callers
hitting the Messages inbound directly still control it.
---
.../control-plane/model-aliases/display.ts | 1 -
.../model-aliases/display_test.ts | 5 ++---
.../src/control-plane/model-aliases/types.ts | 1 -
.../src/data-plane/chat/shared/sanitize.ts | 1 -
.../data-plane/chat/shared/sanitize_test.ts | 8 +++----
.../src/data-plane/model-aliases/apply.ts | 16 ++++++--------
.../data-plane/model-aliases/apply_test.ts | 21 ++++++-------------
.../protocols/src/chat-completions/index.ts | 2 --
packages/protocols/src/common/models.ts | 2 --
packages/protocols/src/extensions/index.ts | 6 +++---
packages/protocols/src/gemini/index.ts | 2 --
packages/protocols/src/responses/index.ts | 2 --
.../chat-completions-via-messages/request.ts | 5 +----
.../request_test.ts | 11 ----------
.../request_test.ts | 2 --
.../request_test.ts | 6 +-----
.../src/gemini-via-messages/request.ts | 5 -----
.../src/gemini-via-messages/request_test.ts | 10 ---------
.../src/gemini-via-responses/request_test.ts | 3 ---
.../request_test.ts | 1 -
.../messages-via-responses/request_test.ts | 1 -
.../request_test.ts | 2 --
.../src/responses-via-messages/request.ts | 5 +----
.../responses-via-messages/request_test.ts | 5 -----
24 files changed, 24 insertions(+), 99 deletions(-)
diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
index 57ecd0eec..f831273d8 100644
--- a/packages/gateway/src/control-plane/model-aliases/display.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display.ts
@@ -26,7 +26,6 @@ export const composeAliasDisplayName = (input: {
if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
- if (rules.anthropicSpeed !== undefined) parts.push(`${rules.anthropicSpeed} speed`);
if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
parts.push([...rules.anthropicBeta].sort().join('/'));
}
diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/gateway/src/control-plane/model-aliases/display_test.ts
index a9eec7070..40dbd2fec 100644
--- a/packages/gateway/src/control-plane/model-aliases/display_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display_test.ts
@@ -30,7 +30,6 @@ describe('composeAliasDisplayName', () => {
expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { summary: 'detailed' } } })).toBe('GPT-5.4 (detailed summary)');
expect(composeAliasDisplayName({ targetDisplayName: target, rules: { verbosity: 'low' } })).toBe('GPT-5.4 (low verbosity)');
expect(composeAliasDisplayName({ targetDisplayName: target, rules: { serviceTier: 'priority' } })).toBe('GPT-5.4 (priority tier)');
- expect(composeAliasDisplayName({ targetDisplayName: target, rules: { anthropicSpeed: 'fast' } })).toBe('GPT-5.4 (fast speed)');
});
test('sorts anthropicBeta tokens and joins with slashes', () => {
@@ -64,9 +63,9 @@ describe('composeAliasDisplayName', () => {
rules: {
reasoning: { effort: 'low', summary: 'concise' },
verbosity: 'high',
- anthropicSpeed: 'fast',
+ serviceTier: 'flex',
},
}),
- ).toBe('GPT-5.4 (low effort, concise summary, high verbosity, fast speed)');
+ ).toBe('GPT-5.4 (low effort, concise summary, high verbosity, flex tier)');
});
});
diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts
index 3170b4b47..e7d9f64bd 100644
--- a/packages/gateway/src/control-plane/model-aliases/types.ts
+++ b/packages/gateway/src/control-plane/model-aliases/types.ts
@@ -10,7 +10,6 @@ export type ModelAliasRules = {
};
readonly verbosity?: string;
readonly serviceTier?: string;
- readonly anthropicSpeed?: string;
readonly anthropicBeta?: readonly string[];
};
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
index 8f8b878e1..57a6959dc 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -69,6 +69,5 @@ export const traceAllRulesDropped = (
}
if (rules.verbosity !== undefined) trace.emit({ alias: trace.aliasName, field: 'verbosity', targetProtocol });
if (rules.serviceTier !== undefined) trace.emit({ alias: trace.aliasName, field: 'serviceTier', targetProtocol });
- if (rules.anthropicSpeed !== undefined) trace.emit({ alias: trace.aliasName, field: 'anthropicSpeed', targetProtocol });
if (rules.anthropicBeta?.length) trace.emit({ alias: trace.aliasName, field: 'anthropicBeta', targetProtocol });
};
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
index eebcd5d06..da2e72d0d 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
@@ -30,7 +30,7 @@ test('sanitizeForMessagesUpstream strips verbosity and emits one trace line', ()
test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves native fields', () => {
const body: Record = {
thinking_budget: 4096,
- anthropic_speed: 'fast',
+ anthropic_beta: ['ctx-1m'],
reasoning_effort: 'high',
model: 'x',
};
@@ -40,7 +40,7 @@ test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves nat
assertEquals(lines.length, 2);
assertEquals(lines.every(l => l.alias === 'alias-1' && l.targetProtocol === 'chat-completions'), true);
const droppedFields = lines.map(l => l.field).sort();
- assertEquals(droppedFields, ['anthropic_speed', 'thinking_budget']);
+ assertEquals(droppedFields, ['anthropic_beta', 'thinking_budget']);
});
test('sanitizeForResponsesUpstream strips extensions without a trace context', () => {
@@ -52,14 +52,14 @@ test('sanitizeForResponsesUpstream strips extensions without a trace context', (
test('sanitizeForGeminiUpstream walks top-level and generationConfig', () => {
const body: Record = {
generationConfig: { verbosity: 'low', thinkingConfig: { thinkingBudget: 100 } },
- anthropicSpeed: 'fast',
+ anthropicBeta: ['ctx-1m'],
};
const { ctx, lines } = makeTrace('alias-g');
sanitizeForGeminiUpstream(body, ctx);
assertEquals(body, { generationConfig: { thinkingConfig: { thinkingBudget: 100 } } });
assertEquals(lines.length, 2);
const droppedFields = lines.map(l => l.field).sort();
- assertEquals(droppedFields, ['anthropicSpeed', 'generationConfig.verbosity']);
+ assertEquals(droppedFields, ['anthropicBeta', 'generationConfig.verbosity']);
assertEquals(lines.every(l => l.alias === 'alias-g' && l.targetProtocol === 'gemini'), true);
});
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index 283845982..e90e2fbed 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -19,26 +19,24 @@ export const applyAliasRulesToChatCompletions = (payload: ChatCompletionsPayload
if (rules.reasoning?.summary !== undefined) payload.reasoning_summary = rules.reasoning.summary;
if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
- if (rules.anthropicSpeed !== undefined) payload.anthropic_speed = rules.anthropicSpeed;
if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
};
export const applyAliasRulesToResponses = (payload: ResponsesPayload, rules: ModelAliasRules): void => {
// reasoning.{effort, summary} and text.verbosity / service_tier are native;
- // budget/adaptive ride on extension slots; the two anthropic_* knobs only
- // matter when this Responses inbound lands on a Messages upstream.
+ // budget/adaptive ride on extension slots; anthropic_beta only matters when
+ // this Responses inbound lands on a Messages upstream.
if (rules.reasoning?.effort !== undefined) payload.reasoning = { ...payload.reasoning, effort: rules.reasoning.effort };
if (rules.reasoning?.summary !== undefined) payload.reasoning = { ...payload.reasoning, summary: rules.reasoning.summary };
if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens;
if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true;
if (rules.verbosity !== undefined) payload.text = { ...payload.text, verbosity: rules.verbosity };
if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
- if (rules.anthropicSpeed !== undefined) payload.anthropic_speed = rules.anthropicSpeed;
if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
};
export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: ModelAliasRules): void => {
- // Anthropic has natives for effort, thinking, speed, and service_tier; only
+ // Anthropic has natives for effort, thinking, and service_tier; only
// verbosity is a Floway extension on this inbound. anthropic_beta is the
// wire header — the attempt layer reads `candidate.aliasRules.anthropicBeta`
// and merges via mergeAnthropicBetaTokens, so we do not stamp the body here.
@@ -72,14 +70,13 @@ export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: Model
}
if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
- if (rules.anthropicSpeed !== undefined) payload.speed = rules.anthropicSpeed;
};
export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAliasRules): void => {
// All four reasoning knobs ride on the native thinkingConfig; verbosity and
- // serviceTier ride on extension slots under generationConfig; the
- // anthropic_* knobs ride on top-level extension slots so the existing
- // gemini-via-messages translator picks them up there.
+ // serviceTier ride on extension slots under generationConfig; anthropicBeta
+ // rides on a top-level extension slot so the existing gemini-via-messages
+ // translator picks it up there.
const hasThinking = rules.reasoning?.effort !== undefined
|| rules.reasoning?.budgetTokens !== undefined
|| rules.reasoning?.adaptive === true
@@ -104,6 +101,5 @@ export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAlia
if (rules.serviceTier !== undefined) generationConfig.serviceTier = rules.serviceTier;
payload.generationConfig = generationConfig;
}
- if (rules.anthropicSpeed !== undefined) payload.anthropicSpeed = rules.anthropicSpeed;
if (rules.anthropicBeta?.length) payload.anthropicBeta = [...rules.anthropicBeta];
};
diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
index e05f40c91..3dfba9d45 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
@@ -43,14 +43,13 @@ describe('applyAliasRulesToChatCompletions', () => {
expect(payload.reasoning_summary).toBe('detailed');
});
- test('writes verbosity, serviceTier, anthropicSpeed, anthropicBeta to their slots', () => {
+ test('writes verbosity, serviceTier, anthropicBeta to their slots', () => {
const payload = cc();
applyAliasRulesToChatCompletions(payload, {
- verbosity: 'low', serviceTier: 'flex', anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'],
+ verbosity: 'low', serviceTier: 'flex', anthropicBeta: ['ctx-1m'],
});
expect(payload.verbosity).toBe('low');
expect(payload.service_tier).toBe('flex');
- expect(payload.anthropic_speed).toBe('fast');
expect(payload.anthropic_beta).toEqual(['ctx-1m']);
});
@@ -100,10 +99,9 @@ describe('applyAliasRulesToResponses', () => {
expect(payload.service_tier).toBe('flex');
});
- test('writes anthropicSpeed / anthropicBeta to extension slots', () => {
+ test('writes anthropicBeta to extension slot', () => {
const payload = resp();
- applyAliasRulesToResponses(payload, { anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'] });
- expect(payload.anthropic_speed).toBe('fast');
+ applyAliasRulesToResponses(payload, { anthropicBeta: ['ctx-1m'] });
expect(payload.anthropic_beta).toEqual(['ctx-1m']);
});
});
@@ -133,12 +131,6 @@ describe('applyAliasRulesToMessages', () => {
expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 1024, display: 'summarized' });
});
- test('writes anthropicSpeed to native speed', () => {
- const payload = msg();
- applyAliasRulesToMessages(payload, { anthropicSpeed: 'fast' });
- expect(payload.speed).toBe('fast');
- });
-
test('writes serviceTier to native service_tier', () => {
const payload = msg();
applyAliasRulesToMessages(payload, { serviceTier: 'priority' });
@@ -203,10 +195,9 @@ describe('applyAliasRulesToGemini', () => {
expect(payload.generationConfig?.serviceTier).toBe('flex');
});
- test('writes anthropicSpeed / anthropicBeta to top-level extension slots', () => {
+ test('writes anthropicBeta to top-level extension slot', () => {
const payload = gem();
- applyAliasRulesToGemini(payload, { anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'] });
- expect(payload.anthropicSpeed).toBe('fast');
+ applyAliasRulesToGemini(payload, { anthropicBeta: ['ctx-1m'] });
expect(payload.anthropicBeta).toEqual(['ctx-1m']);
});
diff --git a/packages/protocols/src/chat-completions/index.ts b/packages/protocols/src/chat-completions/index.ts
index 072348261..fc8a76d7f 100644
--- a/packages/protocols/src/chat-completions/index.ts
+++ b/packages/protocols/src/chat-completions/index.ts
@@ -36,8 +36,6 @@ export interface ChatCompletionsPayload {
adaptive_thinking?: boolean;
/** Floway protocol extension. Translated to OpenAI Responses `reasoning.summary` / Anthropic `thinking.display` / Gemini `thinkingConfig.includeThoughts` when routed to those upstreams; dropped on OpenAI Chat targets. */
reasoning_summary?: string;
- /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
- anthropic_speed?: string;
/** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
anthropic_beta?: readonly string[];
}
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 278a6254b..97c9283a2 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -129,7 +129,6 @@ export interface PublicModelAliasedFrom {
};
verbosity?: string;
serviceTier?: string;
- anthropicSpeed?: string;
anthropicBeta?: readonly string[];
};
onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
@@ -156,7 +155,6 @@ export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): A
if (rules.reasoning?.summary !== undefined) out.push({ label: 'reasoning summary', value: rules.reasoning.summary });
if (rules.verbosity !== undefined) out.push({ label: 'verbosity', value: rules.verbosity });
if (rules.serviceTier !== undefined) out.push({ label: 'service tier', value: rules.serviceTier });
- if (rules.anthropicSpeed !== undefined) out.push({ label: 'speed', value: rules.anthropicSpeed });
if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
out.push({ label: 'anthropic beta', value: [...rules.anthropicBeta].sort().join('/') });
}
diff --git a/packages/protocols/src/extensions/index.ts b/packages/protocols/src/extensions/index.ts
index 3f2c67750..00edf50f1 100644
--- a/packages/protocols/src/extensions/index.ts
+++ b/packages/protocols/src/extensions/index.ts
@@ -5,11 +5,11 @@
* extension residue before the upstream HTTP call.
*/
export const FLOWAY_EXTENSION_FIELDS = {
- chatCompletions: ['thinking_budget', 'adaptive_thinking', 'reasoning_summary', 'anthropic_speed', 'anthropic_beta'] as const,
- responses: ['thinking_budget', 'adaptive_thinking', 'anthropic_speed', 'anthropic_beta'] as const,
+ chatCompletions: ['thinking_budget', 'adaptive_thinking', 'reasoning_summary', 'anthropic_beta'] as const,
+ responses: ['thinking_budget', 'adaptive_thinking', 'anthropic_beta'] as const,
messages: ['verbosity'] as const,
gemini: {
- topLevel: ['anthropicSpeed', 'anthropicBeta'] as const,
+ topLevel: ['anthropicBeta'] as const,
generationConfig: ['verbosity', 'serviceTier'] as const,
},
} as const;
diff --git a/packages/protocols/src/gemini/index.ts b/packages/protocols/src/gemini/index.ts
index d3786aad5..f8878eda2 100644
--- a/packages/protocols/src/gemini/index.ts
+++ b/packages/protocols/src/gemini/index.ts
@@ -6,8 +6,6 @@ export interface GeminiPayload {
generationConfig?: GeminiGenerationConfig;
safetySettings?: GeminiSafetySetting[];
cachedContent?: string;
- /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
- anthropicSpeed?: string;
/** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
anthropicBeta?: readonly string[];
}
diff --git a/packages/protocols/src/responses/index.ts b/packages/protocols/src/responses/index.ts
index 2669fe41d..53cf084af 100644
--- a/packages/protocols/src/responses/index.ts
+++ b/packages/protocols/src/responses/index.ts
@@ -41,8 +41,6 @@ export interface ResponsesPayload {
thinking_budget?: number;
/** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. */
adaptive_thinking?: boolean;
- /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
- anthropic_speed?: string;
/** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
anthropic_beta?: readonly string[];
}
diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts
index eb6647861..d85c8e301 100644
--- a/packages/translate/src/chat-completions-via-messages/request.ts
+++ b/packages/translate/src/chat-completions-via-messages/request.ts
@@ -202,9 +202,7 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
// `service_tier: 'fast'` from the Chat Completions caller maps to
// Anthropic's `speed: 'fast'`; all other defined service_tier values
- // pass through as `service_tier` on the Messages wire. An explicit
- // `anthropic_speed` from the alias-extension layer rides through
- // independently and may co-set `speed`.
+ // pass through as `service_tier` on the Messages wire.
const serviceTierFields: Partial =
payload.service_tier === 'fast'
? { speed: 'fast' }
@@ -230,7 +228,6 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
...(payload.tool_choice != null ? { tool_choice: translateChatCompletionsToolChoice(payload.tool_choice) } : {}),
...(hasOutputConfig ? { output_config: outputConfig } : {}),
...(thinking ? { thinking } : {}),
- ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
...serviceTierFields,
};
};
diff --git a/packages/translate/src/chat-completions-via-messages/request_test.ts b/packages/translate/src/chat-completions-via-messages/request_test.ts
index 01bfc0788..9dede4c21 100644
--- a/packages/translate/src/chat-completions-via-messages/request_test.ts
+++ b/packages/translate/src/chat-completions-via-messages/request_test.ts
@@ -1261,17 +1261,6 @@ test('translateChatCompletionsToMessages merges reasoning_summary onto budget-dr
assertEquals(result.thinking, { type: 'enabled', budget_tokens: 2048, display: 'summarized' });
});
-test('translateChatCompletionsToMessages emits anthropic_speed onto Messages speed', async () => {
- const result = await translateChatCompletionsToMessages(
- mkPayload({
- messages: [{ role: 'user', content: 'hi' }],
- anthropic_speed: 'fast',
- }),
- );
-
- assertEquals(result.speed, 'fast');
-});
-
test('translateChatCompletionsToMessages forwards service_tier verbatim', async () => {
const result = await translateChatCompletionsToMessages(
mkPayload({
diff --git a/packages/translate/src/chat-completions-via-responses/request_test.ts b/packages/translate/src/chat-completions-via-responses/request_test.ts
index 137df562f..70af2527d 100644
--- a/packages/translate/src/chat-completions-via-responses/request_test.ts
+++ b/packages/translate/src/chat-completions-via-responses/request_test.ts
@@ -461,7 +461,6 @@ test('translateChatCompletionsToResponses leaves Messages-only extensions as inb
messages: [{ role: 'user', content: 'hi' }],
thinking_budget: 4096,
adaptive_thinking: true,
- anthropic_speed: 'fast',
anthropic_beta: ['fast-mode-2026-02-01'],
});
@@ -469,7 +468,6 @@ test('translateChatCompletionsToResponses leaves Messages-only extensions as inb
// residue. Translate must not invent a target field.
assertEquals('thinking_budget' in result, false);
assertEquals('adaptive_thinking' in result, false);
- assertEquals('anthropic_speed' in result, false);
assertEquals('anthropic_beta' in result, false);
});
diff --git a/packages/translate/src/gemini-via-chat-completions/request_test.ts b/packages/translate/src/gemini-via-chat-completions/request_test.ts
index 45f98c146..debd6b707 100644
--- a/packages/translate/src/gemini-via-chat-completions/request_test.ts
+++ b/packages/translate/src/gemini-via-chat-completions/request_test.ts
@@ -498,19 +498,15 @@ test('buildTargetRequest emits generationConfig.serviceTier onto Chat service_ti
assertEquals(result.service_tier, 'priority');
});
-test('buildTargetRequest drops top-level Anthropic extensions (anthropicSpeed, anthropicBeta) on Chat', () => {
+test('buildTargetRequest drops top-level Anthropic extensions (anthropicBeta) on Chat', () => {
const result = buildTargetRequest(
{
contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
- anthropicSpeed: 'fast',
anthropicBeta: ['fast-mode-2026-02-01'],
},
'gpt-test',
);
- assertEquals('anthropicSpeed' in result, false);
- assertEquals('anthropic_speed' in result, false);
- assertEquals('speed' in result, false);
assertEquals('anthropicBeta' in result, false);
assertEquals('anthropic_beta' in result, false);
});
diff --git a/packages/translate/src/gemini-via-messages/request.ts b/packages/translate/src/gemini-via-messages/request.ts
index c109da54d..f8a0fa0c1 100644
--- a/packages/translate/src/gemini-via-messages/request.ts
+++ b/packages/translate/src/gemini-via-messages/request.ts
@@ -265,11 +265,6 @@ export const buildTargetRequest = (
applyGenerationConfig(request, payload.generationConfig, fallbackMaxOutputTokens);
- // Top-level Gemini Floway extensions: `anthropicSpeed` is the only one
- // with a Messages-natural slot. `anthropicBeta` is header-bound at the
- // gateway boundary (Task 5) since translate functions do not own headers.
- if (payload.anthropicSpeed != null) request.speed = payload.anthropicSpeed;
-
const tools = buildTools(payload);
if (tools) request.tools = tools;
applyLastToolCacheBreakpoint(request.tools);
diff --git a/packages/translate/src/gemini-via-messages/request_test.ts b/packages/translate/src/gemini-via-messages/request_test.ts
index 555c12aa4..870d3f388 100644
--- a/packages/translate/src/gemini-via-messages/request_test.ts
+++ b/packages/translate/src/gemini-via-messages/request_test.ts
@@ -408,16 +408,6 @@ test('buildTargetRequest rejects a part with no recognized content field', () =>
// ── Floway extension emission ──
-test('buildTargetRequest emits top-level anthropicSpeed onto Messages speed', () => {
- const result = buildTargetRequest(
- { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], anthropicSpeed: 'fast' },
- 'claude-test',
- noOptions,
- );
-
- assertEquals(result.speed, 'fast');
-});
-
test('buildTargetRequest emits generationConfig.serviceTier onto Messages service_tier', () => {
const result = buildTargetRequest(
{ contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { serviceTier: 'priority' } },
diff --git a/packages/translate/src/gemini-via-responses/request_test.ts b/packages/translate/src/gemini-via-responses/request_test.ts
index 9d568605f..747bfed02 100644
--- a/packages/translate/src/gemini-via-responses/request_test.ts
+++ b/packages/translate/src/gemini-via-responses/request_test.ts
@@ -436,13 +436,10 @@ test('buildTargetRequest drops top-level Anthropic extensions on Responses', ()
const result = buildTargetRequest(
{
contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
- anthropicSpeed: 'fast',
anthropicBeta: ['fast-mode-2026-02-01'],
},
'gpt-test',
);
- assertEquals('anthropicSpeed' in result, false);
- assertEquals('anthropic_speed' in result, false);
assertEquals('anthropicBeta' in result, false);
});
diff --git a/packages/translate/src/messages-via-chat-completions/request_test.ts b/packages/translate/src/messages-via-chat-completions/request_test.ts
index 9692cb1ea..4cc6f81bb 100644
--- a/packages/translate/src/messages-via-chat-completions/request_test.ts
+++ b/packages/translate/src/messages-via-chat-completions/request_test.ts
@@ -509,7 +509,6 @@ test('translateMessagesToChatCompletions drops Anthropic-only knobs that have no
assertEquals(result.reasoning_effort, 'medium');
assertEquals('thinking_budget' in result, false);
assertEquals('reasoning_summary' in result, false);
- assertEquals('anthropic_speed' in result, false);
});
test('translateMessagesToChatCompletions does not emit verbosity when the extension is unset', () => {
diff --git a/packages/translate/src/messages-via-responses/request_test.ts b/packages/translate/src/messages-via-responses/request_test.ts
index 3ee95b4e6..4c32d1b44 100644
--- a/packages/translate/src/messages-via-responses/request_test.ts
+++ b/packages/translate/src/messages-via-responses/request_test.ts
@@ -542,7 +542,6 @@ test('translateMessagesToResponses drops Anthropic-only mode knobs the Responses
// bridge test below and is intentionally excluded here.
assertEquals('thinking_budget' in result, false);
assertEquals('adaptive_thinking' in result, false);
- assertEquals('anthropic_speed' in result, false);
assertEquals('anthropic_beta' in result, false);
});
diff --git a/packages/translate/src/responses-via-chat-completions/request_test.ts b/packages/translate/src/responses-via-chat-completions/request_test.ts
index 448222f45..7ebfb6e15 100644
--- a/packages/translate/src/responses-via-chat-completions/request_test.ts
+++ b/packages/translate/src/responses-via-chat-completions/request_test.ts
@@ -1486,13 +1486,11 @@ test('translateResponsesToChatCompletions leaves Messages-only extensions as inb
input: [{ type: 'message', role: 'user', content: 'hi' }],
thinking_budget: 4096,
adaptive_thinking: true,
- anthropic_speed: 'fast',
anthropic_beta: ['fast-mode-2026-02-01'],
});
assertEquals('thinking_budget' in result.target, false);
assertEquals('adaptive_thinking' in result.target, false);
- assertEquals('anthropic_speed' in result.target, false);
assertEquals('anthropic_beta' in result.target, false);
});
diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts
index fa608d971..a1896a1f3 100644
--- a/packages/translate/src/responses-via-messages/request.ts
+++ b/packages/translate/src/responses-via-messages/request.ts
@@ -348,9 +348,7 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
// `service_tier: 'fast'` from the Responses caller maps to Anthropic's
// `speed: 'fast'`; all other defined service_tier values pass through as
// `service_tier` on the Messages wire (Anthropic accepts 'auto',
- // 'standard_only', and future literals). An explicit `anthropic_speed`
- // from the alias-extension layer rides through independently and may
- // co-set `speed`.
+ // 'standard_only', and future literals).
const serviceTierFields: Partial =
payload.service_tier === 'fast'
? { speed: 'fast' }
@@ -373,7 +371,6 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
tool_choice: translateToolChoice(payload.tool_choice),
...(thinking ? { thinking } : {}),
...(hasOutputConfig ? { output_config: outputConfig } : {}),
- ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
...serviceTierFields,
};
diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts
index c8dbd8bcb..291bddad5 100644
--- a/packages/translate/src/responses-via-messages/request_test.ts
+++ b/packages/translate/src/responses-via-messages/request_test.ts
@@ -709,11 +709,6 @@ test('translateResponsesToMessages emits adaptive_thinking onto thinking.{adapti
assertEquals(result.target.thinking, { type: 'adaptive' });
});
-test('translateResponsesToMessages emits anthropic_speed onto speed', async () => {
- const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_speed: 'fast' }));
- assertEquals(result.target.speed, 'fast');
-});
-
test('translateResponsesToMessages leaves anthropic_beta as inbound residue for the gateway header pass', async () => {
const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_beta: ['fast-mode-2026-02-01'] }));
assertEquals('anthropic_beta' in result.target, false);
From 8824e820d23ff0e493114b4ed31538250d8d9a4a Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 04:13:35 +0800
Subject: [PATCH 023/170] feat(aliases): extend ModelAliasesRepo with
save/create/delete
Mirror the proxies repo's CRUD shape: `create` rejects PK collisions with
a typed `{ reason: 'duplicate' }` so the route layer can map to 409 without
driver-specific error parsing, `save` upserts in place (preserving the
existing row's createdAt on conflict), `delete` returns whether the row
was removed. `getByAlias` is the targeted lookup the PATCH handler uses to
merge a partial body against the persisted row.
In-memory impl now sorts loadAll by alias to match the SQL `ORDER BY alias`
contract; the Map keyed by alias keeps PK semantics 1:1 with SQLite.
---
.../src/control-plane/model-aliases/repo.ts | 66 ++++++++++++++++++-
packages/gateway/src/repo/memory.ts | 36 ++++++++--
packages/gateway/src/repo/sql.ts | 18 ++++-
packages/gateway/src/repo/types.ts | 13 +++-
4 files changed, 124 insertions(+), 9 deletions(-)
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index 3718b5fd5..1eaae7d16 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -12,17 +12,81 @@ interface ModelAliasRow {
created_at: number;
}
+const ALIAS_COLUMNS = 'alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at';
+
// The model_aliases table is operator-managed and small (dozens of rows at
// most), so the data plane reads the full table per request — no cache layer.
// `ORDER BY alias` makes the read deterministic so `/v1/models` and friends
// emit alias entries in a stable, operator-predictable order across runtimes.
export const loadAllAliases = async (db: SqlDatabase): Promise => {
const { results } = await db
- .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at FROM model_aliases ORDER BY alias')
+ .prepare(`SELECT ${ALIAS_COLUMNS} FROM model_aliases ORDER BY alias`)
.all();
return results.map(toModelAlias);
};
+export const getAliasByName = async (db: SqlDatabase, alias: string): Promise => {
+ const row = await db
+ .prepare(`SELECT ${ALIAS_COLUMNS} FROM model_aliases WHERE alias = ?`)
+ .bind(alias)
+ .first();
+ return row ? toModelAlias(row) : null;
+};
+
+// Plain INSERT — surfaces a PK collision through the `duplicate` return so
+// the route layer can map it to 409 without parsing driver-specific error
+// strings. SQLite/D1 both raise a constraint failure on conflict; we detect
+// it with a single SELECT round-trip rather than catching the throw because
+// the driver error shape varies between node:sqlite and D1.
+export const insertAlias = async (db: SqlDatabase, alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> => {
+ const existing = await db
+ .prepare('SELECT 1 FROM model_aliases WHERE alias = ?')
+ .bind(alias.alias)
+ .first<{ 1: number }>();
+ if (existing) return { ok: false, reason: 'duplicate' };
+ await db
+ .prepare(`INSERT INTO model_aliases (${ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`)
+ .bind(...bindValues(alias))
+ .run();
+ return { ok: true };
+};
+
+// UPSERT — on conflict the row is overwritten in place, but `created_at`
+// is preserved (the row's first INSERT wins, matching how `proxies.save`
+// keeps the original creation timestamp on a re-save).
+export const saveAlias = async (db: SqlDatabase, alias: ModelAlias): Promise => {
+ await db
+ .prepare(
+ `INSERT INTO model_aliases (${ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+ ON CONFLICT (alias) DO UPDATE SET
+ target_model_id = excluded.target_model_id,
+ upstream_ids_json = excluded.upstream_ids_json,
+ rules_json = excluded.rules_json,
+ visible_in_models_list = excluded.visible_in_models_list,
+ on_conflict = excluded.on_conflict,
+ display_name = excluded.display_name,
+ updated_at = unixepoch()`,
+ )
+ .bind(...bindValues(alias))
+ .run();
+};
+
+export const deleteAlias = async (db: SqlDatabase, alias: string): Promise<{ deleted: boolean }> => {
+ const result = await db.prepare('DELETE FROM model_aliases WHERE alias = ?').bind(alias).run();
+ return { deleted: (result.meta.changes ?? 0) > 0 };
+};
+
+const bindValues = (alias: ModelAlias): unknown[] => [
+ alias.alias,
+ alias.targetModelId,
+ JSON.stringify(alias.upstreamIds),
+ JSON.stringify(alias.rules),
+ alias.visibleInModelsList ? 1 : 0,
+ alias.onConflict,
+ alias.displayName ?? null,
+ alias.createdAt,
+];
+
const toModelAlias = (row: ModelAliasRow): ModelAlias => ({
alias: row.alias,
targetModelId: row.target_model_id,
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index 85f01b621..c424426ee 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -918,17 +918,41 @@ export class InMemoryRepo implements Repo {
}
}
-// Test-only in-memory backing for the alias table. The list starts empty
-// and can be reseeded via `setAll` so tests exercising alias-resolution
-// behavior do not depend on a live SQL database.
+// Test-only in-memory backing for the alias table. Mirrors SqlModelAliasesRepo:
+// `loadAll` returns rows sorted by alias, `create` rejects PK collisions,
+// `save` upserts in place. `setAll` is the test seam: tests that pre-populate
+// the table for read-only data-plane assertions reach for it directly.
export class MemoryModelAliasesRepo implements ModelAliasesRepo {
- private rows: readonly ModelAlias[] = [];
+ private rows = new Map();
loadAll(): Promise {
- return Promise.resolve(this.rows);
+ return Promise.resolve([...this.rows.values()].sort((a, b) => a.alias.localeCompare(b.alias)));
+ }
+
+ getByAlias(alias: string): Promise {
+ return Promise.resolve(this.rows.get(alias) ?? null);
+ }
+
+ create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> {
+ if (this.rows.has(alias.alias)) return Promise.resolve({ ok: false, reason: 'duplicate' });
+ this.rows.set(alias.alias, alias);
+ return Promise.resolve({ ok: true });
+ }
+
+ save(alias: ModelAlias): Promise {
+ // Preserve the original row's createdAt on an upsert so re-saves do not
+ // overwrite the local deployment's first-seen timestamp.
+ const existing = this.rows.get(alias.alias);
+ const preserved = existing ? { ...alias, createdAt: existing.createdAt } : alias;
+ this.rows.set(preserved.alias, preserved);
+ return Promise.resolve();
+ }
+
+ delete(alias: string): Promise<{ deleted: boolean }> {
+ return Promise.resolve({ deleted: this.rows.delete(alias) });
}
setAll(rows: readonly ModelAlias[]): void {
- this.rows = rows;
+ this.rows = new Map(rows.map(row => [row.alias, row]));
}
}
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index 109b35024..044f79aae 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -35,7 +35,7 @@ import type {
UsersRepo,
} from './types.ts';
import { serializeStoredConfig, serializeStoredState } from './upstream-json.ts';
-import { loadAllAliases } from '../control-plane/model-aliases/repo.ts';
+import { deleteAlias, getAliasByName, insertAlias, loadAllAliases, saveAlias } from '../control-plane/model-aliases/repo.ts';
import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
import { latencyBucketForMs } from '../shared/performance-histogram.ts';
import { generateSessionToken } from '../shared/session-tokens.ts';
@@ -1628,4 +1628,20 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
loadAll(): Promise {
return loadAllAliases(this.db);
}
+
+ getByAlias(alias: string): Promise {
+ return getAliasByName(this.db, alias);
+ }
+
+ create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> {
+ return insertAlias(this.db, alias);
+ }
+
+ save(alias: ModelAlias): Promise {
+ return saveAlias(this.db, alias);
+ }
+
+ delete(alias: string): Promise<{ deleted: boolean }> {
+ return deleteAlias(this.db, alias);
+ }
}
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index d282aaa98..8ed620849 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -337,7 +337,18 @@ export interface Repo {
}
// Operator-managed alias table; small (dozens of rows at most) and read
-// per request, so the repo deliberately exposes only a full-table fetch.
+// per request, so the repo deliberately exposes only a full-table fetch
+// plus the targeted mutations the control-plane CRUD needs.
export interface ModelAliasesRepo {
loadAll(): Promise;
+ getByAlias(alias: string): Promise;
+ // INSERT-only — fails with `duplicate` on PK conflict so the route layer
+ // surfaces 409 to the dashboard instead of silently overwriting an
+ // existing row.
+ create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }>;
+ // UPSERT semantics — used by import/restore flows that need to land a row
+ // regardless of whether it already exists.
+ save(alias: ModelAlias): Promise;
+ // Returns whether a row was actually removed; routes treat false as 404.
+ delete(alias: string): Promise<{ deleted: boolean }>;
}
From 9abf5c28f5708c90460873cbf0d49f1dcc82b291 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 04:15:34 +0800
Subject: [PATCH 024/170] feat(aliases): add REST CRUD endpoints under
/api/aliases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Operator-managed alias rows previously had no admin surface — the only
write path was a hand-edited migration. Wire admin-only CRUD next to the
existing model-aliases code:
GET /api/aliases list, sorted by alias
POST /api/aliases create; 409 on PK conflict
PATCH /api/aliases/:alias partial update; 404 when missing
DELETE /api/aliases/:alias idempotent-shaped 204/404
The Zod schemas mirror the closed rule knob set (reasoning effort /
budgetTokens / adaptive / summary, verbosity, serviceTier,
anthropicBeta[]) under `.strict()` so an unknown rule key is a 400 — but
each value stays freeform: a newly-introduced upstream-side enum ships
through without a gateway code change (Goal 2). Alias names are bounded
by the same `[A-Za-z0-9_.:-/]+` grammar the real model ids already use.
PATCH propagates the absent/null distinction for `displayName` so the
operator can clear an operator-set label back to the synthesized
fallback without dropping into a separate "reset" route.
---
.../src/control-plane/model-aliases/routes.ts | 80 ++++++
.../model-aliases/routes_test.ts | 267 ++++++++++++++++++
.../control-plane/model-aliases/serialize.ts | 31 ++
packages/gateway/src/control-plane/routes.ts | 8 +-
packages/gateway/src/control-plane/schemas.ts | 70 +++++
5 files changed, 455 insertions(+), 1 deletion(-)
create mode 100644 packages/gateway/src/control-plane/model-aliases/routes.ts
create mode 100644 packages/gateway/src/control-plane/model-aliases/routes_test.ts
create mode 100644 packages/gateway/src/control-plane/model-aliases/serialize.ts
diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
new file mode 100644
index 000000000..961a0da73
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/routes.ts
@@ -0,0 +1,80 @@
+import type { Context } from 'hono';
+
+import { aliasToJson } from './serialize.ts';
+import type { ModelAlias, ModelAliasRules } from './types.ts';
+import { type CtxWithJson } from '../../middleware/zod-validator.ts';
+import { getRepo } from '../../repo/index.ts';
+import type { createAliasBody, updateAliasBody } from '../schemas.ts';
+
+export const listAliases = async (c: Context) => {
+ const aliases = await getRepo().modelAliases.loadAll();
+ return c.json(aliases.map(aliasToJson));
+};
+
+export const createAlias = async (c: CtxWithJson) => {
+ const body = c.req.valid('json');
+ const record: ModelAlias = {
+ alias: body.alias,
+ targetModelId: body.targetModelId,
+ upstreamIds: body.upstreamIds,
+ rules: body.rules,
+ visibleInModelsList: body.visibleInModelsList,
+ // `real-only` is the safe default: an alias whose target id collides with
+ // a real model id stays hidden until the operator opts the alias into one
+ // of the surfacing modes. Matches the migration's column default.
+ onConflict: body.onConflict ?? 'real-only',
+ ...(body.displayName !== undefined ? { displayName: body.displayName } : {}),
+ createdAt: Math.floor(Date.now() / 1000),
+ };
+
+ const result = await getRepo().modelAliases.create(record);
+ if (!result.ok) {
+ return c.json({ error: { type: 'conflict', message: `Alias "${body.alias}" already exists` } }, 409);
+ }
+
+ return c.json(aliasToJson(record), 201);
+};
+
+export const updateAlias = async (c: CtxWithJson) => {
+ const aliasName = c.req.param('alias') ?? '';
+ const body = c.req.valid('json');
+
+ const repo = getRepo();
+ const existing = await repo.modelAliases.getByAlias(aliasName);
+ if (!existing) return c.json({ error: 'Alias not found' }, 404);
+
+ // Field-by-field merge so an absent field preserves the existing value.
+ // `displayName` accepts an explicit null to clear the operator-set label
+ // back to the synthesized fallback; we use Object.hasOwn to keep the
+ // absent / null distinction that `??` would collapse.
+ const merged: ModelAlias = {
+ alias: existing.alias,
+ targetModelId: body.targetModelId ?? existing.targetModelId,
+ upstreamIds: body.upstreamIds ?? existing.upstreamIds,
+ rules: body.rules ?? existing.rules,
+ visibleInModelsList: body.visibleInModelsList ?? existing.visibleInModelsList,
+ onConflict: body.onConflict ?? existing.onConflict,
+ createdAt: existing.createdAt,
+ ...nextDisplayName(existing, body.displayName),
+ };
+
+ await repo.modelAliases.save(merged);
+ return c.json(aliasToJson(merged));
+};
+
+const nextDisplayName = (existing: ModelAlias, patch: string | null | undefined): { displayName?: string } => {
+ if (patch === undefined) return existing.displayName !== undefined ? { displayName: existing.displayName } : {};
+ if (patch === null) return {};
+ return { displayName: patch };
+};
+
+export const deleteAlias = async (c: Context) => {
+ const aliasName = c.req.param('alias') ?? '';
+ const { deleted } = await getRepo().modelAliases.delete(aliasName);
+ if (!deleted) return c.json({ error: 'Alias not found' }, 404);
+ return c.body(null, 204);
+};
+
+// Re-export so the routes module can wire the type-level `Rules` carrier
+// through the RPC client without consumers having to chase the alias subtree.
+export type { ModelAliasRules };
diff --git a/packages/gateway/src/control-plane/model-aliases/routes_test.ts b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
new file mode 100644
index 000000000..4143ed1cf
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
@@ -0,0 +1,267 @@
+import { test } from 'vitest';
+
+import type { SerializedModelAlias } from './serialize.ts';
+import { requestApp, setupAppTest } from '../../test-helpers.ts';
+import { assertEquals } from '@floway-dev/test-utils';
+
+const authedGet = (adminSession: string): RequestInit => ({
+ method: 'GET',
+ headers: { 'x-floway-session': adminSession },
+});
+
+const authedJson = (adminSession: string, method: 'POST' | 'PATCH' | 'DELETE', body?: unknown): RequestInit => ({
+ method,
+ headers: {
+ 'content-type': 'application/json',
+ 'x-floway-session': adminSession,
+ },
+ ...(body === undefined ? {} : { body: JSON.stringify(body) }),
+});
+
+const baseCreate = (overrides: Record = {}) => ({
+ alias: 'opus-xhigh',
+ targetModelId: 'claude-opus-4-6',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'xhigh' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ ...overrides,
+});
+
+test('GET /api/aliases returns rows sorted by alias', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.save({
+ alias: 'zzz-late',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_001,
+ });
+ await repo.modelAliases.save({
+ alias: 'aaa-early',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ });
+
+ const resp = await requestApp('/api/aliases', authedGet(adminSession));
+ assertEquals(resp.status, 200);
+ const list = (await resp.json()) as SerializedModelAlias[];
+ assertEquals(list.map(a => a.alias), ['aaa-early', 'zzz-late']);
+});
+
+test('POST /api/aliases creates a row and echoes the serialized shape', async () => {
+ const { repo, adminSession } = await setupAppTest();
+
+ const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({
+ displayName: 'Opus Extra-High',
+ upstreamIds: ['up_a', 'up_b'],
+ rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] },
+ })));
+ assertEquals(resp.status, 201);
+ const created = (await resp.json()) as SerializedModelAlias;
+ assertEquals(created.alias, 'opus-xhigh');
+ assertEquals(created.target_model_id, 'claude-opus-4-6');
+ assertEquals(created.upstream_ids, ['up_a', 'up_b']);
+ assertEquals(created.rules, { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] });
+ assertEquals(created.visible_in_models_list, true);
+ assertEquals(created.on_conflict, 'real-only');
+ assertEquals(created.display_name, 'Opus Extra-High');
+ assertEquals(typeof created.created_at, 'number');
+
+ const stored = await repo.modelAliases.getByAlias('opus-xhigh');
+ assertEquals(stored?.targetModelId, 'claude-opus-4-6');
+ assertEquals(stored?.displayName, 'Opus Extra-High');
+});
+
+test('POST /api/aliases defaults onConflict to real-only when omitted', async () => {
+ const { adminSession } = await setupAppTest();
+
+ const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', {
+ alias: 'no-onconflict',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ }));
+ assertEquals(resp.status, 201);
+ const created = (await resp.json()) as SerializedModelAlias;
+ assertEquals(created.on_conflict, 'real-only');
+});
+
+test('POST /api/aliases returns 409 on duplicate alias', async () => {
+ const { adminSession } = await setupAppTest();
+
+ const first = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
+ assertEquals(first.status, 201);
+
+ const dup = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
+ assertEquals(dup.status, 409);
+ const body = (await dup.json()) as { error: { type: string; message: string } };
+ assertEquals(body.error.type, 'conflict');
+});
+
+test('POST /api/aliases rejects an empty alias name with 400', async () => {
+ const { adminSession } = await setupAppTest();
+ const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ alias: '' })));
+ assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases rejects an alias containing whitespace with 400', async () => {
+ const { adminSession } = await setupAppTest();
+ const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ alias: 'has space' })));
+ assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases rejects an unknown rule key with 400', async () => {
+ const { adminSession } = await setupAppTest();
+ const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({
+ rules: { reasoning: { effort: 'high' }, mysteryKnob: true } as unknown as Record,
+ })));
+ assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases rejects an invalid onConflict value with 400', async () => {
+ const { adminSession } = await setupAppTest();
+ const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ onConflict: 'mystery-mode' })));
+ assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases requires admin auth (non-admin api key returns 403)', async () => {
+ const { adminSession, apiKey } = await setupAppTest();
+
+ // Sanity: the admin call succeeds so the failure below pins the auth gate,
+ // not a request-shape mistake shared by both calls.
+ const adminResp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
+ assertEquals(adminResp.status, 201);
+
+ const userResp = await requestApp('/api/aliases', {
+ method: 'POST',
+ headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
+ body: JSON.stringify(baseCreate({ alias: 'other' })),
+ });
+ assertEquals(userResp.status, 403);
+});
+
+test('PATCH /api/aliases/:alias merges a partial body and preserves untouched fields', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.save({
+ alias: 'opus-xhigh',
+ targetModelId: 'claude-opus-4-6',
+ upstreamIds: ['up_a'],
+ rules: { reasoning: { effort: 'xhigh' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ displayName: 'Existing Label',
+ createdAt: 1_700_000_000,
+ });
+
+ const resp = await requestApp('/api/aliases/opus-xhigh', authedJson(adminSession, 'PATCH', {
+ rules: { reasoning: { effort: 'medium' }, serviceTier: 'priority' },
+ }));
+ assertEquals(resp.status, 200);
+ const updated = (await resp.json()) as SerializedModelAlias;
+ // Patched fields took effect.
+ assertEquals(updated.rules, { reasoning: { effort: 'medium' }, serviceTier: 'priority' });
+ // Untouched fields preserved verbatim.
+ assertEquals(updated.target_model_id, 'claude-opus-4-6');
+ assertEquals(updated.upstream_ids, ['up_a']);
+ assertEquals(updated.visible_in_models_list, true);
+ assertEquals(updated.display_name, 'Existing Label');
+ assertEquals(updated.created_at, 1_700_000_000);
+});
+
+test('PATCH /api/aliases/:alias accepts displayName=null to clear the label', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.save({
+ alias: 'opus-xhigh',
+ targetModelId: 'claude-opus-4-6',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ displayName: 'Existing Label',
+ createdAt: 1_700_000_000,
+ });
+
+ const resp = await requestApp('/api/aliases/opus-xhigh', authedJson(adminSession, 'PATCH', { displayName: null }));
+ assertEquals(resp.status, 200);
+ const updated = (await resp.json()) as SerializedModelAlias;
+ assertEquals(updated.display_name, null);
+
+ const stored = await repo.modelAliases.getByAlias('opus-xhigh');
+ assertEquals(stored?.displayName, undefined);
+});
+
+test('PATCH /api/aliases/:alias returns 404 when the alias does not exist', async () => {
+ const { adminSession } = await setupAppTest();
+ const resp = await requestApp('/api/aliases/nope', authedJson(adminSession, 'PATCH', { visibleInModelsList: false }));
+ assertEquals(resp.status, 404);
+});
+
+test('PATCH /api/aliases/:alias requires admin auth', async () => {
+ const { repo, adminSession: _adminSession, apiKey } = await setupAppTest();
+ await repo.modelAliases.save({
+ alias: 'opus-xhigh',
+ targetModelId: 'claude-opus-4-6',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ });
+
+ const userResp = await requestApp('/api/aliases/opus-xhigh', {
+ method: 'PATCH',
+ headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
+ body: JSON.stringify({ visibleInModelsList: false }),
+ });
+ assertEquals(userResp.status, 403);
+});
+
+test('DELETE /api/aliases/:alias returns 204 on success and removes the row', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.save({
+ alias: 'doomed',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ });
+
+ const resp = await requestApp('/api/aliases/doomed', authedJson(adminSession, 'DELETE'));
+ assertEquals(resp.status, 204);
+ assertEquals(await repo.modelAliases.getByAlias('doomed'), null);
+});
+
+test('DELETE /api/aliases/:alias returns 404 when the alias does not exist', async () => {
+ const { adminSession } = await setupAppTest();
+ const resp = await requestApp('/api/aliases/nope', authedJson(adminSession, 'DELETE'));
+ assertEquals(resp.status, 404);
+});
+
+test('DELETE /api/aliases/:alias requires admin auth', async () => {
+ const { repo, apiKey } = await setupAppTest();
+ await repo.modelAliases.save({
+ alias: 'doomed',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ });
+
+ const resp = await requestApp('/api/aliases/doomed', {
+ method: 'DELETE',
+ headers: { 'x-api-key': apiKey.key },
+ });
+ assertEquals(resp.status, 403);
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize.ts b/packages/gateway/src/control-plane/model-aliases/serialize.ts
new file mode 100644
index 000000000..5889997cc
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/serialize.ts
@@ -0,0 +1,31 @@
+// Wire-format projection for the operator-managed model_aliases rows. The
+// dashboard reads the same shape it sends back for create/update; the few
+// snake_cased fields (`visible_in_models_list`, `on_conflict`, `created_at`,
+// `display_name`) follow the rest of the control-plane HTTP surface.
+
+import type { ModelAlias, ModelAliasRules, OnConflict } from './types.ts';
+
+export interface SerializedModelAlias {
+ alias: string;
+ target_model_id: string;
+ upstream_ids: string[];
+ rules: ModelAliasRules;
+ visible_in_models_list: boolean;
+ on_conflict: OnConflict;
+ display_name: string | null;
+ created_at: number;
+}
+
+export const aliasToJson = (alias: ModelAlias): SerializedModelAlias => ({
+ alias: alias.alias,
+ target_model_id: alias.targetModelId,
+ // Defensive copy: the readonly arrays inside ModelAlias are shared with
+ // callers, and JSON serialization would otherwise expose the same backing
+ // array used by `loadAll`.
+ upstream_ids: [...alias.upstreamIds],
+ rules: alias.rules,
+ visible_in_models_list: alias.visibleInModelsList,
+ on_conflict: alias.onConflict,
+ display_name: alias.displayName ?? null,
+ created_at: alias.createdAt,
+});
diff --git a/packages/gateway/src/control-plane/routes.ts b/packages/gateway/src/control-plane/routes.ts
index 94b5f06ff..35ceb71de 100644
--- a/packages/gateway/src/control-plane/routes.ts
+++ b/packages/gateway/src/control-plane/routes.ts
@@ -5,10 +5,11 @@ import { authLogin, authLogout, authMe } from './auth/routes.ts';
import { copilotQuota } from './copilot-quota/routes.ts';
import { exportData, importData } from './data-transfer/routes.ts';
import { dumpRoutes } from './dump.ts';
+import { createAlias, deleteAlias, listAliases, updateAlias } from './model-aliases/routes.ts';
import { controlPlaneModels } from './models/routes.ts';
import { performanceOverview, performanceTelemetry } from './performance/routes.ts';
import { createProxy, deleteProxy, listAllBackoffs, listProxies, listProxyBackoffs, resetProxyBackoffs, testProxy, updateProxy } from './proxies/routes.ts';
-import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
+import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createAliasBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateAliasBody, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
import { getSearchConfigRoute, putSearchConfigRoute, testSearchConfigRoute } from './search-config/routes.ts';
import { searchUsage } from './search-usage/routes.ts';
import { tokenUsage } from './token-usage/routes.ts';
@@ -100,6 +101,11 @@ export const controlPlaneRoutes = new Hono<{ Variables: AuthVars }>()
.get('/search-config', getSearchConfigRoute)
.put('/search-config', zValidator('json', searchConfigSchema), putSearchConfigRoute)
.post('/search-config/test', zValidator('json', searchConfigSchema), testSearchConfigRoute)
+ // Model aliases.
+ .get('/aliases', listAliases)
+ .post('/aliases', zValidator('json', createAliasBody), createAlias)
+ .patch('/aliases/:alias', zValidator('json', updateAliasBody), updateAlias)
+ .delete('/aliases/:alias', deleteAlias)
.get('/export', zValidator('query', exportQuery), exportData)
.post('/import', zValidator('json', importBody), importData));
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index f718539ee..60f5df604 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -635,3 +635,73 @@ export const performanceQuery = z.object({
bucket: z.enum(['hour', '4h', '8h', 'day', 'all']).optional(),
timezone_offset_minutes: z.string().optional(),
});
+
+// --- model aliases ---
+//
+// Operator-managed alias rows wired through `/api/aliases`. The schemas
+// describe the request bodies the dashboard sends; deeper invariants
+// (the alias's target model exists in the catalog, the upstream ids
+// resolve, etc.) are intentionally NOT enforced here — an alias is allowed
+// to point at a target that is currently absent so an operator can pre-
+// stage the row before the upstream is registered, mirroring how the
+// catalog tolerates pending references.
+
+// Mirror the public model-id grammar: letters, digits, `_ . : - / `. Matches
+// the surface ids the dashboard already accepts in the models picker and the
+// `/v1/models` listing, so an alias name is interchangeable with a real id at
+// the request boundary.
+export const MODEL_ALIAS_PATTERN = /^[A-Za-z0-9_.:\-/]+$/;
+
+const aliasNameSchema = z.string().min(1).regex(MODEL_ALIAS_PATTERN, 'alias must be 1+ chars of [A-Za-z0-9_.:-/]');
+
+// Rule field values pass through to the upstream verbatim — the gateway
+// deliberately does not enum-gate operator input here. The Goal-2 contract
+// is that a freshly added enum upstream-side ships through without a
+// gateway code change, so we validate shape (non-empty string, in-range
+// number) but never set membership.
+const aliasReasoningSchema = z.object({
+ effort: z.string().min(1).optional(),
+ budgetTokens: z.number().int().nonnegative().optional(),
+ adaptive: z.boolean().optional(),
+ summary: z.string().min(1).optional(),
+}).strict().refine(
+ r => r.effort !== undefined || r.budgetTokens !== undefined || r.adaptive !== undefined || r.summary !== undefined,
+ { message: 'reasoning must declare at least one of effort, budgetTokens, adaptive, summary' },
+);
+
+const aliasRulesSchema = z.object({
+ reasoning: aliasReasoningSchema.optional(),
+ verbosity: z.string().min(1).optional(),
+ serviceTier: z.string().min(1).optional(),
+ // Each beta header token is a non-empty string. Empty arrays are accepted
+ // (the dashboard sends `[]` when the operator clears every tag) and are
+ // semantically equivalent to omitting the field.
+ anthropicBeta: z.array(z.string().min(1)).optional(),
+}).strict();
+
+const onConflictSchema = z.enum(['alias-only', 'real-only', 'both-real-first', 'both-alias-first']);
+const upstreamIdsSchema = z.array(z.string().min(1));
+
+export const createAliasBody = z.object({
+ alias: aliasNameSchema,
+ targetModelId: z.string().min(1),
+ upstreamIds: upstreamIdsSchema,
+ rules: aliasRulesSchema,
+ visibleInModelsList: z.boolean(),
+ // Defaults to `'real-only'` server-side when omitted so the dashboard's
+ // "create" form does not have to ship a default — the route layer fills it.
+ onConflict: onConflictSchema.optional(),
+ displayName: z.string().min(1).optional(),
+});
+
+// PATCH accepts a partial shape. `displayName` is nullable so the operator
+// can clear an existing label back to the synthesized fallback; absent vs.
+// null is meaningful and propagated through to the handler via Object.hasOwn.
+export const updateAliasBody = z.object({
+ targetModelId: z.string().min(1).optional(),
+ upstreamIds: upstreamIdsSchema.optional(),
+ rules: aliasRulesSchema.optional(),
+ visibleInModelsList: z.boolean().optional(),
+ onConflict: onConflictSchema.optional(),
+ displayName: z.string().min(1).nullable().optional(),
+});
From 26d3dd9b987365f57d10f863e2fc40803c6a9640 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 04:23:39 +0800
Subject: [PATCH 025/170] feat(web): alias-list settings card + edit dialog
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Adds the dashboard surface for the new /api/aliases CRUD endpoints:
- useModelAliases composable mirrors the proxies-store pattern (module-
scoped cache, error / loading refs, load()).
- AliasesSettingsCard slots into the Settings page directly under
ProxiesSettingsCard, sharing the glass-card styling and animate-in
delay ordering.
- AliasRow surfaces the alias id, optional display name, target model,
rule badges (sourced from formatAliasRuleBadges so the badge order
matches the rest of the dashboard), and an `on_conflict` chip.
- AliasEditDialog is a single modal for both create and edit. Reasoning
is rendered as a None / Effort / Budget / Adaptive radio + a separate
summary input so the mutually-exclusive wire shape is visible at a
glance. Suggestion hints come from the target model's chat.reasoning
metadata when it matches a real catalog entry, but every value field
stays freeform — Goal 2.
Co-located component-level smoke tests use @vue/test-utils (newly added
as a devDep) plus happy-dom. The dialog tests stub the api client, the
two stores, and reka-ui's portaling Dialog so the form mounts inline
where assertions can reach the inputs and read the posted JSON.
The gateway package's exports map gains two new type-only subpaths
(`./control-plane/model-aliases/serialize`, `./control-plane/model-aliases/types`)
so apps/web can pull `SerializedModelAlias` and `ModelAliasRules` as the
source-of-truth types without crossing the existing deep-import ban.
---
apps/web/package.json | 1 +
apps/web/src/api/types.ts | 2 +
.../components/alias-edit/AliasEditDialog.vue | 349 ++++++++++++++++++
.../alias-edit/AliasEditDialog_test.ts | 167 +++++++++
apps/web/src/components/settings/AliasRow.vue | 90 +++++
.../src/components/settings/AliasRow_test.ts | 128 +++++++
.../settings/AliasesSettingsCard.vue | 65 ++++
apps/web/src/composables/useModelAliases.ts | 29 ++
apps/web/src/pages/dashboard/settings.vue | 31 +-
apps/web/vitest.config.ts | 5 +
packages/gateway/package.json | 2 +
pnpm-lock.yaml | 234 ++++++++++++
12 files changed, 1100 insertions(+), 3 deletions(-)
create mode 100644 apps/web/src/components/alias-edit/AliasEditDialog.vue
create mode 100644 apps/web/src/components/alias-edit/AliasEditDialog_test.ts
create mode 100644 apps/web/src/components/settings/AliasRow.vue
create mode 100644 apps/web/src/components/settings/AliasRow_test.ts
create mode 100644 apps/web/src/components/settings/AliasesSettingsCard.vue
create mode 100644 apps/web/src/composables/useModelAliases.ts
diff --git a/apps/web/package.json b/apps/web/package.json
index 2bab9e262..75a4ca003 100644
--- a/apps/web/package.json
+++ b/apps/web/package.json
@@ -42,6 +42,7 @@
"@unocss/preset-wind3": "^66.5.10",
"@unocss/reset": "^66.5.10",
"@vitejs/plugin-vue": "^6.0.1",
+ "@vue/test-utils": "^2.4.11",
"happy-dom": "^20.0.0",
"unocss": "^66.5.10",
"unocss-preset-animations": "^1.3.0",
diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 8f11835e2..66216b812 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -305,6 +305,8 @@ export interface FlagDef {
// than redeclaring the shape) makes any future field rename a compile error
// here instead of a runtime mismatch the next time someone refreshes the page.
export type { SerializedProxyRecord as ProxyRecord, SerializedBackoffRow as BackoffRow } from '@floway-dev/gateway/control-plane/proxies/serialize';
+export type { SerializedModelAlias as ModelAlias } from '@floway-dev/gateway/control-plane/model-aliases/serialize';
+export type { ModelAliasRules, OnConflict as ModelAliasOnConflict } from '@floway-dev/gateway/control-plane/model-aliases/types';
// 409 body returned by DELETE /api/proxies/:id when the row is referenced
// by an upstream's fallback list.
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
new file mode 100644
index 000000000..e247137b0
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -0,0 +1,349 @@
+
+
+
+
+
+
+ {{ saveError }}
+
+
+
+
+
Alias name
+
+
Alias names are the primary key and cannot be changed; delete and recreate to rename.
+
+
+
+ Display name (optional)
+
+
+
+
+
+ Target model id
+
+
+ {{ opt.label }}
+
+
+
+
+ Upstreams (leave empty to allow any upstream that serves the target)
+
+
+
+
+
+ On conflict
+
+
+
+
+
+ Visible in /v1/models
+
+
+
+
+ Reasoning
+
+
+
+ {{ opt.label }}
+
+
+
+
+
Effort
+
+
+
+
+
Target supports: {{ effortSuggestions.join(', ') }}
+
+
+
+
Budget tokens
+
+
+ Target range:
+ min {{ budgetMin }}
+ ,
+ max {{ budgetMax }}
+
+
+
+
+ Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.
+
+
+
+ Reasoning summary (optional)
+
+
+
+
+
+
+
+
+
+
+ Anthropic beta headers (comma- or Enter-separated tokens)
+
+
+
+
+ Save
+ Cancel
+
+
+
+
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
new file mode 100644
index 000000000..976a24a18
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -0,0 +1,167 @@
+// @vitest-environment happy-dom
+
+import { mount } from '@vue/test-utils';
+import { afterEach, beforeEach, expect, test, vi } from 'vitest';
+import { defineComponent, h, ref } from 'vue';
+
+import type { ModelAlias } from '../../api/types.ts';
+
+// Module-level mocks for the api client + every store the dialog imports.
+// The dialog stays as-is; we substitute the dependencies so the component
+// renders and submits without any real HTTP. callApi is exposed as a spy so
+// tests can read what was posted.
+const createAliasMock = vi.fn(async (_args: { json: unknown }) => new Response(JSON.stringify({}), { status: 201, headers: { 'content-type': 'application/json' } }));
+const patchAliasMock = vi.fn(async (_args: { param: { alias: string }; json: unknown }) => new Response(JSON.stringify({}), { status: 200, headers: { 'content-type': 'application/json' } }));
+
+vi.mock('../../api/client.ts', async () => {
+ const { callApi: realCallApi } = await vi.importActual('../../api/client.ts');
+ return {
+ useApi: () => ({
+ api: {
+ aliases: Object.assign(
+ { $post: (args: { json: unknown }) => createAliasMock(args) },
+ { ':alias': { $patch: (args: { param: { alias: string }; json: unknown }) => patchAliasMock(args) } },
+ ),
+ },
+ }),
+ callApi: realCallApi,
+ };
+});
+
+vi.mock('../../composables/useModels.ts', () => ({
+ useModelsStore: () => ({
+ models: {
+ value: [
+ { id: 'gpt-5.4', display_name: 'GPT-5.4', object: 'model', type: 'model', limits: {}, kind: 'chat', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' }, budget_tokens: { min: 1024, max: 8192 }, adaptive: true } } },
+ { id: 'claude-opus-4-6', display_name: 'Claude Opus 4.6', object: 'model', type: 'model', limits: {}, kind: 'chat' },
+ ],
+ },
+ loading: { value: false },
+ error: { value: null },
+ load: vi.fn(async () => undefined),
+ }),
+}));
+
+vi.mock('../../composables/useUpstreams.ts', () => ({
+ useUpstreamsStore: () => ({
+ upstreams: {
+ value: [
+ { id: 'up_oai', name: 'OpenAI' },
+ { id: 'up_anth', name: 'Anthropic' },
+ ],
+ },
+ loading: { value: false },
+ load: vi.fn(async () => undefined),
+ }),
+}));
+
+// reka-ui's Dialog mounts via Teleport into document.body and renders a
+// portal — we stub it down to a passthrough so happy-dom mounts the slot
+// content inline where assertions can reach it.
+vi.mock('@floway-dev/ui', async () => {
+ const real = await vi.importActual('@floway-dev/ui');
+ const Passthrough = defineComponent({ name: 'Passthrough', setup(_props, { slots }) { return () => h('div', slots.default?.()); } });
+ return { ...real, Dialog: Passthrough };
+});
+
+beforeEach(() => {
+ createAliasMock.mockClear();
+ patchAliasMock.mockClear();
+});
+
+afterEach(() => {
+ vi.clearAllMocks();
+});
+
+test('AliasEditDialog (create mode) posts a payload matching the form state', async () => {
+ const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
+ const open = ref(true);
+
+ const wrapper = mount(defineComponent({
+ components: { AliasEditDialog },
+ setup() { return { open }; },
+ template: ' ',
+ }));
+
+ // Fill the form: alias name + target id are the only required fields for
+ // the create-mode happy path. Everything else uses its default.
+ const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+ expect(aliasInput.exists()).toBe(true);
+ await aliasInput.setValue('opus-fast');
+
+ const targetInput = wrapper.find('input[placeholder="gpt-5.4"]');
+ expect(targetInput.exists()).toBe(true);
+ await targetInput.setValue('claude-opus-4-6');
+
+ // Click Save.
+ const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
+ expect(saveBtn).toBeDefined();
+ await saveBtn!.trigger('click');
+ // Flush microtasks so the async save completes.
+ await new Promise(r => setTimeout(r, 0));
+
+ expect(createAliasMock).toHaveBeenCalledTimes(1);
+ const args = createAliasMock.mock.calls[0]![0];
+ expect(args.json).toMatchObject({
+ alias: 'opus-fast',
+ targetModelId: 'claude-opus-4-6',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ });
+});
+
+test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the diff', async () => {
+ const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
+ const open = ref(true);
+ const record: ModelAlias = {
+ alias: 'opus-xhigh',
+ target_model_id: 'claude-opus-4-6',
+ upstream_ids: ['up_anth'],
+ rules: { reasoning: { effort: 'xhigh' } },
+ visible_in_models_list: true,
+ on_conflict: 'real-only',
+ display_name: 'Opus XHigh',
+ created_at: 1_700_000_000,
+ };
+
+ const wrapper = mount(defineComponent({
+ components: { AliasEditDialog },
+ setup() { return { open, record }; },
+ template: ' ',
+ }));
+
+ // Alias name input should be disabled in edit mode (PK is immutable).
+ const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+ expect(aliasInput.exists()).toBe(true);
+ expect((aliasInput.element as HTMLInputElement).disabled).toBe(true);
+ expect((aliasInput.element as HTMLInputElement).value).toBe('opus-xhigh');
+
+ // Display name pre-filled.
+ const displayInput = wrapper.find('input[placeholder="Codex Auto Review"]');
+ expect((displayInput.element as HTMLInputElement).value).toBe('Opus XHigh');
+
+ // Target id pre-filled.
+ const targetInput = wrapper.find('input[placeholder="gpt-5.4"]');
+ expect((targetInput.element as HTMLInputElement).value).toBe('claude-opus-4-6');
+
+ // Change one field and submit; PATCH carries the merged shape (every editable
+ // field, not just the diff — the route layer merges against the stored row).
+ await targetInput.setValue('gpt-5.4');
+ const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
+ await saveBtn!.trigger('click');
+ await new Promise(r => setTimeout(r, 0));
+
+ expect(patchAliasMock).toHaveBeenCalledTimes(1);
+ const args = patchAliasMock.mock.calls[0]![0];
+ expect(args.param.alias).toBe('opus-xhigh');
+ expect(args.json).toMatchObject({
+ targetModelId: 'gpt-5.4',
+ upstreamIds: ['up_anth'],
+ rules: { reasoning: { effort: 'xhigh' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ displayName: 'Opus XHigh',
+ });
+});
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
new file mode 100644
index 000000000..1f8a684d1
--- /dev/null
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -0,0 +1,90 @@
+
+
+
+
+
{{ alias.on_conflict }}
+
+
+ {{ labelText }}
+ {{ alias.alias }}
+ → {{ alias.target_model_id }}
+
+
+
+
+ {{ badge.label }}: {{ badge.value }}
+
+
+
+
hidden
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
new file mode 100644
index 000000000..cef3a9639
--- /dev/null
+++ b/apps/web/src/components/settings/AliasRow_test.ts
@@ -0,0 +1,128 @@
+// @vitest-environment happy-dom
+
+import { mount } from '@vue/test-utils';
+import { beforeEach, describe, expect, test, vi } from 'vitest';
+import { defineComponent } from 'vue';
+
+import AliasRow from './AliasRow.vue';
+import type { ModelAlias } from '../../api/types.ts';
+
+const baseAlias: ModelAlias = {
+ alias: 'opus-xhigh',
+ target_model_id: 'claude-opus-4-6',
+ upstream_ids: [],
+ rules: { reasoning: { effort: 'xhigh' } },
+ visible_in_models_list: true,
+ on_conflict: 'real-only',
+ display_name: 'Opus XHigh',
+ created_at: 1_700_000_000,
+};
+
+describe('AliasRow', () => {
+ beforeEach(() => {
+ vi.restoreAllMocks();
+ });
+
+ test('renders the display name, alias id, and target', () => {
+ const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
+ expect(wrapper.text()).toContain('Opus XHigh');
+ expect(wrapper.text()).toContain('opus-xhigh');
+ expect(wrapper.text()).toContain('claude-opus-4-6');
+ });
+
+ test('falls back to alias name when display_name is null', () => {
+ const wrapper = mount(AliasRow, { props: { alias: { ...baseAlias, display_name: null } } });
+ // alias id appears twice (label fallback + the small font-mono id), but the
+ // important assertion is that the label slot is non-empty.
+ expect(wrapper.text()).toContain('opus-xhigh');
+ expect(wrapper.text()).not.toContain('Opus XHigh');
+ });
+
+ test('emits edit and delete on the matching button clicks', async () => {
+ const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
+ await wrapper.find('[aria-label="Edit alias"]').trigger('click');
+ await wrapper.find('[aria-label="Delete alias"]').trigger('click');
+ expect(wrapper.emitted('edit')).toHaveLength(1);
+ expect(wrapper.emitted('delete')).toHaveLength(1);
+ });
+
+ test('shows a "hidden" badge when visible_in_models_list is false', () => {
+ const wrapper = mount(AliasRow, { props: { alias: { ...baseAlias, visible_in_models_list: false } } });
+ expect(wrapper.text()).toContain('hidden');
+ });
+
+ test('renders one rule badge per active rule field', () => {
+ const wrapper = mount(AliasRow, {
+ props: {
+ alias: {
+ ...baseAlias,
+ rules: { reasoning: { effort: 'high' }, verbosity: 'low', serviceTier: 'priority' },
+ },
+ },
+ });
+ // formatAliasRuleBadges drives the order: effort, verbosity, service tier.
+ const text = wrapper.text();
+ expect(text).toContain('effort: high');
+ expect(text).toContain('verbosity: low');
+ expect(text).toContain('service tier: priority');
+ });
+});
+
+// Bare-component smoke test for the card. We mock the composable so the
+// card renders deterministically without an HTTP round-trip; the stub
+// substitutes the same shape useModelAliases exposes.
+describe('AliasesSettingsCard', () => {
+ test('renders empty state when the store has no aliases', async () => {
+ vi.resetModules();
+ vi.doMock('../../composables/useModelAliases.ts', () => ({
+ useModelAliases: () => ({
+ aliases: { value: [] },
+ loading: { value: false },
+ error: { value: null },
+ load: vi.fn(),
+ }),
+ }));
+ vi.doMock('../../api/client.ts', () => ({
+ useApi: () => ({ api: { aliases: { ':alias': { $delete: vi.fn() } } } }),
+ callApi: vi.fn(),
+ }));
+ const { default: AliasesSettingsCard } = await import('./AliasesSettingsCard.vue');
+ const wrapper = mount(AliasesSettingsCard);
+ expect(wrapper.text()).toContain('No aliases configured');
+ });
+
+ test('renders one AliasRow per alias the store holds', async () => {
+ vi.resetModules();
+ const rows: ModelAlias[] = [
+ { ...baseAlias, alias: 'a-one' },
+ { ...baseAlias, alias: 'b-two', display_name: null },
+ ];
+ vi.doMock('../../composables/useModelAliases.ts', () => ({
+ useModelAliases: () => ({
+ aliases: { value: rows },
+ loading: { value: false },
+ error: { value: null },
+ load: vi.fn(),
+ }),
+ }));
+ vi.doMock('../../api/client.ts', () => ({
+ useApi: () => ({ api: { aliases: { ':alias': { $delete: vi.fn() } } } }),
+ callApi: vi.fn(),
+ }));
+ const { default: AliasesSettingsCard } = await import('./AliasesSettingsCard.vue');
+ const wrapper = mount(AliasesSettingsCard);
+ // Each row exposes its delete button by aria-label, so the count is a
+ // reliable proxy for "one AliasRow rendered per alias".
+ expect(wrapper.findAll('[aria-label="Delete alias"]').length).toBe(rows.length);
+ expect(wrapper.text()).toContain('a-one');
+ expect(wrapper.text()).toContain('b-two');
+ });
+});
+
+// Sanity: a stub wrapping the component above guards against template parse
+// regressions (an unknown directive or missing import would explode at mount
+// time even when no real backend is reachable).
+test('the test harness can mount a trivial component', () => {
+ const wrapper = mount(defineComponent({ template: 'ok ' }));
+ expect(wrapper.text()).toBe('ok');
+});
diff --git a/apps/web/src/components/settings/AliasesSettingsCard.vue b/apps/web/src/components/settings/AliasesSettingsCard.vue
new file mode 100644
index 000000000..da71b374f
--- /dev/null
+++ b/apps/web/src/components/settings/AliasesSettingsCard.vue
@@ -0,0 +1,65 @@
+
+
+
+
+
+
+
Aliases
+
+ Synthesized model ids that pin a target model plus a request-time rule overlay.
+ Surfaced in /v1/models per the conflict policy.
+
+
+
Add Alias
+
+
+
+ Failed to load aliases: {{ aliasesStore.error.value }}
+
+
+
+ No aliases configured. Add one to expose a model id with locked reasoning, service tier, or other rule overrides.
+
+
+
+
+
+
+
diff --git a/apps/web/src/composables/useModelAliases.ts b/apps/web/src/composables/useModelAliases.ts
new file mode 100644
index 000000000..880c771f9
--- /dev/null
+++ b/apps/web/src/composables/useModelAliases.ts
@@ -0,0 +1,29 @@
+import { ref, shallowRef } from 'vue';
+
+import { callApi, useApi } from '../api/client.ts';
+import type { ModelAlias } from '../api/types.ts';
+
+// Module-scoped cache so concurrent callers share one fetch — mirrors the
+// proxies store pattern: settings tabs that mount in parallel reuse a single
+// in-flight request instead of fan-out per-component.
+const aliases = shallowRef(null);
+const loading = ref(false);
+const error = ref(null);
+
+export const useModelAliases = () => {
+ const api = useApi();
+
+ const load = async () => {
+ loading.value = true;
+ error.value = null;
+ const { data, error: err } = await callApi(() => api.api.aliases.$get());
+ loading.value = false;
+ if (err) {
+ error.value = err.message;
+ return;
+ }
+ aliases.value = data;
+ };
+
+ return { aliases, loading, error, load };
+};
diff --git a/apps/web/src/pages/dashboard/settings.vue b/apps/web/src/pages/dashboard/settings.vue
index 83fedc993..636754fee 100644
--- a/apps/web/src/pages/dashboard/settings.vue
+++ b/apps/web/src/pages/dashboard/settings.vue
@@ -4,14 +4,17 @@ import { ref, watch } from 'vue';
import { useRouter } from 'vue-router';
import { callApi, useApi } from '../../api/client.ts';
-import type { ProxyRecord, SearchConfig, UpstreamProviderKind, UpstreamRecord } from '../../api/types.ts';
+import type { ModelAlias, ProxyRecord, SearchConfig, UpstreamProviderKind, UpstreamRecord } from '../../api/types.ts';
+import AliasEditDialog from '../../components/alias-edit/AliasEditDialog.vue';
import ProxyEditDialog from '../../components/proxy-edit/ProxyEditDialog.vue';
+import AliasesSettingsCard from '../../components/settings/AliasesSettingsCard.vue';
import ApiEndpointsSection from '../../components/settings/ApiEndpointsSection.vue';
import ExportSection from '../../components/settings/ExportSection.vue';
import ImportSection from '../../components/settings/ImportSection.vue';
import ProxiesSettingsCard from '../../components/settings/ProxiesSettingsCard.vue';
import SearchConfigSection from '../../components/settings/SearchConfigSection.vue';
import UpstreamsSettingsCard from '../../components/settings/UpstreamsSettingsCard.vue';
+import { useModelAliases } from '../../composables/useModelAliases.ts';
import { useModelsStore } from '../../composables/useModels.ts';
import { useProxiesStore } from '../../composables/useProxies.ts';
import { useRuntimeInfo } from '../../composables/useRuntimeInfo.ts';
@@ -31,6 +34,7 @@ export const useSettingsPageData = defineBasicLoader(async () => {
useUpstreamsStore().load(),
useModelsStore().load(),
useProxiesStore().load(),
+ useModelAliases().load(),
useRuntimeInfo().load(),
]);
return {
@@ -49,6 +53,8 @@ const { upstreams, loading: storeLoading, load } = useUpstreamsStore();
const modelsStore = useModelsStore();
const proxiesStore = useProxiesStore();
const { load: loadProxies } = proxiesStore;
+const aliasesStore = useModelAliases();
+const { load: loadAliases } = aliasesStore;
const settingsData = useSettingsPageData();
// Local working copy the child reorders via v-model:ordered; reloadAll
@@ -59,10 +65,10 @@ watch(upstreams, list => {
}, { immediate: true });
const reloadAll = async () => {
- await Promise.all([load(), modelsStore.load(), loadProxies()]);
+ await Promise.all([load(), modelsStore.load(), loadProxies(), loadAliases()]);
};
-// Proxy editor is hosted as a modal — v-if drives the unmount on close
+// Proxy + alias editors are hosted as modals — v-if drives the unmount on close
// so the next open boots from a fresh script setup (no manual reset).
const proxyDialogOpen = ref(false);
const proxyDialogRecord = ref(null);
@@ -70,6 +76,13 @@ const openProxyDialog = (record: ProxyRecord | null): void => {
proxyDialogRecord.value = record;
proxyDialogOpen.value = true;
};
+
+const aliasDialogOpen = ref(false);
+const aliasDialogRecord = ref(null);
+const openAliasDialog = (record: ModelAlias | null): void => {
+ aliasDialogRecord.value = record;
+ aliasDialogOpen.value = true;
+};
@@ -89,6 +102,11 @@ const openProxyDialog = (record: ProxyRecord | null): void => {
@edit="(record: ProxyRecord) => openProxyDialog(record)"
@changed="reloadAll"
/>
+ openAliasDialog(null)"
+ @edit="(record: ModelAlias) => openAliasDialog(record)"
+ @changed="reloadAll"
+ />
{
:record="proxyDialogRecord"
@saved="reloadAll"
/>
+
+
diff --git a/apps/web/vitest.config.ts b/apps/web/vitest.config.ts
index 4b522bfd9..20fd5dbbc 100644
--- a/apps/web/vitest.config.ts
+++ b/apps/web/vitest.config.ts
@@ -1,6 +1,11 @@
+import Vue from '@vitejs/plugin-vue';
import { defineConfig } from 'vitest/config';
export default defineConfig({
+ // The Vue plugin is required for any test that mounts an SFC; logic-only
+ // tests don't need it, but adding it here is cheap and lets component
+ // tests (AliasRow, AliasEditDialog, ...) live next to the rest.
+ plugins: [Vue()],
test: {
// happy-dom provides DOM + EventSource for the dump-subscription
// composable's tests. Node-env worked while the composable accepted
diff --git a/packages/gateway/package.json b/packages/gateway/package.json
index 0e763374d..99ce27a73 100644
--- a/packages/gateway/package.json
+++ b/packages/gateway/package.json
@@ -11,6 +11,8 @@
"types": "./src/runtime/channel-broker-contract.ts"
},
"./control-plane/proxies/serialize": { "types": "./src/control-plane/proxies/serialize.ts" },
+ "./control-plane/model-aliases/serialize": { "types": "./src/control-plane/model-aliases/serialize.ts" },
+ "./control-plane/model-aliases/types": { "types": "./src/control-plane/model-aliases/types.ts" },
"./control-plane/pricing/types": { "types": "./src/control-plane/pricing/types.ts" },
"./data-plane/tools/web-search/types": {
"import": "./src/data-plane/tools/web-search/types.ts",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index bf3ef7f58..e1273e234 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -223,6 +223,9 @@ importers:
'@vitejs/plugin-vue':
specifier: ^6.0.1
version: 6.0.7(vite@7.3.1(@types/node@22.19.19)(jiti@2.6.1)(tsx@4.22.4)(yaml@2.9.0))(vue@3.5.34(typescript@5.9.3))
+ '@vue/test-utils':
+ specifier: ^2.4.11
+ version: 2.4.11(@vue/compiler-dom@3.5.34)(@vue/server-renderer@3.5.34(vue@3.5.34(typescript@5.9.3)))(vue@3.5.34(typescript@5.9.3))
happy-dom:
specifier: ^20.0.0
version: 20.10.6
@@ -1278,6 +1281,10 @@ packages:
'@internationalized/number@3.6.6':
resolution: {integrity: sha512-iFgmQaXHE0vytNfpLZWOC2mEJCBRzcUxt53Xf/yCXG93lRvqas237i3r7X4RKMwO3txiyZD4mQjKAByFv6UGSQ==}
+ '@isaacs/cliui@8.0.2':
+ resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==}
+ engines: {node: '>=12'}
+
'@jridgewell/gen-mapping@0.3.13':
resolution: {integrity: sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==}
@@ -1330,6 +1337,9 @@ packages:
resolution: {integrity: sha512-IYqDGiTXab6FniAgnSdZwgWbomxpy9FtYvLKs7wCUs2a8RkITG+DFGO1DM9cr+E3/RgADRpFjrKVaJ1z6sjtEg==}
engines: {node: '>= 20.19.0'}
+ '@one-ini/wasm@0.1.1':
+ resolution: {integrity: sha512-XuySG1E38YScSJoMlqovLru4KTUNSjgVTIjyh7qMX6aNN5HY5Ct5LhRJdxO79JtTzKfzV/bnWpz+zquYrISsvw==}
+
'@oxc-parser/binding-android-arm-eabi@0.131.0':
resolution: {integrity: sha512-t2xicr9pfzkSRYx5aPqZqlLaayIwJTqgQ81Jor31Xep2nGyL2Aq3d0K5wOfeR7VevaSdxaS9dzSQP9xDwn8fDg==}
engines: {node: ^20.19.0 || >=22.12.0}
@@ -1489,6 +1499,10 @@ packages:
resolution: {integrity: sha512-C2Xj8FZ0uHWeCXXqX5B4/gVFQmtSkiuOolzAgutjTfseNOHT3pUjljDZsTSxXFGgio54bCzVFqmEOUrIVk8RDA==}
engines: {node: '>=20.0.0'}
+ '@pkgjs/parseargs@0.11.0':
+ resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
+ engines: {node: '>=14'}
+
'@polka/url@1.0.0-next.29':
resolution: {integrity: sha512-wwQAWhWSuHaag8c4q/KN/vCoeOJYshAIvMQwD4GpSb3OiZklFfvAgmj0VCBBImRpuF/aFgIRzllXlVX93Jevww==}
@@ -2064,6 +2078,16 @@ packages:
'@vue/shared@3.5.34':
resolution: {integrity: sha512-24uqU4OIiX29ryC3MeWid/Xf2fa2EFRUVLb77nRhk+UrTVrh/XiGtFAFmJBAtBRbjwNdsPRP+jj/OL27Eg1NDA==}
+ '@vue/test-utils@2.4.11':
+ resolution: {integrity: sha512-GDqaqZsA6m2E5vNzej0aYiIb6BX8xV9pNSbbbXKOfEYwg7ZNblVX8suyqmUBThq8VIrgAJNxn+z72hVtUeiWHA==}
+ peerDependencies:
+ '@vue/compiler-dom': 3.x
+ '@vue/server-renderer': 3.x
+ vue: 3.x
+ peerDependenciesMeta:
+ '@vue/server-renderer':
+ optional: true
+
'@vueuse/core@14.3.0':
resolution: {integrity: sha512-aHfz47g0ZhMtTVHmIzMVpJy8ePhhOy68GY5bv110+5DVtZ+W7BsOx+m61UNQqfrWyPztIHIanWa3E2tib3NFIw==}
peerDependencies:
@@ -2119,6 +2143,10 @@ packages:
peerDependencies:
vue: ^3.5.0
+ abbrev@2.0.0:
+ resolution: {integrity: sha512-6/mh1E2u2YgEsCHdY0Yx5oW+61gZU+1vXaoiHHrpKeuRNNgFvS+/jrwHiQhB5apAf5oB7UB7E19ol2R2LKH8hQ==}
+ engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
+
acorn-jsx@5.3.2:
resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==}
peerDependencies:
@@ -2142,10 +2170,18 @@ packages:
resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==}
engines: {node: '>=8'}
+ ansi-regex@6.2.2:
+ resolution: {integrity: sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==}
+ engines: {node: '>=12'}
+
ansi-styles@4.3.0:
resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==}
engines: {node: '>=8'}
+ ansi-styles@6.2.3:
+ resolution: {integrity: sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==}
+ engines: {node: '>=12'}
+
argparse@2.0.1:
resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==}
@@ -2295,6 +2331,10 @@ packages:
colorette@2.0.20:
resolution: {integrity: sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w==}
+ commander@10.0.1:
+ resolution: {integrity: sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==}
+ engines: {node: '>=14'}
+
concat-map@0.0.1:
resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==}
@@ -2309,6 +2349,9 @@ packages:
confbox@0.2.4:
resolution: {integrity: sha512-ysOGlgTFbN2/Y6Cg3Iye8YKulHw+R2fNXHrgSmXISQdMnomY6eNDprVdW9R5xBguEqI954+S6709UyiO7B+6OQ==}
+ config-chain@1.1.13:
+ resolution: {integrity: sha512-qj+f8APARXHrM0hraqXYb2/bOVSV4PvJQlNZ/DVj0QrmNM2q2euizkeuVckQ57J+W0mRH6Hvi+k50M4Jul2VRQ==}
+
consola@3.4.2:
resolution: {integrity: sha512-5IKcdX0nnYavi6G7TtOhwkYzyjfJlatbjMjuLSfE2kYT5pMDOilZ4OvMhi637CcDICTmz3wARPoyhqyX1Y+XvA==}
engines: {node: ^14.18.0 || >=16.10.0}
@@ -2414,9 +2457,20 @@ packages:
duplexer@0.1.2:
resolution: {integrity: sha512-jtD6YG370ZCIi/9GTaJKQxWTZD045+4R4hTk/x1UyoqadyJ9x9CgSi1RlVDQF8U2sxLLSnFkCaMihqljHIWgMg==}
+ eastasianwidth@0.2.0:
+ resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==}
+
+ editorconfig@1.0.7:
+ resolution: {integrity: sha512-e0GOtq/aTQhVdNyDU9e02+wz9oDDM+SIOQxWME2QRjzRX5yyLAuHDE+0aE8vHb9XRC8XD37eO2u57+F09JqFhw==}
+ engines: {node: '>=14'}
+ hasBin: true
+
emoji-regex@8.0.0:
resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==}
+ emoji-regex@9.2.2:
+ resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==}
+
entities@7.0.1:
resolution: {integrity: sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==}
engines: {node: '>=0.12'}
@@ -2635,6 +2689,10 @@ packages:
resolution: {integrity: sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg==}
engines: {node: '>= 0.4'}
+ foreground-child@3.3.1:
+ resolution: {integrity: sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==}
+ engines: {node: '>=14'}
+
fsevents@2.3.3:
resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
@@ -2677,6 +2735,11 @@ packages:
resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==}
engines: {node: '>=10.13.0'}
+ glob@10.5.0:
+ resolution: {integrity: sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==}
+ deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me
+ hasBin: true
+
globals@14.0.0:
resolution: {integrity: sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==}
engines: {node: '>=18'}
@@ -2767,6 +2830,9 @@ packages:
resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==}
engines: {node: '>=0.8.19'}
+ ini@1.3.8:
+ resolution: {integrity: sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==}
+
internal-slot@1.1.0:
resolution: {integrity: sha512-4gd7VpWNQNB4UKKCFFVcp1AVv+FMOgs9NKzjHKusc8jTMhd5eL1NqQqOpE0KzMds804/yHlglp3uxgluOqAPLw==}
engines: {node: '>= 0.4'}
@@ -2887,10 +2953,21 @@ packages:
isexe@2.0.0:
resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==}
+ jackspeak@3.4.3:
+ resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==}
+
jiti@2.6.1:
resolution: {integrity: sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==}
hasBin: true
+ js-beautify@1.15.4:
+ resolution: {integrity: sha512-9/KXeZUKKJwqCXUdBxFJ3vPh467OCckSBmYDwSK/EtV090K+iMJ7zx2S3HLVDIWFQdqMIsZWbnaGiba18aWhaA==}
+ engines: {node: '>=14'}
+ hasBin: true
+
+ js-cookie@3.0.8:
+ resolution: {integrity: sha512-yeJd4aNAdYZQjaon2bpD/Gb0B/omw7HQOsynXXcOiWVCacbBcPlgn8S/d1X6blFSaHao7ozqtW7NZW19xpCtIw==}
+
js-yaml@4.1.1:
resolution: {integrity: sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==}
hasBin: true
@@ -2947,6 +3024,9 @@ packages:
lodash.merge@4.6.2:
resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==}
+ lru-cache@10.4.3:
+ resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==}
+
magic-regexp@0.10.0:
resolution: {integrity: sha512-Uly1Bu4lO1hwHUW0CQeSWuRtzCMNO00CmXtS8N6fyvB3B979GOEEeAkiTUDsmbYLAbvpUS/Kt5c4ibosAzVyVg==}
@@ -2982,6 +3062,10 @@ packages:
minimist@1.2.8:
resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==}
+ minipass@7.1.3:
+ resolution: {integrity: sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==}
+ engines: {node: '>=16 || 14 >=14.17'}
+
mitt@3.0.1:
resolution: {integrity: sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==}
@@ -3018,6 +3102,11 @@ packages:
node-fetch-native@1.6.7:
resolution: {integrity: sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q==}
+ nopt@7.2.1:
+ resolution: {integrity: sha512-taM24ViiimT/XntxbPyJQzCG+p4EKOpgD3mxFwW38mGjVUrfERQOeY4EDHjdnptttfHuHQXFx+lTP08Q+mLa/w==}
+ engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
+ hasBin: true
+
nprogress@0.2.0:
resolution: {integrity: sha512-I19aIingLgR1fmhftnbWWO3dXc0hSxqHQHQb3H8m+K3TnEn/iSeTZZOyvKXWqQESMwuUVnatlCnZdLBZZt2VSA==}
@@ -3089,6 +3178,9 @@ packages:
resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==}
engines: {node: '>=10'}
+ package-json-from-dist@1.0.1:
+ resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==}
+
package-manager-detector@1.6.0:
resolution: {integrity: sha512-61A5ThoTiDG/C8s8UMZwSorAGwMJ0ERVGj2OjoW5pAalsNOg15+iQiPzrLJ4jhZ1HJzmC2PIHT2oEiH3R5fzNA==}
@@ -3110,6 +3202,10 @@ packages:
path-parse@1.0.7:
resolution: {integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==}
+ path-scurry@1.11.1:
+ resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==}
+ engines: {node: '>=16 || 14 >=14.18'}
+
path-to-regexp@6.3.0:
resolution: {integrity: sha512-Yhpw4T9C6hPpgPeA28us07OJeqZ5EzQTkbfwuhsUg0c237RomFoETJgmp2sa3F/41gfLE6G5cqcYwznmeEeOlQ==}
@@ -3164,6 +3260,9 @@ packages:
resolution: {integrity: sha512-DEvV2ZF2r2/63V+tK8hQvrR2ZGn10srHbXviTlcv7Kpzw8jWiNTqbVgjO3IY8RxrrOUF8VPMQQFysYYYv0YZxw==}
engines: {node: '>=6'}
+ proto-list@1.2.4:
+ resolution: {integrity: sha512-vtK/94akxsTMhe0/cbfpR+syPuszcuwhqVjJq26CuNDgFGj682oRBXOP5MJpv2r7JtE8MsiepGIqvvOTBwn2vA==}
+
punycode@2.3.1:
resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==}
engines: {node: '>=6'}
@@ -3311,6 +3410,10 @@ packages:
siginfo@2.0.0:
resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==}
+ signal-exit@4.1.0:
+ resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==}
+ engines: {node: '>=14'}
+
simple-swizzle@0.2.4:
resolution: {integrity: sha512-nAu1WFPQSMNr2Zn9PGSZK9AGn4t/y97lEm+MXTtUDwfP0ksAIX4nO+6ruD9Jwut4C49SB1Ws+fbXsm/yScWOHw==}
@@ -3350,6 +3453,10 @@ packages:
resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==}
engines: {node: '>=8'}
+ string-width@5.1.2:
+ resolution: {integrity: sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==}
+ engines: {node: '>=12'}
+
string.prototype.trim@1.2.10:
resolution: {integrity: sha512-Rs66F0P/1kedk5lyYyH9uBzuiI/kNRmwJAR9quK6VOtIpZ2G+hMZd+HQbbv25MgCA6gEffoMZYxlTod4WcdrKA==}
engines: {node: '>= 0.4'}
@@ -3366,6 +3473,10 @@ packages:
resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==}
engines: {node: '>=8'}
+ strip-ansi@7.2.0:
+ resolution: {integrity: sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==}
+ engines: {node: '>=12'}
+
strip-bom@3.0.0:
resolution: {integrity: sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==}
engines: {node: '>=4'}
@@ -3623,6 +3734,9 @@ packages:
vscode-uri@3.1.0:
resolution: {integrity: sha512-/BpdSx+yCQGnCvecbyXdxHDkuk55/G3xwnC0GqY4gmQ3j+A+g8kzzgB4Nk/SINjqn6+waqw3EgbVF2QKExkRxQ==}
+ vue-component-type-helpers@3.3.5:
+ resolution: {integrity: sha512-Fe1jyPJoUGpJOYKOri44jduR7My4yYINOMJISuMAbmrs+L5LbIDUc8NTWZYY3EJLK0yPLuCmcd5zoCsE4k2/KA==}
+
vue-demi@0.14.10:
resolution: {integrity: sha512-nMZBOwuzabUO0nLgIcc6rycZEebF6eeUfaiQx9+WSk8e29IbLvPU9feI6tqW4kTo3hvoYAJkMh8n8D0fuISphg==}
engines: {node: '>=12'}
@@ -3715,6 +3829,10 @@ packages:
resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==}
engines: {node: '>=10'}
+ wrap-ansi@8.1.0:
+ resolution: {integrity: sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==}
+ engines: {node: '>=12'}
+
ws@8.18.0:
resolution: {integrity: sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==}
engines: {node: '>=10.0.0'}
@@ -4295,6 +4413,15 @@ snapshots:
dependencies:
'@swc/helpers': 0.5.21
+ '@isaacs/cliui@8.0.2':
+ dependencies:
+ string-width: 5.1.2
+ string-width-cjs: string-width@4.2.3
+ strip-ansi: 7.2.0
+ strip-ansi-cjs: strip-ansi@6.0.1
+ wrap-ansi: 8.1.0
+ wrap-ansi-cjs: wrap-ansi@7.0.0
+
'@jridgewell/gen-mapping@0.3.13':
dependencies:
'@jridgewell/sourcemap-codec': 1.5.5
@@ -4342,6 +4469,8 @@ snapshots:
'@noble/hashes@2.2.0': {}
+ '@one-ini/wasm@0.1.1': {}
+
'@oxc-parser/binding-android-arm-eabi@0.131.0':
optional: true
@@ -4502,6 +4631,9 @@ snapshots:
tslib: 2.8.1
tsyringe: 4.10.0
+ '@pkgjs/parseargs@0.11.0':
+ optional: true
+
'@polka/url@1.0.0-next.29': {}
'@poppinss/colors@4.1.6':
@@ -5144,6 +5276,15 @@ snapshots:
'@vue/shared@3.5.34': {}
+ '@vue/test-utils@2.4.11(@vue/compiler-dom@3.5.34)(@vue/server-renderer@3.5.34(vue@3.5.34(typescript@5.9.3)))(vue@3.5.34(typescript@5.9.3))':
+ dependencies:
+ '@vue/compiler-dom': 3.5.34
+ js-beautify: 1.15.4
+ vue: 3.5.34(typescript@5.9.3)
+ vue-component-type-helpers: 3.3.5
+ optionalDependencies:
+ '@vue/server-renderer': 3.5.34(vue@3.5.34(typescript@5.9.3))
+
'@vueuse/core@14.3.0(vue@3.5.34(typescript@5.9.3))':
dependencies:
'@types/web-bluetooth': 0.0.21
@@ -5166,6 +5307,8 @@ snapshots:
dependencies:
vue: 3.5.34(typescript@5.9.3)
+ abbrev@2.0.0: {}
+
acorn-jsx@5.3.2(acorn@8.16.0):
dependencies:
acorn: 8.16.0
@@ -5185,10 +5328,14 @@ snapshots:
ansi-regex@5.0.1: {}
+ ansi-regex@6.2.2: {}
+
ansi-styles@4.3.0:
dependencies:
color-convert: 2.0.1
+ ansi-styles@6.2.3: {}
+
argparse@2.0.1: {}
aria-hidden@1.2.6:
@@ -5361,6 +5508,8 @@ snapshots:
colorette@2.0.20: {}
+ commander@10.0.1: {}
+
concat-map@0.0.1: {}
concurrently@9.2.1:
@@ -5376,6 +5525,11 @@ snapshots:
confbox@0.2.4: {}
+ config-chain@1.1.13:
+ dependencies:
+ ini: 1.3.8
+ proto-list: 1.2.4
+
consola@3.4.2: {}
cookie@1.1.1: {}
@@ -5468,8 +5622,19 @@ snapshots:
duplexer@0.1.2: {}
+ eastasianwidth@0.2.0: {}
+
+ editorconfig@1.0.7:
+ dependencies:
+ '@one-ini/wasm': 0.1.1
+ commander: 10.0.1
+ minimatch: 9.0.9
+ semver: 7.7.4
+
emoji-regex@8.0.0: {}
+ emoji-regex@9.2.2: {}
+
entities@7.0.1: {}
error-stack-parser-es@1.0.5: {}
@@ -5812,6 +5977,11 @@ snapshots:
dependencies:
is-callable: 1.2.7
+ foreground-child@3.3.1:
+ dependencies:
+ cross-spawn: 7.0.6
+ signal-exit: 4.1.0
+
fsevents@2.3.3:
optional: true
@@ -5864,6 +6034,15 @@ snapshots:
dependencies:
is-glob: 4.0.3
+ glob@10.5.0:
+ dependencies:
+ foreground-child: 3.3.1
+ jackspeak: 3.4.3
+ minimatch: 9.0.9
+ minipass: 7.1.3
+ package-json-from-dist: 1.0.1
+ path-scurry: 1.11.1
+
globals@14.0.0: {}
globalthis@1.0.4:
@@ -5937,6 +6116,8 @@ snapshots:
imurmurhash@0.1.4: {}
+ ini@1.3.8: {}
+
internal-slot@1.1.0:
dependencies:
es-errors: 1.3.0
@@ -6063,8 +6244,24 @@ snapshots:
isexe@2.0.0: {}
+ jackspeak@3.4.3:
+ dependencies:
+ '@isaacs/cliui': 8.0.2
+ optionalDependencies:
+ '@pkgjs/parseargs': 0.11.0
+
jiti@2.6.1: {}
+ js-beautify@1.15.4:
+ dependencies:
+ config-chain: 1.1.13
+ editorconfig: 1.0.7
+ glob: 10.5.0
+ js-cookie: 3.0.8
+ nopt: 7.2.1
+
+ js-cookie@3.0.8: {}
+
js-yaml@4.1.1:
dependencies:
argparse: 2.0.1
@@ -6110,6 +6307,8 @@ snapshots:
lodash.merge@4.6.2: {}
+ lru-cache@10.4.3: {}
+
magic-regexp@0.10.0:
dependencies:
estree-walker: 3.0.3
@@ -6158,6 +6357,8 @@ snapshots:
minimist@1.2.8: {}
+ minipass@7.1.3: {}
+
mitt@3.0.1: {}
mlly@1.8.2:
@@ -6188,6 +6389,10 @@ snapshots:
node-fetch-native@1.6.7: {}
+ nopt@7.2.1:
+ dependencies:
+ abbrev: 2.0.0
+
nprogress@0.2.0: {}
nth-check@2.1.1:
@@ -6299,6 +6504,8 @@ snapshots:
dependencies:
p-limit: 3.1.0
+ package-json-from-dist@1.0.1: {}
+
package-manager-detector@1.6.0: {}
parent-module@1.0.1:
@@ -6313,6 +6520,11 @@ snapshots:
path-parse@1.0.7: {}
+ path-scurry@1.11.1:
+ dependencies:
+ lru-cache: 10.4.3
+ minipass: 7.1.3
+
path-to-regexp@6.3.0: {}
pathe@2.0.3: {}
@@ -6361,6 +6573,8 @@ snapshots:
prismjs@1.30.0: {}
+ proto-list@1.2.4: {}
+
punycode@2.3.1: {}
pvtsutils@1.3.6:
@@ -6611,6 +6825,8 @@ snapshots:
siginfo@2.0.0: {}
+ signal-exit@4.1.0: {}
+
simple-swizzle@0.2.4:
dependencies:
is-arrayish: 0.3.4
@@ -6646,6 +6862,12 @@ snapshots:
is-fullwidth-code-point: 3.0.0
strip-ansi: 6.0.1
+ string-width@5.1.2:
+ dependencies:
+ eastasianwidth: 0.2.0
+ emoji-regex: 9.2.2
+ strip-ansi: 7.2.0
+
string.prototype.trim@1.2.10:
dependencies:
call-bind: 1.0.9
@@ -6673,6 +6895,10 @@ snapshots:
dependencies:
ansi-regex: 5.0.1
+ strip-ansi@7.2.0:
+ dependencies:
+ ansi-regex: 6.2.2
+
strip-bom@3.0.0: {}
strip-json-comments@3.1.1: {}
@@ -6959,6 +7185,8 @@ snapshots:
vscode-uri@3.1.0: {}
+ vue-component-type-helpers@3.3.5: {}
+
vue-demi@0.14.10(vue@3.5.34(typescript@5.9.3)):
dependencies:
vue: 3.5.34(typescript@5.9.3)
@@ -7083,6 +7311,12 @@ snapshots:
string-width: 4.2.3
strip-ansi: 6.0.1
+ wrap-ansi@8.1.0:
+ dependencies:
+ ansi-styles: 6.2.3
+ string-width: 5.1.2
+ strip-ansi: 7.2.0
+
ws@8.18.0: {}
ws@8.21.0: {}
From c4355345fbb7245dbf3514aafb243c65e175d761 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 04:38:59 +0800
Subject: [PATCH 026/170] chore(aliases): drop dead helpers and stale comments
in control plane
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
routes.ts had a re-export of ModelAliasRules that no file imported; the
frontend pulls the type from packages/gateway/src/control-plane/model-aliases/types.ts
directly. The PATCH/DELETE param fallbacks (?? '') were dead — Hono only
dispatches the :alias routes when the segment is present, matching how
api-keys/users routes use param('id')!. repo.ts trimmed verbose
explanatory prose down to the one load-bearing fact. repo/types.ts
'save used by import/restore flows' was stale — only PATCH calls it.
---
.../gateway/src/control-plane/model-aliases/repo.ts | 7 ++-----
.../gateway/src/control-plane/model-aliases/routes.ts | 10 +++-------
packages/gateway/src/repo/types.ts | 4 ++--
3 files changed, 7 insertions(+), 14 deletions(-)
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index 1eaae7d16..69d4d4fd1 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -33,11 +33,8 @@ export const getAliasByName = async (db: SqlDatabase, alias: string): Promise => {
const existing = await db
.prepare('SELECT 1 FROM model_aliases WHERE alias = ?')
diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
index 961a0da73..b05608ec3 100644
--- a/packages/gateway/src/control-plane/model-aliases/routes.ts
+++ b/packages/gateway/src/control-plane/model-aliases/routes.ts
@@ -1,7 +1,7 @@
import type { Context } from 'hono';
import { aliasToJson } from './serialize.ts';
-import type { ModelAlias, ModelAliasRules } from './types.ts';
+import type { ModelAlias } from './types.ts';
import { type CtxWithJson } from '../../middleware/zod-validator.ts';
import { getRepo } from '../../repo/index.ts';
import type { createAliasBody, updateAliasBody } from '../schemas.ts';
@@ -36,7 +36,7 @@ export const createAlias = async (c: CtxWithJson) => {
};
export const updateAlias = async (c: CtxWithJson) => {
- const aliasName = c.req.param('alias') ?? '';
+ const aliasName = c.req.param('alias')!;
const body = c.req.valid('json');
const repo = getRepo();
@@ -69,12 +69,8 @@ const nextDisplayName = (existing: ModelAlias, patch: string | null | undefined)
};
export const deleteAlias = async (c: Context) => {
- const aliasName = c.req.param('alias') ?? '';
+ const aliasName = c.req.param('alias')!;
const { deleted } = await getRepo().modelAliases.delete(aliasName);
if (!deleted) return c.json({ error: 'Alias not found' }, 404);
return c.body(null, 204);
};
-
-// Re-export so the routes module can wire the type-level `Rules` carrier
-// through the RPC client without consumers having to chase the alias subtree.
-export type { ModelAliasRules };
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 8ed620849..41190352a 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -346,8 +346,8 @@ export interface ModelAliasesRepo {
// surfaces 409 to the dashboard instead of silently overwriting an
// existing row.
create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }>;
- // UPSERT semantics — used by import/restore flows that need to land a row
- // regardless of whether it already exists.
+ // UPSERT — used by the PATCH update path; preserves created_at on re-save
+ // and bumps updated_at.
save(alias: ModelAlias): Promise;
// Returns whether a row was actually removed; routes treat false as 404.
delete(alias: string): Promise<{ deleted: boolean }>;
From 26f08e1b5438355f2459bf9c1b20b5b3f915d4b3 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 04:38:59 +0800
Subject: [PATCH 027/170] fix(aliases): make 0047 display_name seed idempotent
Guard the UPDATE with `AND display_name IS NULL` so a re-run against an
environment where the operator already renamed the seed doesn't wipe
their value. Migrations are tracked one-shot but defense in depth keeps
the local-dev replay path safe.
---
.../gateway/migrations/0047_model_aliases_display_name.sql | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/packages/gateway/migrations/0047_model_aliases_display_name.sql b/packages/gateway/migrations/0047_model_aliases_display_name.sql
index 9d21ed9a1..5fd9b2591 100644
--- a/packages/gateway/migrations/0047_model_aliases_display_name.sql
+++ b/packages/gateway/migrations/0047_model_aliases_display_name.sql
@@ -1,3 +1,4 @@
ALTER TABLE model_aliases ADD COLUMN display_name TEXT;
-UPDATE model_aliases SET display_name = 'Codex Auto Review' WHERE alias = 'codex-auto-review';
+UPDATE model_aliases SET display_name = 'Codex Auto Review'
+ WHERE alias = 'codex-auto-review' AND display_name IS NULL;
From e6a6862c722e5d8a262a6d487fc6920bb24969ec Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 04:38:59 +0800
Subject: [PATCH 028/170] fix(aliases): drop dead anthropicBeta body write on
Gemini apply path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
applyAliasRulesToGemini was writing payload.anthropicBeta but nothing
read it — gemini-via-messages doesn't reference the field, and the
Messages attempt reads candidate.aliasRules.anthropicBeta directly for
the outbound anthropic-beta header. The sanitizer would strip the body
field on its way to upstream regardless. Removed the write and the
matching test; the header path is unchanged.
Also corrected the Messages apply doc that claimed "the write-side
validator forbids" adaptive + budgetTokens — the schema accepts both
today; the dashboard's tagged radio is what enforces exclusivity, and
the apply step picks adaptive when both arrive raw.
---
.../gateway/src/data-plane/model-aliases/apply.ts | 15 +++++++--------
.../src/data-plane/model-aliases/apply_test.ts | 6 ------
2 files changed, 7 insertions(+), 14 deletions(-)
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index e90e2fbed..0791c5f7b 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -43,10 +43,9 @@ export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: Model
if (rules.reasoning?.effort !== undefined) {
payload.output_config = { ...payload.output_config, effort: rules.reasoning.effort };
}
- // Adaptive wins over budgetTokens when both arrive — the write-side
- // validator forbids the combination, but the apply step has to make a
- // choice if both slip through and the translate-layer policy is
- // adaptive-first.
+ // The dashboard's tagged radio enforces mutual exclusivity between
+ // adaptive and budgetTokens; if both arrive through the raw API the apply
+ // step picks adaptive (matches the translate-layer adaptive-first policy).
if (rules.reasoning?.adaptive === true) {
payload.thinking = { type: 'adaptive' };
} else if (rules.reasoning?.budgetTokens !== undefined) {
@@ -74,9 +73,10 @@ export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: Model
export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAliasRules): void => {
// All four reasoning knobs ride on the native thinkingConfig; verbosity and
- // serviceTier ride on extension slots under generationConfig; anthropicBeta
- // rides on a top-level extension slot so the existing gemini-via-messages
- // translator picks it up there.
+ // serviceTier ride on extension slots under generationConfig. anthropicBeta
+ // doesn't surface on Gemini-inbound bodies — the gemini-via-messages
+ // translator doesn't read it, and the Messages attempt reads it off the
+ // candidate's aliasRules directly when stamping the outbound header.
const hasThinking = rules.reasoning?.effort !== undefined
|| rules.reasoning?.budgetTokens !== undefined
|| rules.reasoning?.adaptive === true
@@ -101,5 +101,4 @@ export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAlia
if (rules.serviceTier !== undefined) generationConfig.serviceTier = rules.serviceTier;
payload.generationConfig = generationConfig;
}
- if (rules.anthropicBeta?.length) payload.anthropicBeta = [...rules.anthropicBeta];
};
diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
index 3dfba9d45..c62bac7f7 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
@@ -195,12 +195,6 @@ describe('applyAliasRulesToGemini', () => {
expect(payload.generationConfig?.serviceTier).toBe('flex');
});
- test('writes anthropicBeta to top-level extension slot', () => {
- const payload = gem();
- applyAliasRulesToGemini(payload, { anthropicBeta: ['ctx-1m'] });
- expect(payload.anthropicBeta).toEqual(['ctx-1m']);
- });
-
test('preserves existing thinkingConfig entries when adding a new one', () => {
const payload = gem({ generationConfig: { thinkingConfig: { thinkingBudget: 1024 } } });
applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } });
From 784e19e3e07d4111a13893ee23e7cf00a8faeb6b Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 04:38:59 +0800
Subject: [PATCH 029/170] perf(web): drop duplicate /api/models reload on alias
save
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
AliasEditDialog awaited modelsStore.load() after emitting saved, but
settings.vue's reloadAll handler (wired to @saved) also loads models —
so the dashboard fired the same GET /api/models twice per alias write.
The emit alone is the contract; drop the duplicate.
---
apps/web/src/components/alias-edit/AliasEditDialog.vue | 5 -----
1 file changed, 5 deletions(-)
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index e247137b0..ce254521c 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -203,11 +203,6 @@ const save = async () => {
if (error) { saveError.value = error.message; return; }
}
emit('saved');
- // Also refresh the dashboard's /api/models cache so the new alias appears
- // in the catalog. The settings.vue reloadAll handler does this too, but a
- // direct call here keeps the modal-close semantics independent of the
- // parent's reload wiring.
- await modelsStore.load();
open.value = false;
} finally {
saving.value = false;
From 1a54f3963ff3973c00282fcf67e060abe877bfae Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 12:23:14 +0800
Subject: [PATCH 030/170] feat(aliases): allow PATCH to rename alias by
changing the PK
PR feedback: the alias name was treated as immutable because it is the
table PK. SQLite (and D1) do permit UPDATEing a PRIMARY KEY column, so
operators can now rename in place from a single PATCH instead of the
delete-and-recreate workaround.
Wire change: updateAliasBody gains an optional `alias` field. When the
body name differs from the path param, the handler runs a rename
codepath that 409s on collision and 404s on a missing source row. The
existing merged-save then proceeds against the new PK.
Repo change: ModelAliasesRepo grows a `rename(old, new)` method. The
SQL impl uses a pre-flight SELECT for the collision check (driver
error shape differs between node:sqlite and D1) plus
`UPDATE model_aliases SET alias = ? WHERE alias = ?`; `meta.changes`
distinguishes notFound from success. The Memory impl mirrors the
semantics for the test path. body.alias === path.alias collapses to a
no-op so the dashboard can always send the merged shape without
branching on whether a rename was requested.
---
.../src/control-plane/model-aliases/repo.ts | 20 +++++
.../control-plane/model-aliases/repo_test.ts | 52 ++++++++++-
.../src/control-plane/model-aliases/routes.ts | 13 ++-
.../model-aliases/routes_test.ts | 87 +++++++++++++++++++
packages/gateway/src/control-plane/schemas.ts | 9 +-
packages/gateway/src/repo/memory.ts | 10 +++
packages/gateway/src/repo/sql.ts | 6 +-
packages/gateway/src/repo/types.ts | 5 ++
8 files changed, 196 insertions(+), 6 deletions(-)
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index 69d4d4fd1..f49c1e639 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -73,6 +73,26 @@ export const deleteAlias = async (db: SqlDatabase, alias: string): Promise<{ del
return { deleted: (result.meta.changes ?? 0) > 0 };
};
+// Updates the PK column in place. A pre-flight SELECT detects the destination
+// collision so the caller gets a structured `duplicate` reason instead of a
+// driver-specific SQLITE_CONSTRAINT thrown error (shape differs between
+// node:sqlite and D1). `meta.changes === 0` after the UPDATE means the source
+// row was gone — propagated as `notFound` for the 404 mapping.
+export const renameAlias = async (db: SqlDatabase, oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> => {
+ if (oldAlias === newAlias) return { ok: true };
+ const conflict = await db
+ .prepare('SELECT 1 FROM model_aliases WHERE alias = ?')
+ .bind(newAlias)
+ .first<{ 1: number }>();
+ if (conflict) return { ok: false, reason: 'duplicate' };
+ const result = await db
+ .prepare('UPDATE model_aliases SET alias = ?, updated_at = unixepoch() WHERE alias = ?')
+ .bind(newAlias, oldAlias)
+ .run();
+ if ((result.meta.changes ?? 0) === 0) return { ok: false, reason: 'notFound' };
+ return { ok: true };
+};
+
const bindValues = (alias: ModelAlias): unknown[] => [
alias.alias,
alias.targetModelId,
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
index 5f1e4fa6d..32ba3aea0 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
@@ -1,6 +1,6 @@
import { test } from 'vitest';
-import { loadAllAliases } from './repo.ts';
+import { loadAllAliases, renameAlias } from './repo.ts';
import { createSqliteTestDb } from '../../repo/test-sqlite.ts';
import { assertEquals, assertRejects } from '@floway-dev/test-utils';
@@ -120,3 +120,53 @@ test('loadAllAliases surfaces malformed upstream_ids_json as a descriptive error
await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases upstream_ids_json for bad-upstreams');
});
+
+test('renameAlias updates the PRIMARY KEY in place', async () => {
+ const db = await createSqliteTestDb();
+ await db.exec('DELETE FROM model_aliases');
+ await db
+ .prepare(
+ 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
+ )
+ .bind('source', 'gpt-5.4', '[]', '{}', 1, 'real-only', 'Source Label', 1_700_000_000)
+ .run();
+
+ const result = await renameAlias(db, 'source', 'renamed');
+ assertEquals(result, { ok: true });
+
+ const remaining = await loadAllAliases(db);
+ assertEquals(remaining.map(a => a.alias), ['renamed']);
+ // Preserved row payload — only the PK changed; createdAt and displayName intact.
+ assertEquals(remaining[0]!.displayName, 'Source Label');
+ assertEquals(remaining[0]!.createdAt, 1_700_000_000);
+});
+
+test('renameAlias returns notFound when the source row is missing', async () => {
+ const db = await createSqliteTestDb();
+ await db.exec('DELETE FROM model_aliases');
+ const result = await renameAlias(db, 'ghost', 'new-name');
+ assertEquals(result, { ok: false, reason: 'notFound' });
+});
+
+test('renameAlias returns duplicate when the destination row already exists', async () => {
+ const db = await createSqliteTestDb();
+ await db.exec('DELETE FROM model_aliases');
+ await db
+ .prepare(
+ 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
+ )
+ .bind('source', 'gpt-5.4', '[]', '{}', 1, 'real-only', 1_700_000_000)
+ .run();
+ await db
+ .prepare(
+ 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
+ )
+ .bind('taken', 'gpt-5.4', '[]', '{}', 1, 'real-only', 1_700_000_001)
+ .run();
+
+ const result = await renameAlias(db, 'source', 'taken');
+ assertEquals(result, { ok: false, reason: 'duplicate' });
+ // Both rows still present.
+ const remaining = (await loadAllAliases(db)).map(a => a.alias).sort();
+ assertEquals(remaining, ['source', 'taken']);
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
index b05608ec3..611d8913e 100644
--- a/packages/gateway/src/control-plane/model-aliases/routes.ts
+++ b/packages/gateway/src/control-plane/model-aliases/routes.ts
@@ -43,12 +43,23 @@ export const updateAlias = async (c: CtxWithJson) => {
const existing = await repo.modelAliases.getByAlias(aliasName);
if (!existing) return c.json({ error: 'Alias not found' }, 404);
+ // Rename runs first so the merged save below targets the row at its new
+ // PK. A no-op (alias unchanged or omitted) returns ok without touching
+ // the row.
+ const nextAlias = body.alias ?? existing.alias;
+ if (nextAlias !== existing.alias) {
+ const renamed = await repo.modelAliases.rename(existing.alias, nextAlias);
+ if (!renamed.ok) {
+ return c.json({ error: { type: 'conflict', message: `Alias "${nextAlias}" already exists` } }, 409);
+ }
+ }
+
// Field-by-field merge so an absent field preserves the existing value.
// `displayName` accepts an explicit null to clear the operator-set label
// back to the synthesized fallback; we use Object.hasOwn to keep the
// absent / null distinction that `??` would collapse.
const merged: ModelAlias = {
- alias: existing.alias,
+ alias: nextAlias,
targetModelId: body.targetModelId ?? existing.targetModelId,
upstreamIds: body.upstreamIds ?? existing.upstreamIds,
rules: body.rules ?? existing.rules,
diff --git a/packages/gateway/src/control-plane/model-aliases/routes_test.ts b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
index 4143ed1cf..bf1cd766c 100644
--- a/packages/gateway/src/control-plane/model-aliases/routes_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
@@ -204,6 +204,93 @@ test('PATCH /api/aliases/:alias returns 404 when the alias does not exist', asyn
assertEquals(resp.status, 404);
});
+test('PATCH /api/aliases/:alias renames the row when body.alias differs from the path', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.save({
+ alias: 'old-name',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: ['up_a'],
+ rules: { reasoning: { effort: 'high' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ displayName: 'Old Label',
+ createdAt: 1_700_000_000,
+ });
+
+ const resp = await requestApp('/api/aliases/old-name', authedJson(adminSession, 'PATCH', {
+ alias: 'new-name',
+ rules: { reasoning: { effort: 'medium' } },
+ }));
+ assertEquals(resp.status, 200);
+ const updated = (await resp.json()) as SerializedModelAlias;
+ // Response carries the new alias and the patched rules; preserved fields stay intact.
+ assertEquals(updated.alias, 'new-name');
+ assertEquals(updated.target_model_id, 'gpt-5.4');
+ assertEquals(updated.upstream_ids, ['up_a']);
+ assertEquals(updated.rules, { reasoning: { effort: 'medium' } });
+ assertEquals(updated.display_name, 'Old Label');
+ assertEquals(updated.created_at, 1_700_000_000);
+
+ // Repo state: old row gone, new row present.
+ assertEquals(await repo.modelAliases.getByAlias('old-name'), null);
+ const stored = await repo.modelAliases.getByAlias('new-name');
+ assertEquals(stored?.alias, 'new-name');
+ assertEquals(stored?.rules, { reasoning: { effort: 'medium' } });
+});
+
+test('PATCH /api/aliases/:alias returns 409 when body.alias collides with an existing row', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.save({
+ alias: 'source',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ });
+ await repo.modelAliases.save({
+ alias: 'taken',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_001,
+ });
+
+ const resp = await requestApp('/api/aliases/source', authedJson(adminSession, 'PATCH', { alias: 'taken' }));
+ assertEquals(resp.status, 409);
+ const body = (await resp.json()) as { error: { type: string; message: string } };
+ assertEquals(body.error.type, 'conflict');
+
+ // Both rows untouched.
+ assertEquals((await repo.modelAliases.getByAlias('source'))?.alias, 'source');
+ assertEquals((await repo.modelAliases.getByAlias('taken'))?.alias, 'taken');
+});
+
+test('PATCH /api/aliases/:alias treats body.alias === path as a no-op rename', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.save({
+ alias: 'same-name',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: {},
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ createdAt: 1_700_000_000,
+ });
+
+ const resp = await requestApp('/api/aliases/same-name', authedJson(adminSession, 'PATCH', {
+ alias: 'same-name',
+ targetModelId: 'claude-opus-4-6',
+ }));
+ assertEquals(resp.status, 200);
+ const updated = (await resp.json()) as SerializedModelAlias;
+ assertEquals(updated.alias, 'same-name');
+ assertEquals(updated.target_model_id, 'claude-opus-4-6');
+});
+
test('PATCH /api/aliases/:alias requires admin auth', async () => {
const { repo, adminSession: _adminSession, apiKey } = await setupAppTest();
await repo.modelAliases.save({
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index 60f5df604..f81c5df42 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -694,10 +694,13 @@ export const createAliasBody = z.object({
displayName: z.string().min(1).optional(),
});
-// PATCH accepts a partial shape. `displayName` is nullable so the operator
-// can clear an existing label back to the synthesized fallback; absent vs.
-// null is meaningful and propagated through to the handler via Object.hasOwn.
+// PATCH accepts a partial shape. `alias` is the row's primary key — when
+// present and different from the path param, the handler renames the row
+// (409 on collision). `displayName` is nullable so the operator can clear
+// an existing label back to the synthesized fallback; absent vs. null is
+// meaningful and propagated through to the handler via Object.hasOwn.
export const updateAliasBody = z.object({
+ alias: aliasNameSchema.optional(),
targetModelId: z.string().min(1).optional(),
upstreamIds: upstreamIdsSchema.optional(),
rules: aliasRulesSchema.optional(),
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index c424426ee..d7492938f 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -948,6 +948,16 @@ export class MemoryModelAliasesRepo implements ModelAliasesRepo {
return Promise.resolve();
}
+ rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> {
+ if (oldAlias === newAlias) return Promise.resolve({ ok: true });
+ if (this.rows.has(newAlias)) return Promise.resolve({ ok: false, reason: 'duplicate' });
+ const existing = this.rows.get(oldAlias);
+ if (!existing) return Promise.resolve({ ok: false, reason: 'notFound' });
+ this.rows.delete(oldAlias);
+ this.rows.set(newAlias, { ...existing, alias: newAlias });
+ return Promise.resolve({ ok: true });
+ }
+
delete(alias: string): Promise<{ deleted: boolean }> {
return Promise.resolve({ deleted: this.rows.delete(alias) });
}
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index 044f79aae..75f814178 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -35,7 +35,7 @@ import type {
UsersRepo,
} from './types.ts';
import { serializeStoredConfig, serializeStoredState } from './upstream-json.ts';
-import { deleteAlias, getAliasByName, insertAlias, loadAllAliases, saveAlias } from '../control-plane/model-aliases/repo.ts';
+import { deleteAlias, getAliasByName, insertAlias, loadAllAliases, renameAlias, saveAlias } from '../control-plane/model-aliases/repo.ts';
import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
import { latencyBucketForMs } from '../shared/performance-histogram.ts';
import { generateSessionToken } from '../shared/session-tokens.ts';
@@ -1641,6 +1641,10 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
return saveAlias(this.db, alias);
}
+ rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> {
+ return renameAlias(this.db, oldAlias, newAlias);
+ }
+
delete(alias: string): Promise<{ deleted: boolean }> {
return deleteAlias(this.db, alias);
}
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 41190352a..7d10f90ca 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -349,6 +349,11 @@ export interface ModelAliasesRepo {
// UPSERT — used by the PATCH update path; preserves created_at on re-save
// and bumps updated_at.
save(alias: ModelAlias): Promise;
+ // Updates the PK in place. Returns `notFound` when the source row is
+ // missing, `duplicate` when the destination name already exists; the
+ // route layer maps those to 404 / 409. SQLite (and D1) permit UPDATEing
+ // a PRIMARY KEY column.
+ rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }>;
// Returns whether a row was actually removed; routes treat false as 404.
delete(alias: string): Promise<{ deleted: boolean }>;
}
From 7763ad9a2941b029de5c7754861fc74dad29635d Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 12:23:24 +0800
Subject: [PATCH 031/170] fix(ui): dark theme combobox component with creatable
values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The alias edit dialog previously paired ` ` with HTML ``
for its enum-ish fields (effort, summary, service tier, anthropic beta).
Browsers render the `` popover with a white background and
dark text — jarring inside the dashboard's dark theme, and visually
inconsistent with the rest of the project's reka-ui-backed dropdowns.
New `` wraps reka-ui's `ComboboxRoot` with the same surface
palette as `Select.vue` and `TagCombobox.vue`. A synthesized
"Use ''" row keeps free-form values selectable via keyboard:
reka's Combobox only registers items present in the DOM, so a typed
value with no matching suggestion needs an explicit committable row to
flow through the arrow-keys + Enter path. The component also watches
the typed query and writes it to the model on every keystroke so a
submit that fires without a prior selection still carries the latest
text — alias rule fields pass through to upstream verbatim and the
gateway intentionally does not enum-gate them.
---
packages/ui/src/Combobox.vue | 159 +++++++++++++++++++++++++++++++++++
packages/ui/src/index.ts | 1 +
2 files changed, 160 insertions(+)
create mode 100644 packages/ui/src/Combobox.vue
diff --git a/packages/ui/src/Combobox.vue b/packages/ui/src/Combobox.vue
new file mode 100644
index 000000000..6526888a4
--- /dev/null
+++ b/packages/ui/src/Combobox.vue
@@ -0,0 +1,159 @@
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+
+ {{ emptyText }}
+
+
+
+
+
+
+ Use "{{ trimmedQuery }} "
+
+
+
+
+
+
+
+ {{ item.label }}
+ {{ item.value }}
+
+
+
+
+
+
+
diff --git a/packages/ui/src/index.ts b/packages/ui/src/index.ts
index afb922da4..4f9afa695 100644
--- a/packages/ui/src/index.ts
+++ b/packages/ui/src/index.ts
@@ -3,6 +3,7 @@ export { default as Button } from './Button.vue';
export { default as Card } from './Card.vue';
export { default as Checkbox } from './Checkbox.vue';
export { default as Code } from './Code.vue';
+export { default as Combobox } from './Combobox.vue';
export { default as Dialog } from './Dialog.vue';
export { default as Input } from './Input.vue';
export { default as NumberField } from './NumberField.vue';
From a5e729ada5438ca34e0f0b993e15fd8619d9e64b Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 12:23:42 +0800
Subject: [PATCH 032/170] refactor(web): flatten alias edit dialog and trim
AliasRow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
PR feedback on the alias editor:
- The alias name was a disabled field with a "delete and recreate to
rename" hint. Operators wanted in-place rename. The input is now
editable and the save handler PATCHes the row at its *original* PK
with `body.alias = `; the backend route maps that to a rename.
- Alias name and Target model id share a two-column first row (mirrors
the CustomConfigPanel `grid grid-cols-1 gap-3 sm:grid-cols-2`
pattern) so the header reads at a glance.
- On-conflict is no longer a label-only Select. Each option carries a
one-line explanation rendered through Select's `description` slot —
same pattern as Auth Style in CustomConfigPanel. Replaces the cryptic
`real-only — alias hidden when target id collides` style copy with
"Real model wins" / "Alias replaces real" plus the operational
consequence underneath.
- The Reasoning section dropped its inner glass card and its
None/Effort/Budget/Adaptive facet radio. Every input is now visible
simultaneously; the wire schema already permits all four facets to
coexist (apply layer has adaptive-first precedence). Forcing
mutual-exclusivity at the UI level meant operators had to nuke an
existing knob before setting another, which fought the actual
workflow.
- Every enum-ish field switched from HTML `` to the new
`` so the popover finally matches the dark theme.
AliasRow loses its `on_conflict` badge: the row no longer prints
`real-only` / `alias-only` / `both-…` as a coloured chip — operator
feedback was that the inline label was noise and the same setting is
clearly visible inside the edit dialog. Upstream-id pills now render
in the row when the alias whitelists upstreams, replacing the
previously implicit "this alias is scoped" signal.
---
.../components/alias-edit/AliasEditDialog.vue | 235 +++++++++---------
.../alias-edit/AliasEditDialog_test.ts | 42 +++-
apps/web/src/components/settings/AliasRow.vue | 26 +-
.../src/components/settings/AliasRow_test.ts | 19 ++
4 files changed, 188 insertions(+), 134 deletions(-)
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index ce254521c..7b238d007 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -1,10 +1,10 @@
@@ -226,36 +242,36 @@ const reasoningModeOptions: { value: ReasoningMode; label: string }[] = [
{{ saveError }}
-
-
-
Alias name
-
-
Alias names are the primary key and cannot be changed; delete and recreate to rename.
+
+
+ Alias name
+
-
-
Display name (optional)
-
+
+ Target model id
+
-
-
Target model id
-
-
- {{ opt.label }}
-
+
+ Display name (optional)
+
-
-
Upstreams (leave empty to allow any upstream that serves the target)
+
+ Upstreams (leave empty to allow any upstream that serves the target)
-
-
-
On conflict
-
+
+
+
On conflict
+
+
+ {{ option.explanation }}
+
+
@@ -264,34 +280,26 @@ const reasoningModeOptions: { value: ReasoningMode; label: string }[] = [
-
+
Reasoning
-
-
-
- {{ opt.label }}
-
-
-
-
Effort
-
-
-
-
-
Target supports: {{ effortSuggestions.join(', ') }}
+
+
Effort
+
+
Target supports: {{ effortSuggestions.join(', ') }}
-
-
Budget tokens
-
-
+
+
Budget tokens
+
+
Target range:
min {{ budgetMin }}
,
@@ -299,39 +307,36 @@ const reasoningModeOptions: { value: ReasoningMode; label: string }[] = [
-
- Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.
+
+ Summary
+
-
-
Reasoning summary (optional)
-
-
-
-
+
+
+
+ Adaptive reasoning
+
+
+ Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.
+
-
-
-
-
- Verbosity
-
-
-
-
+
+
+
+
+ Verbosity
+
-
-
Service tier
-
-
-
-
+
+ Service tier
+
-
-
Anthropic beta headers (comma- or Enter-separated tokens)
+
+ Anthropic beta headers (comma- or Enter-separated tokens)
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index 976a24a18..bd6870b2d 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -112,7 +112,7 @@ test('AliasEditDialog (create mode) posts a payload matching the form state', as
});
});
-test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the diff', async () => {
+test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the merged shape', async () => {
const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
const open = ref(true);
const record: ModelAlias = {
@@ -132,10 +132,10 @@ test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the diff', asyn
template: '
',
}));
- // Alias name input should be disabled in edit mode (PK is immutable).
+ // Alias name input is editable in edit mode — the PK can now be renamed.
const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
expect(aliasInput.exists()).toBe(true);
- expect((aliasInput.element as HTMLInputElement).disabled).toBe(true);
+ expect((aliasInput.element as HTMLInputElement).disabled).toBe(false);
expect((aliasInput.element as HTMLInputElement).value).toBe('opus-xhigh');
// Display name pre-filled.
@@ -157,6 +157,7 @@ test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the diff', asyn
const args = patchAliasMock.mock.calls[0]![0];
expect(args.param.alias).toBe('opus-xhigh');
expect(args.json).toMatchObject({
+ alias: 'opus-xhigh',
targetModelId: 'gpt-5.4',
upstreamIds: ['up_anth'],
rules: { reasoning: { effort: 'xhigh' } },
@@ -165,3 +166,38 @@ test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the diff', asyn
displayName: 'Opus XHigh',
});
});
+
+test('AliasEditDialog (edit mode) PATCHes the original alias when the operator renames it', async () => {
+ const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
+ const open = ref(true);
+ const record: ModelAlias = {
+ alias: 'opus-xhigh',
+ target_model_id: 'claude-opus-4-6',
+ upstream_ids: [],
+ rules: {},
+ visible_in_models_list: true,
+ on_conflict: 'real-only',
+ display_name: null,
+ created_at: 1_700_000_000,
+ };
+
+ const wrapper = mount(defineComponent({
+ components: { AliasEditDialog },
+ setup() { return { open, record }; },
+ template: '
',
+ }));
+
+ const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+ await aliasInput.setValue('opus-renamed');
+
+ const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
+ await saveBtn!.trigger('click');
+ await new Promise(r => setTimeout(r, 0));
+
+ expect(patchAliasMock).toHaveBeenCalledTimes(1);
+ const args = patchAliasMock.mock.calls[0]![0];
+ // The PATCH path stays at the row's *original* PK; the rename is requested
+ // via `body.alias`, which the route handler maps to the rename codepath.
+ expect(args.param.alias).toBe('opus-xhigh');
+ expect(args.json).toMatchObject({ alias: 'opus-renamed' });
+});
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index 1f8a684d1..b00f36348 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -20,30 +20,24 @@ defineEmits<{
const labelText = computed(() => props.alias.display_name ?? props.alias.alias);
const badges = computed(() => formatAliasRuleBadges(props.alias.rules));
-
-const onConflictBadgeClass = computed(() => {
- switch (props.alias.on_conflict) {
- case 'alias-only': return 'border-accent-violet/30 bg-accent-violet/10 text-accent-violet';
- case 'real-only': return 'border-white/10 bg-white/5 text-gray-400';
- case 'both-real-first':
- case 'both-alias-first': return 'border-accent-cyan/30 bg-accent-cyan/10 text-accent-cyan';
- }
-});
-
{{ alias.on_conflict }}
-
- {{ labelText }}
- {{ alias.alias }}
+ {{ alias.alias }}
+ {{ labelText }}
→ {{ alias.target_model_id }}
+
+ {{ id }}
+
+
{
expect(wrapper.text()).toContain('claude-opus-4-6');
});
+ test('does not render the on_conflict label as a badge', () => {
+ // The row used to surface `real-only` / `alias-only` as a coloured badge.
+ // Operator feedback was that the inline label was noisy and the same
+ // information lives inside the edit dialog. Asserting absence here pins
+ // the regression — the words must not slip back into the row template.
+ const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
+ expect(wrapper.text()).not.toContain('real-only');
+ expect(wrapper.text()).not.toContain('alias-only');
+ });
+
+ test('renders upstream-id pills when the alias whitelists upstreams', () => {
+ const wrapper = mount(AliasRow, {
+ props: { alias: { ...baseAlias, upstream_ids: ['up_anth', 'up_oai'] } },
+ });
+ const text = wrapper.text();
+ expect(text).toContain('up_anth');
+ expect(text).toContain('up_oai');
+ });
+
test('falls back to alias name when display_name is null', () => {
const wrapper = mount(AliasRow, { props: { alias: { ...baseAlias, display_name: null } } });
// alias id appears twice (label fallback + the small font-mono id), but the
From ff479e471ae7926d299a362e5b6d92a0c11c0c57 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 12:57:44 +0800
Subject: [PATCH 033/170] feat(ui): combobox auto-opens on input focus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Operators expected the popover to appear the moment focus lands in the
text input — tab into Effort, suggestions show up — instead of the
chevron-click-only pattern. reka-ui already supports this through
`open-on-focus` on `ComboboxRoot`; flip it on at the wrapper level so
every consumer (alias edit dialog, future settings forms) benefits at
once and the existing chevron-click toggle path stays intact.
---
packages/ui/src/Combobox.vue | 1 +
1 file changed, 1 insertion(+)
diff --git a/packages/ui/src/Combobox.vue b/packages/ui/src/Combobox.vue
index 6526888a4..970f16ce9 100644
--- a/packages/ui/src/Combobox.vue
+++ b/packages/ui/src/Combobox.vue
@@ -89,6 +89,7 @@ const commitTyped = async () => {
v-model:open="open"
:disabled="disabled"
:display-value="(v: string) => v"
+ open-on-focus
>
From 5a257529c633981756748133e79e8607a9b36d1b Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 12:57:59 +0800
Subject: [PATCH 034/170] feat(protocols): move composeAliasDisplayName to
protocols/common
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The web dashboard wants to compute the same synthesized alias display
name the gateway uses (for the live placeholder under the Display name
input — show the operator what the fallback label would be). The
function previously lived in
`packages/gateway/src/control-plane/model-aliases/display.ts`, which
the SPA cannot import: per the architecture rules `apps/web` only
depends on `@floway-dev/ui`, `@floway-dev/proxy` (via its narrow subpath
exports), and type-imports from `@floway-dev/gateway/app-type`.
Move the function next to `formatAliasRuleBadges` in
`protocols/src/common/models.ts` — same audience, same rule-shape
input. The type changes from gateway-local `ModelAliasRules` to the
`PublicModelAliasedFrom['rules']` shape already declared in that file;
the two were structurally identical. Gateway call site swaps its
import to `@floway-dev/protocols/common`. The unit suite moves
alongside the function.
---
.../control-plane/model-aliases/display.ts | 34 -------------------
.../src/data-plane/models/alias-listing.ts | 3 +-
packages/protocols/src/common/models.ts | 33 ++++++++++++++++++
.../src/common/models_alias-display_test.ts} | 2 +-
4 files changed, 35 insertions(+), 37 deletions(-)
delete mode 100644 packages/gateway/src/control-plane/model-aliases/display.ts
rename packages/{gateway/src/control-plane/model-aliases/display_test.ts => protocols/src/common/models_alias-display_test.ts} (98%)
diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
deleted file mode 100644
index f831273d8..000000000
--- a/packages/gateway/src/control-plane/model-aliases/display.ts
+++ /dev/null
@@ -1,34 +0,0 @@
-import type { ModelAliasRules } from './types.ts';
-
-// Compose the alias-local display name — what the operator named the alias
-// (when set) or a synthesized target + rules summary. Independent of which
-// upstream is surfacing the alias; the prefixed listing form prepends the
-// upstream display name at the call site, mirroring the real-model path in
-// `registry.ts`.
-//
-// The synthesized form's parenthesized rules suffix uses the compact
-// `value label` wording so it fits alongside the target name in narrow
-// listings — the dashboard's per-badge view uses `formatAliasRuleBadges`
-// for the self-describing `label: value` form. `anthropicBeta` tokens are
-// sorted so two operators carrying the same set in different orders see
-// the same label.
-export const composeAliasDisplayName = (input: {
- aliasDisplayName?: string;
- targetDisplayName: string;
- rules: ModelAliasRules;
-}): string => {
- if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
- const parts: string[] = [];
- const { rules } = input;
- if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
- if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
- if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
- if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
- if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
- if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
- if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
- parts.push([...rules.anthropicBeta].sort().join('/'));
- }
- const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : '';
- return `${input.targetDisplayName}${suffix}`;
-};
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index e9762ed35..ea3fca570 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -1,7 +1,6 @@
-import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
import { unionEndpoints } from '../providers/registry.ts';
-import { kindForEndpoints, type PublicModel } from '@floway-dev/protocols/common';
+import { composeAliasDisplayName, kindForEndpoints, type PublicModel } from '@floway-dev/protocols/common';
import type { ModelProviderInstance, ProviderModelRecord, ResolvedModel, UpstreamModel } from '@floway-dev/provider';
// One emission slot for an alias: a (provider, addressable form) pair where
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 0fb869db9..e0d92c49a 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -186,6 +186,39 @@ export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): A
return out;
};
+// Compose the alias-local display name — what the operator named the alias
+// (when set) or a synthesized target + rules summary. Independent of which
+// upstream is surfacing the alias; the prefixed listing form prepends the
+// upstream display name at the call site, mirroring the real-model path in
+// the gateway's provider registry.
+//
+// The synthesized form's parenthesized rules suffix uses the compact
+// `value label` wording so it fits alongside the target name in narrow
+// listings — the dashboard's per-badge view uses `formatAliasRuleBadges`
+// for the self-describing `label: value` form. `anthropicBeta` tokens are
+// sorted so two operators carrying the same set in different orders see
+// the same label.
+export const composeAliasDisplayName = (input: {
+ aliasDisplayName?: string;
+ targetDisplayName: string;
+ rules: PublicModelAliasedFrom['rules'];
+}): string => {
+ if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
+ const parts: string[] = [];
+ const { rules } = input;
+ if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
+ if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
+ if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
+ if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
+ if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
+ if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
+ if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
+ parts.push([...rules.anthropicBeta].sort().join('/'));
+ }
+ const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : '';
+ return `${input.targetDisplayName}${suffix}`;
+};
+
export interface PublicModelsResponse {
// OpenAI container
object: 'list';
diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/protocols/src/common/models_alias-display_test.ts
similarity index 98%
rename from packages/gateway/src/control-plane/model-aliases/display_test.ts
rename to packages/protocols/src/common/models_alias-display_test.ts
index 40dbd2fec..21473a3a4 100644
--- a/packages/gateway/src/control-plane/model-aliases/display_test.ts
+++ b/packages/protocols/src/common/models_alias-display_test.ts
@@ -1,6 +1,6 @@
import { describe, expect, test } from 'vitest';
-import { composeAliasDisplayName } from './display.ts';
+import { composeAliasDisplayName } from './models.ts';
describe('composeAliasDisplayName', () => {
test('uses alias displayName when set, suppressing the rules summary', () => {
From 39ba1873cae7e672480d8190298031f92629f5c4 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 12:58:11 +0800
Subject: [PATCH 035/170] refactor(web): alias edit dialog second-round polish
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Operator-feedback pass on the dialog:
- Visible-in-/v1/models moved from a checkbox sharing the on-conflict
row to a Switch in the footer, left of Cancel — matches the global
pattern (CustomConfigPanel fetch toggle) and frees the row above
for a single-purpose layout.
- On conflict gets its own full-width row plus a one-line helper
underneath that mirrors the selected option's explanation. Operators
shouldn't have to re-open the dropdown to remember which mode is
active; the same `explanation` field that powers the popover
description now also sits in the trigger's helper line.
- Reasoning section flattened: no `REASONING` heading, no inner card
wrapper, the four facets sit in two two-column rows
(Effort / Budget, Adaptive / Summary). Adaptive switches from
Checkbox to Switch for visual consistency with the footer toggle.
- Display name placeholder now demonstrates the synthesized fallback
live. When the target id is empty the dialog shows the teaching
example `GPT-5.5 (xhigh effort, fast speed)` to match the new
`gpt-5.5-xhigh-fast` / `gpt-5.5` placeholders on the alias and
target inputs — the trio communicates the operator pattern at a
glance. Once a target is picked we feed the live form state through
`composeAliasDisplayName` (now in @floway-dev/protocols/common) so
the placeholder tracks every rule edit.
Test query selectors that previously matched the static placeholders
shift to the new placeholders; the Display name lookup changes from
placeholder match to value match since the placeholder is now dynamic.
---
.../components/alias-edit/AliasEditDialog.vue | 123 ++++++++++++------
.../alias-edit/AliasEditDialog_test.ts | 18 +--
2 files changed, 96 insertions(+), 45 deletions(-)
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 7b238d007..926ec889d 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -12,7 +12,8 @@ import { callApi, useApi } from '../../api/client.ts';
import type { ModelAlias, ModelAliasOnConflict } from '../../api/types.ts';
import { useModelsStore } from '../../composables/useModels.ts';
import { useUpstreamsStore } from '../../composables/useUpstreams.ts';
-import { Button, Checkbox, Combobox, Dialog, Input, Select, TagCombobox } from '@floway-dev/ui';
+import { composeAliasDisplayName } from '@floway-dev/protocols/common';
+import { Button, Combobox, Dialog, Input, Select, Switch, TagCombobox } from '@floway-dev/ui';
// Mutable mirror of @floway-dev/protocols ModelAliasRules — the wire shape
// is `readonly` at the contract boundary, but the form mutates it in place
@@ -90,8 +91,10 @@ const upstreamItems = computed(() => (upstreamsStore.upstreams.value ?? []).map(
detail: u.id,
})));
+const targetMatch = computed(() => modelsStore.models.value?.find(m => m.id === targetModelId.value));
+
const targetChat = computed(() => {
- const match = modelsStore.models.value?.find(m => m.id === targetModelId.value);
+ const match = targetMatch.value;
return match && 'chat' in match ? (match as { chat?: { reasoning?: { effort?: { supported: string[] }; budget_tokens?: { min?: number; max?: number }; adaptive?: boolean } } }).chat : undefined;
});
@@ -104,10 +107,10 @@ const SUMMARY_HINTS = ['auto', 'concise', 'detailed', 'omitted'];
const VERBOSITY_HINTS = ['low', 'medium', 'high'];
const SERVICE_TIER_HINTS = ['auto', 'default', 'flex', 'scale', 'priority', 'standard_only'];
-// Each on-conflict option carries a one-line explanation surfaced in the
-// Select popover so an operator picks by what happens at request time, not
-// by guessing what `real-only` / `alias-only` mean. Mirrors the Auth Style
-// pattern in CustomConfigPanel.
+// Each on-conflict option carries a one-line explanation surfaced both in
+// the Select popover and in a helper line below the trigger so an operator
+// picks by what happens at request time, not by guessing what `real-only`
+// / `alias-only` mean. Mirrors the Auth Style pattern in CustomConfigPanel.
interface OnConflictOption {
value: ModelAliasOnConflict;
label: string;
@@ -137,6 +140,52 @@ const onConflictOptions: OnConflictOption[] = [
},
];
+const selectedOnConflict = computed(() => onConflictOptions.find(o => o.value === onConflict.value));
+
+// --- display name placeholder ---
+//
+// Shows the operator what the synthesized fallback would look like when
+// the Display name field is left blank. Before a target is picked we hold
+// a teaching example so the three placeholders (alias / target / display
+// name) read as a coherent trio; once a target is set we compute the real
+// synthesized label off the current form state so the placeholder tracks
+// every rule edit live.
+const FALLBACK_PLACEHOLDER_EXAMPLE = 'GPT-5.5 (xhigh effort, fast speed)';
+
+// Mirror of `buildRules` without the validation errors — used purely for
+// the live placeholder so a half-typed budget value (e.g. mid-typing) does
+// not bubble validation text into a UI hint. Invalid intermediate states
+// fall back to the empty rules object.
+const buildRulesForPreview = (): MutableRules => {
+ const rules: MutableRules = {};
+ const reasoning: NonNullable = {};
+ const effort = reasoningEffort.value.trim();
+ if (effort !== '') reasoning.effort = effort;
+ const budgetRaw = reasoningBudgetTokens.value.trim();
+ if (budgetRaw !== '' && /^\d+$/.test(budgetRaw)) reasoning.budgetTokens = Number(budgetRaw);
+ if (reasoningAdaptive.value) reasoning.adaptive = true;
+ const summary = reasoningSummary.value.trim();
+ if (summary !== '') reasoning.summary = summary;
+ if (Object.keys(reasoning).length > 0) rules.reasoning = reasoning;
+ const verb = verbosity.value.trim();
+ if (verb !== '') rules.verbosity = verb;
+ const tier = serviceTier.value.trim();
+ if (tier !== '') rules.serviceTier = tier;
+ const betas = anthropicBeta.value.map(s => s.trim()).filter(s => s !== '');
+ if (betas.length > 0) rules.anthropicBeta = betas;
+ return rules;
+};
+
+const displayNamePlaceholder = computed(() => {
+ const trimmedTarget = targetModelId.value.trim();
+ if (trimmedTarget === '') return FALLBACK_PLACEHOLDER_EXAMPLE;
+ const targetDisplay = targetMatch.value?.display_name ?? trimmedTarget;
+ return composeAliasDisplayName({
+ targetDisplayName: targetDisplay,
+ rules: buildRulesForPreview(),
+ });
+});
+
// --- save ---
const saving = ref(false);
@@ -245,18 +294,18 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
Display name (optional)
-
+
@@ -264,25 +313,17 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
-
-
-
On conflict
-
-
- {{ option.explanation }}
-
-
-
-
-
-
- Visible in /v1/models
-
+
+
On conflict
+
+
+ {{ option.explanation }}
+
+
+
{{ selectedOnConflict.explanation }}
-
-
Reasoning
-
+
Effort
@@ -306,21 +347,23 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
max {{ budgetMax }}
+
+
- Summary
-
-
-
-
-
-
+
+
Adaptive reasoning
-
+
Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.
+
+
+ Summary
+
+
@@ -340,9 +383,15 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
-
-
Save
-
Cancel
+
+
+
+ Visible in /v1/models
+
+
+ Cancel
+ Save
+
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index bd6870b2d..99574c248 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -85,11 +85,11 @@ test('AliasEditDialog (create mode) posts a payload matching the form state', as
// Fill the form: alias name + target id are the only required fields for
// the create-mode happy path. Everything else uses its default.
- const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+ const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
expect(aliasInput.exists()).toBe(true);
await aliasInput.setValue('opus-fast');
- const targetInput = wrapper.find('input[placeholder="gpt-5.4"]');
+ const targetInput = wrapper.find('input[placeholder="gpt-5.5"]');
expect(targetInput.exists()).toBe(true);
await targetInput.setValue('claude-opus-4-6');
@@ -133,17 +133,19 @@ test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the merged shap
}));
// Alias name input is editable in edit mode — the PK can now be renamed.
- const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+ const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
expect(aliasInput.exists()).toBe(true);
expect((aliasInput.element as HTMLInputElement).disabled).toBe(false);
expect((aliasInput.element as HTMLInputElement).value).toBe('opus-xhigh');
- // Display name pre-filled.
- const displayInput = wrapper.find('input[placeholder="Codex Auto Review"]');
- expect((displayInput.element as HTMLInputElement).value).toBe('Opus XHigh');
+ // Display name pre-filled — its placeholder is dynamic now (mirrors the
+ // synthesized fallback) so we locate it by its current value instead.
+ const allInputs = wrapper.findAll('input');
+ const displayInput = allInputs.find(i => (i.element as HTMLInputElement).value === 'Opus XHigh');
+ expect(displayInput).toBeDefined();
// Target id pre-filled.
- const targetInput = wrapper.find('input[placeholder="gpt-5.4"]');
+ const targetInput = wrapper.find('input[placeholder="gpt-5.5"]');
expect((targetInput.element as HTMLInputElement).value).toBe('claude-opus-4-6');
// Change one field and submit; PATCH carries the merged shape (every editable
@@ -187,7 +189,7 @@ test('AliasEditDialog (edit mode) PATCHes the original alias when the operator r
template: '
',
}));
- const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+ const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
await aliasInput.setValue('opus-renamed');
const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
From 1aa0a55d5aa36737bebbd60eb0a5980f9e27cdd6 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 13:16:20 +0800
Subject: [PATCH 036/170] feat(web): add 'fast' to Service tier combobox
suggestions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
After main's cross-protocol service_tier ↔ speed:'fast' bridge, 'fast' is
a meaningful value on both OpenAI- and Anthropic-bound requests — include
it in the dropdown alongside priority/flex/standard_only so the operator
does not have to type it freeform.
---
apps/web/src/components/alias-edit/AliasEditDialog.vue | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 926ec889d..8d0c5844d 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -105,7 +105,7 @@ const adaptiveSupported = computed(() => targetChat.value?.reasoning?.adaptive =
const SUMMARY_HINTS = ['auto', 'concise', 'detailed', 'omitted'];
const VERBOSITY_HINTS = ['low', 'medium', 'high'];
-const SERVICE_TIER_HINTS = ['auto', 'default', 'flex', 'scale', 'priority', 'standard_only'];
+const SERVICE_TIER_HINTS = ['auto', 'default', 'flex', 'scale', 'priority', 'fast', 'standard_only'];
// Each on-conflict option carries a one-line explanation surfaced both in
// the Select popover and in a helper line below the trigger so an operator
From db15d10836a7923252377878cff942b1172fcc33 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 13:17:39 +0800
Subject: [PATCH 037/170] refactor(web): Adaptive reasoning gets a section
label; switch reads "Enable"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Match the labelled-field rhythm of the surrounding inputs — the bare
switch with "Adaptive reasoning" inline read inconsistently next to the
labelled Effort / Summary / Verbosity rows. Now: caption "Adaptive
reasoning" above, Switch + "Enable" below.
---
apps/web/src/components/alias-edit/AliasEditDialog.vue | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 8d0c5844d..335ed789e 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -351,9 +351,10 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
+
Adaptive reasoning
- Adaptive reasoning
+ Enable
Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.
From 66360bfe714038d9993ae04035bf1aaa603e6bc1 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 13:23:02 +0800
Subject: [PATCH 038/170] refactor(web): always-show combobox + drop effort
hint + footer warnings group
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Combobox now keeps every suggestion visible while ranking matches at the
top — operators see what else is on the menu instead of typing into an
empty dropdown when the partial query doesn't match.
Drop the inline "Target supports: …" effort hint and the "Adaptive
reasoning" amber callout. A new fieldWarnings collector renders all
target-misalignment notes in one amber block above the dialog footer —
one row per condition, labelled with the field. The first entry is the
adaptive-not-supported case; the same shape will house future per-rule
warnings without crowding the form area.
---
.../components/alias-edit/AliasEditDialog.vue | 27 +++++++++++++++---
packages/ui/src/Combobox.vue | 28 +++++++++++++------
2 files changed, 43 insertions(+), 12 deletions(-)
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 335ed789e..ac441af0f 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -103,6 +103,19 @@ const budgetMin = computed(() => targetChat.value?.reasoning?.budget_tokens?.min
const budgetMax = computed(() => targetChat.value?.reasoning?.budget_tokens?.max);
const adaptiveSupported = computed(() => targetChat.value?.reasoning?.adaptive === true);
+// Collected at the bottom of the dialog so a misalignment between the
+// rule and the target model's advertised capability stays visible without
+// crowding the per-field area. Add a new entry per condition; the labels
+// double as the field name an operator should look up to fix it.
+interface FieldWarning { field: string; message: string }
+const fieldWarnings = computed(() => {
+ const out: FieldWarning[] = [];
+ if (reasoningAdaptive.value && !adaptiveSupported.value) {
+ out.push({ field: 'Adaptive reasoning', message: 'Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.' });
+ }
+ return out;
+});
+
const SUMMARY_HINTS = ['auto', 'concise', 'detailed', 'omitted'];
const VERBOSITY_HINTS = ['low', 'medium', 'high'];
const SERVICE_TIER_HINTS = ['auto', 'default', 'flex', 'scale', 'priority', 'fast', 'standard_only'];
@@ -327,7 +340,6 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
Effort
-
Target supports: {{ effortSuggestions.join(', ') }}
@@ -356,9 +368,6 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
Enable
-
- Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.
-
@@ -384,6 +393,16 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
+
+
+ {{ warning.field }}: {{ warning.message }}
+
+
+
diff --git a/packages/ui/src/Combobox.vue b/packages/ui/src/Combobox.vue
index 970f16ce9..006d9e4f3 100644
--- a/packages/ui/src/Combobox.vue
+++ b/packages/ui/src/Combobox.vue
@@ -59,12 +59,24 @@ watch(value, v => { if (v !== query.value) query.value = v; });
// same value.
watch(query, q => { value.value = q; });
-// Filter suggestions by the typed query against either value or label.
-// When the query exactly matches an item we still show it (the user can
-// re-pick the same row), but we hide a synthesized "create" row in that
-// case — it would be a no-op.
-const filteredItems = computed(() => normalizedItems.value.filter(item =>
- query.value === '' || contains(item.label ?? item.value, query.value) || contains(item.value, query.value)));
+// Always show every suggestion; rank items whose label or value contains the
+// typed query above the rest, preserving the original order within each
+// group. Empty query keeps the configured order untouched. The operator
+// always sees the full set of presets — typing narrows attention to the
+// top of the list without hiding the alternatives.
+const orderedItems = computed- (() => {
+ if (query.value === '') return normalizedItems.value;
+ const matches: Item[] = [];
+ const rest: Item[] = [];
+ for (const item of normalizedItems.value) {
+ if (contains(item.label ?? item.value, query.value) || contains(item.value, query.value)) {
+ matches.push(item);
+ } else {
+ rest.push(item);
+ }
+ }
+ return [...matches, ...rest];
+});
const trimmedQuery = computed(() => query.value.trim());
const hasExactMatch = computed(() => normalizedItems.value.some(item => item.value === trimmedQuery.value));
@@ -123,7 +135,7 @@ const commitTyped = async () => {
class="z-50 max-h-72 w-[--reka-combobox-trigger-width] overflow-hidden rounded-[10px] border border-white/[0.06] bg-surface-800 text-white shadow-xl"
>
-
+
{{ emptyText }}
@@ -139,7 +151,7 @@ const commitTyped = async () => {
Use "{{ trimmedQuery }} "
Date: Fri, 26 Jun 2026 13:29:07 +0800
Subject: [PATCH 039/170] feat(web): expand alias warnings (effort/budget
mismatch) + bold field labels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Now surfaces three target-mismatch warnings in the dialog footer:
- Effort value not in the target's advertised supported list
- Budget tokens below the target's minimum or above its maximum
- Adaptive reasoning enabled but the target doesn't advertise adaptive
Each warning leads with the field name in bold so an operator
scanning the dialog jumps straight to the affected control. Values
still pass through verbatim (Goal 2) — warnings are informational,
not blocking.
---
.../components/alias-edit/AliasEditDialog.vue | 40 +++++++++++++++++--
1 file changed, 36 insertions(+), 4 deletions(-)
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index ac441af0f..5028ca35f 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -105,14 +105,46 @@ const adaptiveSupported = computed(() => targetChat.value?.reasoning?.adaptive =
// Collected at the bottom of the dialog so a misalignment between the
// rule and the target model's advertised capability stays visible without
-// crowding the per-field area. Add a new entry per condition; the labels
-// double as the field name an operator should look up to fix it.
+// crowding the per-field area. Each entry tells the operator which field
+// to revisit; the values still flow through to the upstream verbatim
+// (Goal 2: pass through, don't enum-gate) so a warning is informational,
+// not blocking.
interface FieldWarning { field: string; message: string }
const fieldWarnings = computed(() => {
const out: FieldWarning[] = [];
+ const chatReasoning = targetChat.value?.reasoning;
+
+ if (reasoningEffort.value.trim() !== '' && chatReasoning?.effort?.supported) {
+ const value = reasoningEffort.value.trim();
+ const supported = chatReasoning.effort.supported;
+ if (!supported.includes(value)) {
+ out.push({
+ field: 'Effort',
+ message: `"${value}" is not in the target's supported list (${supported.join(', ')}). The rule will still be sent verbatim.`,
+ });
+ }
+ }
+
+ const budgetRaw = reasoningBudgetTokens.value.trim();
+ if (budgetRaw !== '') {
+ const n = Number(budgetRaw);
+ const range = chatReasoning?.budget_tokens;
+ if (Number.isFinite(n) && range) {
+ if (range.min !== undefined && n < range.min) {
+ out.push({ field: 'Budget tokens', message: `${n} is below the target's minimum (${range.min}). The rule will still be sent verbatim.` });
+ } else if (range.max !== undefined && n > range.max) {
+ out.push({ field: 'Budget tokens', message: `${n} is above the target's maximum (${range.max}). The rule will still be sent verbatim.` });
+ }
+ }
+ }
+
if (reasoningAdaptive.value && !adaptiveSupported.value) {
- out.push({ field: 'Adaptive reasoning', message: 'Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.' });
+ out.push({
+ field: 'Adaptive reasoning',
+ message: 'Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.',
+ });
}
+
return out;
});
@@ -399,7 +431,7 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
:key="warning.field"
class="rounded-md border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-xs text-amber-300"
>
- {{ warning.field }}: {{ warning.message }}
+ {{ warning.field }}: {{ warning.message }}
From d8e2b9a41273bf26c3b83a3cd51f7ea4670960b9 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 13:34:28 +0800
Subject: [PATCH 040/170] refactor(web): collapse alias warning cards into one
amber card with multiple lines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Replace one card per warning with a single amber card carrying every
warning paragraph inside — same visual identity, less repeated chrome
when more than one warning fires.
---
apps/web/src/components/alias-edit/AliasEditDialog.vue | 8 ++------
1 file changed, 2 insertions(+), 6 deletions(-)
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 5028ca35f..db7275abe 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -425,12 +425,8 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
-
-
+
+
{{ warning.field }}: {{ warning.message }}
From e3ecae996696d932cc66f2114907e832a021e9b0 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 13:53:53 +0800
Subject: [PATCH 041/170] feat(protocols): expose aliasedFrom.displayName +
formatAliasRulesInline helper
Two related extensions so alias-aware UIs can render an alias model row
without re-deriving the operator's intent from scratch:
- Add an optional `displayName` field to `PublicModelAliasedFrom`. The
gateway's alias-listing serializer carries it onto the wire only when
the operator explicitly set one; absence means "synthesize from the
target's name + the rules summary." This lets the dashboard show the
operator-named heading on its own line, distinct from the synthesized
fallback that the top-level `display_name` already carries.
- Extract a shared parts builder behind `composeAliasDisplayName` and
expose `formatAliasRulesInline(rules)` as a sibling export. Both
surfaces consume the same per-field wording, so the parenthesized
suffix in the synthesized display name and the standalone summary line
rendered on alias rows can never drift.
Gateway tests assert the new `displayName` round-trips on /v1/models and
/api/models; the existing absence-path tests stay green because the
serializer omits the key for aliases that never had one.
---
.../src/control-plane/models/routes_test.ts | 3 +-
.../src/data-plane/models/alias-listing.ts | 1 +
.../src/data-plane/models/serve_test.ts | 55 ++++++++++++++++
packages/protocols/src/common/models.ts | 62 ++++++++++++-------
.../src/common/models_alias-display_test.ts | 31 +++++++++-
5 files changed, 127 insertions(+), 25 deletions(-)
diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts
index 0be710c36..bab38e182 100644
--- a/packages/gateway/src/control-plane/models/routes_test.ts
+++ b/packages/gateway/src/control-plane/models/routes_test.ts
@@ -144,13 +144,14 @@ test('/api/models appends visible alias entries with aliasedFrom alongside real
await withMockedFetch(modelsFetchHandler, async () => {
const response = await requestApp('/api/models', { headers: { 'x-api-key': apiKey.key } });
assertEquals(response.status, 200);
- const body = (await response.json()) as { data: Array<{ id: string; display_name: string; upstreams: Array<{ kind: string; id: string; name: string }>; aliasedFrom?: { targetModelId: string; rules: Record } }> };
+ const body = (await response.json()) as { data: Array<{ id: string; display_name: string; upstreams: Array<{ kind: string; id: string; name: string }>; aliasedFrom?: { targetModelId: string; rules: Record; displayName?: string } }> };
const aliasEntry = body.data.find(model => model.id === 'codex-auto-review');
if (!aliasEntry) throw new Error('expected codex-auto-review alias entry on /api/models');
assertEquals(aliasEntry.display_name, 'Codex Auto Review');
assertEquals(aliasEntry.upstreams, [{ kind: 'custom', id: 'up_custom_models', name: 'Custom Provider' }]);
assertEquals(aliasEntry.aliasedFrom?.targetModelId, 'custom-model');
assertEquals(aliasEntry.aliasedFrom?.rules, { reasoning: { effort: 'low' } });
+ assertEquals(aliasEntry.aliasedFrom?.displayName, 'Codex Auto Review');
assertEquals(body.data.some(model => model.id === 'hidden-alias'), false);
});
});
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index ea3fca570..72c0fdcf7 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -99,6 +99,7 @@ const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmi
upstreamIds: alias.upstreamIds,
rules: alias.rules,
onConflict: alias.onConflict,
+ ...(alias.displayName !== undefined ? { displayName: alias.displayName } : {}),
},
};
};
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index cc5227967..9af4d7981 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -661,6 +661,61 @@ test('/v1/models appends a visible alias with aliasedFrom after the real entries
);
});
+// `displayName` propagates verbatim when the operator set it; absence on the
+// wire (the prior test) means "synthesize from target name + rules summary".
+test('/v1/models forwards the operator-set displayName on the aliasedFrom payload', async () => {
+ const { repo, apiKey } = await setupAppTest();
+
+ (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+ {
+ alias: 'codex-auto-review',
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ visibleInModelsList: true,
+ onConflict: 'real-only',
+ displayName: 'Codex Auto Review',
+ createdAt: 1_700_000_000,
+ },
+ ]);
+
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_oai',
+ name: 'Test OpenAI',
+ sortOrder: 100,
+ config: {
+ baseUrl: 'https://oai.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-test',
+ endpoints: { chatCompletions: {} },
+ },
+ }));
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+ }
+ if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
+ return jsonResponse(copilotModels([]));
+ }
+ if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
+ return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+ const body = await response.json() as { data: Array<{ id: string; aliasedFrom?: { displayName?: string } }> };
+ const aliasEntry = body.data.find(m => m.id === 'codex-auto-review');
+ if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
+ assertEquals(aliasEntry.aliasedFrom?.displayName, 'Codex Auto Review');
+ },
+ );
+});
+
test('/v1/models omits aliases marked visibleInModelsList=false', async () => {
const { repo, apiKey } = await setupAppTest();
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index e0d92c49a..9a73c11d5 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -157,16 +157,20 @@ export interface PublicModelAliasedFrom {
anthropicBeta?: readonly string[];
};
onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
+ // Operator-set display name. Absent (undefined) when the operator left the
+ // field blank — alias-aware UIs then synthesize a label from the target's
+ // display name and the inline rules summary instead.
+ displayName?: string;
}
// One badge per rule field on an alias, in a `${label}` / `${label}: ${value}`
// shape the dashboard renders inline next to the model row. Returned in a
// deterministic order so the badge sequence stays stable across surfaces and
// across JSON key arrivals. Boolean toggles render label-only (no colon);
-// every other field renders as `${label}: ${value}`. The gateway's
-// `formatAliasRulesSummary` uses its own labels for the parenthesized
-// display-name suffix — the two surfaces deliberately diverge so the suffix
-// stays compact while the badge view stays self-describing.
+// every other field renders as `${label}: ${value}`. The inline-prose form
+// (`composeAliasDisplayName`'s suffix and `formatAliasRulesInline`) uses its
+// own compact wording — the two surfaces deliberately diverge so the inline
+// summary stays compact while the badge view stays self-describing.
export interface AliasRuleBadge {
label: string;
value?: string;
@@ -186,26 +190,15 @@ export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): A
return out;
};
-// Compose the alias-local display name — what the operator named the alias
-// (when set) or a synthesized target + rules summary. Independent of which
-// upstream is surfacing the alias; the prefixed listing form prepends the
-// upstream display name at the call site, mirroring the real-model path in
-// the gateway's provider registry.
-//
-// The synthesized form's parenthesized rules suffix uses the compact
-// `value label` wording so it fits alongside the target name in narrow
-// listings — the dashboard's per-badge view uses `formatAliasRuleBadges`
-// for the self-describing `label: value` form. `anthropicBeta` tokens are
-// sorted so two operators carrying the same set in different orders see
-// the same label.
-export const composeAliasDisplayName = (input: {
- aliasDisplayName?: string;
- targetDisplayName: string;
- rules: PublicModelAliasedFrom['rules'];
-}): string => {
- if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
+// Inline-prose parts for an alias's rules, in a deterministic order. Each
+// entry uses the compact `value label` wording (e.g. `low effort`,
+// `4096tk reasoning`) so it fits both alongside the target name in narrow
+// listings and on its own as a standalone summary line. The dashboard's
+// per-badge view uses `formatAliasRuleBadges` for the self-describing
+// `label: value` form. `anthropicBeta` tokens are sorted so two operators
+// carrying the same set in different orders see the same label.
+const aliasRulesInlineParts = (rules: PublicModelAliasedFrom['rules']): string[] => {
const parts: string[] = [];
- const { rules } = input;
if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
@@ -215,10 +208,33 @@ export const composeAliasDisplayName = (input: {
if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
parts.push([...rules.anthropicBeta].sort().join('/'));
}
+ return parts;
+};
+
+// Compose the alias-local display name — what the operator named the alias
+// (when set) or a synthesized target + rules summary. Independent of which
+// upstream is surfacing the alias; the prefixed listing form prepends the
+// upstream display name at the call site, mirroring the real-model path in
+// the gateway's provider registry. The parenthesized rules suffix shares
+// its parts with `formatAliasRulesInline` so the two surfaces never drift.
+export const composeAliasDisplayName = (input: {
+ aliasDisplayName?: string;
+ targetDisplayName: string;
+ rules: PublicModelAliasedFrom['rules'];
+}): string => {
+ if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
+ const parts = aliasRulesInlineParts(input.rules);
const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : '';
return `${input.targetDisplayName}${suffix}`;
};
+// Joined rules summary without the parentheses — what the dashboard's alias
+// row renders on its third line. Empty string when no rule applies; callers
+// should drop the line entirely in that case rather than rendering blank.
+export const formatAliasRulesInline = (rules: PublicModelAliasedFrom['rules']): string => {
+ return aliasRulesInlineParts(rules).join(', ');
+};
+
export interface PublicModelsResponse {
// OpenAI container
object: 'list';
diff --git a/packages/protocols/src/common/models_alias-display_test.ts b/packages/protocols/src/common/models_alias-display_test.ts
index 21473a3a4..7c7d4c49c 100644
--- a/packages/protocols/src/common/models_alias-display_test.ts
+++ b/packages/protocols/src/common/models_alias-display_test.ts
@@ -1,6 +1,6 @@
import { describe, expect, test } from 'vitest';
-import { composeAliasDisplayName } from './models.ts';
+import { composeAliasDisplayName, formatAliasRulesInline } from './models.ts';
describe('composeAliasDisplayName', () => {
test('uses alias displayName when set, suppressing the rules summary', () => {
@@ -69,3 +69,32 @@ describe('composeAliasDisplayName', () => {
).toBe('GPT-5.4 (low effort, concise summary, high verbosity, flex tier)');
});
});
+
+describe('formatAliasRulesInline', () => {
+ test('returns empty string when no rule applies', () => {
+ expect(formatAliasRulesInline({})).toBe('');
+ });
+
+ test('returns each rule field with the same compact wording as the parenthesized suffix, sans parens', () => {
+ expect(formatAliasRulesInline({ reasoning: { effort: 'low' } })).toBe('low effort');
+ expect(formatAliasRulesInline({ reasoning: { budgetTokens: 4096 } })).toBe('4096tk reasoning');
+ expect(formatAliasRulesInline({ reasoning: { adaptive: true } })).toBe('adaptive reasoning');
+ expect(formatAliasRulesInline({ reasoning: { summary: 'detailed' } })).toBe('detailed summary');
+ });
+
+ test('joins multiple fields with comma in the same order composeAliasDisplayName uses', () => {
+ expect(
+ formatAliasRulesInline({
+ reasoning: { effort: 'low', summary: 'detailed' },
+ verbosity: 'high',
+ serviceTier: 'fast',
+ }),
+ ).toBe('low effort, detailed summary, high verbosity, fast tier');
+ });
+
+ test('sorts anthropicBeta tokens and joins with slashes', () => {
+ expect(
+ formatAliasRulesInline({ anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] }),
+ ).toBe('extended-thinking/fast-mode-2026-02-01');
+ });
+});
From 6045ab8b36d2bb679d31f94c6013044823a29bf3 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 13:54:04 +0800
Subject: [PATCH 042/170] refactor(web): alias model rows render as 3-line text
block in /dashboard/models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The badge cluster the row used to grow for every alias rule made it look
like a real catalog entry with a long list of capabilities, when the
information was actually the alias's own definition. Switching to a
left-justified text block makes the relationship explicit:
- Line 1 — the operator-set display name (omitted when not set, since
the third line + the second line already convey the synthesized
fallback name without redundancy).
- Line 2 — the id mapping ` → `, alias id in
white and target muted so the eye lands on the public name first.
- Line 3 — the inline rules summary from `formatAliasRulesInline`,
omitted when the alias has no rules.
The upstream pills and context/prompt/output limit badges are dropped on
alias rows on purpose — every value there belongs to the target row, and
duplicating it on the alias muddied which limits are policy vs. inherited.
Real-model rows keep the current heading + id + badges layout verbatim.
The Clear button stays on its right; the chat playground below is
unchanged.
---
.../src/components/models/ModelInfoBar.vue | 85 +++++++-------
.../components/models/ModelInfoBar_test.ts | 110 ++++++++++++++++++
2 files changed, 154 insertions(+), 41 deletions(-)
create mode 100644 apps/web/src/components/models/ModelInfoBar_test.ts
diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index a70fa7f93..5e4d52262 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -1,9 +1,11 @@
-
-
{{ model.display_name ?? model.id }}
- {{ model.id }}
-
-
- {{ binding.name }}
-
- context: {{ formatTokenLimit(model.limits.max_context_window_tokens) }}
-
-
- prompt: {{ formatTokenLimit(model.limits.max_prompt_tokens) }}
-
-
- output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
-
- {{ badge.label }}{{ badge.value !== undefined ? `: ${badge.value}` : '' }}
-
+
+
+
{{ model.aliasedFrom.displayName }}
+
+ {{ model.id }}
+ →
+ {{ model.aliasedFrom.targetModelId }}
+
+
{{ rulesInline }}
+
+
+
+
+
{{ model.display_name ?? model.id }}
+ {{ model.id }}
+
+
+ {{ binding.name }}
+
+ context: {{ formatTokenLimit(model.limits.max_context_window_tokens) }}
+
+
+ prompt: {{ formatTokenLimit(model.limits.max_prompt_tokens) }}
+
+
+ output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
+
+
+
diff --git a/apps/web/src/components/models/ModelInfoBar_test.ts b/apps/web/src/components/models/ModelInfoBar_test.ts
new file mode 100644
index 000000000..ac8d62f5a
--- /dev/null
+++ b/apps/web/src/components/models/ModelInfoBar_test.ts
@@ -0,0 +1,110 @@
+// @vitest-environment happy-dom
+
+import { mount } from '@vue/test-utils';
+import { describe, expect, test } from 'vitest';
+
+import ModelInfoBar from './ModelInfoBar.vue';
+import type { ControlPlaneModel } from '../../api/types.ts';
+
+const realModel: ControlPlaneModel = {
+ id: 'gpt-5.4',
+ display_name: 'GPT-5.4',
+ kind: 'chat',
+ limits: { max_context_window_tokens: 200_000, max_output_tokens: 16_384 },
+ upstreams: [{ id: 'up_oai', kind: 'custom', name: 'OpenAI' }],
+};
+
+const aliasModel: ControlPlaneModel = {
+ id: 'codex-auto-review',
+ display_name: 'Codex Auto Review',
+ kind: 'chat',
+ limits: { max_context_window_tokens: 200_000, max_output_tokens: 16_384 },
+ upstreams: [{ id: 'up_oai', kind: 'custom', name: 'OpenAI' }],
+ aliasedFrom: {
+ targetModelId: 'gpt-5.4',
+ upstreamIds: [],
+ rules: { reasoning: { effort: 'low' } },
+ onConflict: 'real-only',
+ displayName: 'Codex Auto Review',
+ },
+};
+
+describe('ModelInfoBar', () => {
+ describe('real-model row (no aliasedFrom)', () => {
+ test('renders the display-name heading + upstream + limit badges', () => {
+ const wrapper = mount(ModelInfoBar, { props: { model: realModel } });
+ const text = wrapper.text();
+ expect(text).toContain('GPT-5.4');
+ expect(text).toContain('OpenAI');
+ expect(text).toContain('context:');
+ expect(text).toContain('output:');
+ });
+
+ test('does not render the alias-prose summary line', () => {
+ const wrapper = mount(ModelInfoBar, { props: { model: realModel } });
+ // The phrase "low effort" is uniquely produced by the alias path; its
+ // absence on a real-model row guards against the alias branch leaking.
+ expect(wrapper.text()).not.toContain('low effort');
+ expect(wrapper.text()).not.toContain('→');
+ });
+ });
+
+ describe('alias row', () => {
+ test('renders the operator-set displayName as a heading when present', () => {
+ const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
+ const headings = wrapper.findAll('h3');
+ expect(headings).toHaveLength(1);
+ expect(headings[0].text()).toBe('Codex Auto Review');
+ });
+
+ test('omits the heading when displayName is missing', () => {
+ const without: ControlPlaneModel = {
+ ...aliasModel,
+ aliasedFrom: { ...aliasModel.aliasedFrom!, displayName: undefined },
+ };
+ const wrapper = mount(ModelInfoBar, { props: { model: without } });
+ expect(wrapper.findAll('h3')).toHaveLength(0);
+ });
+
+ test('renders the id mapping with the alias id emphasised and target muted', () => {
+ const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
+ const aliasSpan = wrapper.get('.text-white.break-all');
+ const targetSpan = wrapper.get('.text-gray-500.break-all');
+ expect(aliasSpan.text()).toBe('codex-auto-review');
+ expect(targetSpan.text()).toBe('gpt-5.4');
+ // The arrow lives between them.
+ expect(wrapper.text()).toContain('→');
+ });
+
+ test('renders the rules summary on a third line when rules apply', () => {
+ const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
+ const paragraphs = wrapper.findAll('p');
+ expect(paragraphs).toHaveLength(2);
+ expect(paragraphs[1].text()).toBe('low effort');
+ expect(paragraphs[1].classes()).toContain('text-xs');
+ expect(paragraphs[1].classes()).toContain('text-gray-500');
+ });
+
+ test('omits the rules summary line when no rule applies', () => {
+ const empty: ControlPlaneModel = {
+ ...aliasModel,
+ aliasedFrom: { ...aliasModel.aliasedFrom!, rules: {} },
+ };
+ const wrapper = mount(ModelInfoBar, { props: { model: empty } });
+ expect(wrapper.findAll('p')).toHaveLength(1);
+ });
+
+ test('drops the upstream and limit badges that the real-model path renders', () => {
+ const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
+ const text = wrapper.text();
+ expect(text).not.toContain('OpenAI');
+ expect(text).not.toContain('context:');
+ expect(text).not.toContain('output:');
+ });
+
+ test('keeps the Clear button', () => {
+ const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
+ expect(wrapper.text()).toContain('Clear');
+ });
+ });
+});
From 1989a38520e3e7dd8e5c86111061c6caf540721d Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 14:09:02 +0800
Subject: [PATCH 043/170] refactor(web): AliasRow renders as 3-line text block,
matching the /models row
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Mirror the dashboard Models row layout the operator landed on: drop the
right-side effort/upstream/visibility badges; render the row as three
left-justified lines — operator-set display name (skipped when the
alias has no displayName), ` → ` mapping
with the alias id emphasised, and `formatAliasRulesInline` summary.
The hidden-from-/v1/models hint stays as a small amber line below the
mapping when the alias is hidden.
---
apps/web/src/components/settings/AliasRow.vue | 52 ++++++-------------
.../src/components/settings/AliasRow_test.ts | 30 +++++++----
2 files changed, 36 insertions(+), 46 deletions(-)
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index b00f36348..e4899dc5c 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -2,7 +2,7 @@
import { computed } from 'vue';
import type { ModelAlias } from '../../api/types.ts';
-import { formatAliasRuleBadges } from '@floway-dev/protocols/common';
+import { formatAliasRulesInline } from '@floway-dev/protocols/common';
const props = defineProps<{
alias: ModelAlias;
@@ -13,47 +13,25 @@ defineEmits<{
delete: [];
}>();
-// Effective label: operator-set display name when present, otherwise fall
-// back to the alias id itself. The "→ target" annotation is rendered
-// alongside the label rather than substituted in so an operator who picks a
-// long display name still sees what the alias points at.
-const labelText = computed(() => props.alias.display_name ?? props.alias.alias);
-
-const badges = computed(() => formatAliasRuleBadges(props.alias.rules));
+const rulesInline = computed(() => formatAliasRulesInline(props.alias.rules));
-
-
- {{ alias.alias }}
- {{ labelText }}
- → {{ alias.target_model_id }}
-
-
-
-
{{ id }}
+
+
+
{{ alias.display_name }}
+
+ {{ alias.alias }}
+ →
+ {{ alias.target_model_id }}
+
+
{{ rulesInline }}
+
hidden from /v1/models
-
-
- {{ badge.label }}: {{ badge.value }}
-
-
-
-
hidden
-
{
expect(wrapper.text()).not.toContain('alias-only');
});
- test('renders upstream-id pills when the alias whitelists upstreams', () => {
+ test('does not render per-upstream pills (upstream restriction is shown in the edit dialog instead)', () => {
+ // The row used to surface alias.upstream_ids as small font-mono pills.
+ // Operator feedback pushed all alias detail beyond the 3-line text block
+ // (display name, id mapping, rules summary) into the edit dialog so the
+ // listing reads at a glance. Assert the upstream id strings stay out of
+ // the row text — they live in the dialog now.
const wrapper = mount(AliasRow, {
props: { alias: { ...baseAlias, upstream_ids: ['up_anth', 'up_oai'] } },
});
const text = wrapper.text();
- expect(text).toContain('up_anth');
- expect(text).toContain('up_oai');
+ expect(text).not.toContain('up_anth');
+ expect(text).not.toContain('up_oai');
});
test('falls back to alias name when display_name is null', () => {
@@ -70,7 +75,7 @@ describe('AliasRow', () => {
expect(wrapper.text()).toContain('hidden');
});
- test('renders one rule badge per active rule field', () => {
+ test('renders the rules summary inline as one comma-joined line', () => {
const wrapper = mount(AliasRow, {
props: {
alias: {
@@ -79,11 +84,18 @@ describe('AliasRow', () => {
},
},
});
- // formatAliasRuleBadges drives the order: effort, verbosity, service tier.
- const text = wrapper.text();
- expect(text).toContain('effort: high');
- expect(text).toContain('verbosity: low');
- expect(text).toContain('service tier: priority');
+ // formatAliasRulesInline produces "value label, ..." in the same order
+ // composeAliasDisplayName uses for its parenthesized suffix.
+ expect(wrapper.text()).toContain('high effort, low verbosity, priority tier');
+ });
+
+ test('omits the rules summary line entirely when no rules are set', () => {
+ const wrapper = mount(AliasRow, {
+ props: { alias: { ...baseAlias, rules: {} } },
+ });
+ expect(wrapper.text()).not.toContain('effort');
+ expect(wrapper.text()).not.toContain('verbosity');
+ expect(wrapper.text()).not.toContain('tier');
});
});
From 6f616054a095563dd013b010338f4ab173346330 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 14:12:38 +0800
Subject: [PATCH 044/170] style(web): match alias rules-summary font size to
the id-mapping line above
---
apps/web/src/components/settings/AliasRow.vue | 2 +-
1 file changed, 1 insertion(+), 1 deletion(-)
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index e4899dc5c..913455ad1 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -25,7 +25,7 @@ const rulesInline = computed(() => formatAliasRulesInline(props.alias.rules));
→
{{ alias.target_model_id }}
- {{ rulesInline }}
+ {{ rulesInline }}
Date: Fri, 26 Jun 2026 14:13:53 +0800
Subject: [PATCH 045/170] Revert "refactor(web): alias model rows render as
3-line text block in /dashboard/models"
This reverts commit 6045ab8b36d2bb679d31f94c6013044823a29bf3.
---
.../src/components/models/ModelInfoBar.vue | 85 +++++++-------
.../components/models/ModelInfoBar_test.ts | 110 ------------------
2 files changed, 41 insertions(+), 154 deletions(-)
delete mode 100644 apps/web/src/components/models/ModelInfoBar_test.ts
diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index 5e4d52262..a70fa7f93 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -1,11 +1,9 @@
-
-
-
{{ model.aliasedFrom.displayName }}
-
- {{ model.id }}
- →
- {{ model.aliasedFrom.targetModelId }}
-
-
{{ rulesInline }}
-
-
-
-
-
{{ model.display_name ?? model.id }}
- {{ model.id }}
-
-
- {{ binding.name }}
-
- context: {{ formatTokenLimit(model.limits.max_context_window_tokens) }}
-
-
- prompt: {{ formatTokenLimit(model.limits.max_prompt_tokens) }}
-
-
- output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
-
-
-
+
+
{{ model.display_name ?? model.id }}
+ {{ model.id }}
+
+
+ {{ binding.name }}
+
+ context: {{ formatTokenLimit(model.limits.max_context_window_tokens) }}
+
+
+ prompt: {{ formatTokenLimit(model.limits.max_prompt_tokens) }}
+
+
+ output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
+
+ {{ badge.label }}{{ badge.value !== undefined ? `: ${badge.value}` : '' }}
+
diff --git a/apps/web/src/components/models/ModelInfoBar_test.ts b/apps/web/src/components/models/ModelInfoBar_test.ts
deleted file mode 100644
index ac8d62f5a..000000000
--- a/apps/web/src/components/models/ModelInfoBar_test.ts
+++ /dev/null
@@ -1,110 +0,0 @@
-// @vitest-environment happy-dom
-
-import { mount } from '@vue/test-utils';
-import { describe, expect, test } from 'vitest';
-
-import ModelInfoBar from './ModelInfoBar.vue';
-import type { ControlPlaneModel } from '../../api/types.ts';
-
-const realModel: ControlPlaneModel = {
- id: 'gpt-5.4',
- display_name: 'GPT-5.4',
- kind: 'chat',
- limits: { max_context_window_tokens: 200_000, max_output_tokens: 16_384 },
- upstreams: [{ id: 'up_oai', kind: 'custom', name: 'OpenAI' }],
-};
-
-const aliasModel: ControlPlaneModel = {
- id: 'codex-auto-review',
- display_name: 'Codex Auto Review',
- kind: 'chat',
- limits: { max_context_window_tokens: 200_000, max_output_tokens: 16_384 },
- upstreams: [{ id: 'up_oai', kind: 'custom', name: 'OpenAI' }],
- aliasedFrom: {
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- onConflict: 'real-only',
- displayName: 'Codex Auto Review',
- },
-};
-
-describe('ModelInfoBar', () => {
- describe('real-model row (no aliasedFrom)', () => {
- test('renders the display-name heading + upstream + limit badges', () => {
- const wrapper = mount(ModelInfoBar, { props: { model: realModel } });
- const text = wrapper.text();
- expect(text).toContain('GPT-5.4');
- expect(text).toContain('OpenAI');
- expect(text).toContain('context:');
- expect(text).toContain('output:');
- });
-
- test('does not render the alias-prose summary line', () => {
- const wrapper = mount(ModelInfoBar, { props: { model: realModel } });
- // The phrase "low effort" is uniquely produced by the alias path; its
- // absence on a real-model row guards against the alias branch leaking.
- expect(wrapper.text()).not.toContain('low effort');
- expect(wrapper.text()).not.toContain('→');
- });
- });
-
- describe('alias row', () => {
- test('renders the operator-set displayName as a heading when present', () => {
- const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
- const headings = wrapper.findAll('h3');
- expect(headings).toHaveLength(1);
- expect(headings[0].text()).toBe('Codex Auto Review');
- });
-
- test('omits the heading when displayName is missing', () => {
- const without: ControlPlaneModel = {
- ...aliasModel,
- aliasedFrom: { ...aliasModel.aliasedFrom!, displayName: undefined },
- };
- const wrapper = mount(ModelInfoBar, { props: { model: without } });
- expect(wrapper.findAll('h3')).toHaveLength(0);
- });
-
- test('renders the id mapping with the alias id emphasised and target muted', () => {
- const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
- const aliasSpan = wrapper.get('.text-white.break-all');
- const targetSpan = wrapper.get('.text-gray-500.break-all');
- expect(aliasSpan.text()).toBe('codex-auto-review');
- expect(targetSpan.text()).toBe('gpt-5.4');
- // The arrow lives between them.
- expect(wrapper.text()).toContain('→');
- });
-
- test('renders the rules summary on a third line when rules apply', () => {
- const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
- const paragraphs = wrapper.findAll('p');
- expect(paragraphs).toHaveLength(2);
- expect(paragraphs[1].text()).toBe('low effort');
- expect(paragraphs[1].classes()).toContain('text-xs');
- expect(paragraphs[1].classes()).toContain('text-gray-500');
- });
-
- test('omits the rules summary line when no rule applies', () => {
- const empty: ControlPlaneModel = {
- ...aliasModel,
- aliasedFrom: { ...aliasModel.aliasedFrom!, rules: {} },
- };
- const wrapper = mount(ModelInfoBar, { props: { model: empty } });
- expect(wrapper.findAll('p')).toHaveLength(1);
- });
-
- test('drops the upstream and limit badges that the real-model path renders', () => {
- const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
- const text = wrapper.text();
- expect(text).not.toContain('OpenAI');
- expect(text).not.toContain('context:');
- expect(text).not.toContain('output:');
- });
-
- test('keeps the Clear button', () => {
- const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
- expect(wrapper.text()).toContain('Clear');
- });
- });
-});
From 33373d2eba242ef89b26ab91459ccb0df1d2623d Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 14:14:39 +0800
Subject: [PATCH 046/170] style(web): shrink alias id-mapping and rules-summary
to text-xs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
The first line is the title (display_name) and should be the largest.
The id mapping and the rules summary below it are subordinate detail —
both render at text-xs so the title stands out and the two helper lines
read as one block beneath it.
---
apps/web/src/components/settings/AliasRow.vue | 4 ++--
1 file changed, 2 insertions(+), 2 deletions(-)
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index 913455ad1..cea5d37de 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -20,12 +20,12 @@ const rulesInline = computed(() => formatAliasRulesInline(props.alias.rules));
{{ alias.display_name }}
-
+
{{ alias.alias }}
→
{{ alias.target_model_id }}
-
{{ rulesInline }}
+
{{ rulesInline }}
Date: Fri, 26 Jun 2026 17:17:14 +0800
Subject: [PATCH 047/170] chore(aliases): scrap v1 implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Remove the v1 model-aliases feature in preparation for the v2 redesign
documented in docs/superpowers/specs/2026-06-26-model-aliases-design.md.
The v1 implementation grew an alias rule-application surface (effort,
budget, adaptive, summary, verbosity, serviceTier, anthropicBeta) that
the v2 spec rebuilds from a different angle; clearing v1 first keeps
the v2 changeset small and review-friendly.
Deleted:
- migrations 0046_model_aliases.sql and
0047_model_aliases_display_name.sql (v2 will reuse the 0046 slot for
its own schema; local dev databases are rebuilt fresh, no on-disk
state is preserved across the v1 → v2 boundary).
- control-plane CRUD: routes, repo, types, serialize, tests.
- data-plane: model-aliases/apply, model-aliases/match,
models/alias-listing.
- dashboard: useModelAliases composable, AliasEditDialog, AliasRow,
AliasesSettingsCard, and their tests.
Surgical edits:
- protocols/common/models.ts loses PublicModelAliasedFrom and the
alias-formatting helpers; ChatModelInfo stays (introduced for #115's
per-model reasoning metadata, unrelated to aliases).
- repo: ModelAliasesRepo and its memory/SQL bindings dropped from the
Repo contract; the InMemoryRepo test seam goes with them.
- registry: enumerateModelInterpretations and resolveModelForRequest
return to their pre-alias signatures; getModelsForListing collapses
into getModels — the catalog no longer fans out alias entries.
- chat/responses/messages/gemini/chat-completions serve + attempt paths
stop loading aliases, stop applying rules, and stop staging the
x-floway-alias response header; the staging helper and its companion
responseHeaders bag on GatewayCtx are removed.
- shared/sanitize.ts drops the alias-aware trace context and the
traceAllRulesDropped helper used by the passthrough surface.
- control-plane Hono entry and Zod schemas lose the /api/aliases route
group and its bodies.
- dashboard PublicModel + ControlPlaneModel lose aliasedFrom;
ModelInfoBar stops rendering alias badges; settings.vue drops the
AliasesSettingsCard slot.
The translate-pair extension fields (thinking_budget, adaptive_thinking,
reasoning_summary, anthropic_beta) stay in place — they are protocol
extensions that flow regardless of alias source. The
applyAnthropicBetaToHeaders helper becomes unused by the gateway but
stays exported from packages/translate alongside the other Messages-via
helpers it ships with.
Verified: pnpm run lint and pnpm run typecheck both exit 0.
---
apps/web/src/api/types.ts | 6 +-
.../components/alias-edit/AliasEditDialog.vue | 446 -----------------
.../alias-edit/AliasEditDialog_test.ts | 205 --------
.../src/components/models/ModelInfoBar.vue | 17 +-
apps/web/src/components/settings/AliasRow.vue | 62 ---
.../src/components/settings/AliasRow_test.ts | 159 ------
.../settings/AliasesSettingsCard.vue | 65 ---
apps/web/src/composables/useModelAliases.ts | 29 --
apps/web/src/pages/dashboard/settings.vue | 33 +-
apps/web/vitest.config.ts | 2 +-
.../gateway/migrations/0046_model_aliases.sql | 14 -
.../0047_model_aliases_display_name.sql | 4 -
packages/gateway/package.json | 2 -
.../src/control-plane/model-aliases/repo.ts | 124 -----
.../control-plane/model-aliases/repo_test.ts | 172 -------
.../src/control-plane/model-aliases/routes.ts | 87 ----
.../model-aliases/routes_test.ts | 354 -------------
.../control-plane/model-aliases/serialize.ts | 31 --
.../src/control-plane/model-aliases/types.ts | 34 --
.../src/control-plane/models/routes.ts | 18 +-
.../src/control-plane/models/routes_test.ts | 34 +-
packages/gateway/src/control-plane/routes.ts | 8 +-
packages/gateway/src/control-plane/schemas.ts | 73 ---
.../chat/chat-completions/attempt.ts | 2 +-
.../chat/chat-completions/attempt_test.ts | 1 -
.../demote-developer-to-system_test.ts | 1 -
.../demote-interleaved-system-to-user_test.ts | 1 -
...le-reasoning-on-forced-tool-choice_test.ts | 1 -
.../include-usage-stream-options_test.ts | 1 -
.../interceptors/normalize-usage_test.ts | 1 -
.../vendor-deepseek-normalize_test.ts | 1 -
.../vendor-kimi-normalize_test.ts | 1 -
.../vendor-qwen-normalize_test.ts | 1 -
.../data-plane/chat/chat-completions/serve.ts | 11 -
.../chat/chat-completions/serve_test.ts | 1 -
.../data-plane/chat/gemini/attempt_test.ts | 1 -
.../strip-safety-settings_test.ts | 1 -
.../strip-unsupported-part-fields_test.ts | 1 -
.../strip-unsupported-tools_test.ts | 1 -
.../suppress-thought-parts_test.ts | 1 -
.../data-plane/chat/gemini/respond_test.ts | 1 -
.../src/data-plane/chat/gemini/serve.ts | 14 -
.../src/data-plane/chat/gemini/serve_test.ts | 1 -
.../src/data-plane/chat/messages/attempt.ts | 21 +-
.../data-plane/chat/messages/attempt_test.ts | 1 -
.../src/data-plane/chat/messages/http_test.ts | 57 ---
.../demote-interleaved-system-to-user_test.ts | 1 -
...le-reasoning-on-forced-tool-choice_test.ts | 1 -
.../strip-billing-attribution_test.ts | 1 -
.../interceptors/web-search-shim_test.ts | 1 -
.../data-plane/chat/messages/respond_test.ts | 1 -
.../src/data-plane/chat/messages/serve.ts | 19 -
.../data-plane/chat/messages/serve_test.ts | 1 -
.../src/data-plane/chat/responses/attempt.ts | 4 +-
.../data-plane/chat/responses/attempt_test.ts | 1 -
.../canonicalize-encrypted-content_test.ts | 1 -
.../demote-developer-to-system_test.ts | 1 -
.../demote-interleaved-system-to-user_test.ts | 1 -
...le-reasoning-on-forced-tool-choice_test.ts | 1 -
.../interceptors/retry-cyber-policy_test.ts | 1 -
.../interceptors/server-tool-shim_test.ts | 2 -
.../image-generation-integration_test.ts | 1 -
.../server-tools/image-generation.ts | 9 +-
.../server-tools/image-generation_test.ts | 1 -
.../vendor-deepseek-normalize_test.ts | 1 -
.../vendor-qwen-normalize_test.ts | 1 -
.../data-plane/chat/responses/serve-prep.ts | 3 -
.../src/data-plane/chat/responses/serve.ts | 13 -
.../data-plane/chat/responses/serve_test.ts | 1 -
.../src/data-plane/chat/shared/candidates.ts | 37 +-
.../data-plane/chat/shared/candidates_test.ts | 12 -
.../src/data-plane/chat/shared/gateway-ctx.ts | 38 +-
.../data-plane/chat/shared/respond_test.ts | 1 -
.../src/data-plane/chat/shared/routing.ts | 10 +-
.../src/data-plane/chat/shared/sanitize.ts | 36 +-
.../data-plane/chat/shared/sanitize_test.ts | 18 +-
.../chat/shared/upstream-telemetry_test.ts | 1 -
.../src/data-plane/completions/serve_test.ts | 47 --
.../src/data-plane/embeddings/serve_test.ts | 56 ---
.../src/data-plane/images/serve_test.ts | 60 ---
.../src/data-plane/model-aliases/apply.ts | 104 ----
.../data-plane/model-aliases/apply_test.ts | 203 --------
.../src/data-plane/model-aliases/match.ts | 15 -
.../data-plane/model-aliases/match_test.ts | 55 --
.../src/data-plane/models/alias-listing.ts | 139 ------
.../gateway/src/data-plane/models/gemini.ts | 21 +-
.../src/data-plane/models/gemini_test.ts | 46 --
.../gateway/src/data-plane/models/load.ts | 14 +-
.../gateway/src/data-plane/models/serve.ts | 4 +-
.../src/data-plane/models/serve_test.ts | 468 ------------------
.../src/data-plane/providers/registry.ts | 173 +------
.../src/data-plane/providers/registry_test.ts | 210 +-------
.../data-plane/shared/passthrough-serve.ts | 21 +-
packages/gateway/src/repo/memory.ts | 53 --
packages/gateway/src/repo/sql.ts | 33 --
packages/gateway/src/repo/types.ts | 24 -
.../gateway/src/test-helpers/gateway-ctx.ts | 10 +-
packages/protocols/src/common/models.ts | 98 ----
.../src/common/models_alias-display_test.ts | 100 ----
99 files changed, 77 insertions(+), 4199 deletions(-)
delete mode 100644 apps/web/src/components/alias-edit/AliasEditDialog.vue
delete mode 100644 apps/web/src/components/alias-edit/AliasEditDialog_test.ts
delete mode 100644 apps/web/src/components/settings/AliasRow.vue
delete mode 100644 apps/web/src/components/settings/AliasRow_test.ts
delete mode 100644 apps/web/src/components/settings/AliasesSettingsCard.vue
delete mode 100644 apps/web/src/composables/useModelAliases.ts
delete mode 100644 packages/gateway/migrations/0046_model_aliases.sql
delete mode 100644 packages/gateway/migrations/0047_model_aliases_display_name.sql
delete mode 100644 packages/gateway/src/control-plane/model-aliases/repo.ts
delete mode 100644 packages/gateway/src/control-plane/model-aliases/repo_test.ts
delete mode 100644 packages/gateway/src/control-plane/model-aliases/routes.ts
delete mode 100644 packages/gateway/src/control-plane/model-aliases/routes_test.ts
delete mode 100644 packages/gateway/src/control-plane/model-aliases/serialize.ts
delete mode 100644 packages/gateway/src/control-plane/model-aliases/types.ts
delete mode 100644 packages/gateway/src/data-plane/model-aliases/apply.ts
delete mode 100644 packages/gateway/src/data-plane/model-aliases/apply_test.ts
delete mode 100644 packages/gateway/src/data-plane/model-aliases/match.ts
delete mode 100644 packages/gateway/src/data-plane/model-aliases/match_test.ts
delete mode 100644 packages/gateway/src/data-plane/models/alias-listing.ts
delete mode 100644 packages/protocols/src/common/models_alias-display_test.ts
diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 66216b812..088891e1b 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -6,11 +6,10 @@ import type {
ModelEndpoints,
ModelKind,
ModelPricing,
- PublicModelAliasedFrom,
} from '@floway-dev/protocols/common';
import type { AddressableForm, ModelPrefixConfig } from '@floway-dev/provider/model-prefix';
-export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing, PublicModelAliasedFrom };
+export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing };
export type { AddressableForm, ModelPrefixConfig };
export type UpstreamProviderKind = 'custom' | 'azure' | 'copilot' | 'codex' | 'claude-code' | 'ollama';
@@ -305,8 +304,6 @@ export interface FlagDef {
// than redeclaring the shape) makes any future field rename a compile error
// here instead of a runtime mismatch the next time someone refreshes the page.
export type { SerializedProxyRecord as ProxyRecord, SerializedBackoffRow as BackoffRow } from '@floway-dev/gateway/control-plane/proxies/serialize';
-export type { SerializedModelAlias as ModelAlias } from '@floway-dev/gateway/control-plane/model-aliases/serialize';
-export type { ModelAliasRules, OnConflict as ModelAliasOnConflict } from '@floway-dev/gateway/control-plane/model-aliases/types';
// 409 body returned by DELETE /api/proxies/:id when the row is referenced
// by an upstream's fallback list.
@@ -343,7 +340,6 @@ export interface PublicModel {
endpoints?: Record;
cost?: ModelPricing;
kind?: ModelKind;
- aliasedFrom?: PublicModelAliasedFrom;
}
export interface ControlPlaneModel extends PublicModel {
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
deleted file mode 100644
index db7275abe..000000000
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ /dev/null
@@ -1,446 +0,0 @@
-
-
-
-
-
-
- {{ saveError }}
-
-
-
-
- Alias name
-
-
-
-
- Target model id
-
-
-
-
-
- Display name (optional)
-
-
-
-
- Upstreams (leave empty to allow any upstream that serves the target)
-
-
-
-
-
On conflict
-
-
- {{ option.explanation }}
-
-
-
{{ selectedOnConflict.explanation }}
-
-
-
-
- Effort
-
-
-
-
-
Budget tokens
-
-
- Target range:
- min {{ budgetMin }}
- ,
- max {{ budgetMax }}
-
-
-
-
-
-
-
Adaptive reasoning
-
-
- Enable
-
-
-
-
- Summary
-
-
-
-
-
-
- Verbosity
-
-
-
-
- Service tier
-
-
-
-
-
- Anthropic beta headers (comma- or Enter-separated tokens)
-
-
-
-
-
- {{ warning.field }}: {{ warning.message }}
-
-
-
-
-
-
- Visible in /v1/models
-
-
- Cancel
- Save
-
-
-
-
-
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
deleted file mode 100644
index 99574c248..000000000
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ /dev/null
@@ -1,205 +0,0 @@
-// @vitest-environment happy-dom
-
-import { mount } from '@vue/test-utils';
-import { afterEach, beforeEach, expect, test, vi } from 'vitest';
-import { defineComponent, h, ref } from 'vue';
-
-import type { ModelAlias } from '../../api/types.ts';
-
-// Module-level mocks for the api client + every store the dialog imports.
-// The dialog stays as-is; we substitute the dependencies so the component
-// renders and submits without any real HTTP. callApi is exposed as a spy so
-// tests can read what was posted.
-const createAliasMock = vi.fn(async (_args: { json: unknown }) => new Response(JSON.stringify({}), { status: 201, headers: { 'content-type': 'application/json' } }));
-const patchAliasMock = vi.fn(async (_args: { param: { alias: string }; json: unknown }) => new Response(JSON.stringify({}), { status: 200, headers: { 'content-type': 'application/json' } }));
-
-vi.mock('../../api/client.ts', async () => {
- const { callApi: realCallApi } = await vi.importActual('../../api/client.ts');
- return {
- useApi: () => ({
- api: {
- aliases: Object.assign(
- { $post: (args: { json: unknown }) => createAliasMock(args) },
- { ':alias': { $patch: (args: { param: { alias: string }; json: unknown }) => patchAliasMock(args) } },
- ),
- },
- }),
- callApi: realCallApi,
- };
-});
-
-vi.mock('../../composables/useModels.ts', () => ({
- useModelsStore: () => ({
- models: {
- value: [
- { id: 'gpt-5.4', display_name: 'GPT-5.4', object: 'model', type: 'model', limits: {}, kind: 'chat', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' }, budget_tokens: { min: 1024, max: 8192 }, adaptive: true } } },
- { id: 'claude-opus-4-6', display_name: 'Claude Opus 4.6', object: 'model', type: 'model', limits: {}, kind: 'chat' },
- ],
- },
- loading: { value: false },
- error: { value: null },
- load: vi.fn(async () => undefined),
- }),
-}));
-
-vi.mock('../../composables/useUpstreams.ts', () => ({
- useUpstreamsStore: () => ({
- upstreams: {
- value: [
- { id: 'up_oai', name: 'OpenAI' },
- { id: 'up_anth', name: 'Anthropic' },
- ],
- },
- loading: { value: false },
- load: vi.fn(async () => undefined),
- }),
-}));
-
-// reka-ui's Dialog mounts via Teleport into document.body and renders a
-// portal — we stub it down to a passthrough so happy-dom mounts the slot
-// content inline where assertions can reach it.
-vi.mock('@floway-dev/ui', async () => {
- const real = await vi.importActual('@floway-dev/ui');
- const Passthrough = defineComponent({ name: 'Passthrough', setup(_props, { slots }) { return () => h('div', slots.default?.()); } });
- return { ...real, Dialog: Passthrough };
-});
-
-beforeEach(() => {
- createAliasMock.mockClear();
- patchAliasMock.mockClear();
-});
-
-afterEach(() => {
- vi.clearAllMocks();
-});
-
-test('AliasEditDialog (create mode) posts a payload matching the form state', async () => {
- const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
- const open = ref(true);
-
- const wrapper = mount(defineComponent({
- components: { AliasEditDialog },
- setup() { return { open }; },
- template: ' ',
- }));
-
- // Fill the form: alias name + target id are the only required fields for
- // the create-mode happy path. Everything else uses its default.
- const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
- expect(aliasInput.exists()).toBe(true);
- await aliasInput.setValue('opus-fast');
-
- const targetInput = wrapper.find('input[placeholder="gpt-5.5"]');
- expect(targetInput.exists()).toBe(true);
- await targetInput.setValue('claude-opus-4-6');
-
- // Click Save.
- const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
- expect(saveBtn).toBeDefined();
- await saveBtn!.trigger('click');
- // Flush microtasks so the async save completes.
- await new Promise(r => setTimeout(r, 0));
-
- expect(createAliasMock).toHaveBeenCalledTimes(1);
- const args = createAliasMock.mock.calls[0]![0];
- expect(args.json).toMatchObject({
- alias: 'opus-fast',
- targetModelId: 'claude-opus-4-6',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- });
-});
-
-test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the merged shape', async () => {
- const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
- const open = ref(true);
- const record: ModelAlias = {
- alias: 'opus-xhigh',
- target_model_id: 'claude-opus-4-6',
- upstream_ids: ['up_anth'],
- rules: { reasoning: { effort: 'xhigh' } },
- visible_in_models_list: true,
- on_conflict: 'real-only',
- display_name: 'Opus XHigh',
- created_at: 1_700_000_000,
- };
-
- const wrapper = mount(defineComponent({
- components: { AliasEditDialog },
- setup() { return { open, record }; },
- template: ' ',
- }));
-
- // Alias name input is editable in edit mode — the PK can now be renamed.
- const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
- expect(aliasInput.exists()).toBe(true);
- expect((aliasInput.element as HTMLInputElement).disabled).toBe(false);
- expect((aliasInput.element as HTMLInputElement).value).toBe('opus-xhigh');
-
- // Display name pre-filled — its placeholder is dynamic now (mirrors the
- // synthesized fallback) so we locate it by its current value instead.
- const allInputs = wrapper.findAll('input');
- const displayInput = allInputs.find(i => (i.element as HTMLInputElement).value === 'Opus XHigh');
- expect(displayInput).toBeDefined();
-
- // Target id pre-filled.
- const targetInput = wrapper.find('input[placeholder="gpt-5.5"]');
- expect((targetInput.element as HTMLInputElement).value).toBe('claude-opus-4-6');
-
- // Change one field and submit; PATCH carries the merged shape (every editable
- // field, not just the diff — the route layer merges against the stored row).
- await targetInput.setValue('gpt-5.4');
- const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
- await saveBtn!.trigger('click');
- await new Promise(r => setTimeout(r, 0));
-
- expect(patchAliasMock).toHaveBeenCalledTimes(1);
- const args = patchAliasMock.mock.calls[0]![0];
- expect(args.param.alias).toBe('opus-xhigh');
- expect(args.json).toMatchObject({
- alias: 'opus-xhigh',
- targetModelId: 'gpt-5.4',
- upstreamIds: ['up_anth'],
- rules: { reasoning: { effort: 'xhigh' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- displayName: 'Opus XHigh',
- });
-});
-
-test('AliasEditDialog (edit mode) PATCHes the original alias when the operator renames it', async () => {
- const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
- const open = ref(true);
- const record: ModelAlias = {
- alias: 'opus-xhigh',
- target_model_id: 'claude-opus-4-6',
- upstream_ids: [],
- rules: {},
- visible_in_models_list: true,
- on_conflict: 'real-only',
- display_name: null,
- created_at: 1_700_000_000,
- };
-
- const wrapper = mount(defineComponent({
- components: { AliasEditDialog },
- setup() { return { open, record }; },
- template: ' ',
- }));
-
- const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
- await aliasInput.setValue('opus-renamed');
-
- const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
- await saveBtn!.trigger('click');
- await new Promise(r => setTimeout(r, 0));
-
- expect(patchAliasMock).toHaveBeenCalledTimes(1);
- const args = patchAliasMock.mock.calls[0]![0];
- // The PATCH path stays at the row's *original* PK; the rename is requested
- // via `body.alias`, which the route handler maps to the rename codepath.
- expect(args.param.alias).toBe('opus-xhigh');
- expect(args.json).toMatchObject({ alias: 'opus-renamed' });
-});
diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index a70fa7f93..f8bf98b6e 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -1,7 +1,6 @@
@@ -53,11 +43,6 @@ const aliasBadges = (aliasedFrom: PublicModelAliasedFrom): AliasRuleBadge[] => [
output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
- {{ badge.label }}{{ badge.value !== undefined ? `: ${badge.value}` : '' }}
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
deleted file mode 100644
index cea5d37de..000000000
--- a/apps/web/src/components/settings/AliasRow.vue
+++ /dev/null
@@ -1,62 +0,0 @@
-
-
-
-
-
-
{{ alias.display_name }}
-
- {{ alias.alias }}
- →
- {{ alias.target_model_id }}
-
-
{{ rulesInline }}
-
hidden from /v1/models
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
-
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
deleted file mode 100644
index 988065fc6..000000000
--- a/apps/web/src/components/settings/AliasRow_test.ts
+++ /dev/null
@@ -1,159 +0,0 @@
-// @vitest-environment happy-dom
-
-import { mount } from '@vue/test-utils';
-import { beforeEach, describe, expect, test, vi } from 'vitest';
-import { defineComponent } from 'vue';
-
-import AliasRow from './AliasRow.vue';
-import type { ModelAlias } from '../../api/types.ts';
-
-const baseAlias: ModelAlias = {
- alias: 'opus-xhigh',
- target_model_id: 'claude-opus-4-6',
- upstream_ids: [],
- rules: { reasoning: { effort: 'xhigh' } },
- visible_in_models_list: true,
- on_conflict: 'real-only',
- display_name: 'Opus XHigh',
- created_at: 1_700_000_000,
-};
-
-describe('AliasRow', () => {
- beforeEach(() => {
- vi.restoreAllMocks();
- });
-
- test('renders the display name, alias id, and target', () => {
- const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
- expect(wrapper.text()).toContain('Opus XHigh');
- expect(wrapper.text()).toContain('opus-xhigh');
- expect(wrapper.text()).toContain('claude-opus-4-6');
- });
-
- test('does not render the on_conflict label as a badge', () => {
- // The row used to surface `real-only` / `alias-only` as a coloured badge.
- // Operator feedback was that the inline label was noisy and the same
- // information lives inside the edit dialog. Asserting absence here pins
- // the regression — the words must not slip back into the row template.
- const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
- expect(wrapper.text()).not.toContain('real-only');
- expect(wrapper.text()).not.toContain('alias-only');
- });
-
- test('does not render per-upstream pills (upstream restriction is shown in the edit dialog instead)', () => {
- // The row used to surface alias.upstream_ids as small font-mono pills.
- // Operator feedback pushed all alias detail beyond the 3-line text block
- // (display name, id mapping, rules summary) into the edit dialog so the
- // listing reads at a glance. Assert the upstream id strings stay out of
- // the row text — they live in the dialog now.
- const wrapper = mount(AliasRow, {
- props: { alias: { ...baseAlias, upstream_ids: ['up_anth', 'up_oai'] } },
- });
- const text = wrapper.text();
- expect(text).not.toContain('up_anth');
- expect(text).not.toContain('up_oai');
- });
-
- test('falls back to alias name when display_name is null', () => {
- const wrapper = mount(AliasRow, { props: { alias: { ...baseAlias, display_name: null } } });
- // alias id appears twice (label fallback + the small font-mono id), but the
- // important assertion is that the label slot is non-empty.
- expect(wrapper.text()).toContain('opus-xhigh');
- expect(wrapper.text()).not.toContain('Opus XHigh');
- });
-
- test('emits edit and delete on the matching button clicks', async () => {
- const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
- await wrapper.find('[aria-label="Edit alias"]').trigger('click');
- await wrapper.find('[aria-label="Delete alias"]').trigger('click');
- expect(wrapper.emitted('edit')).toHaveLength(1);
- expect(wrapper.emitted('delete')).toHaveLength(1);
- });
-
- test('shows a "hidden" badge when visible_in_models_list is false', () => {
- const wrapper = mount(AliasRow, { props: { alias: { ...baseAlias, visible_in_models_list: false } } });
- expect(wrapper.text()).toContain('hidden');
- });
-
- test('renders the rules summary inline as one comma-joined line', () => {
- const wrapper = mount(AliasRow, {
- props: {
- alias: {
- ...baseAlias,
- rules: { reasoning: { effort: 'high' }, verbosity: 'low', serviceTier: 'priority' },
- },
- },
- });
- // formatAliasRulesInline produces "value label, ..." in the same order
- // composeAliasDisplayName uses for its parenthesized suffix.
- expect(wrapper.text()).toContain('high effort, low verbosity, priority tier');
- });
-
- test('omits the rules summary line entirely when no rules are set', () => {
- const wrapper = mount(AliasRow, {
- props: { alias: { ...baseAlias, rules: {} } },
- });
- expect(wrapper.text()).not.toContain('effort');
- expect(wrapper.text()).not.toContain('verbosity');
- expect(wrapper.text()).not.toContain('tier');
- });
-});
-
-// Bare-component smoke test for the card. We mock the composable so the
-// card renders deterministically without an HTTP round-trip; the stub
-// substitutes the same shape useModelAliases exposes.
-describe('AliasesSettingsCard', () => {
- test('renders empty state when the store has no aliases', async () => {
- vi.resetModules();
- vi.doMock('../../composables/useModelAliases.ts', () => ({
- useModelAliases: () => ({
- aliases: { value: [] },
- loading: { value: false },
- error: { value: null },
- load: vi.fn(),
- }),
- }));
- vi.doMock('../../api/client.ts', () => ({
- useApi: () => ({ api: { aliases: { ':alias': { $delete: vi.fn() } } } }),
- callApi: vi.fn(),
- }));
- const { default: AliasesSettingsCard } = await import('./AliasesSettingsCard.vue');
- const wrapper = mount(AliasesSettingsCard);
- expect(wrapper.text()).toContain('No aliases configured');
- });
-
- test('renders one AliasRow per alias the store holds', async () => {
- vi.resetModules();
- const rows: ModelAlias[] = [
- { ...baseAlias, alias: 'a-one' },
- { ...baseAlias, alias: 'b-two', display_name: null },
- ];
- vi.doMock('../../composables/useModelAliases.ts', () => ({
- useModelAliases: () => ({
- aliases: { value: rows },
- loading: { value: false },
- error: { value: null },
- load: vi.fn(),
- }),
- }));
- vi.doMock('../../api/client.ts', () => ({
- useApi: () => ({ api: { aliases: { ':alias': { $delete: vi.fn() } } } }),
- callApi: vi.fn(),
- }));
- const { default: AliasesSettingsCard } = await import('./AliasesSettingsCard.vue');
- const wrapper = mount(AliasesSettingsCard);
- // Each row exposes its delete button by aria-label, so the count is a
- // reliable proxy for "one AliasRow rendered per alias".
- expect(wrapper.findAll('[aria-label="Delete alias"]').length).toBe(rows.length);
- expect(wrapper.text()).toContain('a-one');
- expect(wrapper.text()).toContain('b-two');
- });
-});
-
-// Sanity: a stub wrapping the component above guards against template parse
-// regressions (an unknown directive or missing import would explode at mount
-// time even when no real backend is reachable).
-test('the test harness can mount a trivial component', () => {
- const wrapper = mount(defineComponent({ template: 'ok ' }));
- expect(wrapper.text()).toBe('ok');
-});
diff --git a/apps/web/src/components/settings/AliasesSettingsCard.vue b/apps/web/src/components/settings/AliasesSettingsCard.vue
deleted file mode 100644
index da71b374f..000000000
--- a/apps/web/src/components/settings/AliasesSettingsCard.vue
+++ /dev/null
@@ -1,65 +0,0 @@
-
-
-
-
-
-
-
Aliases
-
- Synthesized model ids that pin a target model plus a request-time rule overlay.
- Surfaced in /v1/models per the conflict policy.
-
-
-
Add Alias
-
-
-
- Failed to load aliases: {{ aliasesStore.error.value }}
-
-
-
- No aliases configured. Add one to expose a model id with locked reasoning, service tier, or other rule overrides.
-
-
-
-
-
-
-
diff --git a/apps/web/src/composables/useModelAliases.ts b/apps/web/src/composables/useModelAliases.ts
deleted file mode 100644
index 880c771f9..000000000
--- a/apps/web/src/composables/useModelAliases.ts
+++ /dev/null
@@ -1,29 +0,0 @@
-import { ref, shallowRef } from 'vue';
-
-import { callApi, useApi } from '../api/client.ts';
-import type { ModelAlias } from '../api/types.ts';
-
-// Module-scoped cache so concurrent callers share one fetch — mirrors the
-// proxies store pattern: settings tabs that mount in parallel reuse a single
-// in-flight request instead of fan-out per-component.
-const aliases = shallowRef(null);
-const loading = ref(false);
-const error = ref(null);
-
-export const useModelAliases = () => {
- const api = useApi();
-
- const load = async () => {
- loading.value = true;
- error.value = null;
- const { data, error: err } = await callApi(() => api.api.aliases.$get());
- loading.value = false;
- if (err) {
- error.value = err.message;
- return;
- }
- aliases.value = data;
- };
-
- return { aliases, loading, error, load };
-};
diff --git a/apps/web/src/pages/dashboard/settings.vue b/apps/web/src/pages/dashboard/settings.vue
index 636754fee..b8e23dc3d 100644
--- a/apps/web/src/pages/dashboard/settings.vue
+++ b/apps/web/src/pages/dashboard/settings.vue
@@ -4,17 +4,14 @@ import { ref, watch } from 'vue';
import { useRouter } from 'vue-router';
import { callApi, useApi } from '../../api/client.ts';
-import type { ModelAlias, ProxyRecord, SearchConfig, UpstreamProviderKind, UpstreamRecord } from '../../api/types.ts';
-import AliasEditDialog from '../../components/alias-edit/AliasEditDialog.vue';
+import type { ProxyRecord, SearchConfig, UpstreamProviderKind, UpstreamRecord } from '../../api/types.ts';
import ProxyEditDialog from '../../components/proxy-edit/ProxyEditDialog.vue';
-import AliasesSettingsCard from '../../components/settings/AliasesSettingsCard.vue';
import ApiEndpointsSection from '../../components/settings/ApiEndpointsSection.vue';
import ExportSection from '../../components/settings/ExportSection.vue';
import ImportSection from '../../components/settings/ImportSection.vue';
import ProxiesSettingsCard from '../../components/settings/ProxiesSettingsCard.vue';
import SearchConfigSection from '../../components/settings/SearchConfigSection.vue';
import UpstreamsSettingsCard from '../../components/settings/UpstreamsSettingsCard.vue';
-import { useModelAliases } from '../../composables/useModelAliases.ts';
import { useModelsStore } from '../../composables/useModels.ts';
import { useProxiesStore } from '../../composables/useProxies.ts';
import { useRuntimeInfo } from '../../composables/useRuntimeInfo.ts';
@@ -34,7 +31,6 @@ export const useSettingsPageData = defineBasicLoader(async () => {
useUpstreamsStore().load(),
useModelsStore().load(),
useProxiesStore().load(),
- useModelAliases().load(),
useRuntimeInfo().load(),
]);
return {
@@ -53,8 +49,6 @@ const { upstreams, loading: storeLoading, load } = useUpstreamsStore();
const modelsStore = useModelsStore();
const proxiesStore = useProxiesStore();
const { load: loadProxies } = proxiesStore;
-const aliasesStore = useModelAliases();
-const { load: loadAliases } = aliasesStore;
const settingsData = useSettingsPageData();
// Local working copy the child reorders via v-model:ordered; reloadAll
@@ -65,24 +59,17 @@ watch(upstreams, list => {
}, { immediate: true });
const reloadAll = async () => {
- await Promise.all([load(), modelsStore.load(), loadProxies(), loadAliases()]);
+ await Promise.all([load(), modelsStore.load(), loadProxies()]);
};
-// Proxy + alias editors are hosted as modals — v-if drives the unmount on close
-// so the next open boots from a fresh script setup (no manual reset).
+// Proxy editor is hosted as a modal — v-if drives the unmount on close so the
+// next open boots from a fresh script setup (no manual reset).
const proxyDialogOpen = ref(false);
const proxyDialogRecord = ref(null);
const openProxyDialog = (record: ProxyRecord | null): void => {
proxyDialogRecord.value = record;
proxyDialogOpen.value = true;
};
-
-const aliasDialogOpen = ref(false);
-const aliasDialogRecord = ref(null);
-const openAliasDialog = (record: ModelAlias | null): void => {
- aliasDialogRecord.value = record;
- aliasDialogOpen.value = true;
-};
@@ -102,11 +89,6 @@ const openAliasDialog = (record: ModelAlias | null): void => {
@edit="(record: ProxyRecord) => openProxyDialog(record)"
@changed="reloadAll"
/>
- openAliasDialog(null)"
- @edit="(record: ModelAlias) => openAliasDialog(record)"
- @changed="reloadAll"
- />
{
:record="proxyDialogRecord"
@saved="reloadAll"
/>
-
-
diff --git a/apps/web/vitest.config.ts b/apps/web/vitest.config.ts
index 20fd5dbbc..4f478ad04 100644
--- a/apps/web/vitest.config.ts
+++ b/apps/web/vitest.config.ts
@@ -4,7 +4,7 @@ import { defineConfig } from 'vitest/config';
export default defineConfig({
// The Vue plugin is required for any test that mounts an SFC; logic-only
// tests don't need it, but adding it here is cheap and lets component
- // tests (AliasRow, AliasEditDialog, ...) live next to the rest.
+ // tests live next to the rest.
plugins: [Vue()],
test: {
// happy-dom provides DOM + EventSource for the dump-subscription
diff --git a/packages/gateway/migrations/0046_model_aliases.sql b/packages/gateway/migrations/0046_model_aliases.sql
deleted file mode 100644
index d76687bbd..000000000
--- a/packages/gateway/migrations/0046_model_aliases.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-CREATE TABLE model_aliases (
- alias TEXT PRIMARY KEY,
- target_model_id TEXT NOT NULL,
- upstream_ids_json TEXT NOT NULL DEFAULT '[]',
- rules_json TEXT NOT NULL DEFAULT '{}',
- visible_in_models_list INTEGER NOT NULL DEFAULT 1,
- on_conflict TEXT NOT NULL DEFAULT 'real-only'
- CHECK (on_conflict IN ('alias-only', 'real-only', 'both-real-first', 'both-alias-first')),
- created_at INTEGER NOT NULL DEFAULT (unixepoch()),
- updated_at INTEGER NOT NULL DEFAULT (unixepoch())
-);
-
-INSERT OR IGNORE INTO model_aliases (alias, target_model_id, rules_json, on_conflict)
-VALUES ('codex-auto-review', 'gpt-5.4', '{"reasoning":{"effort":"low"}}', 'real-only');
diff --git a/packages/gateway/migrations/0047_model_aliases_display_name.sql b/packages/gateway/migrations/0047_model_aliases_display_name.sql
deleted file mode 100644
index 5fd9b2591..000000000
--- a/packages/gateway/migrations/0047_model_aliases_display_name.sql
+++ /dev/null
@@ -1,4 +0,0 @@
-ALTER TABLE model_aliases ADD COLUMN display_name TEXT;
-
-UPDATE model_aliases SET display_name = 'Codex Auto Review'
- WHERE alias = 'codex-auto-review' AND display_name IS NULL;
diff --git a/packages/gateway/package.json b/packages/gateway/package.json
index 99ce27a73..0e763374d 100644
--- a/packages/gateway/package.json
+++ b/packages/gateway/package.json
@@ -11,8 +11,6 @@
"types": "./src/runtime/channel-broker-contract.ts"
},
"./control-plane/proxies/serialize": { "types": "./src/control-plane/proxies/serialize.ts" },
- "./control-plane/model-aliases/serialize": { "types": "./src/control-plane/model-aliases/serialize.ts" },
- "./control-plane/model-aliases/types": { "types": "./src/control-plane/model-aliases/types.ts" },
"./control-plane/pricing/types": { "types": "./src/control-plane/pricing/types.ts" },
"./data-plane/tools/web-search/types": {
"import": "./src/data-plane/tools/web-search/types.ts",
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
deleted file mode 100644
index f49c1e639..000000000
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ /dev/null
@@ -1,124 +0,0 @@
-import type { ModelAlias, OnConflict } from './types.ts';
-import type { SqlDatabase } from '@floway-dev/platform';
-
-interface ModelAliasRow {
- alias: string;
- target_model_id: string;
- upstream_ids_json: string;
- rules_json: string;
- visible_in_models_list: number;
- on_conflict: OnConflict;
- display_name: string | null;
- created_at: number;
-}
-
-const ALIAS_COLUMNS = 'alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at';
-
-// The model_aliases table is operator-managed and small (dozens of rows at
-// most), so the data plane reads the full table per request — no cache layer.
-// `ORDER BY alias` makes the read deterministic so `/v1/models` and friends
-// emit alias entries in a stable, operator-predictable order across runtimes.
-export const loadAllAliases = async (db: SqlDatabase): Promise
=> {
- const { results } = await db
- .prepare(`SELECT ${ALIAS_COLUMNS} FROM model_aliases ORDER BY alias`)
- .all();
- return results.map(toModelAlias);
-};
-
-export const getAliasByName = async (db: SqlDatabase, alias: string): Promise => {
- const row = await db
- .prepare(`SELECT ${ALIAS_COLUMNS} FROM model_aliases WHERE alias = ?`)
- .bind(alias)
- .first();
- return row ? toModelAlias(row) : null;
-};
-
-// Detects PK collision with a SELECT round-trip rather than catching the
-// INSERT throw — driver error shape differs between node:sqlite and D1.
-export const insertAlias = async (db: SqlDatabase, alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> => {
- const existing = await db
- .prepare('SELECT 1 FROM model_aliases WHERE alias = ?')
- .bind(alias.alias)
- .first<{ 1: number }>();
- if (existing) return { ok: false, reason: 'duplicate' };
- await db
- .prepare(`INSERT INTO model_aliases (${ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`)
- .bind(...bindValues(alias))
- .run();
- return { ok: true };
-};
-
-// UPSERT — on conflict the row is overwritten in place, but `created_at`
-// is preserved (the row's first INSERT wins, matching how `proxies.save`
-// keeps the original creation timestamp on a re-save).
-export const saveAlias = async (db: SqlDatabase, alias: ModelAlias): Promise => {
- await db
- .prepare(
- `INSERT INTO model_aliases (${ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
- ON CONFLICT (alias) DO UPDATE SET
- target_model_id = excluded.target_model_id,
- upstream_ids_json = excluded.upstream_ids_json,
- rules_json = excluded.rules_json,
- visible_in_models_list = excluded.visible_in_models_list,
- on_conflict = excluded.on_conflict,
- display_name = excluded.display_name,
- updated_at = unixepoch()`,
- )
- .bind(...bindValues(alias))
- .run();
-};
-
-export const deleteAlias = async (db: SqlDatabase, alias: string): Promise<{ deleted: boolean }> => {
- const result = await db.prepare('DELETE FROM model_aliases WHERE alias = ?').bind(alias).run();
- return { deleted: (result.meta.changes ?? 0) > 0 };
-};
-
-// Updates the PK column in place. A pre-flight SELECT detects the destination
-// collision so the caller gets a structured `duplicate` reason instead of a
-// driver-specific SQLITE_CONSTRAINT thrown error (shape differs between
-// node:sqlite and D1). `meta.changes === 0` after the UPDATE means the source
-// row was gone — propagated as `notFound` for the 404 mapping.
-export const renameAlias = async (db: SqlDatabase, oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> => {
- if (oldAlias === newAlias) return { ok: true };
- const conflict = await db
- .prepare('SELECT 1 FROM model_aliases WHERE alias = ?')
- .bind(newAlias)
- .first<{ 1: number }>();
- if (conflict) return { ok: false, reason: 'duplicate' };
- const result = await db
- .prepare('UPDATE model_aliases SET alias = ?, updated_at = unixepoch() WHERE alias = ?')
- .bind(newAlias, oldAlias)
- .run();
- if ((result.meta.changes ?? 0) === 0) return { ok: false, reason: 'notFound' };
- return { ok: true };
-};
-
-const bindValues = (alias: ModelAlias): unknown[] => [
- alias.alias,
- alias.targetModelId,
- JSON.stringify(alias.upstreamIds),
- JSON.stringify(alias.rules),
- alias.visibleInModelsList ? 1 : 0,
- alias.onConflict,
- alias.displayName ?? null,
- alias.createdAt,
-];
-
-const toModelAlias = (row: ModelAliasRow): ModelAlias => ({
- alias: row.alias,
- targetModelId: row.target_model_id,
- upstreamIds: parseJsonField(row.alias, 'upstream_ids_json', row.upstream_ids_json),
- rules: parseJsonField(row.alias, 'rules_json', row.rules_json),
- visibleInModelsList: row.visible_in_models_list === 1,
- onConflict: row.on_conflict,
- ...(row.display_name !== null ? { displayName: row.display_name } : {}),
- createdAt: row.created_at,
-});
-
-const parseJsonField = (alias: string, field: string, raw: string): T => {
- try {
- return JSON.parse(raw) as T;
- } catch (cause) {
- throw new Error(`Malformed model_aliases ${field} for ${alias}`, { cause });
- }
-};
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
deleted file mode 100644
index 32ba3aea0..000000000
--- a/packages/gateway/src/control-plane/model-aliases/repo_test.ts
+++ /dev/null
@@ -1,172 +0,0 @@
-import { test } from 'vitest';
-
-import { loadAllAliases, renameAlias } from './repo.ts';
-import { createSqliteTestDb } from '../../repo/test-sqlite.ts';
-import { assertEquals, assertRejects } from '@floway-dev/test-utils';
-
-test('loadAllAliases reads the seed row from a freshly migrated database', async () => {
- const db = await createSqliteTestDb();
-
- const aliases = await loadAllAliases(db);
- assertEquals(aliases.length, 1);
- const [seed] = aliases;
- // `createdAt` rides off the migration's `DEFAULT (unixepoch())`, so the
- // exact value is wall-clock dependent. Assert structurally that it landed
- // as a number and strip it before comparing the rest of the row.
- assertEquals(typeof seed.createdAt, 'number');
- const { createdAt: _createdAt, ...withoutTimestamp } = seed;
- assertEquals(withoutTimestamp, {
- alias: 'codex-auto-review',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- displayName: 'Codex Auto Review',
- });
-});
-
-test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_models_list to a boolean', async () => {
- const db = await createSqliteTestDb();
- await db.exec('DELETE FROM model_aliases');
- await db
- .prepare(
- 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
- )
- .bind(
- 'opus-xhigh',
- 'claude-opus-4-6',
- '["up_priority","up_secondary"]',
- '{"reasoning":{"effort":"xhigh"},"anthropicBeta":["fine-grained-tool-streaming"]}',
- 0,
- 'alias-only',
- 1_700_000_000,
- )
- .run();
- await db
- .prepare(
- 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
- )
- .bind('gpt-5-fast', 'gpt-5.4', '[]', '{"serviceTier":"priority"}', 1, 'both-alias-first', 1_700_000_001)
- .run();
-
- const aliases = await loadAllAliases(db);
- const byAlias = new Map(aliases.map(entry => [entry.alias, entry]));
-
- assertEquals(byAlias.get('opus-xhigh'), {
- alias: 'opus-xhigh',
- targetModelId: 'claude-opus-4-6',
- upstreamIds: ['up_priority', 'up_secondary'],
- rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] },
- visibleInModelsList: false,
- onConflict: 'alias-only',
- createdAt: 1_700_000_000,
- });
- assertEquals(byAlias.get('gpt-5-fast'), {
- alias: 'gpt-5-fast',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { serviceTier: 'priority' },
- visibleInModelsList: true,
- onConflict: 'both-alias-first',
- createdAt: 1_700_000_001,
- });
-});
-
-test('loadAllAliases reads display_name and omits the field when SQL stored NULL', async () => {
- const db = await createSqliteTestDb();
- await db.exec('DELETE FROM model_aliases');
- await db
- .prepare(
- 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
- )
- .bind('with-label', 'gpt-5.4', '[]', '{}', 1, 'real-only', 'Pretty Label', 1_700_000_000)
- .run();
- await db
- .prepare(
- 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
- )
- .bind('no-label', 'gpt-5.4', '[]', '{}', 1, 'real-only', null, 1_700_000_001)
- .run();
-
- const byAlias = new Map((await loadAllAliases(db)).map(entry => [entry.alias, entry]));
- assertEquals(byAlias.get('with-label')?.displayName, 'Pretty Label');
- // SQL NULL becomes undefined on the typed row so callers can branch on `=== undefined`.
- assertEquals('displayName' in (byAlias.get('no-label') ?? {}), false);
-});
-
-test('loadAllAliases surfaces malformed rules_json as a descriptive error', async () => {
- const db = await createSqliteTestDb();
- await db.exec('DELETE FROM model_aliases');
- await db
- .prepare(
- 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
- )
- .bind('bad-rules', 'gpt-5.4', '[]', '{not json', 1, 'real-only')
- .run();
-
- await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases rules_json for bad-rules');
-});
-
-test('loadAllAliases surfaces malformed upstream_ids_json as a descriptive error', async () => {
- const db = await createSqliteTestDb();
- await db.exec('DELETE FROM model_aliases');
- await db
- .prepare(
- 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
- )
- .bind('bad-upstreams', 'gpt-5.4', '[bad', '{}', 1, 'real-only')
- .run();
-
- await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases upstream_ids_json for bad-upstreams');
-});
-
-test('renameAlias updates the PRIMARY KEY in place', async () => {
- const db = await createSqliteTestDb();
- await db.exec('DELETE FROM model_aliases');
- await db
- .prepare(
- 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
- )
- .bind('source', 'gpt-5.4', '[]', '{}', 1, 'real-only', 'Source Label', 1_700_000_000)
- .run();
-
- const result = await renameAlias(db, 'source', 'renamed');
- assertEquals(result, { ok: true });
-
- const remaining = await loadAllAliases(db);
- assertEquals(remaining.map(a => a.alias), ['renamed']);
- // Preserved row payload — only the PK changed; createdAt and displayName intact.
- assertEquals(remaining[0]!.displayName, 'Source Label');
- assertEquals(remaining[0]!.createdAt, 1_700_000_000);
-});
-
-test('renameAlias returns notFound when the source row is missing', async () => {
- const db = await createSqliteTestDb();
- await db.exec('DELETE FROM model_aliases');
- const result = await renameAlias(db, 'ghost', 'new-name');
- assertEquals(result, { ok: false, reason: 'notFound' });
-});
-
-test('renameAlias returns duplicate when the destination row already exists', async () => {
- const db = await createSqliteTestDb();
- await db.exec('DELETE FROM model_aliases');
- await db
- .prepare(
- 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
- )
- .bind('source', 'gpt-5.4', '[]', '{}', 1, 'real-only', 1_700_000_000)
- .run();
- await db
- .prepare(
- 'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
- )
- .bind('taken', 'gpt-5.4', '[]', '{}', 1, 'real-only', 1_700_000_001)
- .run();
-
- const result = await renameAlias(db, 'source', 'taken');
- assertEquals(result, { ok: false, reason: 'duplicate' });
- // Both rows still present.
- const remaining = (await loadAllAliases(db)).map(a => a.alias).sort();
- assertEquals(remaining, ['source', 'taken']);
-});
diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
deleted file mode 100644
index 611d8913e..000000000
--- a/packages/gateway/src/control-plane/model-aliases/routes.ts
+++ /dev/null
@@ -1,87 +0,0 @@
-import type { Context } from 'hono';
-
-import { aliasToJson } from './serialize.ts';
-import type { ModelAlias } from './types.ts';
-import { type CtxWithJson } from '../../middleware/zod-validator.ts';
-import { getRepo } from '../../repo/index.ts';
-import type { createAliasBody, updateAliasBody } from '../schemas.ts';
-
-export const listAliases = async (c: Context) => {
- const aliases = await getRepo().modelAliases.loadAll();
- return c.json(aliases.map(aliasToJson));
-};
-
-export const createAlias = async (c: CtxWithJson) => {
- const body = c.req.valid('json');
- const record: ModelAlias = {
- alias: body.alias,
- targetModelId: body.targetModelId,
- upstreamIds: body.upstreamIds,
- rules: body.rules,
- visibleInModelsList: body.visibleInModelsList,
- // `real-only` is the safe default: an alias whose target id collides with
- // a real model id stays hidden until the operator opts the alias into one
- // of the surfacing modes. Matches the migration's column default.
- onConflict: body.onConflict ?? 'real-only',
- ...(body.displayName !== undefined ? { displayName: body.displayName } : {}),
- createdAt: Math.floor(Date.now() / 1000),
- };
-
- const result = await getRepo().modelAliases.create(record);
- if (!result.ok) {
- return c.json({ error: { type: 'conflict', message: `Alias "${body.alias}" already exists` } }, 409);
- }
-
- return c.json(aliasToJson(record), 201);
-};
-
-export const updateAlias = async (c: CtxWithJson) => {
- const aliasName = c.req.param('alias')!;
- const body = c.req.valid('json');
-
- const repo = getRepo();
- const existing = await repo.modelAliases.getByAlias(aliasName);
- if (!existing) return c.json({ error: 'Alias not found' }, 404);
-
- // Rename runs first so the merged save below targets the row at its new
- // PK. A no-op (alias unchanged or omitted) returns ok without touching
- // the row.
- const nextAlias = body.alias ?? existing.alias;
- if (nextAlias !== existing.alias) {
- const renamed = await repo.modelAliases.rename(existing.alias, nextAlias);
- if (!renamed.ok) {
- return c.json({ error: { type: 'conflict', message: `Alias "${nextAlias}" already exists` } }, 409);
- }
- }
-
- // Field-by-field merge so an absent field preserves the existing value.
- // `displayName` accepts an explicit null to clear the operator-set label
- // back to the synthesized fallback; we use Object.hasOwn to keep the
- // absent / null distinction that `??` would collapse.
- const merged: ModelAlias = {
- alias: nextAlias,
- targetModelId: body.targetModelId ?? existing.targetModelId,
- upstreamIds: body.upstreamIds ?? existing.upstreamIds,
- rules: body.rules ?? existing.rules,
- visibleInModelsList: body.visibleInModelsList ?? existing.visibleInModelsList,
- onConflict: body.onConflict ?? existing.onConflict,
- createdAt: existing.createdAt,
- ...nextDisplayName(existing, body.displayName),
- };
-
- await repo.modelAliases.save(merged);
- return c.json(aliasToJson(merged));
-};
-
-const nextDisplayName = (existing: ModelAlias, patch: string | null | undefined): { displayName?: string } => {
- if (patch === undefined) return existing.displayName !== undefined ? { displayName: existing.displayName } : {};
- if (patch === null) return {};
- return { displayName: patch };
-};
-
-export const deleteAlias = async (c: Context) => {
- const aliasName = c.req.param('alias')!;
- const { deleted } = await getRepo().modelAliases.delete(aliasName);
- if (!deleted) return c.json({ error: 'Alias not found' }, 404);
- return c.body(null, 204);
-};
diff --git a/packages/gateway/src/control-plane/model-aliases/routes_test.ts b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
deleted file mode 100644
index bf1cd766c..000000000
--- a/packages/gateway/src/control-plane/model-aliases/routes_test.ts
+++ /dev/null
@@ -1,354 +0,0 @@
-import { test } from 'vitest';
-
-import type { SerializedModelAlias } from './serialize.ts';
-import { requestApp, setupAppTest } from '../../test-helpers.ts';
-import { assertEquals } from '@floway-dev/test-utils';
-
-const authedGet = (adminSession: string): RequestInit => ({
- method: 'GET',
- headers: { 'x-floway-session': adminSession },
-});
-
-const authedJson = (adminSession: string, method: 'POST' | 'PATCH' | 'DELETE', body?: unknown): RequestInit => ({
- method,
- headers: {
- 'content-type': 'application/json',
- 'x-floway-session': adminSession,
- },
- ...(body === undefined ? {} : { body: JSON.stringify(body) }),
-});
-
-const baseCreate = (overrides: Record = {}) => ({
- alias: 'opus-xhigh',
- targetModelId: 'claude-opus-4-6',
- upstreamIds: [],
- rules: { reasoning: { effort: 'xhigh' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- ...overrides,
-});
-
-test('GET /api/aliases returns rows sorted by alias', async () => {
- const { repo, adminSession } = await setupAppTest();
- await repo.modelAliases.save({
- alias: 'zzz-late',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_001,
- });
- await repo.modelAliases.save({
- alias: 'aaa-early',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- });
-
- const resp = await requestApp('/api/aliases', authedGet(adminSession));
- assertEquals(resp.status, 200);
- const list = (await resp.json()) as SerializedModelAlias[];
- assertEquals(list.map(a => a.alias), ['aaa-early', 'zzz-late']);
-});
-
-test('POST /api/aliases creates a row and echoes the serialized shape', async () => {
- const { repo, adminSession } = await setupAppTest();
-
- const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({
- displayName: 'Opus Extra-High',
- upstreamIds: ['up_a', 'up_b'],
- rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] },
- })));
- assertEquals(resp.status, 201);
- const created = (await resp.json()) as SerializedModelAlias;
- assertEquals(created.alias, 'opus-xhigh');
- assertEquals(created.target_model_id, 'claude-opus-4-6');
- assertEquals(created.upstream_ids, ['up_a', 'up_b']);
- assertEquals(created.rules, { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] });
- assertEquals(created.visible_in_models_list, true);
- assertEquals(created.on_conflict, 'real-only');
- assertEquals(created.display_name, 'Opus Extra-High');
- assertEquals(typeof created.created_at, 'number');
-
- const stored = await repo.modelAliases.getByAlias('opus-xhigh');
- assertEquals(stored?.targetModelId, 'claude-opus-4-6');
- assertEquals(stored?.displayName, 'Opus Extra-High');
-});
-
-test('POST /api/aliases defaults onConflict to real-only when omitted', async () => {
- const { adminSession } = await setupAppTest();
-
- const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', {
- alias: 'no-onconflict',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- }));
- assertEquals(resp.status, 201);
- const created = (await resp.json()) as SerializedModelAlias;
- assertEquals(created.on_conflict, 'real-only');
-});
-
-test('POST /api/aliases returns 409 on duplicate alias', async () => {
- const { adminSession } = await setupAppTest();
-
- const first = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
- assertEquals(first.status, 201);
-
- const dup = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
- assertEquals(dup.status, 409);
- const body = (await dup.json()) as { error: { type: string; message: string } };
- assertEquals(body.error.type, 'conflict');
-});
-
-test('POST /api/aliases rejects an empty alias name with 400', async () => {
- const { adminSession } = await setupAppTest();
- const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ alias: '' })));
- assertEquals(resp.status, 400);
-});
-
-test('POST /api/aliases rejects an alias containing whitespace with 400', async () => {
- const { adminSession } = await setupAppTest();
- const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ alias: 'has space' })));
- assertEquals(resp.status, 400);
-});
-
-test('POST /api/aliases rejects an unknown rule key with 400', async () => {
- const { adminSession } = await setupAppTest();
- const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({
- rules: { reasoning: { effort: 'high' }, mysteryKnob: true } as unknown as Record,
- })));
- assertEquals(resp.status, 400);
-});
-
-test('POST /api/aliases rejects an invalid onConflict value with 400', async () => {
- const { adminSession } = await setupAppTest();
- const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ onConflict: 'mystery-mode' })));
- assertEquals(resp.status, 400);
-});
-
-test('POST /api/aliases requires admin auth (non-admin api key returns 403)', async () => {
- const { adminSession, apiKey } = await setupAppTest();
-
- // Sanity: the admin call succeeds so the failure below pins the auth gate,
- // not a request-shape mistake shared by both calls.
- const adminResp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
- assertEquals(adminResp.status, 201);
-
- const userResp = await requestApp('/api/aliases', {
- method: 'POST',
- headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
- body: JSON.stringify(baseCreate({ alias: 'other' })),
- });
- assertEquals(userResp.status, 403);
-});
-
-test('PATCH /api/aliases/:alias merges a partial body and preserves untouched fields', async () => {
- const { repo, adminSession } = await setupAppTest();
- await repo.modelAliases.save({
- alias: 'opus-xhigh',
- targetModelId: 'claude-opus-4-6',
- upstreamIds: ['up_a'],
- rules: { reasoning: { effort: 'xhigh' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- displayName: 'Existing Label',
- createdAt: 1_700_000_000,
- });
-
- const resp = await requestApp('/api/aliases/opus-xhigh', authedJson(adminSession, 'PATCH', {
- rules: { reasoning: { effort: 'medium' }, serviceTier: 'priority' },
- }));
- assertEquals(resp.status, 200);
- const updated = (await resp.json()) as SerializedModelAlias;
- // Patched fields took effect.
- assertEquals(updated.rules, { reasoning: { effort: 'medium' }, serviceTier: 'priority' });
- // Untouched fields preserved verbatim.
- assertEquals(updated.target_model_id, 'claude-opus-4-6');
- assertEquals(updated.upstream_ids, ['up_a']);
- assertEquals(updated.visible_in_models_list, true);
- assertEquals(updated.display_name, 'Existing Label');
- assertEquals(updated.created_at, 1_700_000_000);
-});
-
-test('PATCH /api/aliases/:alias accepts displayName=null to clear the label', async () => {
- const { repo, adminSession } = await setupAppTest();
- await repo.modelAliases.save({
- alias: 'opus-xhigh',
- targetModelId: 'claude-opus-4-6',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- displayName: 'Existing Label',
- createdAt: 1_700_000_000,
- });
-
- const resp = await requestApp('/api/aliases/opus-xhigh', authedJson(adminSession, 'PATCH', { displayName: null }));
- assertEquals(resp.status, 200);
- const updated = (await resp.json()) as SerializedModelAlias;
- assertEquals(updated.display_name, null);
-
- const stored = await repo.modelAliases.getByAlias('opus-xhigh');
- assertEquals(stored?.displayName, undefined);
-});
-
-test('PATCH /api/aliases/:alias returns 404 when the alias does not exist', async () => {
- const { adminSession } = await setupAppTest();
- const resp = await requestApp('/api/aliases/nope', authedJson(adminSession, 'PATCH', { visibleInModelsList: false }));
- assertEquals(resp.status, 404);
-});
-
-test('PATCH /api/aliases/:alias renames the row when body.alias differs from the path', async () => {
- const { repo, adminSession } = await setupAppTest();
- await repo.modelAliases.save({
- alias: 'old-name',
- targetModelId: 'gpt-5.4',
- upstreamIds: ['up_a'],
- rules: { reasoning: { effort: 'high' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- displayName: 'Old Label',
- createdAt: 1_700_000_000,
- });
-
- const resp = await requestApp('/api/aliases/old-name', authedJson(adminSession, 'PATCH', {
- alias: 'new-name',
- rules: { reasoning: { effort: 'medium' } },
- }));
- assertEquals(resp.status, 200);
- const updated = (await resp.json()) as SerializedModelAlias;
- // Response carries the new alias and the patched rules; preserved fields stay intact.
- assertEquals(updated.alias, 'new-name');
- assertEquals(updated.target_model_id, 'gpt-5.4');
- assertEquals(updated.upstream_ids, ['up_a']);
- assertEquals(updated.rules, { reasoning: { effort: 'medium' } });
- assertEquals(updated.display_name, 'Old Label');
- assertEquals(updated.created_at, 1_700_000_000);
-
- // Repo state: old row gone, new row present.
- assertEquals(await repo.modelAliases.getByAlias('old-name'), null);
- const stored = await repo.modelAliases.getByAlias('new-name');
- assertEquals(stored?.alias, 'new-name');
- assertEquals(stored?.rules, { reasoning: { effort: 'medium' } });
-});
-
-test('PATCH /api/aliases/:alias returns 409 when body.alias collides with an existing row', async () => {
- const { repo, adminSession } = await setupAppTest();
- await repo.modelAliases.save({
- alias: 'source',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- });
- await repo.modelAliases.save({
- alias: 'taken',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_001,
- });
-
- const resp = await requestApp('/api/aliases/source', authedJson(adminSession, 'PATCH', { alias: 'taken' }));
- assertEquals(resp.status, 409);
- const body = (await resp.json()) as { error: { type: string; message: string } };
- assertEquals(body.error.type, 'conflict');
-
- // Both rows untouched.
- assertEquals((await repo.modelAliases.getByAlias('source'))?.alias, 'source');
- assertEquals((await repo.modelAliases.getByAlias('taken'))?.alias, 'taken');
-});
-
-test('PATCH /api/aliases/:alias treats body.alias === path as a no-op rename', async () => {
- const { repo, adminSession } = await setupAppTest();
- await repo.modelAliases.save({
- alias: 'same-name',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- });
-
- const resp = await requestApp('/api/aliases/same-name', authedJson(adminSession, 'PATCH', {
- alias: 'same-name',
- targetModelId: 'claude-opus-4-6',
- }));
- assertEquals(resp.status, 200);
- const updated = (await resp.json()) as SerializedModelAlias;
- assertEquals(updated.alias, 'same-name');
- assertEquals(updated.target_model_id, 'claude-opus-4-6');
-});
-
-test('PATCH /api/aliases/:alias requires admin auth', async () => {
- const { repo, adminSession: _adminSession, apiKey } = await setupAppTest();
- await repo.modelAliases.save({
- alias: 'opus-xhigh',
- targetModelId: 'claude-opus-4-6',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- });
-
- const userResp = await requestApp('/api/aliases/opus-xhigh', {
- method: 'PATCH',
- headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
- body: JSON.stringify({ visibleInModelsList: false }),
- });
- assertEquals(userResp.status, 403);
-});
-
-test('DELETE /api/aliases/:alias returns 204 on success and removes the row', async () => {
- const { repo, adminSession } = await setupAppTest();
- await repo.modelAliases.save({
- alias: 'doomed',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- });
-
- const resp = await requestApp('/api/aliases/doomed', authedJson(adminSession, 'DELETE'));
- assertEquals(resp.status, 204);
- assertEquals(await repo.modelAliases.getByAlias('doomed'), null);
-});
-
-test('DELETE /api/aliases/:alias returns 404 when the alias does not exist', async () => {
- const { adminSession } = await setupAppTest();
- const resp = await requestApp('/api/aliases/nope', authedJson(adminSession, 'DELETE'));
- assertEquals(resp.status, 404);
-});
-
-test('DELETE /api/aliases/:alias requires admin auth', async () => {
- const { repo, apiKey } = await setupAppTest();
- await repo.modelAliases.save({
- alias: 'doomed',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- });
-
- const resp = await requestApp('/api/aliases/doomed', {
- method: 'DELETE',
- headers: { 'x-api-key': apiKey.key },
- });
- assertEquals(resp.status, 403);
-});
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize.ts b/packages/gateway/src/control-plane/model-aliases/serialize.ts
deleted file mode 100644
index 5889997cc..000000000
--- a/packages/gateway/src/control-plane/model-aliases/serialize.ts
+++ /dev/null
@@ -1,31 +0,0 @@
-// Wire-format projection for the operator-managed model_aliases rows. The
-// dashboard reads the same shape it sends back for create/update; the few
-// snake_cased fields (`visible_in_models_list`, `on_conflict`, `created_at`,
-// `display_name`) follow the rest of the control-plane HTTP surface.
-
-import type { ModelAlias, ModelAliasRules, OnConflict } from './types.ts';
-
-export interface SerializedModelAlias {
- alias: string;
- target_model_id: string;
- upstream_ids: string[];
- rules: ModelAliasRules;
- visible_in_models_list: boolean;
- on_conflict: OnConflict;
- display_name: string | null;
- created_at: number;
-}
-
-export const aliasToJson = (alias: ModelAlias): SerializedModelAlias => ({
- alias: alias.alias,
- target_model_id: alias.targetModelId,
- // Defensive copy: the readonly arrays inside ModelAlias are shared with
- // callers, and JSON serialization would otherwise expose the same backing
- // array used by `loadAll`.
- upstream_ids: [...alias.upstreamIds],
- rules: alias.rules,
- visible_in_models_list: alias.visibleInModelsList,
- on_conflict: alias.onConflict,
- display_name: alias.displayName ?? null,
- created_at: alias.createdAt,
-});
diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts
deleted file mode 100644
index e7d9f64bd..000000000
--- a/packages/gateway/src/control-plane/model-aliases/types.ts
+++ /dev/null
@@ -1,34 +0,0 @@
-// Closed set of request-time mode knobs an operator can lock on a matched
-// alias. Each value is freeform — the gateway does not enum-gate operator
-// input so values pass through to upstream verbatim.
-export type ModelAliasRules = {
- readonly reasoning?: {
- readonly effort?: string;
- readonly budgetTokens?: number;
- readonly adaptive?: boolean;
- readonly summary?: string;
- };
- readonly verbosity?: string;
- readonly serviceTier?: string;
- readonly anthropicBeta?: readonly string[];
-};
-
-export type OnConflict = 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
-
-export type ModelAlias = {
- readonly alias: string;
- readonly targetModelId: string;
- readonly upstreamIds: readonly string[];
- readonly rules: ModelAliasRules;
- readonly visibleInModelsList: boolean;
- readonly onConflict: OnConflict;
- // Operator-set human-readable label shown after the upstream display name in
- // `/v1/models` entries (e.g. "Azure: Codex Auto Review"). When unset, the
- // listing falls back to the alias target's display name plus a rules-summary
- // suffix; see `data-plane/model-aliases/display.ts`.
- readonly displayName?: string;
- // Unix epoch seconds stamped at row insertion. Surfaced on the
- // `/v1/models` synthesized alias entry so callers see when an alias was
- // declared, matching the `created` semantics of the real entries.
- readonly createdAt: number;
-};
diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 9db27e7f4..3a6620496 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -1,25 +1,21 @@
import type { Context } from 'hono';
-import type { ListedModel } from '../../data-plane/models/alias-listing.ts';
import { toPublicModel } from '../../data-plane/models/load.ts';
import { MODEL_LISTING_FAILURE_MESSAGE } from '../../data-plane/models/shared.ts';
-import { getModelsForListing } from '../../data-plane/providers/registry.ts';
+import { getModels } from '../../data-plane/providers/registry.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
-import { getRepo } from '../../repo/index.ts';
import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
import { getCurrentColo } from '../../runtime/runtime-info.ts';
import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
import { ProviderModelsUnavailableError } from '@floway-dev/provider';
-import type { UpstreamProviderKind } from '@floway-dev/provider';
+import type { ResolvedModel, UpstreamProviderKind } from '@floway-dev/provider';
// Same DTO as the public /models endpoint, plus one dashboard-only field:
// `upstreams` lists every provider binding for this model as { kind, id, name }
// triples. A single model id can be served by mixed provider kinds (e.g. one
// azure deployment + one custom upstream both expose `gpt-5.5`), so a flat
-// `provider`/`upstream_ids` split would misrepresent that. Alias entries
-// carry a single binding (the upstream that resolves their target) and the
-// `aliasedFrom` provenance flows through `toPublicModel`.
+// `provider`/`upstream_ids` split would misrepresent that.
interface ControlPlaneModel extends PublicModel {
upstreams: { kind: UpstreamProviderKind; id: string; name: string }[];
}
@@ -28,7 +24,7 @@ interface ControlPlaneModelsResponse extends Omit
data: ControlPlaneModel[];
}
-const toControlPlaneModel = (model: ListedModel): ControlPlaneModel => ({
+const toControlPlaneModel = (model: ResolvedModel): ControlPlaneModel => ({
...toPublicModel(model),
upstreams: model.providers.map(binding => ({ kind: binding.providerKind, id: binding.upstream, name: binding.upstreamName })),
});
@@ -39,15 +35,11 @@ export const controlPlaneModels = async (c: Context) => {
// like the data-plane /models endpoint. On a session request there is no
// API key, so this resolves to the user's per-user upstream cap: a user who
// has had an upstream removed must not see its models in the Models tab.
- // Aliases come from the same repo singleton the data plane uses, so the
- // dashboard sees exactly the alias rows the runtime would honour.
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- const aliases = await getRepo().modelAliases.loadAll();
- const { models } = await getModelsForListing(
+ const models = await getModels(
effectiveUpstreamIdsFromContext(c),
fetcherForUpstream,
backgroundSchedulerFromContext(c),
- aliases,
);
const data = models.map(toControlPlaneModel);
const response: ControlPlaneModelsResponse = {
diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts
index bab38e182..bc49181de 100644
--- a/packages/gateway/src/control-plane/models/routes_test.ts
+++ b/packages/gateway/src/control-plane/models/routes_test.ts
@@ -1,6 +1,5 @@
import { test } from 'vitest';
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
import type { UpstreamRecord } from '@floway-dev/provider';
import { assertEquals, jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
@@ -119,39 +118,10 @@ test('/api/models appends visible alias entries with aliasedFrom alongside real
const { apiKey, repo } = await setupAppTest();
await repo.upstreams.save(buildCustomUpstreamRecord({ id: 'up_custom_models', sortOrder: 100 }));
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'codex-auto-review',
- displayName: 'Codex Auto Review',
- targetModelId: 'custom-model',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- },
- {
- alias: 'hidden-alias',
- targetModelId: 'custom-model',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: false,
- onConflict: 'real-only',
- createdAt: 1_700_000_001,
- },
- ]);
-
await withMockedFetch(modelsFetchHandler, async () => {
const response = await requestApp('/api/models', { headers: { 'x-api-key': apiKey.key } });
assertEquals(response.status, 200);
- const body = (await response.json()) as { data: Array<{ id: string; display_name: string; upstreams: Array<{ kind: string; id: string; name: string }>; aliasedFrom?: { targetModelId: string; rules: Record; displayName?: string } }> };
- const aliasEntry = body.data.find(model => model.id === 'codex-auto-review');
- if (!aliasEntry) throw new Error('expected codex-auto-review alias entry on /api/models');
- assertEquals(aliasEntry.display_name, 'Codex Auto Review');
- assertEquals(aliasEntry.upstreams, [{ kind: 'custom', id: 'up_custom_models', name: 'Custom Provider' }]);
- assertEquals(aliasEntry.aliasedFrom?.targetModelId, 'custom-model');
- assertEquals(aliasEntry.aliasedFrom?.rules, { reasoning: { effort: 'low' } });
- assertEquals(aliasEntry.aliasedFrom?.displayName, 'Codex Auto Review');
- assertEquals(body.data.some(model => model.id === 'hidden-alias'), false);
+ const body = (await response.json()) as { data: Array<{ id: string; display_name: string; upstreams: Array<{ kind: string; id: string; name: string }> }> };
+ assertEquals(body.data.some(model => model.id === 'custom-model'), true);
});
});
diff --git a/packages/gateway/src/control-plane/routes.ts b/packages/gateway/src/control-plane/routes.ts
index 35ceb71de..94b5f06ff 100644
--- a/packages/gateway/src/control-plane/routes.ts
+++ b/packages/gateway/src/control-plane/routes.ts
@@ -5,11 +5,10 @@ import { authLogin, authLogout, authMe } from './auth/routes.ts';
import { copilotQuota } from './copilot-quota/routes.ts';
import { exportData, importData } from './data-transfer/routes.ts';
import { dumpRoutes } from './dump.ts';
-import { createAlias, deleteAlias, listAliases, updateAlias } from './model-aliases/routes.ts';
import { controlPlaneModels } from './models/routes.ts';
import { performanceOverview, performanceTelemetry } from './performance/routes.ts';
import { createProxy, deleteProxy, listAllBackoffs, listProxies, listProxyBackoffs, resetProxyBackoffs, testProxy, updateProxy } from './proxies/routes.ts';
-import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createAliasBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateAliasBody, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
+import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
import { getSearchConfigRoute, putSearchConfigRoute, testSearchConfigRoute } from './search-config/routes.ts';
import { searchUsage } from './search-usage/routes.ts';
import { tokenUsage } from './token-usage/routes.ts';
@@ -101,11 +100,6 @@ export const controlPlaneRoutes = new Hono<{ Variables: AuthVars }>()
.get('/search-config', getSearchConfigRoute)
.put('/search-config', zValidator('json', searchConfigSchema), putSearchConfigRoute)
.post('/search-config/test', zValidator('json', searchConfigSchema), testSearchConfigRoute)
- // Model aliases.
- .get('/aliases', listAliases)
- .post('/aliases', zValidator('json', createAliasBody), createAlias)
- .patch('/aliases/:alias', zValidator('json', updateAliasBody), updateAlias)
- .delete('/aliases/:alias', deleteAlias)
.get('/export', zValidator('query', exportQuery), exportData)
.post('/import', zValidator('json', importBody), importData));
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index f81c5df42..f718539ee 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -635,76 +635,3 @@ export const performanceQuery = z.object({
bucket: z.enum(['hour', '4h', '8h', 'day', 'all']).optional(),
timezone_offset_minutes: z.string().optional(),
});
-
-// --- model aliases ---
-//
-// Operator-managed alias rows wired through `/api/aliases`. The schemas
-// describe the request bodies the dashboard sends; deeper invariants
-// (the alias's target model exists in the catalog, the upstream ids
-// resolve, etc.) are intentionally NOT enforced here — an alias is allowed
-// to point at a target that is currently absent so an operator can pre-
-// stage the row before the upstream is registered, mirroring how the
-// catalog tolerates pending references.
-
-// Mirror the public model-id grammar: letters, digits, `_ . : - / `. Matches
-// the surface ids the dashboard already accepts in the models picker and the
-// `/v1/models` listing, so an alias name is interchangeable with a real id at
-// the request boundary.
-export const MODEL_ALIAS_PATTERN = /^[A-Za-z0-9_.:\-/]+$/;
-
-const aliasNameSchema = z.string().min(1).regex(MODEL_ALIAS_PATTERN, 'alias must be 1+ chars of [A-Za-z0-9_.:-/]');
-
-// Rule field values pass through to the upstream verbatim — the gateway
-// deliberately does not enum-gate operator input here. The Goal-2 contract
-// is that a freshly added enum upstream-side ships through without a
-// gateway code change, so we validate shape (non-empty string, in-range
-// number) but never set membership.
-const aliasReasoningSchema = z.object({
- effort: z.string().min(1).optional(),
- budgetTokens: z.number().int().nonnegative().optional(),
- adaptive: z.boolean().optional(),
- summary: z.string().min(1).optional(),
-}).strict().refine(
- r => r.effort !== undefined || r.budgetTokens !== undefined || r.adaptive !== undefined || r.summary !== undefined,
- { message: 'reasoning must declare at least one of effort, budgetTokens, adaptive, summary' },
-);
-
-const aliasRulesSchema = z.object({
- reasoning: aliasReasoningSchema.optional(),
- verbosity: z.string().min(1).optional(),
- serviceTier: z.string().min(1).optional(),
- // Each beta header token is a non-empty string. Empty arrays are accepted
- // (the dashboard sends `[]` when the operator clears every tag) and are
- // semantically equivalent to omitting the field.
- anthropicBeta: z.array(z.string().min(1)).optional(),
-}).strict();
-
-const onConflictSchema = z.enum(['alias-only', 'real-only', 'both-real-first', 'both-alias-first']);
-const upstreamIdsSchema = z.array(z.string().min(1));
-
-export const createAliasBody = z.object({
- alias: aliasNameSchema,
- targetModelId: z.string().min(1),
- upstreamIds: upstreamIdsSchema,
- rules: aliasRulesSchema,
- visibleInModelsList: z.boolean(),
- // Defaults to `'real-only'` server-side when omitted so the dashboard's
- // "create" form does not have to ship a default — the route layer fills it.
- onConflict: onConflictSchema.optional(),
- displayName: z.string().min(1).optional(),
-});
-
-// PATCH accepts a partial shape. `alias` is the row's primary key — when
-// present and different from the path param, the handler renames the row
-// (409 on collision). `displayName` is nullable so the operator can clear
-// an existing label back to the synthesized fallback; absent vs. null is
-// meaningful and propagated through to the handler via Object.hasOwn.
-export const updateAliasBody = z.object({
- alias: aliasNameSchema.optional(),
- targetModelId: z.string().min(1).optional(),
- upstreamIds: upstreamIdsSchema.optional(),
- rules: aliasRulesSchema.optional(),
- visibleInModelsList: z.boolean().optional(),
- onConflict: onConflictSchema.optional(),
- displayName: z.string().min(1).nullable().optional(),
-});
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
index 71c3a8288..8d2cda45c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
@@ -103,7 +103,7 @@ const callChatCompletionsAsExecuteResult = async (
headers: Headers,
): Promise>> => {
const { model: _model, ...body } = payload;
- sanitizeForChatCompletionsUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName));
+ sanitizeForChatCompletionsUpstream(body as Record, createSanitizeTraceCtx());
const recorder = createUpstreamLatencyRecorder();
const providerResult = await candidate.binding.provider.callChatCompletions(
candidate.binding.upstreamModel,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
index 29f9d1b1b..004c95cfb 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
@@ -25,7 +25,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
index b84aea40f..3f2664912 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
index 4ecb21e72..6f1ef50d4 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 0ff191572..331856b9b 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
index f8d0c33c5..2af2ecba0 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
index b9dcb1b9c..d4996ef8c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
@@ -18,7 +18,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
index 7f96ee5f2..176bad4b2 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
@@ -22,7 +22,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
index 2547a30b4..e2c3e61d8 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
@@ -18,7 +18,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
index e374ea3b4..8a8a7d6d8 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 28e10cfd5..73e8c1afd 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -1,12 +1,9 @@
import { chatCompletionsAttempt } from './attempt.ts';
import { renderChatCompletionsFailure } from './errors.ts';
import { planChatCompletionsRouting } from './routing.ts';
-import { getRepo } from '../../../repo/index.ts';
-import { applyAliasRulesToChatCompletions } from '../../model-aliases/apply.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
-import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { ExecuteResult } from '@floway-dev/provider';
@@ -21,11 +18,9 @@ export interface ChatCompletionsServeGenerateArgs {
export const chatCompletionsServe = {
generate: async (args: ChatCompletionsServeGenerateArgs): Promise>> => {
const { payload, ctx, store, headers } = args;
- const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: payload.model,
- aliases,
pickTarget: endpoints =>
endpoints.chatCompletions ? 'chat-completions'
: endpoints.messages ? 'messages'
@@ -49,12 +44,6 @@ export const chatCompletionsServe = {
: { kind: 'model-missing', model: payload.model, failedUpstreams },
);
}
- // Apply operator-locked alias rules to the inbound IR before the
- // attempt runs its interceptor chain. The matching `x-floway-alias`
- // header is staged via Hono's `c.header` so it survives `streamSSE`'s
- // internal `c.newResponse`.
- if (candidate.aliasRules) applyAliasRulesToChatCompletions(payload, candidate.aliasRules);
- if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
return await chatCompletionsAttempt.generate({ payload, ctx, store, candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 37441a5ba..c3f90f81d 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -51,7 +51,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
index bc7c7949b..877295494 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
@@ -26,7 +26,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
index 98d15dd39..90c86fb1a 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
index e74d45a4f..84a282748 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
index 689ee6d0e..a066ce6c3 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
index 33e49a791..0cbf60b61 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
index 61e0132a7..76edee680 100644
--- a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
@@ -28,7 +28,6 @@ const ctx = (): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index 7d1cf345e..f5daa1d86 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -1,12 +1,9 @@
import { geminiAttempt } from './attempt.ts';
import { renderGeminiFailure } from './errors.ts';
import { planGeminiRouting } from './routing.ts';
-import { getRepo } from '../../../repo/index.ts';
-import { applyAliasRulesToGemini } from '../../model-aliases/apply.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
-import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
import type { ExecuteResult, PlainResult } from '@floway-dev/provider';
@@ -33,11 +30,9 @@ export interface GeminiServeCountTokensArgs {
export const geminiServe = {
generate: async (args: GeminiServeGenerateArgs): Promise>> => {
const { payload, ctx, store, model, headers } = args;
- const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model,
- aliases,
// Gemini has no native upstream target in the provider API; prefer
// Chat Completions, then Messages, then Responses.
pickTarget: endpoints => endpoints.chatCompletions ? 'chat-completions' : endpoints.messages ? 'messages' : endpoints.responses ? 'responses' : null,
@@ -60,21 +55,14 @@ export const geminiServe = {
'generate',
);
}
- // Operator-locked alias rules apply to the Gemini IR before the attempt
- // runs; the matching `x-floway-alias` header is staged via Hono's
- // `c.header` so it survives `streamSSE`'s internal `c.newResponse`.
- if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
- if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
return await geminiAttempt.generate({ payload, ctx, store, candidate, headers });
},
countTokens: async (args: GeminiServeCountTokensArgs): Promise> | PlainResult> => {
const { payload, ctx, store, model, headers } = args;
- const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model,
- aliases,
// Gemini countTokens has no native upstream support; only providers
// exposing the Messages endpoint qualify because we translate Gemini
// → Messages and call Messages count_tokens upstream.
@@ -97,8 +85,6 @@ export const geminiServe = {
'countTokens',
);
}
- if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
- if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
return await geminiAttempt.countTokens({ payload, ctx, store, candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 3ef8114e8..1e834a8be 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -50,7 +50,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt.ts b/packages/gateway/src/data-plane/chat/messages/attempt.ts
index 3dcde67c7..edf635328 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt.ts
@@ -18,7 +18,6 @@ import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { MessagesMessage, MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
import { type ExecuteResult, type PlainResult } from '@floway-dev/provider';
import { translateMessagesViaChatCompletions, translateMessagesViaResponses } from '@floway-dev/translate';
-import { applyAnthropicBetaToHeaders } from '@floway-dev/translate/via-messages/anthropic-extensions';
import { messagesViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
export interface MessagesAttemptGenerateArgs {
@@ -50,21 +49,13 @@ export const messagesAttempt = {
return await runInterceptors(invocation, ctx, messagesInterceptors, async () => {
if (candidate.targetApi === 'messages') {
const { model: _model, ...body } = invocation.payload;
- // The candidate's `anthropic_beta` alias rule merges onto the
- // anthropic-beta header (the wire path; the body slot is rejected
- // by the http entry). Body extensions are stripped just before the
- // upstream call, after every interceptor has had its say.
- const outgoingHeaders = new Headers(invocation.headers);
- if (candidate.aliasRules?.anthropicBeta?.length) {
- applyAnthropicBetaToHeaders(outgoingHeaders, candidate.aliasRules.anthropicBeta);
- }
- sanitizeForMessagesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName));
+ sanitizeForMessagesUpstream(body as Record, createSanitizeTraceCtx());
const recorder = createUpstreamLatencyRecorder();
const providerResult = await candidate.binding.provider.callMessages(
candidate.binding.upstreamModel,
body,
ctx.abortSignal,
- buildUpstreamCallOptions(candidate, ctx, recorder.record, outgoingHeaders),
+ buildUpstreamCallOptions(candidate, ctx, recorder.record, invocation.headers),
);
return await providerStreamResultToExecuteResult(providerResult, candidate, ctx, recorder);
}
@@ -109,16 +100,12 @@ export const messagesAttempt = {
const recorder = createUpstreamLatencyRecorder();
const response = await runInterceptors(invocation, ctx, messagesCountTokensInterceptors, async () => {
const { model: _model, ...body } = invocation.payload;
- const outgoingHeaders = new Headers(invocation.headers);
- if (candidate.aliasRules?.anthropicBeta?.length) {
- applyAnthropicBetaToHeaders(outgoingHeaders, candidate.aliasRules.anthropicBeta);
- }
- sanitizeForMessagesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName));
+ sanitizeForMessagesUpstream(body as Record, createSanitizeTraceCtx());
const { response } = await candidate.binding.provider.callMessagesCountTokens(
candidate.binding.upstreamModel,
body,
ctx.abortSignal,
- buildUpstreamCallOptions(candidate, ctx, recorder.record, outgoingHeaders),
+ buildUpstreamCallOptions(candidate, ctx, recorder.record, invocation.headers),
);
return response;
});
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
index 41a96f0de..36fe9e284 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
@@ -25,7 +25,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/messages/http_test.ts b/packages/gateway/src/data-plane/chat/messages/http_test.ts
index cf32509d2..27f403465 100644
--- a/packages/gateway/src/data-plane/chat/messages/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/http_test.ts
@@ -272,60 +272,3 @@ test('POST /v1/messages forwards upstream response headers end-to-end (non-strea
assertEquals(response.headers.get('anthropic-ratelimit-unified-status'), 'allowed');
assertEquals(response.headers.get('cf-ray'), 'cf_ray_e2e');
});
-
-test('POST /v1/messages stamps x-floway-alias when the candidate is alias-matched', async () => {
- installRepo();
- const callMessages = vi.fn(async (): Promise> => ({
- ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers(),
- }));
- const candidate = makeCandidate({ callMessages });
- queueCandidates([{ ...candidate, aliasRules: { reasoning: { effort: 'low' } }, aliasName: 'codex-auto-review' }]);
-
- const response = await makeApp().request('/v1/messages', {
- method: 'POST',
- headers: { 'content-type': 'application/json' },
- body: JSON.stringify({ model: 'codex-auto-review', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
- });
-
- assertEquals(response.status, 200);
- assertEquals(response.headers.get('x-floway-alias'), 'codex-auto-review');
-});
-
-test('POST /v1/messages does not set x-floway-alias when no alias matched', async () => {
- installRepo();
- const callMessages = vi.fn(async (): Promise> => ({
- ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers(),
- }));
- queueCandidates([makeCandidate({ callMessages })]);
-
- const response = await makeApp().request('/v1/messages', {
- method: 'POST',
- headers: { 'content-type': 'application/json' },
- body: JSON.stringify({ model: 'test-model', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
- });
-
- assertEquals(response.status, 200);
- assertEquals(response.headers.get('x-floway-alias'), null);
-});
-
-test('POST /v1/messages applies alias reasoning.effort onto output_config before upstream call', async () => {
- installRepo();
- const observedBodies: { output_config?: { effort?: string } }[] = [];
- const callMessages = vi.fn(async (_model: unknown, body: unknown): Promise> => {
- observedBodies.push(body as { output_config?: { effort?: string } });
- return { ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers() };
- });
- const candidate = makeCandidate({ callMessages });
- queueCandidates([{ ...candidate, aliasRules: { reasoning: { effort: 'high' } }, aliasName: 'alias-x' }]);
-
- const response = await makeApp().request('/v1/messages', {
- method: 'POST',
- headers: { 'content-type': 'application/json' },
- body: JSON.stringify({ model: 'alias-x', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
- });
-
- assertEquals(response.status, 200);
- const observed = observedBodies[0];
- if (observed === undefined) throw new Error('expected callMessages to receive a body');
- assertEquals(observed.output_config?.effort, 'high');
-});
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
index bf29636a7..590a05c7b 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
@@ -18,7 +18,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 9db2fe856..2917537a7 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -18,7 +18,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
index c08720c71..f1e8e49c0 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
@@ -18,7 +18,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
index 8ddfa03e7..f1cf56677 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
@@ -60,7 +60,6 @@ const gatewayCtx = (apiKeyId: string = 'test-key'): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/messages/respond_test.ts b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
index 0caef38b6..82191d8af 100644
--- a/packages/gateway/src/data-plane/chat/messages/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
@@ -536,7 +536,6 @@ const makeRespondCtx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
currentColo: 'TEST',
dump: null,
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index 8b0b337f1..ae9bb5d6c 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -1,12 +1,9 @@
import { messagesAttempt } from './attempt.ts';
import { renderMessagesFailure } from './errors.ts';
import { planMessagesRouting } from './routing.ts';
-import { getRepo } from '../../../repo/index.ts';
-import { applyAliasRulesToMessages } from '../../model-aliases/apply.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
-import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
import type { ExecuteResult, PlainResult } from '@floway-dev/provider';
@@ -28,11 +25,9 @@ export interface MessagesServeCountTokensArgs {
export const messagesServe = {
generate: async (args: MessagesServeGenerateArgs): Promise>> => {
const { payload, ctx, store, headers } = args;
- const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: payload.model,
- aliases,
pickTarget: endpoints =>
endpoints.messages ? 'messages'
: endpoints.responses ? 'responses'
@@ -57,23 +52,14 @@ export const messagesServe = {
'generate',
);
}
- // Operator-locked alias rules go onto the inbound IR before the attempt
- // begins so the per-protocol interceptor chain (and any downstream
- // translate pass) sees the already-injected fields. The matching
- // `x-floway-alias` header is staged via Hono's `c.header` so it
- // survives `streamSSE`'s internal `c.newResponse`.
- if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
- if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
return await messagesAttempt.generate({ payload, ctx, store, candidate, headers });
},
countTokens: async (args: MessagesServeCountTokensArgs): Promise> | PlainResult> => {
const { payload, ctx, store, headers } = args;
- const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: payload.model,
- aliases,
pickTarget: endpoints => endpoints.messages ? 'messages' : null,
scheduler: ctx.backgroundScheduler,
currentColo: ctx.currentColo,
@@ -93,11 +79,6 @@ export const messagesServe = {
'countTokens',
);
}
- // count_tokens carries the same alias semantics as generate — operator
- // rules apply uniformly regardless of endpoint, and the response header
- // rides out the same way.
- if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
- if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
return await messagesAttempt.countTokens({ payload, ctx, store, candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index bb1dfcfb0..de09f5646 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -48,7 +48,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt.ts b/packages/gateway/src/data-plane/chat/responses/attempt.ts
index c0fb6a902..c70233e93 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt.ts
@@ -192,7 +192,7 @@ const dispatchResponses = async (
switch (candidate.targetApi) {
case 'responses': {
const { model: _model, ...body } = payload;
- sanitizeForResponsesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName));
+ sanitizeForResponsesUpstream(body as Record, createSanitizeTraceCtx());
const recorder = createUpstreamLatencyRecorder();
const providerResult = await candidate.binding.provider.callResponses(
candidate.binding.upstreamModel,
@@ -242,7 +242,7 @@ const callResponsesCompactAsExecuteResult = async (
headers: Headers,
): Promise>> => {
const { model: _model, stream: _stream, store: _store, ...body } = payload;
- sanitizeForResponsesUpstream(body as Record, createSanitizeTraceCtx(candidate.aliasName));
+ sanitizeForResponsesUpstream(body as Record, createSanitizeTraceCtx());
const recorder = createUpstreamLatencyRecorder();
const providerResult = await candidate.binding.provider.callResponsesCompact(
candidate.binding.upstreamModel,
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
index ceba118d7..22ec01ee1 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
@@ -27,7 +27,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
index 444ce069d..914711316 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
index a2951ef03..fc422467e 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
index f4f26c112..4210d34ba 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index ea7b872ab..9618d2f02 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
index 5b9bb4c91..16c220950 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
@@ -47,7 +47,6 @@ const stubCtx = (overrides: { abortSignal?: AbortSignal } = {}): GatewayCtx => (
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
...overrides,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
index ff2e0f910..d48ea7d2b 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
@@ -350,7 +350,6 @@ const makeGatewayCtx = (apiKeyId: string = 'k1'): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
@@ -4500,7 +4499,6 @@ test('downstream AbortSignal threads through to provider search / fetchPage and
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
abortSignal: controller.signal,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
index 8f1abec4c..e10d24fa7 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
@@ -146,7 +146,6 @@ const gatewayCtx = (): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
index 77904bb63..a66fe9995 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
@@ -1,5 +1,4 @@
import { createPerRequestFetcher } from '../../../../../dial/per-request.ts';
-import { getRepo } from '../../../../../repo/index.ts';
import { sleep } from '../../../../../shared/sleep.ts';
import { resolveModelForRequest } from '../../../../providers/registry.ts';
import { appendFailedUpstreams } from '../../../../shared/failed-upstreams.ts';
@@ -536,13 +535,7 @@ const resolveImageBinding = async (
const endpointPath = isEdit ? '/images/edits' : '/images/generations';
let resolution;
try {
- // The image-generation server-tool runs inside a Responses request; the
- // outer request's matched alias (if any) has already stamped the
- // response header. Threading aliases here keeps the second
- // resolveModelForRequest (for the image tool's own model id) consistent
- // with how the outer LLM call resolved its candidate.
- const aliases = await getRepo().modelAliases.loadAll();
- resolution = await resolveModelForRequest(state.config.model, state.upstreamIds, fetcherForUpstream, state.backgroundScheduler, aliases);
+ resolution = await resolveModelForRequest(state.config.model, state.upstreamIds, fetcherForUpstream, state.backgroundScheduler);
} catch (e) {
return { ok: false, error: serverError(e) };
}
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
index 16f0415e1..284004632 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
@@ -58,7 +58,6 @@ const gatewayCtx = (): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
index c873f9225..08534e87b 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
index 6417306ff..9a51ac553 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
};
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index ec4a48afa..8b2638334 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -1,7 +1,6 @@
import { renderResponsesFailure } from './errors.ts';
import type { StatefulResponsesStore } from './items/store.ts';
import { planResponsesRouting } from './routing.ts';
-import { getRepo } from '../../../repo/index.ts';
import { enumerateProviderCandidates, type ChatCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common';
@@ -89,11 +88,9 @@ export const prepareResponsesServePlan = async (args: {
}): Promise => {
const { payload, ctx, store, pickTarget } = args;
const prepared = await expandPreviousResponseId(payload, store);
- const aliases = await getRepo().modelAliases.loadAll();
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: prepared.model,
- aliases,
pickTarget,
scheduler: ctx.backgroundScheduler,
currentColo: ctx.currentColo,
diff --git a/packages/gateway/src/data-plane/chat/responses/serve.ts b/packages/gateway/src/data-plane/chat/responses/serve.ts
index 81035f20d..bed8b6df3 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve.ts
@@ -2,9 +2,7 @@ import { responsesAttempt } from './attempt.ts';
import type { ResponsesAttemptResult } from './interceptors/types.ts';
import type { ResponsesSnapshotMode, StatefulResponsesStore } from './items/store.ts';
import { prepareResponsesServePlan } from './serve-prep.ts';
-import { applyAliasRulesToResponses } from '../../model-aliases/apply.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
-import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
import type { ProtocolFrame } from '@floway-dev/protocols/common';
import type { ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
import type { ExecuteResult } from '@floway-dev/provider';
@@ -48,11 +46,6 @@ export const responsesServe = {
: null,
});
if (plan.kind === 'failure') return plan.result;
- // Operator-locked alias rules apply to the prepared inbound IR before
- // the attempt runs; the `x-floway-alias` header is staged via Hono's
- // `c.header` so it survives `streamSSE`'s internal `c.newResponse`.
- if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
- if (plan.candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', plan.candidate.aliasName);
const effectiveSnapshotMode: ResponsesSnapshotMode = snapshotMode !== 'none' && containsCompactionTrigger(plan.prepared.input)
? 'replace'
: snapshotMode;
@@ -69,12 +62,6 @@ export const responsesServe = {
pickTarget: endpoints => endpoints.responses ? 'responses' : null,
});
if (plan.kind === 'failure') return plan.result;
- // Alias rules also apply on the compact path. The upstream compact
- // endpoint silently drops fields like `reasoning` it does not honor;
- // applying uniformly keeps the operator's intent expressed at the
- // inbound boundary regardless of which endpoint runs.
- if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
- if (plan.candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', plan.candidate.aliasName);
return await responsesAttempt.compact({ payload: plan.prepared, ctx, store, candidate: plan.candidate, headers });
},
};
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index b42f5bdbf..f60be9a2a 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -59,7 +59,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
dump: null,
backgroundScheduler: () => {},
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
});
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates.ts b/packages/gateway/src/data-plane/chat/shared/candidates.ts
index 7a58f9e08..591f44f55 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates.ts
@@ -1,4 +1,3 @@
-import type { ModelAlias, ModelAliasRules } from '../../../control-plane/model-aliases/types.ts';
import { createPerRequestFetcher } from '../../../dial/per-request.ts';
import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../../providers/registry.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
@@ -7,17 +6,7 @@ import type { ChatTargetApi, ProviderCandidate } from '@floway-dev/provider';
export type { ProviderCandidate };
-// Wrapper around `ProviderCandidate` that carries the matched alias's
-// operator-locked request-time rules and the alias name. The wrapper lives
-// here (in the gateway) rather than on `ProviderCandidate` itself to keep
-// the `@floway-dev/provider` package unaware of the gateway's alias
-// concept. Downstream attempt logic narrows the candidate when it needs
-// to apply rules or stamp the `x-floway-alias` response header; passthrough
-// consumers continue to treat the candidate as a plain `ProviderCandidate`.
-export type ChatCandidate = ProviderCandidate & {
- readonly aliasRules?: ModelAliasRules;
- readonly aliasName?: string;
-};
+export type ChatCandidate = ProviderCandidate;
// Returns the candidates that satisfy both the model resolution and the
// target-endpoint pick, plus a `sawModel` flag that distinguishes the
@@ -26,16 +15,11 @@ export type ChatCandidate = ProviderCandidate & {
// whose catalog fetch rejected this round so the caller's failure
// renderer can surface them parenthetically.
export const enumerateProviderCandidates = async ({
- upstreamIds, model, aliases, pickTarget, scheduler, currentColo,
+ upstreamIds, model, pickTarget, scheduler, currentColo,
}: {
// null = unrestricted; empty list = no providers visible.
upstreamIds: readonly string[] | null;
model: string;
- // Operator-managed alias table loaded by the caller (typically via
- // `getRepo().modelAliases.loadAll()`). The fan-out matches each
- // (provider, lookupId) interpretation against this list; an empty list
- // is a valid input and produces only literal interpretations.
- aliases: readonly ModelAlias[];
pickTarget: (endpoints: ModelEndpoints) => ChatTargetApi | null;
// Threaded into `resolveModelForProvider` so the per-upstream catalog
// lookup hits the SWR-cached `fetchUpstreamModelsCached` instead of
@@ -57,28 +41,17 @@ export const enumerateProviderCandidates = async ({
// `resolveModelForRequest`; first-viable-wins ordering follows configured
// sort_order across upstreams, with the unprefixed interpretation pushed
// before the prefixed one within a single upstream.
- //
- // Alias matching runs inside `enumerateModelInterpretations`: each
- // (provider, lookupId) pair is checked against the alias table and the
- // matched alias's `onConflict` decides what to push. The alias-rewrite
- // metadata rides out alongside each resolved candidate so the attempt
- // layer can apply the locked rules.
- const interpretations = enumerateModelInterpretations(model, providers, aliases);
+ const interpretations = enumerateModelInterpretations(model, providers);
const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
const candidates: ChatCandidate[] = [];
let sawModel = false;
- for (const { interpretation, provider, resolved } of resolutions) {
+ for (const { provider, resolved } of resolutions) {
sawModel = true;
const targetApi = pickTarget(resolved.binding.upstreamModel.endpoints);
if (!targetApi) continue;
- const base: ProviderCandidate = { provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) };
- candidates.push(
- interpretation.aliasRules !== undefined
- ? { ...base, aliasRules: interpretation.aliasRules, aliasName: interpretation.aliasName }
- : base,
- );
+ candidates.push({ provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) });
}
return { candidates, sawModel, failedUpstreams };
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
index 381d395b9..627b631d5 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
@@ -54,7 +54,6 @@ describe('enumerateProviderCandidates', () => {
const { candidates, sawModel } = await enumerateProviderCandidates({
upstreamIds: null,
- aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -75,7 +74,6 @@ describe('enumerateProviderCandidates', () => {
const { candidates, sawModel } = await enumerateProviderCandidates({
upstreamIds: null,
- aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -96,7 +94,6 @@ describe('enumerateProviderCandidates', () => {
const { candidates, sawModel } = await enumerateProviderCandidates({
upstreamIds: null,
- aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -116,7 +113,6 @@ describe('enumerateProviderCandidates', () => {
const { candidates } = await enumerateProviderCandidates({
upstreamIds: null,
- aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -137,7 +133,6 @@ describe('enumerateProviderCandidates', () => {
const { candidates } = await enumerateProviderCandidates({
upstreamIds: ['up_c', 'up_a'],
- aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -160,7 +155,6 @@ describe('enumerateProviderCandidates', () => {
const { candidates } = await enumerateProviderCandidates({
upstreamIds: null,
- aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -178,7 +172,6 @@ describe('enumerateProviderCandidates', () => {
const { candidates: msgCandidates } = await enumerateProviderCandidates({
upstreamIds: null,
- aliases: [],
model: 'test-model',
pickTarget: pickMessagesOrResponses,
scheduler: testScheduler,
@@ -189,7 +182,6 @@ describe('enumerateProviderCandidates', () => {
const { candidates: resCandidates } = await enumerateProviderCandidates({
upstreamIds: null,
- aliases: [],
model: 'test-model',
pickTarget: pickResponses,
scheduler: testScheduler,
@@ -206,7 +198,6 @@ describe('enumerateProviderCandidates', () => {
const { candidates: anyCandidates } = await enumerateProviderCandidates({
upstreamIds: null,
- aliases: [],
model: 'test-model',
pickTarget: pickAny,
scheduler: testScheduler,
@@ -217,7 +208,6 @@ describe('enumerateProviderCandidates', () => {
const { candidates: msgCandidates, sawModel } = await enumerateProviderCandidates({
upstreamIds: null,
- aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -255,7 +245,6 @@ describe('enumerateProviderCandidates', () => {
async () => {
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: null,
- aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
@@ -299,7 +288,6 @@ describe('enumerateProviderCandidates', () => {
async () => {
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: null,
- aliases: [],
model: 'test-model',
pickTarget: pickMessages,
scheduler: testScheduler,
diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
index ad0ef2169..9cdbab0c8 100644
--- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
+++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
@@ -6,13 +6,7 @@ import { getCurrentColo } from '../../../runtime/runtime-info.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
export interface GatewayCtx {
- // The inbound Hono context. Carried so the serve layer can stage
- // response headers via `c.header(name, value)` — the Hono-documented
- // knob that survives `streamSSE`'s internal `c.newResponse` for the
- // streaming surfaces. For non-streaming surfaces that build the
- // outgoing Response via the Web `Response.json` constructor (which
- // bypasses Hono's context), the same value also lands on
- // `responseHeaders` so `finalizeGatewayResponse` can stamp it.
+ // The inbound Hono context.
readonly c: AuthedContext;
readonly apiKeyId: string;
readonly upstreamIds: readonly string[] | null;
@@ -31,15 +25,8 @@ export interface GatewayCtx {
readonly currentColo: string;
// Null when the api key has no retention configured, in which case
// `finalizeGatewayResponse` short-circuits the dump tee and returns the
- // response untouched (entries from `responseHeaders` are still applied).
+ // response untouched.
readonly dump: DumpAccumulator | null;
- // Per-request response-header staging for the non-streaming and error
- // paths that build their Response via the Web `Response.json` constructor
- // rather than through Hono's `c.json`/`streamSSE`. The serve layer writes
- // gateway-stamped headers (e.g. `x-floway-alias`) here in lockstep with
- // its `ctx.c.header(...)` call; `finalizeGatewayResponse` then merges
- // them onto the outgoing Response.
- readonly responseHeaders: Headers;
}
export interface CreateGatewayCtxOptions {
@@ -86,27 +73,12 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
runtimeLocation: colo,
currentColo: colo,
dump,
- responseHeaders: new Headers(),
};
};
-// Stage one gateway response header so it lands on the outgoing Response
-// regardless of which builder produced it. Calls Hono's `c.header` (the
-// only knob that survives `streamSSE`'s internal `c.newResponse`) AND
-// stages on the per-ctx `responseHeaders` bag that `finalizeGatewayResponse`
-// merges onto Web-`Response.json`-built non-streaming responses.
-export const stageGatewayResponseHeader = (ctx: GatewayCtx, name: string, value: string): void => {
- ctx.c.header(name, value);
- ctx.responseHeaders.set(name, value);
-};
-
-// Apply ctx-stamped response headers onto the outgoing Response and then run
-// the dump-accumulator's finalize tee. Every inbound HTTP wrapper returns its
-// response through this seam so gateway-stamped headers ride out uniformly
-// across happy-path, error, and passthrough paths — including the
-// non-streaming surfaces that build their Response via Web `Response.json`
-// rather than Hono's `c.json`.
+// Run the dump-accumulator's finalize tee on the outgoing Response. Every
+// inbound HTTP wrapper returns its response through this seam so the dump
+// pipeline applies uniformly across happy-path, error, and passthrough paths.
export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => {
- for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value);
return ctx.dump?.finalize(response) ?? response;
};
diff --git a/packages/gateway/src/data-plane/chat/shared/respond_test.ts b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
index 738bb6399..9a5c506a6 100644
--- a/packages/gateway/src/data-plane/chat/shared/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
@@ -47,7 +47,6 @@ const setup = (): Harness => {
backgroundScheduler: promise => { background.push(promise); },
requestStartedAt,
c: stubAuthedContext(),
- responseHeaders: new Headers(),
}),
};
};
diff --git a/packages/gateway/src/data-plane/chat/shared/routing.ts b/packages/gateway/src/data-plane/chat/shared/routing.ts
index 392c50cf5..ddca37cb0 100644
--- a/packages/gateway/src/data-plane/chat/shared/routing.ts
+++ b/packages/gateway/src/data-plane/chat/shared/routing.ts
@@ -1,12 +1,10 @@
import type { ChatCandidate, ProviderCandidate } from './candidates.ts';
import type { ChatServeFailure } from './errors.ts';
-// Generic over the candidate type so call sites that hand in `ChatCandidate`
-// receive a decision whose surviving candidates retain the alias metadata.
-// The candidate filtering and ordering inside routing is shape-agnostic —
-// it touches `binding.upstream` and `binding.supportsResponsesItemReference`
-// only — so the generic narrows naturally from `ChatCandidate` back out
-// without re-deriving the alias fields.
+// Generic over the candidate type so call sites can narrow back to their
+// concrete shape. The candidate filtering and ordering inside routing is
+// shape-agnostic — it touches `binding.upstream` and
+// `binding.supportsResponsesItemReference` only.
export type RoutingDecision =
| { readonly kind: 'success'; readonly candidates: readonly T[] }
| { readonly kind: 'failure'; readonly failure: ChatServeFailure };
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
index 57a6959dc..64f404944 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -1,19 +1,12 @@
-import type { ModelAliasRules } from '../../../control-plane/model-aliases/types.ts';
import { FLOWAY_EXTENSION_FIELDS } from '@floway-dev/protocols/extensions';
export interface SanitizeTraceCtx {
- readonly aliasName?: string;
- readonly emit: (line: { alias?: string; field: string; targetProtocol: string }) => void;
+ readonly emit: (line: { field: string; targetProtocol: string }) => void;
}
// Default per-request trace that flows through the gateway's console logger.
-// `aliasName` rides through to the trace line so an operator inspecting logs
-// can correlate the drop with the matched alias; when no alias matched the
-// field still appears (residue from a client-sent extension), just without
-// alias attribution.
-export const createSanitizeTraceCtx = (aliasName: string | undefined): SanitizeTraceCtx => ({
- ...(aliasName !== undefined ? { aliasName } : {}),
- emit: line => console.warn('floway.alias.drop', JSON.stringify(line)),
+export const createSanitizeTraceCtx = (): SanitizeTraceCtx => ({
+ emit: line => console.warn('floway.extension.drop', JSON.stringify(line)),
});
const stripKeys = (
@@ -26,7 +19,7 @@ const stripKeys = (
for (const key of keys) {
if (key in body) {
delete body[key];
- trace?.emit({ alias: trace.aliasName, field: `${fieldPrefix}${key}`, targetProtocol });
+ trace?.emit({ field: `${fieldPrefix}${key}`, targetProtocol });
}
}
};
@@ -50,24 +43,3 @@ export const sanitizeForGeminiUpstream = (body: Record, trace?:
stripKeys(generationConfig as Record, FLOWAY_EXTENSION_FIELDS.gemini.generationConfig, 'gemini', trace, 'generationConfig.');
}
};
-
-// Walks the alias rules object and emits one trace line per non-empty rule
-// field. Used by inbound surfaces that have no protocol-extension slots for
-// the rules in the first place (embeddings, images, /v1/completions) — the
-// rules are structurally dropped before the upstream call, and this helper
-// gives the operator the same `floway.alias.drop` signal the chat
-// sanitizers produce when they strip extension residue.
-export const traceAllRulesDropped = (
- rules: ModelAliasRules,
- targetProtocol: string,
- trace: SanitizeTraceCtx,
-): void => {
- if (rules.reasoning) {
- for (const key of Object.keys(rules.reasoning)) {
- trace.emit({ alias: trace.aliasName, field: `reasoning.${key}`, targetProtocol });
- }
- }
- if (rules.verbosity !== undefined) trace.emit({ alias: trace.aliasName, field: 'verbosity', targetProtocol });
- if (rules.serviceTier !== undefined) trace.emit({ alias: trace.aliasName, field: 'serviceTier', targetProtocol });
- if (rules.anthropicBeta?.length) trace.emit({ alias: trace.aliasName, field: 'anthropicBeta', targetProtocol });
-};
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
index da2e72d0d..0dd52330d 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
@@ -11,20 +11,22 @@ import { assertEquals } from '@floway-dev/test-utils';
type TraceLine = { alias?: string; field: string; targetProtocol: string };
-const makeTrace = (aliasName?: string): { ctx: SanitizeTraceCtx; lines: TraceLine[] } => {
+const makeTrace = (): { ctx: SanitizeTraceCtx; lines: TraceLine[] } => {
const lines: TraceLine[] = [];
return {
- ctx: { aliasName, emit: line => lines.push(line) },
+ ctx: { emit: line => lines.push(line) },
lines,
};
};
test('sanitizeForMessagesUpstream strips verbosity and emits one trace line', () => {
const body: Record = { verbosity: 'low', model: 'x' };
- const { ctx, lines } = makeTrace('codex-auto-review');
+ const { ctx, lines } = makeTrace();
sanitizeForMessagesUpstream(body, ctx);
assertEquals(body, { model: 'x' });
- assertEquals(lines, [{ alias: 'codex-auto-review', field: 'verbosity', targetProtocol: 'messages' }]);
+ assertEquals(lines.length, 1);
+ assertEquals(lines[0].field, 'verbosity');
+ assertEquals(lines[0].targetProtocol, 'messages');
});
test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves native fields', () => {
@@ -34,11 +36,11 @@ test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves nat
reasoning_effort: 'high',
model: 'x',
};
- const { ctx, lines } = makeTrace('alias-1');
+ const { ctx, lines } = makeTrace();
sanitizeForChatCompletionsUpstream(body, ctx);
assertEquals(body, { reasoning_effort: 'high', model: 'x' });
assertEquals(lines.length, 2);
- assertEquals(lines.every(l => l.alias === 'alias-1' && l.targetProtocol === 'chat-completions'), true);
+ assertEquals(lines.every(l => l.targetProtocol === 'chat-completions'), true);
const droppedFields = lines.map(l => l.field).sort();
assertEquals(droppedFields, ['anthropic_beta', 'thinking_budget']);
});
@@ -54,13 +56,13 @@ test('sanitizeForGeminiUpstream walks top-level and generationConfig', () => {
generationConfig: { verbosity: 'low', thinkingConfig: { thinkingBudget: 100 } },
anthropicBeta: ['ctx-1m'],
};
- const { ctx, lines } = makeTrace('alias-g');
+ const { ctx, lines } = makeTrace();
sanitizeForGeminiUpstream(body, ctx);
assertEquals(body, { generationConfig: { thinkingConfig: { thinkingBudget: 100 } } });
assertEquals(lines.length, 2);
const droppedFields = lines.map(l => l.field).sort();
assertEquals(droppedFields, ['anthropicBeta', 'generationConfig.verbosity']);
- assertEquals(lines.every(l => l.alias === 'alias-g' && l.targetProtocol === 'gemini'), true);
+ assertEquals(lines.every(l => l.targetProtocol === 'gemini'), true);
});
test('sanitizer is idempotent — a second run emits no additional traces', () => {
diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
index d3e9ad8b0..bdef8b1f1 100644
--- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
@@ -26,7 +26,6 @@ const baseCtx = (overrides: Partial = {}): GatewayCtx => {
upstreamIds: null,
wantsStream: true,
c: stubAuthedContext(),
- responseHeaders: new Headers(),
requestStartedAt: 0,
runtimeLocation: 'TEST',
currentColo: 'TEST',
diff --git a/packages/gateway/src/data-plane/completions/serve_test.ts b/packages/gateway/src/data-plane/completions/serve_test.ts
index 1dd90c6ed..1cd2263bb 100644
--- a/packages/gateway/src/data-plane/completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/completions/serve_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
import { initDumpBroker, initDumpStore } from '../../dump/registry.ts';
import { installDumpStubs } from '../../dump/test-fixtures.ts';
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCustomUpstreamRecord, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
import { assertEquals, assertExists, jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
@@ -398,49 +397,3 @@ test('/v1/completions streaming records usage row, request_total+upstream_succes
assertEquals(frames[3]?.type, 'done');
}
});
-
-// Alias header coverage for /v1/completions: the matched alias name rides
-// out on `x-floway-alias`. Non-streaming path uses passthrough's `json`
-// branch; the streaming path stamps the same header via Hono's `c.header`
-// before `streamSSE` builds the response.
-test('/v1/completions stamps x-floway-alias when the request hits an aliased model', async () => {
- const { apiKey, repo } = await setupAppTest();
- await registerCompletionsUpstream(repo);
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'completions-alias',
- targetModelId: 'davinci-002',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 0,
- },
- ]);
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'passthrough.example.com' && url.pathname === '/v1/completions') {
- return jsonResponse({
- id: 'cmpl_resp',
- object: 'text_completion',
- created: 1,
- model: 'davinci-002',
- choices: [{ index: 0, text: ' world', finish_reason: 'stop' }],
- usage: { prompt_tokens: 5, completion_tokens: 1, total_tokens: 6 },
- });
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1/completions', {
- method: 'POST',
- headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
- body: JSON.stringify({ model: 'completions-alias', prompt: 'hello' }),
- });
- assertEquals(response.status, 200);
- assertEquals(response.headers.get('x-floway-alias'), 'completions-alias');
- },
- );
-});
diff --git a/packages/gateway/src/data-plane/embeddings/serve_test.ts b/packages/gateway/src/data-plane/embeddings/serve_test.ts
index c6c44f61b..bf86dc9a7 100644
--- a/packages/gateway/src/data-plane/embeddings/serve_test.ts
+++ b/packages/gateway/src/data-plane/embeddings/serve_test.ts
@@ -1,6 +1,5 @@
import { test } from 'vitest';
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCustomUpstreamRecord, copilotModels, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
import { jsonResponse, withMockedFetch, assertEquals, assertExists } from '@floway-dev/test-utils';
@@ -496,58 +495,3 @@ test('/v1/embeddings rejects malformed body at the provider-independent boundary
},
);
});
-
-// Critical alias header coverage for the passthrough surface: the matched
-// alias name must ride out on `x-floway-alias` so downstream observers can
-// tell a real-model hit from an alias-routed one. Goes through Hono's
-// `c.header` in `passthroughServe`, mirroring the chat path.
-test('/v1/embeddings stamps x-floway-alias when the request hits an aliased model', async () => {
- const { apiKey, repo } = await setupAppTest();
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'embed-alias',
- targetModelId: 'text-embedding-real',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 0,
- },
- ]);
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
- if (url.pathname === '/copilot_internal/v2/token') {
- return jsonResponse({
- token: 'copilot-access-token',
- expires_at: 4102444800,
- refresh_in: 3600,
- endpoints: { api: 'https://api.individual.githubcopilot.com' },
- });
- }
- if (url.pathname === '/models') {
- return jsonResponse(copilotModels([{ id: 'text-embedding-real', supported_endpoints: ['/embeddings'] }]));
- }
- if (url.pathname === '/embeddings') {
- return jsonResponse({
- object: 'list',
- model: 'text-embedding-real',
- data: [{ object: 'embedding', index: 0, embedding: [0.1] }],
- usage: { prompt_tokens: 1, total_tokens: 1 },
- });
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1/embeddings', {
- method: 'POST',
- headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
- body: JSON.stringify({ model: 'embed-alias', input: 'hello' }),
- });
- assertEquals(response.status, 200);
- assertEquals(response.headers.get('x-floway-alias'), 'embed-alias');
- },
- );
-});
diff --git a/packages/gateway/src/data-plane/images/serve_test.ts b/packages/gateway/src/data-plane/images/serve_test.ts
index f241ad89d..85b5f1adf 100644
--- a/packages/gateway/src/data-plane/images/serve_test.ts
+++ b/packages/gateway/src/data-plane/images/serve_test.ts
@@ -1,6 +1,5 @@
import { test } from 'vitest';
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCustomUpstreamRecord, copilotModels, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
import { jsonResponse, withMockedFetch, assertEquals, assertExists } from '@floway-dev/test-utils';
@@ -234,62 +233,3 @@ test('/v1/images/edits forwards a multipart request through an Azure model and r
const usageRows = await repo.usage.listAll();
assertEquals(usageRows.some(row => row.model === 'gpt-image-2' && row.tokens.input === 7 && row.tokens.output === 11), true);
});
-
-// Alias header coverage for /v1/images/generations: an alias whose target is
-// an image-generation model must surface its name on `x-floway-alias` for
-// downstream observability.
-test('/v1/images/generations stamps x-floway-alias when the request hits an aliased model', async () => {
- const { apiKey, repo } = await setupAppTest();
- clearInProcessCopilotTokenCache();
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'image-alias',
- targetModelId: 'gpt-image-2',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 0,
- },
- ]);
- await repo.upstreams.save(buildCustomUpstreamRecord({
- id: 'up_images',
- name: 'Custom Image Provider',
- sortOrder: 100,
- config: {
- baseUrl: 'https://images.example.com',
- authStyle: 'bearer',
- apiKey: 'sk-images',
- endpoints: {},
- },
- }));
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
- if (url.pathname === '/copilot_internal/v2/token') {
- return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
- }
- if (url.hostname === 'api.individual.githubcopilot.com' && url.pathname === '/models') {
- return jsonResponse(copilotModels([{ id: 'copilot-chat', supported_endpoints: ['/chat/completions'] }]));
- }
- if (url.hostname === 'images.example.com' && url.pathname === '/v1/models') {
- return jsonResponse({ data: [{ id: 'gpt-image-2' }] });
- }
- if (url.hostname === 'images.example.com' && url.pathname === '/v1/images/generations') {
- return jsonResponse({ data: [{ b64_json: 'aGVsbG8=' }] });
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1/images/generations', {
- method: 'POST',
- headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
- body: JSON.stringify({ model: 'image-alias', prompt: 'hi' }),
- });
- assertEquals(response.status, 200);
- assertEquals(response.headers.get('x-floway-alias'), 'image-alias');
- },
- );
-});
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
deleted file mode 100644
index 0791c5f7b..000000000
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ /dev/null
@@ -1,104 +0,0 @@
-import type { ModelAliasRules } from '../../control-plane/model-aliases/types.ts';
-import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
-import type { GeminiPayload } from '@floway-dev/protocols/gemini';
-import type { MessagesPayload, MessagesThinkingDisplay } from '@floway-dev/protocols/messages';
-import type { ResponsesPayload } from '@floway-dev/protocols/responses';
-import { mapSummaryToAnthropicDisplay } from '@floway-dev/translate/via-messages/anthropic-extensions';
-
-// Each function writes the alias rules into the inbound IR's slot best suited
-// to the host protocol: native when the protocol can express the concept,
-// extension otherwise. Writes overwrite any user-supplied value — aliases are
-// operator-locked.
-
-export const applyAliasRulesToChatCompletions = (payload: ChatCompletionsPayload, rules: ModelAliasRules): void => {
- // reasoning.effort is native; budget/adaptive/summary ride on extension slots
- // because Chat Completions has no native expression for those.
- if (rules.reasoning?.effort !== undefined) payload.reasoning_effort = rules.reasoning.effort;
- if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens;
- if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true;
- if (rules.reasoning?.summary !== undefined) payload.reasoning_summary = rules.reasoning.summary;
- if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
- if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
- if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
-};
-
-export const applyAliasRulesToResponses = (payload: ResponsesPayload, rules: ModelAliasRules): void => {
- // reasoning.{effort, summary} and text.verbosity / service_tier are native;
- // budget/adaptive ride on extension slots; anthropic_beta only matters when
- // this Responses inbound lands on a Messages upstream.
- if (rules.reasoning?.effort !== undefined) payload.reasoning = { ...payload.reasoning, effort: rules.reasoning.effort };
- if (rules.reasoning?.summary !== undefined) payload.reasoning = { ...payload.reasoning, summary: rules.reasoning.summary };
- if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens;
- if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true;
- if (rules.verbosity !== undefined) payload.text = { ...payload.text, verbosity: rules.verbosity };
- if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
- if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
-};
-
-export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: ModelAliasRules): void => {
- // Anthropic has natives for effort, thinking, and service_tier; only
- // verbosity is a Floway extension on this inbound. anthropic_beta is the
- // wire header — the attempt layer reads `candidate.aliasRules.anthropicBeta`
- // and merges via mergeAnthropicBetaTokens, so we do not stamp the body here.
- if (rules.reasoning?.effort !== undefined) {
- payload.output_config = { ...payload.output_config, effort: rules.reasoning.effort };
- }
- // The dashboard's tagged radio enforces mutual exclusivity between
- // adaptive and budgetTokens; if both arrive through the raw API the apply
- // step picks adaptive (matches the translate-layer adaptive-first policy).
- if (rules.reasoning?.adaptive === true) {
- payload.thinking = { type: 'adaptive' };
- } else if (rules.reasoning?.budgetTokens !== undefined) {
- payload.thinking = { type: 'enabled', budget_tokens: rules.reasoning.budgetTokens };
- }
- if (rules.reasoning?.summary !== undefined) {
- const display = mapSummaryToAnthropicDisplay(rules.reasoning.summary);
- // summary='auto' maps to undefined and is an explicit no-op on the
- // Messages path — the operator chose "let upstream default decide", so
- // we neither synthesize a thinking block nor overwrite a user-supplied
- // thinking.display. Every other summary value enforces operator-locked
- // overwrite.
- if (display !== undefined) {
- // When no prior thinking branch ran (no effort/budget/adaptive in this
- // rule), synthesize `thinking: {type:'enabled', display}` so the
- // operator's summary intent survives — Anthropic discards `display`
- // without `type`. Matches `buildMessagesThinkingFromExtensions`.
- const base = payload.thinking ?? { type: 'enabled' as const };
- payload.thinking = { ...base, display: display as MessagesThinkingDisplay };
- }
- }
- if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
- if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
-};
-
-export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAliasRules): void => {
- // All four reasoning knobs ride on the native thinkingConfig; verbosity and
- // serviceTier ride on extension slots under generationConfig. anthropicBeta
- // doesn't surface on Gemini-inbound bodies — the gemini-via-messages
- // translator doesn't read it, and the Messages attempt reads it off the
- // candidate's aliasRules directly when stamping the outbound header.
- const hasThinking = rules.reasoning?.effort !== undefined
- || rules.reasoning?.budgetTokens !== undefined
- || rules.reasoning?.adaptive === true
- || rules.reasoning?.summary !== undefined;
- const hasGenerationConfig = hasThinking || rules.verbosity !== undefined || rules.serviceTier !== undefined;
-
- if (hasGenerationConfig) {
- const generationConfig = { ...payload.generationConfig };
- const thinkingConfig = { ...generationConfig.thinkingConfig };
- if (rules.reasoning?.effort !== undefined) thinkingConfig.thinkingLevel = rules.reasoning.effort;
- if (rules.reasoning?.budgetTokens !== undefined) thinkingConfig.thinkingBudget = rules.reasoning.budgetTokens;
- if (rules.reasoning?.adaptive === true) thinkingConfig.thinkingBudget = -1;
- if (rules.reasoning?.summary !== undefined) {
- // Gemini exposes a single boolean for summary; map summary='omitted' to
- // false and every other value (auto / concise / detailed / freeform) to
- // true. Operators that want to fall back to Gemini's account default
- // simply omit `reasoning.summary` from the rule.
- thinkingConfig.includeThoughts = rules.reasoning.summary !== 'omitted';
- }
- if (hasThinking) generationConfig.thinkingConfig = thinkingConfig;
- if (rules.verbosity !== undefined) generationConfig.verbosity = rules.verbosity;
- if (rules.serviceTier !== undefined) generationConfig.serviceTier = rules.serviceTier;
- payload.generationConfig = generationConfig;
- }
-};
diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
deleted file mode 100644
index c62bac7f7..000000000
--- a/packages/gateway/src/data-plane/model-aliases/apply_test.ts
+++ /dev/null
@@ -1,203 +0,0 @@
-import { describe, expect, test } from 'vitest';
-
-import {
- applyAliasRulesToChatCompletions,
- applyAliasRulesToGemini,
- applyAliasRulesToMessages,
- applyAliasRulesToResponses,
-} from './apply.ts';
-import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
-import type { GeminiPayload } from '@floway-dev/protocols/gemini';
-import type { MessagesPayload } from '@floway-dev/protocols/messages';
-import type { ResponsesPayload } from '@floway-dev/protocols/responses';
-
-// Empty-shaped payload helpers; the apply functions only touch the alias-rule
-// slots so the rest can stay structurally minimal.
-const cc = (overrides: Partial = {}): ChatCompletionsPayload => ({ model: 'x', messages: [], ...overrides });
-const resp = (overrides: Partial = {}): ResponsesPayload => ({ model: 'x', input: 'hi', ...overrides });
-const msg = (overrides: Partial = {}): MessagesPayload => ({ model: 'x', messages: [], max_tokens: 1, ...overrides });
-const gem = (overrides: Partial = {}): GeminiPayload => ({ ...overrides });
-
-describe('applyAliasRulesToChatCompletions', () => {
- test('writes effort to native reasoning_effort and overrides user value', () => {
- const payload = cc({ reasoning_effort: 'low' });
- applyAliasRulesToChatCompletions(payload, { reasoning: { effort: 'high' } });
- expect(payload.reasoning_effort).toBe('high');
- });
-
- test('writes budgetTokens to extension thinking_budget', () => {
- const payload = cc();
- applyAliasRulesToChatCompletions(payload, { reasoning: { budgetTokens: 4096 } });
- expect(payload.thinking_budget).toBe(4096);
- });
-
- test('writes adaptive to extension adaptive_thinking', () => {
- const payload = cc();
- applyAliasRulesToChatCompletions(payload, { reasoning: { adaptive: true } });
- expect(payload.adaptive_thinking).toBe(true);
- });
-
- test('writes summary to extension reasoning_summary', () => {
- const payload = cc();
- applyAliasRulesToChatCompletions(payload, { reasoning: { summary: 'detailed' } });
- expect(payload.reasoning_summary).toBe('detailed');
- });
-
- test('writes verbosity, serviceTier, anthropicBeta to their slots', () => {
- const payload = cc();
- applyAliasRulesToChatCompletions(payload, {
- verbosity: 'low', serviceTier: 'flex', anthropicBeta: ['ctx-1m'],
- });
- expect(payload.verbosity).toBe('low');
- expect(payload.service_tier).toBe('flex');
- expect(payload.anthropic_beta).toEqual(['ctx-1m']);
- });
-
- test('leaves payload untouched when rules carry no fields', () => {
- const payload = cc({ reasoning_effort: 'medium', verbosity: 'high' });
- applyAliasRulesToChatCompletions(payload, {});
- expect(payload.reasoning_effort).toBe('medium');
- expect(payload.verbosity).toBe('high');
- });
-});
-
-describe('applyAliasRulesToResponses', () => {
- test('writes effort to native reasoning.effort and overrides user value', () => {
- const payload = resp({ reasoning: { effort: 'low' } });
- applyAliasRulesToResponses(payload, { reasoning: { effort: 'high' } });
- expect(payload.reasoning?.effort).toBe('high');
- });
-
- test('writes summary to native reasoning.summary', () => {
- const payload = resp();
- applyAliasRulesToResponses(payload, { reasoning: { summary: 'detailed' } });
- expect(payload.reasoning?.summary).toBe('detailed');
- });
-
- test('writes budgetTokens to extension thinking_budget', () => {
- const payload = resp();
- applyAliasRulesToResponses(payload, { reasoning: { budgetTokens: 4096 } });
- expect(payload.thinking_budget).toBe(4096);
- });
-
- test('writes adaptive to extension adaptive_thinking', () => {
- const payload = resp();
- applyAliasRulesToResponses(payload, { reasoning: { adaptive: true } });
- expect(payload.adaptive_thinking).toBe(true);
- });
-
- test('writes verbosity to native text.verbosity, preserving format', () => {
- const payload = resp({ text: { format: { type: 'json_object' } } });
- applyAliasRulesToResponses(payload, { verbosity: 'low' });
- expect(payload.text?.verbosity).toBe('low');
- expect(payload.text?.format).toEqual({ type: 'json_object' });
- });
-
- test('writes serviceTier to native service_tier', () => {
- const payload = resp();
- applyAliasRulesToResponses(payload, { serviceTier: 'flex' });
- expect(payload.service_tier).toBe('flex');
- });
-
- test('writes anthropicBeta to extension slot', () => {
- const payload = resp();
- applyAliasRulesToResponses(payload, { anthropicBeta: ['ctx-1m'] });
- expect(payload.anthropic_beta).toEqual(['ctx-1m']);
- });
-});
-
-describe('applyAliasRulesToMessages', () => {
- test('writes effort to native output_config.effort', () => {
- const payload = msg();
- applyAliasRulesToMessages(payload, { reasoning: { effort: 'high' } });
- expect(payload.output_config?.effort).toBe('high');
- });
-
- test('writes budgetTokens to thinking.enabled', () => {
- const payload = msg();
- applyAliasRulesToMessages(payload, { reasoning: { budgetTokens: 4096 } });
- expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 4096 });
- });
-
- test('writes adaptive to thinking.type=adaptive', () => {
- const payload = msg();
- applyAliasRulesToMessages(payload, { reasoning: { adaptive: true } });
- expect(payload.thinking).toEqual({ type: 'adaptive' });
- });
-
- test('writes summary to thinking.display (mapped from OpenAI vocabulary)', () => {
- const payload = msg({ thinking: { type: 'enabled', budget_tokens: 1024 } });
- applyAliasRulesToMessages(payload, { reasoning: { summary: 'detailed' } });
- expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 1024, display: 'summarized' });
- });
-
- test('writes serviceTier to native service_tier', () => {
- const payload = msg();
- applyAliasRulesToMessages(payload, { serviceTier: 'priority' });
- expect(payload.service_tier).toBe('priority');
- });
-
- test('writes verbosity to the extension slot', () => {
- const payload = msg();
- applyAliasRulesToMessages(payload, { verbosity: 'low' });
- expect(payload.verbosity).toBe('low');
- });
-
- test('adaptive overrides budgetTokens when both arrive on the same call', () => {
- // The write-side validator forbids both, but if both still arrive the
- // adaptive choice has to win to match the translate-layer policy.
- const payload = msg();
- applyAliasRulesToMessages(payload, { reasoning: { budgetTokens: 1024, adaptive: true } });
- expect(payload.thinking).toEqual({ type: 'adaptive' });
- });
-});
-
-describe('applyAliasRulesToGemini', () => {
- test('writes effort to generationConfig.thinkingConfig.thinkingLevel', () => {
- const payload = gem();
- applyAliasRulesToGemini(payload, { reasoning: { effort: 'high' } });
- expect(payload.generationConfig?.thinkingConfig?.thinkingLevel).toBe('high');
- });
-
- test('writes budgetTokens to generationConfig.thinkingConfig.thinkingBudget', () => {
- const payload = gem();
- applyAliasRulesToGemini(payload, { reasoning: { budgetTokens: 4096 } });
- expect(payload.generationConfig?.thinkingConfig?.thinkingBudget).toBe(4096);
- });
-
- test('writes adaptive to generationConfig.thinkingConfig.thinkingBudget = -1', () => {
- const payload = gem();
- applyAliasRulesToGemini(payload, { reasoning: { adaptive: true } });
- expect(payload.generationConfig?.thinkingConfig?.thinkingBudget).toBe(-1);
- });
-
- test('writes summary to generationConfig.thinkingConfig.includeThoughts when not omitted', () => {
- const payload = gem();
- applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } });
- expect(payload.generationConfig?.thinkingConfig?.includeThoughts).toBe(true);
- });
-
- test('writes summary=omitted to generationConfig.thinkingConfig.includeThoughts=false', () => {
- const payload = gem();
- applyAliasRulesToGemini(payload, { reasoning: { summary: 'omitted' } });
- expect(payload.generationConfig?.thinkingConfig?.includeThoughts).toBe(false);
- });
-
- test('writes verbosity to generationConfig.verbosity extension', () => {
- const payload = gem();
- applyAliasRulesToGemini(payload, { verbosity: 'low' });
- expect(payload.generationConfig?.verbosity).toBe('low');
- });
-
- test('writes serviceTier to generationConfig.serviceTier extension', () => {
- const payload = gem();
- applyAliasRulesToGemini(payload, { serviceTier: 'flex' });
- expect(payload.generationConfig?.serviceTier).toBe('flex');
- });
-
- test('preserves existing thinkingConfig entries when adding a new one', () => {
- const payload = gem({ generationConfig: { thinkingConfig: { thinkingBudget: 1024 } } });
- applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } });
- expect(payload.generationConfig?.thinkingConfig).toEqual({ thinkingBudget: 1024, includeThoughts: true });
- });
-});
diff --git a/packages/gateway/src/data-plane/model-aliases/match.ts b/packages/gateway/src/data-plane/model-aliases/match.ts
deleted file mode 100644
index edb31d071..000000000
--- a/packages/gateway/src/data-plane/model-aliases/match.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-
-// Lookup an alias for the (post-prefix-strip) lookupId against the upstream's
-// id. An empty `upstreamIds` filter on the alias means "match any upstream";
-// a non-empty filter must include the upstream's id.
-export const matchAlias = (
- lookupId: string,
- upstreamId: string,
- aliases: readonly ModelAlias[],
-): ModelAlias | undefined => {
- const hit = aliases.find(a => a.alias === lookupId);
- if (!hit) return undefined;
- if (hit.upstreamIds.length > 0 && !hit.upstreamIds.includes(upstreamId)) return undefined;
- return hit;
-};
diff --git a/packages/gateway/src/data-plane/model-aliases/match_test.ts b/packages/gateway/src/data-plane/model-aliases/match_test.ts
deleted file mode 100644
index b3fbf5596..000000000
--- a/packages/gateway/src/data-plane/model-aliases/match_test.ts
+++ /dev/null
@@ -1,55 +0,0 @@
-import { describe, expect, test } from 'vitest';
-
-import { matchAlias } from './match.ts';
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-
-const make = (overrides: Partial): ModelAlias => ({
- alias: 'a',
- targetModelId: 't',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 0,
- ...overrides,
-});
-
-describe('matchAlias', () => {
- test('matches by exact lookupId when alias has no upstream filter', () => {
- const aliases = [make({ alias: 'codex-auto-review', targetModelId: 'gpt-5.4' })];
- expect(matchAlias('codex-auto-review', 'up-1', aliases)?.alias).toBe('codex-auto-review');
- });
-
- test('does not match when lookupId differs', () => {
- const aliases = [make({ alias: 'codex-auto-review' })];
- expect(matchAlias('something-else', 'up-1', aliases)).toBeUndefined();
- });
-
- test('respects upstreamIds allowlist (member matches)', () => {
- const aliases = [make({ alias: 'a', upstreamIds: ['up-1', 'up-2'] })];
- expect(matchAlias('a', 'up-1', aliases)).toBeDefined();
- expect(matchAlias('a', 'up-2', aliases)).toBeDefined();
- });
-
- test('respects upstreamIds allowlist (non-member misses)', () => {
- const aliases = [make({ alias: 'a', upstreamIds: ['up-1'] })];
- expect(matchAlias('a', 'up-3', aliases)).toBeUndefined();
- });
-
- test('empty upstreamIds means match-any', () => {
- const aliases = [make({ alias: 'a', upstreamIds: [] })];
- expect(matchAlias('a', 'anywhere', aliases)).toBeDefined();
- });
-
- test('returns the first matching alias entry verbatim', () => {
- const aliases = [
- make({ alias: 'a', targetModelId: 'first', rules: { reasoning: { effort: 'low' } } }),
- make({ alias: 'a', targetModelId: 'second' }),
- ];
- expect(matchAlias('a', 'up-x', aliases)).toEqual(aliases[0]);
- });
-
- test('returns undefined for an empty alias list', () => {
- expect(matchAlias('a', 'up-x', [])).toBeUndefined();
- });
-});
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
deleted file mode 100644
index 72c0fdcf7..000000000
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ /dev/null
@@ -1,139 +0,0 @@
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-import { unionEndpoints } from '../providers/registry.ts';
-import { composeAliasDisplayName, kindForEndpoints, type PublicModel } from '@floway-dev/protocols/common';
-import type { ModelProviderInstance, ProviderModelRecord, ResolvedModel, UpstreamModel } from '@floway-dev/provider';
-
-// One emission slot for an alias: a (provider, addressable form) pair where
-// the provider's raw catalog carries the alias target id, plus the matched
-// UpstreamModel so the synthesized listing entry can borrow the target's
-// limits, owner, and cost without re-querying.
-interface AliasListingEmission {
- provider: ModelProviderInstance;
- form: 'unprefixed' | 'prefixed';
- target: UpstreamModel;
-}
-
-// A `ResolvedModel` that may carry an `aliasedFrom` provenance — what
-// `getModelsForListing` returns when alias entries have been interleaved into
-// the catalog. Each listing endpoint's mapper (`toPublicModel`,
-// `toControlPlaneModel`, `toGeminiModel`) reads the same shape, so the alias
-// fan-out happens exactly once instead of being re-implemented per surface.
-export type ListedModel = ResolvedModel & {
- readonly aliasedFrom?: NonNullable;
-};
-
-// Per-upstream alias enumeration. An alias with empty `upstreamIds` matches
-// every reachable provider; a non-empty list narrows the candidate set. Per
-// provider, the alias emits one entry per `listed` form when its target sits
-// in the upstream's raw catalog. Upstreams that do not carry the target — or
-// whose operator disabled the target — drop the alias entirely for that row.
-const aliasListingEmissions = (
- alias: ModelAlias,
- providers: readonly ModelProviderInstance[],
- rawCatalogs: ReadonlyMap,
-): AliasListingEmission[] => {
- const out: AliasListingEmission[] = [];
- const upstreamFilter = alias.upstreamIds.length > 0 ? new Set(alias.upstreamIds) : null;
- for (const provider of providers) {
- if (upstreamFilter !== null && !upstreamFilter.has(provider.upstream)) continue;
- const catalog = rawCatalogs.get(provider.upstream);
- if (catalog === undefined) continue;
- const disabled = new Set(provider.disabledPublicModelIds);
- const target = catalog.find(m => m.id === alias.targetModelId && !disabled.has(m.id));
- if (target === undefined) continue;
- const cfg = provider.modelPrefix;
- if (cfg === null) {
- out.push({ provider, form: 'unprefixed', target });
- } else {
- for (const form of cfg.listed) {
- out.push({ provider, form, target });
- }
- }
- }
- return out;
-};
-
-// Turn an alias emission into a `ListedModel` that walks the same listing
-// pipeline as real catalog entries. The synthesized `providers` array carries
-// a single binding pointing at the alias's target on this upstream, so the
-// dashboard's per-binding view renders correctly without alias-specific
-// branching. `aliasedFrom` rides out as the public protocol extension.
-//
-// Display name: the alias-local part (operator displayName, or
-// `${target.display_name} (rules summary)`) lives by itself for the
-// `unprefixed` listing form; the `prefixed` form mirrors the real-model path
-// in `registry.ts` and prepends `${provider.name}: ` so the upstream is
-// visible at a glance.
-//
-// Public id: bare alias name for the unprefixed form; provider prefix + alias
-// name for the prefixed form. Mirrors how real models are surfaced in the
-// same listing pass.
-const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmission): ListedModel => {
- const { provider, target, form } = emission;
- const aliasLocalName = composeAliasDisplayName({
- aliasDisplayName: alias.displayName,
- targetDisplayName: target.display_name ?? target.id,
- rules: alias.rules,
- });
- const cfg = provider.modelPrefix;
- const publicId = form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias;
- const record: ProviderModelRecord = {
- upstream: provider.upstream,
- upstreamName: provider.name,
- providerKind: provider.providerKind,
- provider: provider.provider,
- upstreamModel: target,
- enabledFlags: target.enabledFlags,
- supportsResponsesItemReference: provider.supportsResponsesItemReference,
- };
- const { providerData: _providerData, endpoints, id: _targetId, display_name: _targetDisplay, created: _targetCreated, ...rest } = target;
- return {
- ...rest,
- id: publicId,
- display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName,
- created: alias.createdAt,
- endpoints: { ...endpoints },
- providers: [record],
- aliasedFrom: {
- targetModelId: alias.targetModelId,
- upstreamIds: alias.upstreamIds,
- rules: alias.rules,
- onConflict: alias.onConflict,
- ...(alias.displayName !== undefined ? { displayName: alias.displayName } : {}),
- },
- };
-};
-
-// Single-pass alias fan-out used by every listing surface. Visibility filter
-// honoured here. Emissions whose synthesized public id collides — two
-// no-prefix upstreams both serving the alias target, or two prefix-aliased
-// upstreams sharing a prefix — merge into one row with the bindings
-// appended, mirroring how `mergeIntoCatalog` collapses duplicate real-model
-// ids; the dashboard then renders a single alias row whose `upstreams` lists
-// every backing binding instead of N identical rows.
-export const synthesizeListedAliases = (
- aliases: readonly ModelAlias[],
- providers: readonly ModelProviderInstance[],
- rawCatalogs: ReadonlyMap,
-): ListedModel[] => {
- const byId = new Map();
- for (const alias of aliases) {
- if (!alias.visibleInModelsList) continue;
- for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
- const next = aliasEmissionToListedModel(alias, emission);
- const existing = byId.get(next.id);
- if (existing === undefined) {
- byId.set(next.id, next);
- continue;
- }
- const endpoints = unionEndpoints(existing.endpoints, next.endpoints);
- byId.set(next.id, {
- ...existing,
- endpoints,
- kind: kindForEndpoints(endpoints),
- providers: [...existing.providers, ...next.providers],
- });
- }
- }
- return [...byId.values()];
-};
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index f01579f9a..ab9242bd1 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -1,14 +1,12 @@
import type { Context } from 'hono';
import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
-import { getRepo } from '../../repo/index.ts';
import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
import { getCurrentColo } from '../../runtime/runtime-info.ts';
import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts';
-import { getModelsForListing } from '../providers/registry.ts';
+import { getModels } from '../providers/registry.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
import type { ModelPricing } from '@floway-dev/protocols/common';
import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -32,10 +30,6 @@ interface GeminiModel {
cost?: ModelPricing;
}
-// Gemini's Model resource is closed (no `aliasedFrom` extension), so an alias
-// arrives here through `getModelsForListing` looking like any other chat
-// model — `id`, `display_name`, `limits`, `cost` already finalized by
-// `synthesizeListedAliases` — and the mapper has no alias-specific branch.
const toGeminiModel = (model: InternalModel): GeminiModel => {
const limits = model.limits;
const inputTokenLimit = limits.max_prompt_tokens ?? limits.max_context_window_tokens;
@@ -72,20 +66,16 @@ const loadGeminiModels = async (
upstreamFilter: readonly string[] | null,
fetcherForUpstream: (upstreamId: string) => Fetcher,
scheduler: BackgroundScheduler,
- aliases: readonly ModelAlias[],
): Promise => {
- const { models } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler, aliases);
- // Only chat models are representable in the Gemini /models shape — alias
- // entries whose target is non-chat fall out of this filter just like real
- // non-chat catalog entries do.
+ const models = await getModels(upstreamFilter, fetcherForUpstream, scheduler);
+ // Only chat models are representable in the Gemini /models shape.
return models.filter(model => model.kind === 'chat').map(toGeminiModel);
};
export const serveGeminiModels = async (c: Context): Promise => {
try {
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- const aliases = await getRepo().modelAliases.loadAll();
- return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases) });
+ return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)) });
} catch (error) {
return geminiModelLoadError(error);
}
@@ -98,8 +88,7 @@ export const serveGeminiModelInfo = async (c: Context): Promise => {
const modelId = rawModelId.replace(/^models\//, '');
try {
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- const aliases = await getRepo().modelAliases.loadAll();
- const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases)).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
+ const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c))).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
if (!model) return geminiError(404, `Model not found: ${modelId}`);
return Response.json(model);
} catch (error) {
diff --git a/packages/gateway/src/data-plane/models/gemini_test.ts b/packages/gateway/src/data-plane/models/gemini_test.ts
index 584054328..7f4cedaec 100644
--- a/packages/gateway/src/data-plane/models/gemini_test.ts
+++ b/packages/gateway/src/data-plane/models/gemini_test.ts
@@ -1,6 +1,5 @@
import { test } from 'vitest';
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
import { jsonResponse, withMockedFetch, assertEquals } from '@floway-dev/test-utils';
@@ -409,48 +408,3 @@ test('/v1beta/models hides malformed upstream response bodies', async () => {
},
);
});
-
-// Gemini's `Model` resource is closed (no `aliasedFrom` extension), so the
-// `/v1beta/models` surface advertises an alias entry as a synthetic Gemini
-// model carrying the alias id and the target's display fields. This test
-// guards the synthetic shape — name, displayName, supportedGenerationMethods
-// — so a future refactor of `loadGeminiModels` cannot silently drop the
-// alias entries.
-test('/v1beta/models appends visible aliases as synthetic Gemini model entries', async () => {
- const { repo, apiKey } = await setupAppTest();
-
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'codex-auto-review',
- targetModelId: 'gpt-gemini-list',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- },
- ]);
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
- if (url.pathname === '/copilot_internal/v2/token') {
- return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
- }
- if (url.pathname === '/models') {
- return jsonResponse(copilotModels([{ id: 'gpt-gemini-list', display_name: 'GPT Gemini List' }]));
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1beta/models', { headers: { 'x-api-key': apiKey.key } });
- assertEquals(response.status, 200);
- const body = await response.json() as { models: Array<{ name: string; displayName: string; supportedGenerationMethods: string[] }> };
- const aliasEntry = body.models.find(m => m.name === 'models/codex-auto-review');
- if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
- assertEquals(aliasEntry.displayName, 'GPT Gemini List (low effort)');
- assertEquals(aliasEntry.supportedGenerationMethods, ['generateContent', 'streamGenerateContent', 'countTokens']);
- },
- );
-});
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 39c6649a0..027ba38db 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,15 +1,9 @@
-import type { ListedModel } from './alias-listing.ts';
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-import { getModelsForListing } from '../providers/registry.ts';
+import { getModels } from '../providers/registry.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
import type { Fetcher, InternalModel } from '@floway-dev/provider';
-// Maps a single listed catalog entry (real or alias) to the wire DTO. Alias
-// entries arrive with `aliasedFrom` pre-populated by
-// `synthesizeListedAliases`; this mapper just rides it through so every
-// listing surface sees the same provenance field.
-export const toPublicModel = (model: InternalModel & { aliasedFrom?: ListedModel['aliasedFrom'] }): PublicModel => {
+export const toPublicModel = (model: InternalModel): PublicModel => {
const info: PublicModel = {
id: model.id,
object: 'model',
@@ -24,7 +18,6 @@ export const toPublicModel = (model: InternalModel & { aliasedFrom?: ListedModel
info.created_at = new Date(model.created * 1000).toISOString();
}
if (model.cost) info.cost = model.cost;
- if (model.aliasedFrom) info.aliasedFrom = model.aliasedFrom;
if (model.chat) info.chat = model.chat;
return info;
};
@@ -33,9 +26,8 @@ export const loadModels = async (
upstreamFilter: readonly string[] | null,
fetcherForUpstream: (upstreamId: string) => Fetcher,
scheduler: BackgroundScheduler,
- aliases: readonly ModelAlias[],
): Promise => {
- const { models } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler, aliases);
+ const models = await getModels(upstreamFilter, fetcherForUpstream, scheduler);
const data = models.map(toPublicModel);
return {
object: 'list',
diff --git a/packages/gateway/src/data-plane/models/serve.ts b/packages/gateway/src/data-plane/models/serve.ts
index 60736266b..9b8b510f9 100644
--- a/packages/gateway/src/data-plane/models/serve.ts
+++ b/packages/gateway/src/data-plane/models/serve.ts
@@ -7,7 +7,6 @@ import { loadModels } from './load.ts';
import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
-import { getRepo } from '../../repo/index.ts';
import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
import { getCurrentColo } from '../../runtime/runtime-info.ts';
import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -15,8 +14,7 @@ import { ProviderModelsUnavailableError } from '@floway-dev/provider';
export const models = async (c: Context) => {
try {
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- const aliases = await getRepo().modelAliases.loadAll();
- return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases));
+ return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)));
} catch (e) {
// Upstream HTTP/parse failures squash to a generic message so we do not
// leak upstream identity. Other registry-thrown errors (e.g. the "no
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 9af4d7981..1408f10a6 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -1,6 +1,5 @@
import { test } from 'vitest';
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
import { jsonResponse, withMockedFetch, assertEquals } from '@floway-dev/test-utils';
@@ -587,470 +586,3 @@ test('/v1/models returns the last real error when every account model load fails
},
);
});
-
-// /v1/models alias-listing coverage. Each test exercises one slice of the
-// spec's visibility contract: visible alias appears with `aliasedFrom`,
-// hidden alias does not appear, alias-with-disabled-target is still listed,
-// the `aliasedFrom` shape matches the spec byte-for-byte.
-test('/v1/models appends a visible alias with aliasedFrom after the real entries', async () => {
- const { repo, apiKey } = await setupAppTest();
-
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'codex-auto-review',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- },
- ]);
-
- await repo.upstreams.save(buildCustomUpstreamRecord({
- id: 'up_oai',
- name: 'Test OpenAI',
- sortOrder: 100,
- config: {
- baseUrl: 'https://oai.example.com',
- authStyle: 'bearer',
- apiKey: 'sk-test',
- endpoints: { chatCompletions: {} },
- },
- }));
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
- if (url.pathname === '/copilot_internal/v2/token') {
- return jsonResponse({
- token: 'copilot-access-token',
- expires_at: 4102444800,
- refresh_in: 3600,
- endpoints: { api: 'https://api.individual.githubcopilot.com' },
- });
- }
- if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
- return jsonResponse(copilotModels([]));
- }
- if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
- return jsonResponse({
- object: 'list',
- data: [{ id: 'gpt-5.4', owned_by: 'openai' }],
- });
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
- assertEquals(response.status, 200);
- const body = await response.json() as { data: Array<{ id: string; owned_by?: string; aliasedFrom?: unknown }> };
- const ids = body.data.map(m => m.id);
- assertEquals(ids[ids.length - 1], 'codex-auto-review');
- const aliasEntry = body.data.find(m => m.id === 'codex-auto-review');
- if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
- assertEquals(aliasEntry.aliasedFrom, {
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- onConflict: 'real-only',
- });
- assertEquals(aliasEntry.owned_by, 'openai');
- },
- );
-});
-
-// `displayName` propagates verbatim when the operator set it; absence on the
-// wire (the prior test) means "synthesize from target name + rules summary".
-test('/v1/models forwards the operator-set displayName on the aliasedFrom payload', async () => {
- const { repo, apiKey } = await setupAppTest();
-
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'codex-auto-review',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- displayName: 'Codex Auto Review',
- createdAt: 1_700_000_000,
- },
- ]);
-
- await repo.upstreams.save(buildCustomUpstreamRecord({
- id: 'up_oai',
- name: 'Test OpenAI',
- sortOrder: 100,
- config: {
- baseUrl: 'https://oai.example.com',
- authStyle: 'bearer',
- apiKey: 'sk-test',
- endpoints: { chatCompletions: {} },
- },
- }));
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
- if (url.pathname === '/copilot_internal/v2/token') {
- return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
- }
- if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
- return jsonResponse(copilotModels([]));
- }
- if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
- return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
- const body = await response.json() as { data: Array<{ id: string; aliasedFrom?: { displayName?: string } }> };
- const aliasEntry = body.data.find(m => m.id === 'codex-auto-review');
- if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
- assertEquals(aliasEntry.aliasedFrom?.displayName, 'Codex Auto Review');
- },
- );
-});
-
-test('/v1/models omits aliases marked visibleInModelsList=false', async () => {
- const { repo, apiKey } = await setupAppTest();
-
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'hidden-alias',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: {},
- visibleInModelsList: false,
- onConflict: 'real-only',
- createdAt: 0,
- },
- ]);
-
- await repo.upstreams.save(buildCustomUpstreamRecord({
- id: 'up_oai',
- name: 'Test OpenAI',
- sortOrder: 100,
- config: {
- baseUrl: 'https://oai.example.com',
- authStyle: 'bearer',
- apiKey: 'sk-test',
- endpoints: { chatCompletions: {} },
- },
- }));
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
- if (url.pathname === '/copilot_internal/v2/token') {
- return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
- }
- if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
- return jsonResponse(copilotModels([]));
- }
- if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
- return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
- const body = await response.json() as { data: Array<{ id: string }> };
- assertEquals(body.data.map(m => m.id).includes('hidden-alias'), false);
- },
- );
-});
-
-test('/v1/models omits an alias whose target is not in any reachable upstream catalog', async () => {
- const { repo, apiKey } = await setupAppTest();
-
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'orphan-alias',
- targetModelId: 'never-resolves',
- upstreamIds: ['up_oai'],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 0,
- },
- ]);
-
- await repo.upstreams.save(buildCustomUpstreamRecord({
- id: 'up_oai',
- name: 'Test OpenAI',
- sortOrder: 100,
- config: {
- baseUrl: 'https://oai.example.com',
- authStyle: 'bearer',
- apiKey: 'sk-test',
- endpoints: { chatCompletions: {} },
- },
- }));
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
- if (url.pathname === '/copilot_internal/v2/token') {
- return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
- }
- if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
- return jsonResponse(copilotModels([]));
- }
- if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
- return jsonResponse({ object: 'list', data: [] });
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
- const body = await response.json() as { data: Array<{ id: string }> };
- // Per-upstream alias enumeration: an alias whose target cannot be served
- // by any reachable upstream produces zero entries — there is no surface
- // form to attach the alias to. A request for `orphan-alias` still
- // returns the canonical user-facing model-missing error.
- assertEquals(body.data.map(m => m.id).includes('orphan-alias'), false);
- },
- );
-});
-
-test('/v1/models emits the alias on each reachable upstream + listed form; prefixed entries carry the upstream label, unprefixed entries do not', async () => {
- const { repo, apiKey } = await setupAppTest();
-
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'codex-auto-review',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- displayName: 'Codex Auto Review',
- createdAt: 1_700_000_000,
- },
- ]);
-
- await repo.upstreams.save(buildCustomUpstreamRecord({
- id: 'up_azure',
- name: 'Azure',
- sortOrder: 100,
- config: {
- baseUrl: 'https://azure.example.com',
- authStyle: 'bearer',
- apiKey: 'sk-azure',
- endpoints: { chatCompletions: {} },
- },
- modelPrefix: { prefix: 'azure/', addressable: ['unprefixed', 'prefixed'], listed: ['unprefixed', 'prefixed'] },
- }));
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
- if (url.pathname === '/copilot_internal/v2/token') {
- return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
- }
- if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
- if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
- return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4', display_name: 'GPT-5.4' }] });
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
- const body = await response.json() as { data: Array<{ id: string; display_name: string; aliasedFrom?: unknown }> };
- // Both addressable forms appear because the upstream listed both.
- const bare = body.data.find(m => m.id === 'codex-auto-review');
- const prefixed = body.data.find(m => m.id === 'azure/codex-auto-review');
- if (!bare || !prefixed) throw new Error('expected both bare and prefixed alias entries');
- assertEquals(bare.display_name, 'Codex Auto Review');
- assertEquals(prefixed.display_name, 'Azure: Codex Auto Review');
- },
- );
-});
-
-test('/v1/models falls back to target display_name + rules summary when the alias has no displayName', async () => {
- const { repo, apiKey } = await setupAppTest();
-
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'codex-auto-review',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- },
- ]);
-
- await repo.upstreams.save(buildCustomUpstreamRecord({
- id: 'up_azure',
- name: 'Azure',
- sortOrder: 100,
- config: {
- baseUrl: 'https://azure.example.com',
- authStyle: 'bearer',
- apiKey: 'sk-azure',
- endpoints: { chatCompletions: {} },
- },
- }));
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
- if (url.pathname === '/copilot_internal/v2/token') {
- return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
- }
- if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
- if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
- return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4', display_name: 'GPT-5.4' }] });
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
- const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
- const entry = body.data.find(m => m.id === 'codex-auto-review');
- if (!entry) throw new Error('expected codex-auto-review alias entry');
- assertEquals(entry.display_name, 'GPT-5.4 (low effort)');
- },
- );
-});
-
-test('/v1/models honours alias upstreamIds — only emits on the named upstream', async () => {
- const { repo, apiKey } = await setupAppTest();
-
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'codex-auto-review',
- targetModelId: 'gpt-5.4',
- upstreamIds: ['up_azure'],
- rules: {},
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- },
- ]);
-
- await repo.upstreams.save(buildCustomUpstreamRecord({
- id: 'up_azure',
- name: 'Azure',
- sortOrder: 100,
- config: {
- baseUrl: 'https://azure.example.com',
- authStyle: 'bearer',
- apiKey: 'sk-azure',
- endpoints: { chatCompletions: {} },
- },
- }));
- await repo.upstreams.save(buildCustomUpstreamRecord({
- id: 'up_other',
- name: 'Other',
- sortOrder: 200,
- config: {
- baseUrl: 'https://other.example.com',
- authStyle: 'bearer',
- apiKey: 'sk-other',
- endpoints: { chatCompletions: {} },
- },
- }));
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
- if (url.pathname === '/copilot_internal/v2/token') {
- return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
- }
- if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
- // Both upstreams expose gpt-5.4 — but the alias is restricted to up_azure.
- if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
- return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
- }
- if (url.pathname === '/v1/models' && url.hostname === 'other.example.com') {
- return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
- const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
- const aliasRows = body.data.filter(m => m.id === 'codex-auto-review');
- assertEquals(aliasRows.length, 1);
- assertEquals(aliasRows[0].display_name, 'gpt-5.4');
- },
- );
-});
-
-test('/v1/models merges alias emissions whose synthesized public id collides — one row, multiple backing upstreams', async () => {
- const { repo, apiKey } = await setupAppTest();
-
- (repo.modelAliases as MemoryModelAliasesRepo).setAll([
- {
- alias: 'codex-auto-review',
- displayName: 'Codex Auto Review',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 1_700_000_000,
- },
- ]);
-
- // Two no-prefix upstreams both serve gpt-5.4 — without dedupe, the alias
- // would emit two `codex-auto-review` rows. With dedupe, the dashboard sees
- // one row whose `upstreams` field lists both bindings, exactly like real
- // models that exist on multiple upstreams.
- await repo.upstreams.save(buildCustomUpstreamRecord({
- id: 'up_alpha',
- name: 'Alpha',
- sortOrder: 100,
- config: {
- baseUrl: 'https://alpha.example.com',
- authStyle: 'bearer',
- apiKey: 'sk-alpha',
- endpoints: { chatCompletions: {} },
- },
- }));
- await repo.upstreams.save(buildCustomUpstreamRecord({
- id: 'up_beta',
- name: 'Beta',
- sortOrder: 200,
- config: {
- baseUrl: 'https://beta.example.com',
- authStyle: 'bearer',
- apiKey: 'sk-beta',
- endpoints: { chatCompletions: {} },
- },
- }));
-
- await withMockedFetch(
- request => {
- const url = new URL(request.url);
- if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
- if (url.pathname === '/copilot_internal/v2/token') {
- return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
- }
- if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
- if (url.pathname === '/v1/models' && (url.hostname === 'alpha.example.com' || url.hostname === 'beta.example.com')) {
- return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
- }
- throw new Error(`Unhandled fetch ${request.url}`);
- },
- async () => {
- const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
- const body = await response.json() as { data: Array<{ id: string }> };
- const rows = body.data.filter(m => m.id === 'codex-auto-review');
- assertEquals(rows.length, 1);
- },
- );
-});
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 1264eed0d..49be73da7 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -1,8 +1,5 @@
import { fetchUpstreamModelsCached } from './models-cache.ts';
-import type { ModelAlias, ModelAliasRules } from '../../control-plane/model-aliases/types.ts';
import { getRepo } from '../../repo/index.ts';
-import { matchAlias } from '../model-aliases/match.ts';
-import { synthesizeListedAliases, type ListedModel } from '../models/alias-listing.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
import { type ModelEndpointKey, type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common';
import type { InternalModel, ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
@@ -21,10 +18,6 @@ interface ProviderModelsResult {
// order as the input `providers` list so the model-missing renderer can
// surface a stable, dashboard-aligned list.
failedUpstreams: string[];
- // Raw per-upstream catalogs collected during the fan-out. Aliases consume
- // this to enumerate per-upstream entries by addressable form without paying
- // a second round-trip.
- rawCatalogs: Map;
}
const NO_UPSTREAM_CONFIGURED_MESSAGE = 'No upstream provider configured — connect GitHub Copilot or add a Custom/Azure upstream in the dashboard';
@@ -148,7 +141,6 @@ const collectProviderModels = async (
scheduler: BackgroundScheduler,
): Promise => {
const byId = new Map();
- const rawCatalogs = new Map();
let sawSuccess = false;
let lastError: unknown = null;
const failedUpstreams: string[] = [];
@@ -180,7 +172,6 @@ const collectProviderModels = async (
}
sawSuccess = true;
const { instance, models: providedModels } = result.value;
- rawCatalogs.set(instance.upstream, providedModels);
// Operator-disabled public model ids vanish entirely for this upstream:
// dropped before they reach the catalog map, so they appear in no /models
// listing and resolve to nothing for routing. The disable is per-upstream,
@@ -215,7 +206,7 @@ const collectProviderModels = async (
}
}
- return { models: [...byId.values()], sawSuccess, lastError, failedUpstreams, rawCatalogs };
+ return { models: [...byId.values()], sawSuccess, lastError, failedUpstreams };
};
// Public-facing model-id ordering, applied in getModels() to every list that
@@ -271,41 +262,6 @@ export const getModels = async (
return [];
};
-// Returns the merged public model list AND the per-upstream raw catalogs and
-// provider instances. Listing surfaces (`/v1/models`, `/api/models`, Gemini
-// `/models`) use the same call so alias entries — synthesized once via
-// `synthesizeListedAliases` against the same `(providers, rawCatalogs)` pair —
-// are interleaved into the catalog before it returns. Per-surface mappers
-// then walk one uniform `ListedModel[]` instead of re-implementing alias
-// fan-out three times.
-export interface PublicModelsListing {
- models: ListedModel[];
- providers: readonly ModelProviderInstance[];
- rawCatalogs: ReadonlyMap;
-}
-
-export const getModelsForListing = async (
- upstreamFilter: readonly string[] | null,
- fetcherForUpstream: (upstreamId: string) => Fetcher,
- scheduler: BackgroundScheduler,
- aliases: readonly ModelAlias[],
-): Promise => {
- const providers = await listModelProviders(upstreamFilter);
- if (providers.length === 0) {
- throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
- }
-
- const { models, sawSuccess, lastError, rawCatalogs } = await collectProviderModels(providers, fetcherForUpstream, scheduler);
-
- if (sawSuccess) {
- const real = models.sort((a, b) => compareModelIds(a.id, b.id));
- const aliasEntries = synthesizeListedAliases(aliases, providers, rawCatalogs);
- return { models: [...real, ...aliasEntries], providers, rawCatalogs };
- }
- if (lastError) throw lastError;
- return { models: [], providers, rawCatalogs };
-};
-
export const getInternalModels = async (
upstreamFilter: readonly string[] | null,
fetcherForUpstream: (upstreamId: string) => Fetcher,
@@ -327,37 +283,14 @@ export interface ProviderModelResolution {
id: string;
model: UpstreamModel;
binding: ProviderModelRecord;
- // Set when this resolution came from an alias-rewrite interpretation. The
- // gateway-side passthrough callers (embeddings/images/completions) stamp
- // this onto the `x-floway-alias` response header so alias-served calls are
- // observable without enabling any extra mode.
- aliasName?: string;
- // Operator-locked rules carried alongside `aliasName`. Set in lockstep so
- // passthrough callers can trace the dropped rule fields without re-finding
- // the matched alias by name.
- aliasRules?: ModelAliasRules;
}
export interface ModelInterpretation {
provider: ModelProviderInstance;
// The bare id to query the upstream's catalog with. Equals the inbound
// model id for the unprefixed surface; equals `inbound.slice(prefix.length)`
- // for the prefixed surface. For an alias-rewrite interpretation it equals
- // the matched alias's `targetModelId`.
+ // for the prefixed surface.
lookupId: string;
- // Operator-locked request-time rules carried alongside an alias-rewrite
- // interpretation. Set only when this interpretation is the alias-rewrite
- // half of a matched alias; the real-name interpretation in the same
- // `conflictGroup` (and every non-aliased interpretation) leaves this
- // undefined.
- aliasRules?: ModelAliasRules;
- // The alias name as authored by the operator. Set in lockstep with
- // `aliasRules` and carried out for the `x-floway-alias` response header.
- aliasName?: string;
- // Identity-keyed group shared by the two interpretations a single
- // `onConflict: 'real-only'` alias emits. The post-resolution prune uses
- // this to drop the alias-rewrite member when both halves resolved.
- conflictGroup?: { readonly originalLookupId: string };
}
// Expands one inbound model id into every (provider, catalog-lookup-id) pair
@@ -365,90 +298,29 @@ export interface ModelInterpretation {
// when the inbound id literally equals one of the public-id surfaces the
// upstream advertises (bare and/or prefixed, per `modelPrefix.addressable`).
// The unprefixed interpretation is always pushed first when both apply.
-//
-// Each (provider, lookupId) candidate is then matched against the global
-// alias table — semantic P, post-prefix-strip — and the matched alias's
-// `onConflict` decides whether to push the real-name interpretation, the
-// alias-rewrite interpretation, or both (in either order). When neither
-// the alias nor the alias's target id is exposed by the upstream catalog,
-// the fan-out still emits both interpretations and resolution simply
-// drops the half that misses.
export const enumerateModelInterpretations = (
modelId: string,
providers: readonly ModelProviderInstance[],
- aliases: readonly ModelAlias[],
): ModelInterpretation[] => {
const out: ModelInterpretation[] = [];
for (const provider of providers) {
const cfg = provider.modelPrefix;
if (cfg === null || cfg.addressable.includes('unprefixed')) {
- pushInterpretation(out, provider, modelId, aliases);
+ out.push({ provider, lookupId: modelId });
}
if (cfg !== null && cfg.addressable.includes('prefixed') && modelId.startsWith(cfg.prefix)) {
- pushInterpretation(out, provider, modelId.slice(cfg.prefix.length), aliases);
+ out.push({ provider, lookupId: modelId.slice(cfg.prefix.length) });
}
}
return out;
};
-const pushInterpretation = (
- out: ModelInterpretation[],
- provider: ModelProviderInstance,
- lookupId: string,
- aliases: readonly ModelAlias[],
-): void => {
- const alias = matchAlias(lookupId, provider.upstream, aliases);
- if (!alias) {
- out.push({ provider, lookupId });
- return;
- }
- const aliasInterp: ModelInterpretation = {
- provider,
- lookupId: alias.targetModelId,
- aliasRules: alias.rules,
- aliasName: alias.alias,
- };
- const realInterp: ModelInterpretation = { provider, lookupId };
- switch (alias.onConflict) {
- case 'alias-only':
- out.push(aliasInterp);
- return;
- case 'real-only': {
- // Both halves enter the resolution pass; the post-resolution prune
- // drops the alias-rewrite member when the real-name resolved too.
- // Identity-keyed group so the prune step can rejoin them without
- // re-deriving an alias key.
- const group = { originalLookupId: lookupId };
- out.push({ ...realInterp, conflictGroup: group });
- out.push({ ...aliasInterp, conflictGroup: group });
- return;
- }
- case 'both-real-first':
- out.push(realInterp);
- out.push(aliasInterp);
- return;
- case 'both-alias-first':
- out.push(aliasInterp);
- out.push(realInterp);
- return;
- default: {
- const exhaustive: never = alias.onConflict;
- throw new Error(`pushInterpretation: unhandled onConflict '${exhaustive as string}'`);
- }
- }
-};
-
// Fan out per-interpretation against the SWR cache and collect the resolved
// matches plus a deduped list of upstreams whose catalog fetch rejected.
// Shared by `resolveModelForRequest` and `enumerateProviderCandidates`; the
// per-caller divergence (passthrough vs LLM-candidate shape) happens after
// this returns. Cancellation (`AbortError`) propagates so the per-request
// abort signal cannot be masked by a slow upstream's rejection.
-//
-// Each successful resolution carries its source `interpretation` back to
-// the caller so the alias-rewrite metadata (`aliasRules`, `aliasName`)
-// rides through to the candidate, and so the `real-only` post-resolution
-// prune can rejoin the two halves of a conflict group.
export const collectInterpretationOutcomes = async (
interpretations: readonly ModelInterpretation[],
fetcherForUpstream: (upstreamId: string) => Fetcher,
@@ -483,31 +355,7 @@ export const collectInterpretationOutcomes = async (
resolutions.push({ interpretation, provider: interpretation.provider, resolved });
}
- // `onConflict: 'real-only'`: when both halves of a conflict group
- // resolved, drop the alias-rewrite half so the real-name match is the
- // only one downstream sees. When only the alias-rewrite half resolved
- // (the upstream has no model named after the alias itself), keep it —
- // the operator's intent is to fall back to the alias when no real model
- // collides.
- const droppedInterpretations = new Set();
- const byGroup = new Map<{ readonly originalLookupId: string }, ModelInterpretation[]>();
- for (const { interpretation } of resolutions) {
- const group = interpretation.conflictGroup;
- if (!group) continue;
- const list = byGroup.get(group) ?? [];
- list.push(interpretation);
- byGroup.set(group, list);
- }
- for (const members of byGroup.values()) {
- if (members.length < 2) continue;
- const aliasRewriteMember = members.find(i => i.aliasRules !== undefined);
- if (aliasRewriteMember) droppedInterpretations.add(aliasRewriteMember);
- }
-
- return {
- resolutions: resolutions.filter(r => !droppedInterpretations.has(r.interpretation)),
- failedUpstreams,
- };
+ return { resolutions, failedUpstreams };
};
export const resolveModelForRequest = async (
@@ -515,22 +363,15 @@ export const resolveModelForRequest = async (
upstreamFilter: readonly string[] | null,
fetcherForUpstream: (upstreamId: string) => Fetcher,
scheduler: BackgroundScheduler,
- aliases: readonly ModelAlias[] = [],
): Promise => {
const providers = await listModelProviders(upstreamFilter);
if (providers.length === 0) {
throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
}
- const interpretations = enumerateModelInterpretations(modelId, providers, aliases);
+ const interpretations = enumerateModelInterpretations(modelId, providers);
const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
- // Project each resolution's alias-rewrite interpretation onto the
- // returned ProviderModelResolution so passthrough callers can stamp the
- // `x-floway-alias` header without re-deriving the match.
- const matches: ProviderModelResolution[] = resolutions.map(r =>
- r.interpretation.aliasName !== undefined
- ? { ...r.resolved, aliasName: r.interpretation.aliasName, aliasRules: r.interpretation.aliasRules }
- : r.resolved);
+ const matches: ProviderModelResolution[] = resolutions.map(r => r.resolved);
return { matches, failedUpstreams };
};
diff --git a/packages/gateway/src/data-plane/providers/registry_test.ts b/packages/gateway/src/data-plane/providers/registry_test.ts
index 9823aeb98..c330abfee 100644
--- a/packages/gateway/src/data-plane/providers/registry_test.ts
+++ b/packages/gateway/src/data-plane/providers/registry_test.ts
@@ -2,7 +2,6 @@ import { describe, expect, test } from 'vitest';
import { clearInFlightForTesting } from './models-cache.ts';
import { compareModelIds, enumerateModelInterpretations, getInternalModels, listModelProviders, resolveModelForProvider, resolveModelForRequest } from './registry.ts';
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, setupAppTest } from '../../test-helpers.ts';
import { directFetcher, type ModelProviderInstance } from '@floway-dev/provider';
import { createCopilotProvider } from '@floway-dev/provider-copilot';
@@ -610,20 +609,20 @@ describe('enumerateModelInterpretations', () => {
// A: no prefix, bare always accepted. B: prefixed-only addressable — bare
// is not accepted. C: dual-addressable, bare accepted; the prefixed form
// does not apply because `gpt-4o` does not start with `cx/`.
- assertEquals(shape(enumerateModelInterpretations('gpt-4o', [A, B, C], [])), [
+ assertEquals(shape(enumerateModelInterpretations('gpt-4o', [A, B, C])), [
{ upstream: 'A', lookupId: 'gpt-4o' },
{ upstream: 'C', lookupId: 'gpt-4o' },
]);
});
test('prefix-only-addressable upstream strips the prefix when it matches', () => {
- assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [B], [])), [
+ assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [B])), [
{ upstream: 'B', lookupId: 'gpt-4o' },
]);
});
test('prefix-only-addressable upstream is silent when the prefix does not match', () => {
- assertEquals(enumerateModelInterpretations('gpt-4o', [B], []), []);
+ assertEquals(enumerateModelInterpretations('gpt-4o', [B]), []);
});
test('dual-addressable upstream produces two interpretations when the prefix matches', () => {
@@ -634,7 +633,7 @@ describe('enumerateModelInterpretations', () => {
upstream: 'D', name: 'd',
modelPrefix: { prefix: 'or/', addressable: ['unprefixed', 'prefixed'], listed: ['prefixed'] },
});
- assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [D], [])), [
+ assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [D])), [
{ upstream: 'D', lookupId: 'or/gpt-4o' },
{ upstream: 'D', lookupId: 'gpt-4o' },
]);
@@ -654,7 +653,7 @@ describe('enumerateModelInterpretations', () => {
modelPrefix: { prefix: 'aa/bb/', addressable: ['prefixed'], listed: ['prefixed'] },
});
const Z = fakeProvider({ upstream: 'Z', name: 'z', modelPrefix: null });
- assertEquals(shape(enumerateModelInterpretations('aa/bb/gpt-5', [X, Y, Z], [])), [
+ assertEquals(shape(enumerateModelInterpretations('aa/bb/gpt-5', [X, Y, Z])), [
{ upstream: 'X', lookupId: 'bb/gpt-5' },
{ upstream: 'Y', lookupId: 'gpt-5' },
{ upstream: 'Z', lookupId: 'aa/bb/gpt-5' },
@@ -907,202 +906,3 @@ describe('catalog listing under modelPrefix', () => {
);
});
});
-
-// Synthetic-catalog alias matching against a single provider. Verifies that
-// each `onConflict` mode emits the right interpretation shape from
-// `enumerateModelInterpretations`. The downstream `collectInterpretationOutcomes`
-// pass is exercised in the e2e suite below.
-describe('enumerateModelInterpretations with alias matching', () => {
- const provider = fakeProvider({ upstream: 'U', name: 'u', modelPrefix: null });
-
- const makeAlias = (over: Partial): ModelAlias => ({
- alias: 'codex-auto-review',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- visibleInModelsList: true,
- onConflict: 'real-only',
- createdAt: 0,
- ...over,
- });
-
- test('alias-only emits exactly the alias-rewrite interpretation, with rules', () => {
- const aliases = [makeAlias({ onConflict: 'alias-only' })];
- const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
- assertEquals(out.length, 1);
- assertEquals(out[0].lookupId, 'gpt-5.4');
- assertEquals(out[0].aliasRules, { reasoning: { effort: 'low' } });
- assertEquals(out[0].aliasName, 'codex-auto-review');
- assertEquals(out[0].conflictGroup, undefined);
- });
-
- test('real-only emits both halves, tagged with a shared conflictGroup', () => {
- const aliases = [makeAlias({ onConflict: 'real-only' })];
- const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
- assertEquals(out.length, 2);
- // Real first, alias second — the prune step removes the alias when
- // real resolved, so real-first keeps the natural iteration order.
- assertEquals(out[0].lookupId, 'codex-auto-review');
- assertEquals(out[0].aliasRules, undefined);
- assertEquals(out[1].lookupId, 'gpt-5.4');
- assertEquals(out[1].aliasRules, { reasoning: { effort: 'low' } });
- expect(out[0].conflictGroup).toBeDefined();
- expect(out[0].conflictGroup).toBe(out[1].conflictGroup);
- });
-
- test('both-real-first emits real then alias, neither group-tagged', () => {
- const aliases = [makeAlias({ onConflict: 'both-real-first' })];
- const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
- assertEquals(out.length, 2);
- assertEquals(out[0].lookupId, 'codex-auto-review');
- assertEquals(out[0].aliasRules, undefined);
- assertEquals(out[1].lookupId, 'gpt-5.4');
- assertEquals(out[1].aliasRules, { reasoning: { effort: 'low' } });
- assertEquals(out[0].conflictGroup, undefined);
- assertEquals(out[1].conflictGroup, undefined);
- });
-
- test('both-alias-first emits alias then real, neither group-tagged', () => {
- const aliases = [makeAlias({ onConflict: 'both-alias-first' })];
- const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
- assertEquals(out.length, 2);
- assertEquals(out[0].lookupId, 'gpt-5.4');
- assertEquals(out[0].aliasRules, { reasoning: { effort: 'low' } });
- assertEquals(out[1].lookupId, 'codex-auto-review');
- assertEquals(out[1].aliasRules, undefined);
- });
-
- test('upstreamIds filter skips the alias on providers outside the allowlist', () => {
- const aliases = [makeAlias({ onConflict: 'alias-only', upstreamIds: ['OTHER'] })];
- const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
- // The alias only applies to OTHER, so this provider sees a literal
- // (no-rewrite) interpretation.
- assertEquals(out.length, 1);
- assertEquals(out[0].lookupId, 'codex-auto-review');
- assertEquals(out[0].aliasRules, undefined);
- });
-
- test('prefix-strip happens before alias matching (semantic P)', () => {
- // Configure the provider with a prefix; the inbound `cx/codex-auto-review`
- // strips to `codex-auto-review` and matches the alias. The alias-rewrite
- // interpretation carries the target id `gpt-5.4`.
- const prefixedProvider = fakeProvider({
- upstream: 'P', name: 'p',
- modelPrefix: { prefix: 'cx/', addressable: ['prefixed'], listed: ['prefixed'] },
- });
- const aliases = [makeAlias({ onConflict: 'alias-only' })];
- const out = enumerateModelInterpretations('cx/codex-auto-review', [prefixedProvider], aliases);
- assertEquals(out.length, 1);
- assertEquals(out[0].lookupId, 'gpt-5.4');
- assertEquals(out[0].aliasName, 'codex-auto-review');
- });
-});
-
-// E2E coverage of the post-resolution prune. Uses a real Azure-backed
-// catalog (resolved without HTTP) so the conflict pruning behavior is
-// observed end-to-end via `resolveModelForRequest`.
-describe('resolveModelForRequest applies alias onConflict pruning', () => {
- // Helper that stages a single Azure upstream exposing both the real
- // alias-named model and the alias's target model.
- const stageBothNamesUpstream = async (): Promise => {
- const { repo } = await setupAppTest();
- await repo.upstreams.deleteAll();
- await repo.upstreams.save({
- id: 'up_a',
- provider: 'azure',
- name: 'A',
- enabled: true,
- sortOrder: 1,
- createdAt: '2026-05-21T00:00:00.000Z',
- updatedAt: '2026-05-21T00:00:00.000Z',
- config: {
- endpoint: 'https://a.openai.azure.com',
- apiKey: 'az-key',
- models: [
- { upstreamModelId: 'codex-auto-review', endpoints: { chatCompletions: {} } },
- { upstreamModelId: 'gpt-5.4', endpoints: { chatCompletions: {} } },
- ],
- },
- flagOverrides: {},
- disabledPublicModelIds: [],
- proxyFallbackList: [],
- modelPrefix: null,
- state: null,
- });
- };
-
- // Helper that stages a single Azure upstream exposing ONLY the alias's
- // target model (no real `codex-auto-review` collision).
- const stageTargetOnlyUpstream = async (): Promise => {
- const { repo } = await setupAppTest();
- await repo.upstreams.deleteAll();
- await repo.upstreams.save({
- id: 'up_a',
- provider: 'azure',
- name: 'A',
- enabled: true,
- sortOrder: 1,
- createdAt: '2026-05-21T00:00:00.000Z',
- updatedAt: '2026-05-21T00:00:00.000Z',
- config: {
- endpoint: 'https://a.openai.azure.com',
- apiKey: 'az-key',
- models: [
- { upstreamModelId: 'gpt-5.4', endpoints: { chatCompletions: {} } },
- ],
- },
- flagOverrides: {},
- disabledPublicModelIds: [],
- proxyFallbackList: [],
- modelPrefix: null,
- state: null,
- });
- };
-
- const aliasOf = (onConflict: ModelAlias['onConflict']): ModelAlias => ({
- alias: 'codex-auto-review',
- targetModelId: 'gpt-5.4',
- upstreamIds: [],
- rules: { reasoning: { effort: 'low' } },
- visibleInModelsList: true,
- onConflict,
- createdAt: 0,
- });
-
- test('alias-only resolves to a single match against the alias target id', async () => {
- await stageBothNamesUpstream();
- const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('alias-only')]);
- assertEquals(resolved.matches.length, 1);
- assertEquals(resolved.matches[0].id, 'gpt-5.4');
- });
-
- test('real-only drops the alias-rewrite resolution when the real-name resolves too', async () => {
- await stageBothNamesUpstream();
- const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('real-only')]);
- assertEquals(resolved.matches.length, 1);
- assertEquals(resolved.matches[0].id, 'codex-auto-review');
- });
-
- test('real-only keeps the alias-rewrite resolution when the real-name catalog lookup misses', async () => {
- await stageTargetOnlyUpstream();
- const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('real-only')]);
- assertEquals(resolved.matches.length, 1);
- assertEquals(resolved.matches[0].id, 'gpt-5.4');
- });
-
- test('both-real-first resolves to two matches, real first', async () => {
- await stageBothNamesUpstream();
- const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('both-real-first')]);
- assertEquals(resolved.matches.length, 2);
- assertEquals(resolved.matches[0].id, 'codex-auto-review');
- assertEquals(resolved.matches[1].id, 'gpt-5.4');
- });
-
- test('both-alias-first resolves to two matches, alias first', async () => {
- await stageBothNamesUpstream();
- const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('both-alias-first')]);
- assertEquals(resolved.matches.length, 2);
- assertEquals(resolved.matches[0].id, 'gpt-5.4');
- assertEquals(resolved.matches[1].id, 'codex-auto-review');
- });
-});
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 755a3a230..1add1a115 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -21,11 +21,8 @@ import { createUpstreamLatencyRecorder, recordPerformanceError, recordPerformanc
import { recordTokenUsage } from './telemetry/usage.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import type { AuthedContext } from '../../middleware/auth.ts';
-import { getRepo } from '../../repo/index.ts';
import type { TokenUsage } from '../../repo/types.ts';
import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
-import { stageGatewayResponseHeader } from '../chat/shared/gateway-ctx.ts';
-import { createSanitizeTraceCtx, traceAllRulesDropped } from '../chat/shared/sanitize.ts';
import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
import { resolveModelForRequest } from '../providers/registry.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
@@ -127,22 +124,12 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
try {
const fetcherForUpstream = await createPerRequestFetcher(ctx.currentColo);
- // Aliases pass through so a `(model, lookupId)` interpretation can rewrite
- // to the alias's target id even for non-LLM-shaped endpoints. The alias
- // rules themselves never apply here — the inbound payload (embeddings,
- // images, /v1/completions) has no protocol-extension slots for the rule
- // knobs. We still surface the matched alias name on the
- // `x-floway-alias` response header (staged via Hono's `c.header` so it
- // survives `streamSSE`'s internal `c.newResponse` on the streaming
- // `/v1/completions` path) and trace one log line per dropped rule so an
- // operator can confirm the rewrite ran.
- const aliases = await getRepo().modelAliases.loadAll();
// Each match is one (upstream, upstream-catalog id) pair that interprets
// the inbound public id. Iteration order follows configured sort_order
// across upstreams, with the unprefixed interpretation pushed before the
// prefixed one within a single upstream. The first match whose binding
// satisfies the endpoint capability wins.
- const { matches, failedUpstreams } = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler, aliases);
+ const { matches, failedUpstreams } = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler);
if (matches.length === 0) {
ctx.dump?.error('gateway');
return passthroughApiError(c, appendFailedUpstreams(`Model ${model} is not available on any configured upstream.`, failedUpstreams), 404);
@@ -150,12 +137,6 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
for (const match of matches) {
if (!bindingServesEndpoint(match.binding)) continue;
- if (match.aliasName !== undefined) {
- stageGatewayResponseHeader(ctx, 'x-floway-alias', match.aliasName);
- if (match.aliasRules) {
- traceAllRulesDropped(match.aliasRules, sourceApi, createSanitizeTraceCtx(match.aliasName));
- }
- }
const recorder = createUpstreamLatencyRecorder();
const { response, modelKey } = await call(match.binding, {
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index d7492938f..5a85dba39 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -13,7 +13,6 @@ import type {
ApiKeyRepo,
BackoffRow,
CachedModelsRow,
- ModelAliasesRepo,
ModelsCacheRepo,
PerformanceDimensions,
PerformanceErrorSample,
@@ -40,7 +39,6 @@ import type {
UsersRepo,
} from './types.ts';
import { serializeStoredState } from './upstream-json.ts';
-import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
import { latencyBucketForMs } from '../shared/performance-histogram.ts';
import { generateSessionToken } from '../shared/session-tokens.ts';
import { assertWebSearchProviderName } from '../shared/web-search-providers.ts';
@@ -898,7 +896,6 @@ export class InMemoryRepo implements Repo {
proxyBackoffs: ProxyBackoffRepo;
responsesItems: ResponsesItemsRepo;
responsesSnapshots: ResponsesSnapshotsRepo;
- modelAliases: ModelAliasesRepo;
constructor() {
this.users = new MemoryUsersRepo();
@@ -914,55 +911,5 @@ export class InMemoryRepo implements Repo {
this.proxyBackoffs = new MemoryProxyBackoffRepo();
this.responsesItems = new MemoryResponsesItemsRepo();
this.responsesSnapshots = new MemoryResponsesSnapshotsRepo();
- this.modelAliases = new MemoryModelAliasesRepo();
- }
-}
-
-// Test-only in-memory backing for the alias table. Mirrors SqlModelAliasesRepo:
-// `loadAll` returns rows sorted by alias, `create` rejects PK collisions,
-// `save` upserts in place. `setAll` is the test seam: tests that pre-populate
-// the table for read-only data-plane assertions reach for it directly.
-export class MemoryModelAliasesRepo implements ModelAliasesRepo {
- private rows = new Map();
-
- loadAll(): Promise {
- return Promise.resolve([...this.rows.values()].sort((a, b) => a.alias.localeCompare(b.alias)));
- }
-
- getByAlias(alias: string): Promise {
- return Promise.resolve(this.rows.get(alias) ?? null);
- }
-
- create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> {
- if (this.rows.has(alias.alias)) return Promise.resolve({ ok: false, reason: 'duplicate' });
- this.rows.set(alias.alias, alias);
- return Promise.resolve({ ok: true });
- }
-
- save(alias: ModelAlias): Promise {
- // Preserve the original row's createdAt on an upsert so re-saves do not
- // overwrite the local deployment's first-seen timestamp.
- const existing = this.rows.get(alias.alias);
- const preserved = existing ? { ...alias, createdAt: existing.createdAt } : alias;
- this.rows.set(preserved.alias, preserved);
- return Promise.resolve();
- }
-
- rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> {
- if (oldAlias === newAlias) return Promise.resolve({ ok: true });
- if (this.rows.has(newAlias)) return Promise.resolve({ ok: false, reason: 'duplicate' });
- const existing = this.rows.get(oldAlias);
- if (!existing) return Promise.resolve({ ok: false, reason: 'notFound' });
- this.rows.delete(oldAlias);
- this.rows.set(newAlias, { ...existing, alias: newAlias });
- return Promise.resolve({ ok: true });
- }
-
- delete(alias: string): Promise<{ deleted: boolean }> {
- return Promise.resolve({ deleted: this.rows.delete(alias) });
- }
-
- setAll(rows: readonly ModelAlias[]): void {
- this.rows = new Map(rows.map(row => [row.alias, row]));
}
}
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index 75f814178..b716d07e4 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -7,7 +7,6 @@ import type {
ApiKeyRepo,
BackoffRow,
CachedModelsRow,
- ModelAliasesRepo,
ModelsCacheRepo,
PerformanceDimensions,
PerformanceErrorSample,
@@ -35,8 +34,6 @@ import type {
UsersRepo,
} from './types.ts';
import { serializeStoredConfig, serializeStoredState } from './upstream-json.ts';
-import { deleteAlias, getAliasByName, insertAlias, loadAllAliases, renameAlias, saveAlias } from '../control-plane/model-aliases/repo.ts';
-import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
import { latencyBucketForMs } from '../shared/performance-histogram.ts';
import { generateSessionToken } from '../shared/session-tokens.ts';
import { assertWebSearchProviderName } from '../shared/web-search-providers.ts';
@@ -1602,7 +1599,6 @@ export class SqlRepo implements Repo {
proxyBackoffs: ProxyBackoffRepo;
responsesItems: ResponsesItemsRepo;
responsesSnapshots: ResponsesSnapshotsRepo;
- modelAliases: ModelAliasesRepo;
constructor(db: SqlDatabase) {
this.users = new SqlUsersRepo(db);
@@ -1618,34 +1614,5 @@ export class SqlRepo implements Repo {
this.proxyBackoffs = new SqlProxyBackoffRepo(db);
this.responsesItems = new SqlResponsesItemsRepo(db);
this.responsesSnapshots = new SqlResponsesSnapshotsRepo(db);
- this.modelAliases = new SqlModelAliasesRepo(db);
- }
-}
-
-class SqlModelAliasesRepo implements ModelAliasesRepo {
- constructor(private db: SqlDatabase) {}
-
- loadAll(): Promise {
- return loadAllAliases(this.db);
- }
-
- getByAlias(alias: string): Promise {
- return getAliasByName(this.db, alias);
- }
-
- create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> {
- return insertAlias(this.db, alias);
- }
-
- save(alias: ModelAlias): Promise {
- return saveAlias(this.db, alias);
- }
-
- rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> {
- return renameAlias(this.db, oldAlias, newAlias);
- }
-
- delete(alias: string): Promise<{ deleted: boolean }> {
- return deleteAlias(this.db, alias);
}
}
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 7d10f90ca..0341d41ef 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -1,4 +1,3 @@
-import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
import type { HistogramBucket } from '../shared/performance-histogram.ts';
import type { WebSearchProviderName } from '../shared/web-search-providers.ts';
import type { BillingDimension, ModelPricing } from '@floway-dev/protocols/common';
@@ -333,27 +332,4 @@ export interface Repo {
proxyBackoffs: ProxyBackoffRepo;
responsesItems: ResponsesItemsRepo;
responsesSnapshots: ResponsesSnapshotsRepo;
- modelAliases: ModelAliasesRepo;
-}
-
-// Operator-managed alias table; small (dozens of rows at most) and read
-// per request, so the repo deliberately exposes only a full-table fetch
-// plus the targeted mutations the control-plane CRUD needs.
-export interface ModelAliasesRepo {
- loadAll(): Promise;
- getByAlias(alias: string): Promise;
- // INSERT-only — fails with `duplicate` on PK conflict so the route layer
- // surfaces 409 to the dashboard instead of silently overwriting an
- // existing row.
- create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }>;
- // UPSERT — used by the PATCH update path; preserves created_at on re-save
- // and bumps updated_at.
- save(alias: ModelAlias): Promise;
- // Updates the PK in place. Returns `notFound` when the source row is
- // missing, `duplicate` when the destination name already exists; the
- // route layer maps those to 404 / 409. SQLite (and D1) permit UPDATEing
- // a PRIMARY KEY column.
- rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }>;
- // Returns whether a row was actually removed; routes treat false as 404.
- delete(alias: string): Promise<{ deleted: boolean }>;
}
diff --git a/packages/gateway/src/test-helpers/gateway-ctx.ts b/packages/gateway/src/test-helpers/gateway-ctx.ts
index 047981ed1..43358b7fb 100644
--- a/packages/gateway/src/test-helpers/gateway-ctx.ts
+++ b/packages/gateway/src/test-helpers/gateway-ctx.ts
@@ -1,11 +1,10 @@
import type { GatewayCtx } from '../data-plane/chat/shared/gateway-ctx.ts';
import type { AuthedContext } from '../middleware/auth.ts';
-// Minimal stub for the Hono `c` carried on `GatewayCtx`. Only `c.header`
-// is touched by the serve layer (to stamp `x-floway-alias`); unit tests
-// that don't exercise the alias branch never call it. Integration tests
-// that need real Hono behavior build the ctx via `createGatewayCtxFromHono`
-// against a real `makeApp()` request rather than going through this stub.
+// Minimal stub for the Hono `c` carried on `GatewayCtx`. Unit tests rarely
+// touch any methods on it; integration tests that need real Hono behavior
+// build the ctx via `createGatewayCtxFromHono` against a real `makeApp()`
+// request rather than going through this stub.
export const stubAuthedContext = (): AuthedContext =>
({ header: () => {} } as unknown as AuthedContext);
@@ -25,6 +24,5 @@ export const mockGatewayCtx = (overrides: Partial = {}): GatewayCtx
dump: null,
backgroundScheduler: promise => { void promise; },
requestStartedAt: 0,
- responseHeaders: new Headers(),
...overrides,
});
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 9a73c11d5..54243d56d 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -134,107 +134,9 @@ export interface PublicModel {
};
kind: ModelKind;
cost?: ModelPricing;
- // Floway protocol extension. Present on synthesized alias entries the
- // gateway appends to the listing. Clients that do not know about the
- // field ignore it; alias-aware clients (dashboard, CLI shims) render the
- // alias's target id and rules from this payload directly.
- aliasedFrom?: PublicModelAliasedFrom;
chat?: ChatModelInfo;
}
-export interface PublicModelAliasedFrom {
- targetModelId: string;
- upstreamIds: readonly string[];
- rules: {
- reasoning?: {
- effort?: string;
- budgetTokens?: number;
- adaptive?: boolean;
- summary?: string;
- };
- verbosity?: string;
- serviceTier?: string;
- anthropicBeta?: readonly string[];
- };
- onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
- // Operator-set display name. Absent (undefined) when the operator left the
- // field blank — alias-aware UIs then synthesize a label from the target's
- // display name and the inline rules summary instead.
- displayName?: string;
-}
-
-// One badge per rule field on an alias, in a `${label}` / `${label}: ${value}`
-// shape the dashboard renders inline next to the model row. Returned in a
-// deterministic order so the badge sequence stays stable across surfaces and
-// across JSON key arrivals. Boolean toggles render label-only (no colon);
-// every other field renders as `${label}: ${value}`. The inline-prose form
-// (`composeAliasDisplayName`'s suffix and `formatAliasRulesInline`) uses its
-// own compact wording — the two surfaces deliberately diverge so the inline
-// summary stays compact while the badge view stays self-describing.
-export interface AliasRuleBadge {
- label: string;
- value?: string;
-}
-
-export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): AliasRuleBadge[] => {
- const out: AliasRuleBadge[] = [];
- if (rules.reasoning?.effort !== undefined) out.push({ label: 'effort', value: rules.reasoning.effort });
- if (rules.reasoning?.budgetTokens !== undefined) out.push({ label: 'reasoning budget', value: `${rules.reasoning.budgetTokens}tk` });
- if (rules.reasoning?.adaptive === true) out.push({ label: 'adaptive reasoning' });
- if (rules.reasoning?.summary !== undefined) out.push({ label: 'reasoning summary', value: rules.reasoning.summary });
- if (rules.verbosity !== undefined) out.push({ label: 'verbosity', value: rules.verbosity });
- if (rules.serviceTier !== undefined) out.push({ label: 'service tier', value: rules.serviceTier });
- if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
- out.push({ label: 'anthropic beta', value: [...rules.anthropicBeta].sort().join('/') });
- }
- return out;
-};
-
-// Inline-prose parts for an alias's rules, in a deterministic order. Each
-// entry uses the compact `value label` wording (e.g. `low effort`,
-// `4096tk reasoning`) so it fits both alongside the target name in narrow
-// listings and on its own as a standalone summary line. The dashboard's
-// per-badge view uses `formatAliasRuleBadges` for the self-describing
-// `label: value` form. `anthropicBeta` tokens are sorted so two operators
-// carrying the same set in different orders see the same label.
-const aliasRulesInlineParts = (rules: PublicModelAliasedFrom['rules']): string[] => {
- const parts: string[] = [];
- if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
- if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
- if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
- if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
- if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
- if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
- if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
- parts.push([...rules.anthropicBeta].sort().join('/'));
- }
- return parts;
-};
-
-// Compose the alias-local display name — what the operator named the alias
-// (when set) or a synthesized target + rules summary. Independent of which
-// upstream is surfacing the alias; the prefixed listing form prepends the
-// upstream display name at the call site, mirroring the real-model path in
-// the gateway's provider registry. The parenthesized rules suffix shares
-// its parts with `formatAliasRulesInline` so the two surfaces never drift.
-export const composeAliasDisplayName = (input: {
- aliasDisplayName?: string;
- targetDisplayName: string;
- rules: PublicModelAliasedFrom['rules'];
-}): string => {
- if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
- const parts = aliasRulesInlineParts(input.rules);
- const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : '';
- return `${input.targetDisplayName}${suffix}`;
-};
-
-// Joined rules summary without the parentheses — what the dashboard's alias
-// row renders on its third line. Empty string when no rule applies; callers
-// should drop the line entirely in that case rather than rendering blank.
-export const formatAliasRulesInline = (rules: PublicModelAliasedFrom['rules']): string => {
- return aliasRulesInlineParts(rules).join(', ');
-};
-
export interface PublicModelsResponse {
// OpenAI container
object: 'list';
diff --git a/packages/protocols/src/common/models_alias-display_test.ts b/packages/protocols/src/common/models_alias-display_test.ts
deleted file mode 100644
index 7c7d4c49c..000000000
--- a/packages/protocols/src/common/models_alias-display_test.ts
+++ /dev/null
@@ -1,100 +0,0 @@
-import { describe, expect, test } from 'vitest';
-
-import { composeAliasDisplayName, formatAliasRulesInline } from './models.ts';
-
-describe('composeAliasDisplayName', () => {
- test('uses alias displayName when set, suppressing the rules summary', () => {
- expect(
- composeAliasDisplayName({
- aliasDisplayName: 'Codex Auto Review',
- targetDisplayName: 'GPT-5.4',
- rules: { reasoning: { effort: 'low' } },
- }),
- ).toBe('Codex Auto Review');
- });
-
- test('omits the rules suffix when rules are empty', () => {
- expect(
- composeAliasDisplayName({
- targetDisplayName: 'GPT-5.4',
- rules: {},
- }),
- ).toBe('GPT-5.4');
- });
-
- test('formats each rule field with its canonical suffix when alias displayName is missing', () => {
- const target = 'GPT-5.4';
- expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { effort: 'high' } } })).toBe('GPT-5.4 (high effort)');
- expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { budgetTokens: 4096 } } })).toBe('GPT-5.4 (4096tk reasoning)');
- expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { adaptive: true } } })).toBe('GPT-5.4 (adaptive reasoning)');
- expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { summary: 'detailed' } } })).toBe('GPT-5.4 (detailed summary)');
- expect(composeAliasDisplayName({ targetDisplayName: target, rules: { verbosity: 'low' } })).toBe('GPT-5.4 (low verbosity)');
- expect(composeAliasDisplayName({ targetDisplayName: target, rules: { serviceTier: 'priority' } })).toBe('GPT-5.4 (priority tier)');
- });
-
- test('sorts anthropicBeta tokens and joins with slashes', () => {
- expect(
- composeAliasDisplayName({
- targetDisplayName: 'Claude',
- rules: { anthropicBeta: ['extended-thinking', 'fast-mode-2026-02-01'] },
- }),
- ).toBe('Claude (extended-thinking/fast-mode-2026-02-01)');
- expect(
- composeAliasDisplayName({
- targetDisplayName: 'Claude',
- rules: { anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] },
- }),
- ).toBe('Claude (extended-thinking/fast-mode-2026-02-01)');
- });
-
- test('drops anthropicBeta when the token list is empty', () => {
- expect(
- composeAliasDisplayName({
- targetDisplayName: 'Claude',
- rules: { anthropicBeta: [] },
- }),
- ).toBe('Claude');
- });
-
- test('joins multiple fields with comma in deterministic order', () => {
- expect(
- composeAliasDisplayName({
- targetDisplayName: 'GPT-5.4',
- rules: {
- reasoning: { effort: 'low', summary: 'concise' },
- verbosity: 'high',
- serviceTier: 'flex',
- },
- }),
- ).toBe('GPT-5.4 (low effort, concise summary, high verbosity, flex tier)');
- });
-});
-
-describe('formatAliasRulesInline', () => {
- test('returns empty string when no rule applies', () => {
- expect(formatAliasRulesInline({})).toBe('');
- });
-
- test('returns each rule field with the same compact wording as the parenthesized suffix, sans parens', () => {
- expect(formatAliasRulesInline({ reasoning: { effort: 'low' } })).toBe('low effort');
- expect(formatAliasRulesInline({ reasoning: { budgetTokens: 4096 } })).toBe('4096tk reasoning');
- expect(formatAliasRulesInline({ reasoning: { adaptive: true } })).toBe('adaptive reasoning');
- expect(formatAliasRulesInline({ reasoning: { summary: 'detailed' } })).toBe('detailed summary');
- });
-
- test('joins multiple fields with comma in the same order composeAliasDisplayName uses', () => {
- expect(
- formatAliasRulesInline({
- reasoning: { effort: 'low', summary: 'detailed' },
- verbosity: 'high',
- serviceTier: 'fast',
- }),
- ).toBe('low effort, detailed summary, high verbosity, fast tier');
- });
-
- test('sorts anthropicBeta tokens and joins with slashes', () => {
- expect(
- formatAliasRulesInline({ anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] }),
- ).toBe('extended-thinking/fast-mode-2026-02-01');
- });
-});
From c2fee82c4445adb6d874e0b7deded14128f159b5 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 17:19:22 +0800
Subject: [PATCH 048/170] feat(aliases): v2 migration + seed
Single table with JSON `targets` column. Each alias picks a kind
(chat/embedding/image), a selection strategy (first-available/random),
optional display name, visible-in-models flag, and a list of target
entries with per-target rules.
Seed `codex-auto-review` to "prefer the real model, else gpt-5.4 with
low reasoning effort". The real-id-first target makes the alias an
intentional shadow of the real model: when an upstream exposes the
real `codex-auto-review`, it wins; otherwise the configured fallback
kicks in.
---
.../gateway/migrations/0046_model_aliases.sql | 36 +++++++++++++++++++
1 file changed, 36 insertions(+)
create mode 100644 packages/gateway/migrations/0046_model_aliases.sql
diff --git a/packages/gateway/migrations/0046_model_aliases.sql b/packages/gateway/migrations/0046_model_aliases.sql
new file mode 100644
index 000000000..ea20c4d81
--- /dev/null
+++ b/packages/gateway/migrations/0046_model_aliases.sql
@@ -0,0 +1,36 @@
+CREATE TABLE model_aliases (
+ name TEXT PRIMARY KEY,
+ kind TEXT NOT NULL CHECK (kind IN ('chat', 'embedding', 'image')),
+ selection TEXT NOT NULL CHECK (selection IN ('random', 'first-available')),
+ display_name TEXT,
+ visible_in_models_list INTEGER NOT NULL DEFAULT 1 CHECK (visible_in_models_list IN (0, 1)),
+ targets TEXT NOT NULL,
+ sort_order INTEGER NOT NULL DEFAULT 0,
+ created_at TEXT NOT NULL,
+ updated_at TEXT NOT NULL
+);
+
+CREATE INDEX idx_model_aliases_sort ON model_aliases (sort_order, created_at);
+
+INSERT INTO model_aliases (
+ name,
+ kind,
+ selection,
+ display_name,
+ visible_in_models_list,
+ targets,
+ sort_order,
+ created_at,
+ updated_at
+)
+VALUES (
+ 'codex-auto-review',
+ 'chat',
+ 'first-available',
+ 'Codex Auto Review',
+ 1,
+ json('[{"target_model_id":"codex-auto-review","rules":{}},{"target_model_id":"gpt-5.4","rules":{"reasoning":{"effort":"low"}}}]'),
+ 0,
+ strftime('%Y-%m-%dT%H:%M:%fZ', 'now'),
+ strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
+);
From 7c8034bf3fdfabb415b6e00d75233750536e7e1e Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 17:36:44 +0800
Subject: [PATCH 049/170] feat(aliases): backend types + repo + routes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Wire up the v2 model-alias backend on top of the new schema:
- packages/protocols/src/common/aliases.ts — shared snake_case wire DTO
(ModelAlias, AliasTarget, ChatAliasRules, AliasKind, AliasSelection,
ReasoningEffort, ReasoningSummary, Verbosity, ServiceTier). Re-exported
through @floway-dev/protocols/common so the gateway and the dashboard
consume one source of truth.
- ModelAliasRecord + ModelAliasesRepo in repo/types.ts. Repo contract:
list / getByName / insert (throws on PK collision) / update(oldName,
record) with atomic rename semantics / delete / deleteAll.
- SqlModelAliasesRepo + MemoryModelAliasesRepo implementing the contract.
SQL persists targets as a JSON column; rename runs INSERT-then-DELETE
through the batch primitive so D1 / sql.js apply it atomically.
- control-plane/model-aliases/{serialize,routes}.ts — camelCase <->
snake_case projection plus four CRUD handlers (GET / POST / PUT :name /
DELETE :name). POST collisions return 409, rename collisions return
409, missing rows return 404, deletes are idempotent (204 whether or
not a row existed).
- Zod schemas createAliasBody + updateAliasBody with a superRefine pass
that gates per-target rules on the alias-level kind: chat-kind parses
through chatAliasRulesSchema, other kinds require empty rules.
- Routes registered inside the admin-only group alongside upstreams /
proxies.
- Tests: cross-backend repo scenarios (memory + sql.js), RPC-client
route scenarios, and snake/camel round-trip.
Scope-limited to the backend surface; the data-plane resolver,
/v1/models alias listing, and dashboard pieces stay on separate
follow-up tasks.
---
.../control-plane/model-aliases/repo_test.ts | 153 +++++++++++++
.../src/control-plane/model-aliases/routes.ts | 73 ++++++
.../model-aliases/routes_test.ts | 213 ++++++++++++++++++
.../control-plane/model-aliases/serialize.ts | 51 +++++
.../model-aliases/serialize_test.ts | 62 +++++
packages/gateway/src/control-plane/routes.ts | 9 +-
packages/gateway/src/control-plane/schemas.ts | 83 +++++++
packages/gateway/src/repo/memory.ts | 54 +++++
packages/gateway/src/repo/sql.ts | 151 ++++++++++++-
packages/gateway/src/repo/types.ts | 36 ++-
packages/protocols/src/common/aliases.ts | 79 +++++++
packages/protocols/src/common/index.ts | 1 +
12 files changed, 962 insertions(+), 3 deletions(-)
create mode 100644 packages/gateway/src/control-plane/model-aliases/repo_test.ts
create mode 100644 packages/gateway/src/control-plane/model-aliases/routes.ts
create mode 100644 packages/gateway/src/control-plane/model-aliases/routes_test.ts
create mode 100644 packages/gateway/src/control-plane/model-aliases/serialize.ts
create mode 100644 packages/gateway/src/control-plane/model-aliases/serialize_test.ts
create mode 100644 packages/protocols/src/common/aliases.ts
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
new file mode 100644
index 000000000..8a56e5f96
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
@@ -0,0 +1,153 @@
+// Cross-backend tests for the model aliases repo. Memory drives the unit
+// scenarios by default; the SQL backend (sql.js applying every migration)
+// catches schema drift, JSON-column round-trips, and rename atomicity.
+
+import { test } from 'vitest';
+
+import { InMemoryRepo } from '../../repo/memory.ts';
+import { SqlRepo } from '../../repo/sql.ts';
+import { createSqliteTestDb } from '../../repo/test-sqlite.ts';
+import type { ModelAliasRecord, Repo } from '../../repo/types.ts';
+import { assertEquals, assertExists, assertRejects } from '@floway-dev/test-utils';
+
+const REPO_BACKENDS: Array Promise]> = [
+ ['memory', async () => new InMemoryRepo()],
+ ['sql', async () => new SqlRepo(await createSqliteTestDb())],
+];
+
+const aliasFixture = (overrides: Partial = {}): ModelAliasRecord => ({
+ name: 'gpt-fast',
+ kind: 'chat',
+ selection: 'first-available',
+ displayName: null,
+ visibleInModelsList: true,
+ targets: [
+ { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
+ ],
+ sortOrder: 0,
+ createdAt: '2026-06-26T00:00:00.000Z',
+ updatedAt: '2026-06-26T00:00:00.000Z',
+ ...overrides,
+});
+
+for (const [backend, makeRepo] of REPO_BACKENDS) {
+ // The 0046 migration seeds `codex-auto-review`; every test starts from a
+ // known-empty state so assertions on row counts and ordering stay stable.
+ const freshRepo = async (): Promise => {
+ const repo = await makeRepo();
+ await repo.modelAliases.deleteAll();
+ return repo;
+ };
+
+ test(`[${backend}] insert then list returns the row`, async () => {
+ const repo = await freshRepo();
+ await repo.modelAliases.insert(aliasFixture());
+ const list = await repo.modelAliases.list();
+ assertEquals(list.length, 1);
+ assertEquals(list[0].name, 'gpt-fast');
+ assertEquals(list[0].targets[0].target_model_id, 'gpt-5.4');
+ });
+
+ test(`[${backend}] insert collision throws`, async () => {
+ const repo = await freshRepo();
+ await repo.modelAliases.insert(aliasFixture());
+ await assertRejects(() => repo.modelAliases.insert(aliasFixture()));
+ });
+
+ test(`[${backend}] getByName returns null when no row matches`, async () => {
+ const repo = await freshRepo();
+ assertEquals(await repo.modelAliases.getByName('nope'), null);
+ });
+
+ test(`[${backend}] update with same name preserves createdAt and refreshes updatedAt`, async () => {
+ const repo = await freshRepo();
+ await repo.modelAliases.insert(aliasFixture({ createdAt: '2026-01-01T00:00:00.000Z', updatedAt: '2026-01-01T00:00:00.000Z' }));
+ await repo.modelAliases.update('gpt-fast', aliasFixture({
+ createdAt: '2026-01-01T00:00:00.000Z',
+ updatedAt: '2026-06-26T12:00:00.000Z',
+ displayName: 'GPT Fast',
+ }));
+ const after = await repo.modelAliases.getByName('gpt-fast');
+ assertExists(after);
+ assertEquals(after.createdAt, '2026-01-01T00:00:00.000Z');
+ assertEquals(after.updatedAt, '2026-06-26T12:00:00.000Z');
+ assertEquals(after.displayName, 'GPT Fast');
+ });
+
+ test(`[${backend}] update with different name (rename) moves the row`, async () => {
+ const repo = await freshRepo();
+ await repo.modelAliases.insert(aliasFixture({ createdAt: '2026-01-01T00:00:00.000Z' }));
+ await repo.modelAliases.update('gpt-fast', aliasFixture({
+ name: 'gpt-fastest',
+ createdAt: '2026-01-01T00:00:00.000Z',
+ updatedAt: '2026-06-26T12:00:00.000Z',
+ }));
+ assertEquals(await repo.modelAliases.getByName('gpt-fast'), null);
+ const renamed = await repo.modelAliases.getByName('gpt-fastest');
+ assertExists(renamed);
+ assertEquals(renamed.createdAt, '2026-01-01T00:00:00.000Z');
+ });
+
+ test(`[${backend}] rename to an existing name throws and leaves both rows intact`, async () => {
+ const repo = await freshRepo();
+ await repo.modelAliases.insert(aliasFixture({ name: 'gpt-fast' }));
+ await repo.modelAliases.insert(aliasFixture({ name: 'gpt-slow' }));
+ await assertRejects(() => repo.modelAliases.update('gpt-fast', aliasFixture({ name: 'gpt-slow' })));
+ assertExists(await repo.modelAliases.getByName('gpt-fast'));
+ assertExists(await repo.modelAliases.getByName('gpt-slow'));
+ });
+
+ test(`[${backend}] update on a missing name throws`, async () => {
+ const repo = await freshRepo();
+ await assertRejects(() => repo.modelAliases.update('nope', aliasFixture({ name: 'nope' })));
+ });
+
+ test(`[${backend}] delete returns true when present, false when absent`, async () => {
+ const repo = await freshRepo();
+ await repo.modelAliases.insert(aliasFixture());
+ assertEquals(await repo.modelAliases.delete('gpt-fast'), true);
+ assertEquals(await repo.modelAliases.delete('gpt-fast'), false);
+ });
+
+ test(`[${backend}] list orders by (sortOrder, createdAt)`, async () => {
+ const repo = await freshRepo();
+ await repo.modelAliases.insert(aliasFixture({ name: 'a', sortOrder: 1, createdAt: '2026-01-01T00:00:00.000Z' }));
+ await repo.modelAliases.insert(aliasFixture({ name: 'b', sortOrder: 0, createdAt: '2026-02-01T00:00:00.000Z' }));
+ await repo.modelAliases.insert(aliasFixture({ name: 'c', sortOrder: 0, createdAt: '2026-01-15T00:00:00.000Z' }));
+ const list = await repo.modelAliases.list();
+ assertEquals(list.map(r => r.name), ['c', 'b', 'a']);
+ });
+
+ test(`[${backend}] targets JSON round-trips multi-target chat rules`, async () => {
+ const repo = await freshRepo();
+ await repo.modelAliases.insert(aliasFixture({
+ name: 'multi',
+ targets: [
+ { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'high', adaptive: true } } },
+ { target_model_id: 'gpt-4.1', rules: { verbosity: 'low', serviceTier: 'priority' } },
+ { target_model_id: 'gpt-3.5', rules: {} },
+ ],
+ }));
+ const row = await repo.modelAliases.getByName('multi');
+ assertExists(row);
+ assertEquals(row.targets.length, 3);
+ assertEquals(row.targets[0].rules, { reasoning: { effort: 'high', adaptive: true } });
+ assertEquals(row.targets[1].rules, { verbosity: 'low', serviceTier: 'priority' });
+ assertEquals(row.targets[2].rules, {});
+ });
+
+ test(`[${backend}] visibleInModelsList=false round-trips`, async () => {
+ const repo = await freshRepo();
+ await repo.modelAliases.insert(aliasFixture({ visibleInModelsList: false }));
+ const row = await repo.modelAliases.getByName('gpt-fast');
+ assertEquals(row?.visibleInModelsList, false);
+ });
+
+ test(`[${backend}] deleteAll wipes every row`, async () => {
+ const repo = await freshRepo();
+ await repo.modelAliases.insert(aliasFixture({ name: 'a' }));
+ await repo.modelAliases.insert(aliasFixture({ name: 'b' }));
+ await repo.modelAliases.deleteAll();
+ assertEquals((await repo.modelAliases.list()).length, 0);
+ });
+}
diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
new file mode 100644
index 000000000..49956533c
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/routes.ts
@@ -0,0 +1,73 @@
+// Admin-only CRUD for model aliases. Wire shape (snake_case) is documented in
+// `@floway-dev/protocols/common`; this layer translates between the wire DTO
+// and the camelCase `ModelAliasRecord` the repo stores.
+
+import type { Context } from 'hono';
+
+import { recordToWire, wireToRecord } from './serialize.ts';
+import { type CtxWithJson } from '../../middleware/zod-validator.ts';
+import { getRepo } from '../../repo/index.ts';
+import type { ModelAliasRecord } from '../../repo/types.ts';
+import type { createAliasBody, updateAliasBody } from '../schemas.ts';
+
+// Place a new alias at the end of the sort order by default. Empty list → 0
+// so the very first alias starts the sequence at the same origin as upstreams.
+const nextSortOrder = (existing: readonly ModelAliasRecord[]): number =>
+ existing.reduce((acc, record) => Math.max(acc, record.sortOrder), -1) + 1;
+
+export const listAliases = async (c: Context) => {
+ const records = await getRepo().modelAliases.list();
+ return c.json(records.map(recordToWire));
+};
+
+export const createAlias = async (c: CtxWithJson) => {
+ const body = c.req.valid('json');
+ const repo = getRepo();
+
+ const collision = await repo.modelAliases.getByName(body.name);
+ if (collision) {
+ return c.json({ error: `Alias ${body.name} already exists` }, 409);
+ }
+
+ const existing = await repo.modelAliases.list();
+ const now = new Date().toISOString();
+ const record = wireToRecord(body, {
+ sortOrder: body.sort_order ?? nextSortOrder(existing),
+ createdAt: now,
+ updatedAt: now,
+ });
+ await repo.modelAliases.insert(record);
+ return c.json(recordToWire(record), 201);
+};
+
+export const updateAlias = async (c: CtxWithJson) => {
+ const oldName = c.req.param('name') ?? '';
+ const body = c.req.valid('json');
+ const repo = getRepo();
+
+ const existing = await repo.modelAliases.getByName(oldName);
+ if (!existing) return c.json({ error: 'Alias not found' }, 404);
+
+ if (body.name !== oldName) {
+ const collision = await repo.modelAliases.getByName(body.name);
+ if (collision) return c.json({ error: `Alias ${body.name} already exists` }, 409);
+ }
+
+ const next = wireToRecord(body, {
+ // Preserve the original sortOrder unless the client explicitly overrides
+ // it; createdAt belongs to the row's first-seen instant and never moves.
+ sortOrder: body.sort_order ?? existing.sortOrder,
+ createdAt: existing.createdAt,
+ updatedAt: new Date().toISOString(),
+ });
+ await repo.modelAliases.update(oldName, next);
+ return c.json(recordToWire(next));
+};
+
+export const deleteAlias = async (c: Context) => {
+ const name = c.req.param('name') ?? '';
+ // Idempotent: the spec calls for a successful response whether or not a row
+ // existed. 204 keeps the verb-shape parity with DELETE /api/proxies/:id.
+ await getRepo().modelAliases.delete(name);
+ return c.body(null, 204);
+};
diff --git a/packages/gateway/src/control-plane/model-aliases/routes_test.ts b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
new file mode 100644
index 000000000..328cb88ed
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
@@ -0,0 +1,213 @@
+import { test } from 'vitest';
+
+import { requestApp, setupAppTest } from '../../test-helpers.ts';
+import type { ModelAlias } from '@floway-dev/protocols/common';
+import { assertEquals, assertExists } from '@floway-dev/test-utils';
+
+const authed = (adminSession: string, body?: unknown, method?: string): RequestInit => ({
+ method: method ?? (body === undefined ? 'GET' : 'POST'),
+ headers: {
+ 'content-type': 'application/json',
+ 'x-floway-session': adminSession,
+ },
+ ...(body === undefined ? {} : { body: JSON.stringify(body) }),
+});
+
+const putAuthed = (adminSession: string, body: unknown): RequestInit => authed(adminSession, body, 'PUT');
+const deleteAuthed = (adminSession: string): RequestInit => ({
+ method: 'DELETE',
+ headers: { 'x-floway-session': adminSession },
+});
+
+const baseBody = (overrides: Record = {}) => ({
+ name: 'gpt-fast',
+ kind: 'chat',
+ selection: 'first-available',
+ display_name: null,
+ visible_in_models_list: true,
+ targets: [
+ { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
+ ],
+ ...overrides,
+});
+
+test('GET /api/aliases lists every row in sort order', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+ await repo.modelAliases.insert({
+ name: 'b', kind: 'chat', selection: 'random', displayName: null, visibleInModelsList: true,
+ targets: [{ target_model_id: 'm1', rules: {} }],
+ sortOrder: 1, createdAt: '2026-01-01T00:00:00.000Z', updatedAt: '2026-01-01T00:00:00.000Z',
+ });
+ await repo.modelAliases.insert({
+ name: 'a', kind: 'chat', selection: 'random', displayName: null, visibleInModelsList: true,
+ targets: [{ target_model_id: 'm2', rules: {} }],
+ sortOrder: 0, createdAt: '2026-01-02T00:00:00.000Z', updatedAt: '2026-01-02T00:00:00.000Z',
+ });
+
+ const resp = await requestApp('/api/aliases', authed(adminSession));
+ assertEquals(resp.status, 200);
+ const list = (await resp.json()) as ModelAlias[];
+ assertEquals(list.map(r => r.name), ['a', 'b']);
+});
+
+test('POST /api/aliases creates an alias and returns the snake_case wire shape', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+
+ const resp = await requestApp('/api/aliases', authed(adminSession, baseBody()));
+ assertEquals(resp.status, 201);
+ const created = (await resp.json()) as ModelAlias;
+ assertEquals(created.name, 'gpt-fast');
+ assertEquals(created.visible_in_models_list, true);
+ assertEquals(created.targets[0].target_model_id, 'gpt-5.4');
+
+ const stored = await repo.modelAliases.getByName('gpt-fast');
+ assertExists(stored);
+ assertEquals(stored.visibleInModelsList, true);
+});
+
+test('POST /api/aliases rejects a name collision with 409', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+ await requestApp('/api/aliases', authed(adminSession, baseBody()));
+
+ const resp = await requestApp('/api/aliases', authed(adminSession, baseBody()));
+ assertEquals(resp.status, 409);
+ const body = (await resp.json()) as { error?: string };
+ assertEquals(body.error?.includes('already exists'), true);
+});
+
+test('PUT /api/aliases/:name updates rules and refreshes updated_at', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+ await requestApp('/api/aliases', authed(adminSession, baseBody()));
+ const before = await repo.modelAliases.getByName('gpt-fast');
+ assertExists(before);
+ await new Promise(resolve => setTimeout(resolve, 5));
+
+ const resp = await requestApp(
+ '/api/aliases/gpt-fast',
+ putAuthed(adminSession, baseBody({ display_name: 'GPT Fast', targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'high' } } }] })),
+ );
+ assertEquals(resp.status, 200);
+ const updated = (await resp.json()) as ModelAlias;
+ assertEquals(updated.display_name, 'GPT Fast');
+ assertEquals(updated.targets[0].rules, { reasoning: { effort: 'high' } });
+ // createdAt is preserved; updatedAt is fresh.
+ assertEquals(updated.created_at, before.createdAt);
+ if (updated.updated_at === before.updatedAt) throw new Error('updated_at did not refresh');
+});
+
+test('PUT /api/aliases/:name with a different body.name renames the row', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+ await requestApp('/api/aliases', authed(adminSession, baseBody()));
+
+ const resp = await requestApp(
+ '/api/aliases/gpt-fast',
+ putAuthed(adminSession, baseBody({ name: 'gpt-fastest' })),
+ );
+ assertEquals(resp.status, 200);
+ assertEquals(await repo.modelAliases.getByName('gpt-fast'), null);
+ assertExists(await repo.modelAliases.getByName('gpt-fastest'));
+});
+
+test('PUT /api/aliases/:name rename collision returns 409', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+ await requestApp('/api/aliases', authed(adminSession, baseBody({ name: 'gpt-fast' })));
+ await requestApp('/api/aliases', authed(adminSession, baseBody({ name: 'gpt-slow' })));
+
+ const resp = await requestApp(
+ '/api/aliases/gpt-fast',
+ putAuthed(adminSession, baseBody({ name: 'gpt-slow' })),
+ );
+ assertEquals(resp.status, 409);
+});
+
+test('PUT /api/aliases/:name on a missing alias returns 404', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+
+ const resp = await requestApp('/api/aliases/nope', putAuthed(adminSession, baseBody({ name: 'nope' })));
+ assertEquals(resp.status, 404);
+});
+
+test('DELETE /api/aliases/:name returns 204 when present', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+ await requestApp('/api/aliases', authed(adminSession, baseBody()));
+
+ const resp = await requestApp('/api/aliases/gpt-fast', deleteAuthed(adminSession));
+ assertEquals(resp.status, 204);
+ assertEquals(await repo.modelAliases.getByName('gpt-fast'), null);
+});
+
+test('DELETE /api/aliases/:name is idempotent — 204 even when the row is absent', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+
+ const resp = await requestApp('/api/aliases/missing', deleteAuthed(adminSession));
+ assertEquals(resp.status, 204);
+});
+
+test('POST /api/aliases rejects an empty targets array with 400', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+
+ const resp = await requestApp('/api/aliases', authed(adminSession, baseBody({ targets: [] })));
+ assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases rejects an empty target_model_id with 400', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+
+ const resp = await requestApp(
+ '/api/aliases',
+ authed(adminSession, baseBody({ targets: [{ target_model_id: '', rules: {} }] })),
+ );
+ assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases rejects non-empty rules on kind=embedding with 400', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+
+ const resp = await requestApp(
+ '/api/aliases',
+ authed(adminSession, baseBody({
+ kind: 'embedding',
+ targets: [{ target_model_id: 'text-embedding-3', rules: { verbosity: 'low' } }],
+ })),
+ );
+ assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases accepts kind=embedding with empty rules', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+
+ const resp = await requestApp(
+ '/api/aliases',
+ authed(adminSession, baseBody({
+ kind: 'embedding',
+ targets: [{ target_model_id: 'text-embedding-3', rules: {} }],
+ })),
+ );
+ assertEquals(resp.status, 201);
+});
+
+test('POST /api/aliases rejects unknown reasoning fields on a chat target with 400', async () => {
+ const { repo, adminSession } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+
+ const resp = await requestApp(
+ '/api/aliases',
+ authed(adminSession, baseBody({
+ targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { bogus: 1 } } }],
+ })),
+ );
+ assertEquals(resp.status, 400);
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize.ts b/packages/gateway/src/control-plane/model-aliases/serialize.ts
new file mode 100644
index 000000000..a3ca87ec3
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/serialize.ts
@@ -0,0 +1,51 @@
+// Snake_case wire <-> camelCase record conversion for model aliases. The wire
+// shape (`ModelAlias`) lives in `@floway-dev/protocols/common` so the
+// dashboard and the control plane share one source of truth; this file is
+// the only place those two shapes meet.
+
+import type { ModelAliasRecord } from '../../repo/types.ts';
+import type { AliasKind, AliasSelection, AliasTarget, ModelAlias } from '@floway-dev/protocols/common';
+
+export const recordToWire = (record: ModelAliasRecord): ModelAlias => ({
+ name: record.name,
+ kind: record.kind,
+ selection: record.selection,
+ display_name: record.displayName,
+ visible_in_models_list: record.visibleInModelsList,
+ targets: record.targets,
+ sort_order: record.sortOrder,
+ created_at: record.createdAt,
+ updated_at: record.updatedAt,
+});
+
+// Wire payload accepted by the create / update body schemas. Every field
+// except `sort_order` is required at this layer; the route owns how the
+// sort order and timestamps are produced before calling wireToRecord.
+export interface ModelAliasWireInput {
+ name: string;
+ kind: AliasKind;
+ selection: AliasSelection;
+ display_name: string | null;
+ visible_in_models_list: boolean;
+ targets: AliasTarget[];
+ sort_order?: number;
+}
+
+// Build a record from a validated wire payload. The caller supplies the
+// fields the wire shape doesn't carry — `sortOrder` (computed via
+// nextSortOrder, or copied from the existing row on update), `createdAt`
+// (now for create, preserved on update), and `updatedAt` (always now).
+export const wireToRecord = (
+ wire: ModelAliasWireInput,
+ meta: { sortOrder: number; createdAt: string; updatedAt: string },
+): ModelAliasRecord => ({
+ name: wire.name,
+ kind: wire.kind,
+ selection: wire.selection,
+ displayName: wire.display_name,
+ visibleInModelsList: wire.visible_in_models_list,
+ targets: wire.targets,
+ sortOrder: meta.sortOrder,
+ createdAt: meta.createdAt,
+ updatedAt: meta.updatedAt,
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize_test.ts b/packages/gateway/src/control-plane/model-aliases/serialize_test.ts
new file mode 100644
index 000000000..72be080ca
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/serialize_test.ts
@@ -0,0 +1,62 @@
+import { test } from 'vitest';
+
+import { recordToWire, wireToRecord } from './serialize.ts';
+import type { ModelAliasRecord } from '../../repo/types.ts';
+import { assertEquals } from '@floway-dev/test-utils';
+
+const record: ModelAliasRecord = {
+ name: 'codex-auto-review',
+ kind: 'chat',
+ selection: 'first-available',
+ displayName: 'Codex Auto Review',
+ visibleInModelsList: true,
+ targets: [
+ { target_model_id: 'codex-auto-review', rules: {} },
+ { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
+ ],
+ sortOrder: 3,
+ createdAt: '2026-06-26T00:00:00.000Z',
+ updatedAt: '2026-06-26T12:00:00.000Z',
+};
+
+test('recordToWire flips camelCase fields to snake_case', () => {
+ const wire = recordToWire(record);
+ assertEquals(wire.name, 'codex-auto-review');
+ assertEquals(wire.kind, 'chat');
+ assertEquals(wire.selection, 'first-available');
+ assertEquals(wire.display_name, 'Codex Auto Review');
+ assertEquals(wire.visible_in_models_list, true);
+ assertEquals(wire.sort_order, 3);
+ assertEquals(wire.created_at, '2026-06-26T00:00:00.000Z');
+ assertEquals(wire.updated_at, '2026-06-26T12:00:00.000Z');
+ assertEquals(wire.targets, record.targets);
+});
+
+test('wireToRecord roundtrips back to the original record', () => {
+ const wire = recordToWire(record);
+ const roundTripped = wireToRecord(wire, {
+ sortOrder: wire.sort_order,
+ createdAt: wire.created_at,
+ updatedAt: wire.updated_at,
+ });
+ assertEquals(roundTripped, record);
+});
+
+test('wireToRecord uses meta sortOrder when the wire payload omits it', () => {
+ const { sort_order: _drop, ...partial } = recordToWire(record);
+ const built = wireToRecord(partial, {
+ sortOrder: 7,
+ createdAt: '2026-01-01T00:00:00.000Z',
+ updatedAt: '2026-06-26T12:00:00.000Z',
+ });
+ assertEquals(built.sortOrder, 7);
+ assertEquals(built.createdAt, '2026-01-01T00:00:00.000Z');
+});
+
+test('wireToRecord preserves a null display_name', () => {
+ const built = wireToRecord(
+ { ...recordToWire(record), display_name: null },
+ { sortOrder: 0, createdAt: 'x', updatedAt: 'y' },
+ );
+ assertEquals(built.displayName, null);
+});
diff --git a/packages/gateway/src/control-plane/routes.ts b/packages/gateway/src/control-plane/routes.ts
index 94b5f06ff..c5aa25202 100644
--- a/packages/gateway/src/control-plane/routes.ts
+++ b/packages/gateway/src/control-plane/routes.ts
@@ -5,10 +5,11 @@ import { authLogin, authLogout, authMe } from './auth/routes.ts';
import { copilotQuota } from './copilot-quota/routes.ts';
import { exportData, importData } from './data-transfer/routes.ts';
import { dumpRoutes } from './dump.ts';
+import { createAlias, deleteAlias, listAliases, updateAlias } from './model-aliases/routes.ts';
import { controlPlaneModels } from './models/routes.ts';
import { performanceOverview, performanceTelemetry } from './performance/routes.ts';
import { createProxy, deleteProxy, listAllBackoffs, listProxies, listProxyBackoffs, resetProxyBackoffs, testProxy, updateProxy } from './proxies/routes.ts';
-import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
+import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createAliasBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateAliasBody, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
import { getSearchConfigRoute, putSearchConfigRoute, testSearchConfigRoute } from './search-config/routes.ts';
import { searchUsage } from './search-usage/routes.ts';
import { tokenUsage } from './token-usage/routes.ts';
@@ -97,6 +98,12 @@ export const controlPlaneRoutes = new Hono<{ Variables: AuthVars }>()
.get('/proxies/:id/backoffs', listProxyBackoffs)
.patch('/proxies/:id', zValidator('json', updateProxyBody), updateProxy)
.delete('/proxies/:id', deleteProxy)
+ // Model aliases. Admin-only — alias config is gateway-wide tenant state,
+ // and the data-plane resolver runs above prefix routing for every request.
+ .get('/aliases', listAliases)
+ .post('/aliases', zValidator('json', createAliasBody), createAlias)
+ .put('/aliases/:name', zValidator('json', updateAliasBody), updateAlias)
+ .delete('/aliases/:name', deleteAlias)
.get('/search-config', getSearchConfigRoute)
.put('/search-config', zValidator('json', searchConfigSchema), putSearchConfigRoute)
.post('/search-config/test', zValidator('json', searchConfigSchema), testSearchConfigRoute)
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index f718539ee..0fce131b4 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -593,6 +593,89 @@ export const searchConfigSchema = z.object({
jina: z.object({ apiKey: z.string() }),
});
+// --- model aliases ---
+
+// Per-target chat rules. Field names mirror the IR slot each value overlays —
+// `reasoning.effort` / `verbosity` / `serviceTier` flow verbatim onto the
+// outbound request, so the schema does not narrow them against the target's
+// advertised capability metadata (the spec calls for verbatim forwarding so
+// the operator can drive a feature the catalog doesn't yet advertise).
+const chatAliasReasoningSchema = z.object({
+ effort: z.enum(['none', 'low', 'medium', 'high', 'xhigh']).optional(),
+ budget_tokens: z.number().int().nonnegative().optional(),
+ adaptive: z.boolean().optional(),
+ summary: z.enum(['auto', 'concise', 'detailed', 'none']).optional(),
+ mandatory: z.boolean().optional(),
+}).strict();
+
+const chatAliasRulesSchema = z.object({
+ reasoning: chatAliasReasoningSchema.optional(),
+ verbosity: z.enum(['low', 'medium', 'high']).optional(),
+ serviceTier: z.enum(['default', 'flex', 'priority', 'scale', 'fast']).optional(),
+}).strict();
+
+// Rules are validated against the alias-level kind in a superRefine pass on
+// the body schema below — chat-kind aliases accept ChatAliasRules; other kinds
+// require an empty object. Each target_model_id is opaque (no `/` semantics
+// inside the alias layer), so the only structural check is non-emptiness.
+const aliasTargetSchema = z.object({
+ target_model_id: z.string().min(1),
+ rules: z.record(z.string(), z.unknown()),
+});
+
+const aliasBaseShape = {
+ name: z.string().min(1),
+ kind: z.enum(['chat', 'embedding', 'image']),
+ selection: z.enum(['random', 'first-available']),
+ display_name: z.string().min(1).nullable(),
+ visible_in_models_list: z.boolean(),
+ targets: z.array(aliasTargetSchema).min(1),
+ sort_order: z.number().int().optional(),
+};
+
+const aliasBodyCore = z.object(aliasBaseShape);
+
+// superRefine cross-validates each target's `rules` against the alias-level
+// kind. For chat: parse through chatAliasRulesSchema and surface the inner
+// issue verbatim. For embedding / image: today there are no per-target rules,
+// so the slot must be `{}` — populating it later just needs a fresh schema.
+const aliasBodyRulesRefinement = (
+ value: z.infer,
+ ctx: z.core.$RefinementCtx,
+): void => {
+ value.targets.forEach((target, index) => {
+ if (value.kind === 'chat') {
+ const parsed = chatAliasRulesSchema.safeParse(target.rules);
+ if (!parsed.success) {
+ for (const issue of parsed.error.issues) {
+ ctx.issues.push({
+ code: 'custom',
+ message: issue.message,
+ path: ['targets', index, 'rules', ...issue.path],
+ input: target.rules,
+ });
+ }
+ }
+ return;
+ }
+ if (Object.keys(target.rules).length !== 0) {
+ ctx.issues.push({
+ code: 'custom',
+ message: `rules must be empty for kind=${value.kind}`,
+ path: ['targets', index, 'rules'],
+ input: target.rules,
+ });
+ }
+ });
+};
+
+// Create and update share the same body shape — the difference is operational:
+// create rejects PK collisions, update reads the path `:name` as the old name
+// and treats a different `body.name` as a rename. Splitting them keeps the
+// type names self-documenting at the RPC-client surface.
+export const createAliasBody = aliasBodyCore.superRefine(aliasBodyRulesRefinement);
+export const updateAliasBody = aliasBodyCore.superRefine(aliasBodyRulesRefinement);
+
// --- data transfer ---
export const importBody = z.object({
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index 5a85dba39..49e30c5ef 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -13,6 +13,8 @@ import type {
ApiKeyRepo,
BackoffRow,
CachedModelsRow,
+ ModelAliasesRepo,
+ ModelAliasRecord,
ModelsCacheRepo,
PerformanceDimensions,
PerformanceErrorSample,
@@ -882,6 +884,56 @@ class MemoryProxyBackoffRepo implements ProxyBackoffRepo {
const cloneBackoffRow = (row: BackoffRow): BackoffRow => ({ ...row });
+const cloneModelAliasRecord = (record: ModelAliasRecord): ModelAliasRecord => ({
+ ...record,
+ // Deep-clone the JSON payload so a caller's mutation of the returned record
+ // never leaks back into the store. Targets and their inner rule objects are
+ // plain JSON, so structuredClone is the cheapest faithful copy.
+ targets: structuredClone(record.targets),
+});
+
+class MemoryModelAliasesRepo implements ModelAliasesRepo {
+ private store = new Map();
+
+ list(): Promise {
+ return Promise.resolve(
+ [...this.store.values()]
+ .map(cloneModelAliasRecord)
+ .sort((a, b) => a.sortOrder - b.sortOrder || a.createdAt.localeCompare(b.createdAt)),
+ );
+ }
+
+ getByName(name: string): Promise {
+ const found = this.store.get(name);
+ return Promise.resolve(found ? cloneModelAliasRecord(found) : null);
+ }
+
+ insert(record: ModelAliasRecord): Promise {
+ if (this.store.has(record.name)) throw new Error(`alias ${record.name} already exists`);
+ this.store.set(record.name, cloneModelAliasRecord(record));
+ return Promise.resolve();
+ }
+
+ update(oldName: string, record: ModelAliasRecord): Promise {
+ if (!this.store.has(oldName)) throw new Error(`alias ${oldName} not found`);
+ if (oldName !== record.name && this.store.has(record.name)) {
+ throw new Error(`alias ${record.name} already exists`);
+ }
+ this.store.delete(oldName);
+ this.store.set(record.name, cloneModelAliasRecord(record));
+ return Promise.resolve();
+ }
+
+ delete(name: string): Promise {
+ return Promise.resolve(this.store.delete(name));
+ }
+
+ deleteAll(): Promise {
+ this.store.clear();
+ return Promise.resolve();
+ }
+}
+
export class InMemoryRepo implements Repo {
apiKeys: ApiKeyRepo;
users: UsersRepo;
@@ -894,6 +946,7 @@ export class InMemoryRepo implements Repo {
upstreams: UpstreamRepo;
proxies: ProxyRepo;
proxyBackoffs: ProxyBackoffRepo;
+ modelAliases: ModelAliasesRepo;
responsesItems: ResponsesItemsRepo;
responsesSnapshots: ResponsesSnapshotsRepo;
@@ -909,6 +962,7 @@ export class InMemoryRepo implements Repo {
this.upstreams = new MemoryUpstreamRepo();
this.proxies = new MemoryProxyRepo(this.upstreams);
this.proxyBackoffs = new MemoryProxyBackoffRepo();
+ this.modelAliases = new MemoryModelAliasesRepo();
this.responsesItems = new MemoryResponsesItemsRepo();
this.responsesSnapshots = new MemoryResponsesSnapshotsRepo();
}
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index b716d07e4..16645772e 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -7,6 +7,8 @@ import type {
ApiKeyRepo,
BackoffRow,
CachedModelsRow,
+ ModelAliasesRepo,
+ ModelAliasRecord,
ModelsCacheRepo,
PerformanceDimensions,
PerformanceErrorSample,
@@ -38,7 +40,7 @@ import { latencyBucketForMs } from '../shared/performance-histogram.ts';
import { generateSessionToken } from '../shared/session-tokens.ts';
import { assertWebSearchProviderName } from '../shared/web-search-providers.ts';
import type { SqlDatabase, SqlPreparedStatement, SqlResult } from '@floway-dev/platform';
-import { BILLING_DIMENSIONS, type BillingDimension, type ModelPricing, resolveEffectivePricing, unitPriceForDimension } from '@floway-dev/protocols/common';
+import { BILLING_DIMENSIONS, type AliasKind, type AliasSelection, type AliasTarget, type BillingDimension, type ModelPricing, resolveEffectivePricing, unitPriceForDimension } from '@floway-dev/protocols/common';
import type { ProxyFallbackEntry, ModelPrefixConfig, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
import { normalizeModelPrefix } from '@floway-dev/provider';
@@ -1585,6 +1587,151 @@ const toBackoffRow = (row: BackoffRowDb): BackoffRow => ({
lastErrorAt: row.last_error_at,
});
+interface ModelAliasRow {
+ name: string;
+ kind: string;
+ selection: string;
+ display_name: string | null;
+ visible_in_models_list: number;
+ targets: string;
+ sort_order: number;
+ created_at: string;
+ updated_at: string;
+}
+
+const MODEL_ALIAS_COLUMNS = 'name, kind, selection, display_name, visible_in_models_list, targets, sort_order, created_at, updated_at';
+
+const parseAliasTargets = (raw: string, name: string): AliasTarget[] => {
+ let parsed: unknown;
+ try {
+ parsed = JSON.parse(raw);
+ } catch (cause) {
+ throw new Error(`model_aliases.targets JSON is malformed for ${name}`, { cause });
+ }
+ if (!Array.isArray(parsed)) throw new Error(`model_aliases.targets is not an array for ${name}`);
+ return parsed as AliasTarget[];
+};
+
+const toModelAliasRecord = (row: ModelAliasRow): ModelAliasRecord => ({
+ name: row.name,
+ kind: row.kind as AliasKind,
+ selection: row.selection as AliasSelection,
+ displayName: row.display_name,
+ visibleInModelsList: row.visible_in_models_list !== 0,
+ targets: parseAliasTargets(row.targets, row.name),
+ sortOrder: row.sort_order,
+ createdAt: row.created_at,
+ updatedAt: row.updated_at,
+});
+
+class SqlModelAliasesRepo implements ModelAliasesRepo {
+ constructor(private db: SqlDatabase) {}
+
+ async list(): Promise {
+ const { results } = await this.db
+ .prepare(`SELECT ${MODEL_ALIAS_COLUMNS} FROM model_aliases ORDER BY sort_order, created_at`)
+ .all();
+ return results.map(toModelAliasRecord);
+ }
+
+ async getByName(name: string): Promise {
+ const row = await this.db
+ .prepare(`SELECT ${MODEL_ALIAS_COLUMNS} FROM model_aliases WHERE name = ?`)
+ .bind(name)
+ .first();
+ return row ? toModelAliasRecord(row) : null;
+ }
+
+ async insert(record: ModelAliasRecord): Promise {
+ await this.db
+ .prepare(
+ `INSERT INTO model_aliases (${MODEL_ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+ )
+ .bind(
+ record.name,
+ record.kind,
+ record.selection,
+ record.displayName,
+ record.visibleInModelsList ? 1 : 0,
+ JSON.stringify(record.targets),
+ record.sortOrder,
+ record.createdAt,
+ record.updatedAt,
+ )
+ .run();
+ }
+
+ async update(oldName: string, record: ModelAliasRecord): Promise {
+ if (oldName === record.name) {
+ // Plain in-place update — the PK is unchanged, no rename to coordinate.
+ const result = await this.db
+ .prepare(
+ `UPDATE model_aliases SET
+ kind = ?,
+ selection = ?,
+ display_name = ?,
+ visible_in_models_list = ?,
+ targets = ?,
+ sort_order = ?,
+ created_at = ?,
+ updated_at = ?
+ WHERE name = ?`,
+ )
+ .bind(
+ record.kind,
+ record.selection,
+ record.displayName,
+ record.visibleInModelsList ? 1 : 0,
+ JSON.stringify(record.targets),
+ record.sortOrder,
+ record.createdAt,
+ record.updatedAt,
+ oldName,
+ )
+ .run();
+ if ((result.meta.changes ?? 0) === 0) throw new Error(`alias ${oldName} not found`);
+ return;
+ }
+
+ // Rename. Verify the source row exists first so a missing oldName fails
+ // before any write hits the table. Then INSERT(new) + DELETE(old) atomically
+ // through the batch primitive — a PK collision against `record.name`
+ // bubbles up from the INSERT, which is exactly the "rename collides" signal
+ // the route layer translates to 409.
+ const existing = await this.getByName(oldName);
+ if (!existing) throw new Error(`alias ${oldName} not found`);
+
+ await runStatements(this.db, [
+ this.db
+ .prepare(`INSERT INTO model_aliases (${MODEL_ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`)
+ .bind(
+ record.name,
+ record.kind,
+ record.selection,
+ record.displayName,
+ record.visibleInModelsList ? 1 : 0,
+ JSON.stringify(record.targets),
+ record.sortOrder,
+ record.createdAt,
+ record.updatedAt,
+ ),
+ this.db.prepare('DELETE FROM model_aliases WHERE name = ?').bind(oldName),
+ ]);
+ }
+
+ async delete(name: string): Promise {
+ const result = await this.db
+ .prepare('DELETE FROM model_aliases WHERE name = ?')
+ .bind(name)
+ .run();
+ return (result.meta.changes ?? 0) > 0;
+ }
+
+ async deleteAll(): Promise {
+ await this.db.prepare('DELETE FROM model_aliases').run();
+ }
+}
+
export class SqlRepo implements Repo {
users: UsersRepo;
sessions: SessionsRepo;
@@ -1597,6 +1744,7 @@ export class SqlRepo implements Repo {
upstreams: UpstreamRepo;
proxies: ProxyRepo;
proxyBackoffs: ProxyBackoffRepo;
+ modelAliases: ModelAliasesRepo;
responsesItems: ResponsesItemsRepo;
responsesSnapshots: ResponsesSnapshotsRepo;
@@ -1612,6 +1760,7 @@ export class SqlRepo implements Repo {
this.upstreams = new SqlUpstreamRepo(db);
this.proxies = new SqlProxyRepo(db);
this.proxyBackoffs = new SqlProxyBackoffRepo(db);
+ this.modelAliases = new SqlModelAliasesRepo(db);
this.responsesItems = new SqlResponsesItemsRepo(db);
this.responsesSnapshots = new SqlResponsesSnapshotsRepo(db);
}
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 0341d41ef..3ca32e5c0 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -1,6 +1,6 @@
import type { HistogramBucket } from '../shared/performance-histogram.ts';
import type { WebSearchProviderName } from '../shared/web-search-providers.ts';
-import type { BillingDimension, ModelPricing } from '@floway-dev/protocols/common';
+import type { AliasKind, AliasSelection, AliasTarget, BillingDimension, ModelPricing } from '@floway-dev/protocols/common';
import type { UpstreamModel, UpstreamRecord } from '@floway-dev/provider';
export interface ApiKey {
@@ -264,6 +264,39 @@ export interface ProxyBackoffRepo {
deleteAll(): Promise;
}
+// One alias row. The wire DTO (`ModelAlias` in @floway-dev/protocols/common)
+// is the snake_case projection of this record; conversion lives in
+// control-plane/model-aliases/serialize.ts.
+export interface ModelAliasRecord {
+ name: string;
+ kind: AliasKind;
+ selection: AliasSelection;
+ // null = derive at render time from targets + rules.
+ displayName: string | null;
+ visibleInModelsList: boolean;
+ // Order is meaningful for selection=first-available; preserved (but
+ // ignored) for selection=random.
+ targets: AliasTarget[];
+ sortOrder: number;
+ createdAt: string;
+ updatedAt: string;
+}
+
+export interface ModelAliasesRepo {
+ list(): Promise;
+ getByName(name: string): Promise;
+ // Throws on primary-key collision so the route layer can surface a 409.
+ insert(record: ModelAliasRecord): Promise;
+ // Replaces the row keyed by `oldName`. When oldName === record.name the
+ // call is a plain UPDATE; when they differ this is a rename, executed as
+ // INSERT(new) + DELETE(old) inside one transaction so dependent reads
+ // stay consistent. Throws when `oldName` does not exist, or when the
+ // rename target already collides with a different row.
+ update(oldName: string, record: ModelAliasRecord): Promise;
+ delete(name: string): Promise;
+ deleteAll(): Promise;
+}
+
export interface StoredResponsesItem {
id: string;
apiKeyId: string | null;
@@ -330,6 +363,7 @@ export interface Repo {
upstreams: UpstreamRepo;
proxies: ProxyRepo;
proxyBackoffs: ProxyBackoffRepo;
+ modelAliases: ModelAliasesRepo;
responsesItems: ResponsesItemsRepo;
responsesSnapshots: ResponsesSnapshotsRepo;
}
diff --git a/packages/protocols/src/common/aliases.ts b/packages/protocols/src/common/aliases.ts
new file mode 100644
index 000000000..ac212f91e
--- /dev/null
+++ b/packages/protocols/src/common/aliases.ts
@@ -0,0 +1,79 @@
+// Wire-level types for model aliases. Lives in @floway-dev/protocols because
+// both the gateway control plane and the dashboard SPA need the same DTO
+// shape — keeping it here means a single source of truth for snake_case
+// field names and the JSON-serializable rule shapes.
+//
+// An alias is a named virtual model id that resolves at request time to one
+// of N target model ids, optionally overlaying protocol-rule overrides
+// (reasoning effort, verbosity, service tier, ...) onto the request IR.
+// Resolution runs above prefix routing and never re-enters itself, which
+// makes recursive aliasing impossible by construction.
+
+// Endpoint family the alias serves. An alias belongs to exactly one kind;
+// rules are only allowed when the kind admits them (today that is `chat`).
+export type AliasKind = 'chat' | 'embedding' | 'image';
+
+// Target-picking strategy applied to the pool of currently-routable targets:
+//
+// - `first-available` — pick the first target in declaration order whose
+// target_model_id resolves to an enabled upstream binding.
+// - `random` — pick uniformly at random from the same pool.
+//
+// When the pool is empty both strategies surface the same 404 to the caller.
+export type AliasSelection = 'random' | 'first-available';
+
+// Discrete reasoning-effort presets understood across upstreams. `xhigh`
+// matches the wire value Anthropic / OpenAI use for the highest tier.
+export type ReasoningEffort = 'none' | 'low' | 'medium' | 'high' | 'xhigh';
+
+// Reasoning-summary verbosity hint emitted on the Responses / Chat surface.
+export type ReasoningSummary = 'auto' | 'concise' | 'detailed' | 'none';
+
+// Output verbosity hint (OpenAI Responses `verbosity`).
+export type Verbosity = 'low' | 'medium' | 'high';
+
+// Per-request service tier the upstream advertises (Anthropic `fast`,
+// OpenAI `priority` / `flex` / `scale`, default tier).
+export type ServiceTier = 'default' | 'flex' | 'priority' | 'scale' | 'fast';
+
+// Rule overlay applied to a chat-kind alias target. Every field is optional;
+// an absent field leaves the inbound request value untouched. Rule values
+// are forwarded verbatim to the upstream — the gateway does not narrow them
+// against the target's advertised capability metadata.
+export interface ChatAliasRules {
+ reasoning?: {
+ effort?: ReasoningEffort;
+ budget_tokens?: number;
+ adaptive?: boolean;
+ summary?: ReasoningSummary;
+ mandatory?: boolean;
+ };
+ verbosity?: Verbosity;
+ serviceTier?: ServiceTier;
+}
+
+// Rule overlay union keyed by `AliasKind`. Embedding and image targets carry
+// an empty record today; the schema reserves the slot so per-kind rules can
+// grow later without a fresh migration.
+export type AliasRules = ChatAliasRules | Record;
+
+// One target row inside an alias's `targets` list. Order is meaningful for
+// `first-available` selection and preserved (but ignored) for `random`.
+export interface AliasTarget {
+ target_model_id: string;
+ rules: AliasRules;
+}
+
+// Wire DTO returned by `/api/aliases`. snake_case to match the rest of the
+// control plane; `display_name === null` means "derive at render time".
+export interface ModelAlias {
+ name: string;
+ kind: AliasKind;
+ selection: AliasSelection;
+ display_name: string | null;
+ visible_in_models_list: boolean;
+ targets: AliasTarget[];
+ sort_order: number;
+ created_at: string;
+ updated_at: string;
+}
diff --git a/packages/protocols/src/common/index.ts b/packages/protocols/src/common/index.ts
index 51f8ef53b..1af2533bb 100644
--- a/packages/protocols/src/common/index.ts
+++ b/packages/protocols/src/common/index.ts
@@ -1,3 +1,4 @@
+export * from './aliases.ts';
export * from './capabilities.ts';
export * from './models.ts';
export * from './openai-stream.ts';
From e6c5e0c6373ffa51e98dca4919743e0569e71519 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 18:04:42 +0800
Subject: [PATCH 050/170] feat(aliases): data-plane resolver + per-protocol
rule overlay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
`resolveAlias` runs once per request, above prefix routing. It looks up
the inbound model name in the alias repo, narrows on the kind matching
the inbound endpoint group (chat / embedding / image), pre-filters the
target list to entries currently mapping to an enabled upstream binding,
then picks one — `pool[0]` for `first-available`, uniform-random for
`random`. A kind mismatch returns null so the literal id falls through
to prefix routing's miss surface; an all-unroutable hit throws
`AliasNoTargetAvailableError` with the canonical
`alias 'X' has N target(s); none currently map to an enabled upstream
binding` message. The shadow-the-real-model pattern (an alias whose
first target equals its own name) works automatically — alias names
never re-enter the alias layer, so the target string is fed verbatim
back into the existing prefix router.
Four per-protocol apply helpers stamp `ChatAliasRules` onto each chat
IR. `applyChatRulesToChatCompletions` sets `reasoning_effort` /
`thinking_budget` / `adaptive_thinking` / `reasoning_summary` /
`verbosity` / `service_tier`. `applyChatRulesToResponses` lands the
same controls onto `reasoning.*` / `thinking_budget` / `text.verbosity`
/ `service_tier`. `applyChatRulesToMessages` routes effort to
`output_config.effort`, budget / adaptive to `thinking.*`, and uses the
existing `speed: 'fast'` ↔ `service_tier: 'fast'` bridge so a native
Messages target sees Fast Mode through its own field.
`applyChatRulesToGemini` collapses reasoning onto
`generationConfig.thinkingConfig` (effort → `thinkingLevel`, budget →
`thinkingBudget`, adaptive → `thinkingBudget: -1`) and pushes
verbosity / serviceTier through the existing extension surface. Rule
values pass through verbatim — the gateway does not enum-gate against
catalog metadata.
All four chat serves and the embeddings / images passthrough endpoints
call the resolver before candidate enumeration; the resolved id replaces
`payload.model` (or the URL-carried `model` arg on Gemini) and the
`x-floway-alias` header is staged on a new `responseHeaders` bag on
GatewayCtx (applied by `finalizeGatewayResponse`).
`AliasNoTargetAvailableError` is lifted into a new
`alias-no-target-available` ChatServeFailure variant so each chat
protocol's existing failure renderer surfaces it as a 404 in the
protocol-native error envelope.
Tests:
- resolve_test.ts (9 tests) — kind match / mismatch, available pool
filtering, first-available vs random, shadow pattern + fallback,
embedding-kind endpoint filtering, no-target-available throw.
- apply_test.ts (16 tests) — empty rules, full overlay, overwrite, and
cross-protocol bridge per protocol.
- One alias integration test per chat protocol in the existing
serve_test.ts files — mocks the resolver to inject a resolution and
asserts the resolved id reaches candidate enumeration and the rule
overrides land on the IR.
Out of scope (separate tasks per the design): /v1/models alias listing
+ PublicModelAliasedFrom, dashboard UI. /v1/completions does not
participate in alias resolution today — the AliasKind enum
(chat/embedding/image) doesn't model it and its IR has no rule-overlay
surface; revisit when there's a real need.
---
.../chat/chat-completions/attempt_test.ts | 1 +
.../chat/chat-completions/errors.ts | 2 +
.../demote-developer-to-system_test.ts | 1 +
.../demote-interleaved-system-to-user_test.ts | 1 +
...le-reasoning-on-forced-tool-choice_test.ts | 1 +
.../include-usage-stream-options_test.ts | 1 +
.../interceptors/normalize-usage_test.ts | 1 +
.../vendor-deepseek-normalize_test.ts | 1 +
.../vendor-kimi-normalize_test.ts | 1 +
.../vendor-qwen-normalize_test.ts | 1 +
.../data-plane/chat/chat-completions/serve.ts | 8 +
.../chat/chat-completions/serve_test.ts | 58 ++++-
.../data-plane/chat/gemini/attempt_test.ts | 1 +
.../src/data-plane/chat/gemini/errors.ts | 2 +
.../strip-safety-settings_test.ts | 1 +
.../strip-unsupported-part-fields_test.ts | 1 +
.../strip-unsupported-tools_test.ts | 1 +
.../suppress-thought-parts_test.ts | 1 +
.../data-plane/chat/gemini/respond_test.ts | 1 +
.../src/data-plane/chat/gemini/serve.ts | 20 +-
.../src/data-plane/chat/gemini/serve_test.ts | 51 +++-
.../data-plane/chat/messages/attempt_test.ts | 1 +
.../src/data-plane/chat/messages/errors.ts | 2 +
.../demote-interleaved-system-to-user_test.ts | 1 +
...le-reasoning-on-forced-tool-choice_test.ts | 1 +
.../strip-billing-attribution_test.ts | 1 +
.../interceptors/web-search-shim_test.ts | 1 +
.../data-plane/chat/messages/respond_test.ts | 1 +
.../src/data-plane/chat/messages/serve.ts | 14 +
.../data-plane/chat/messages/serve_test.ts | 53 +++-
.../data-plane/chat/responses/attempt_test.ts | 1 +
.../src/data-plane/chat/responses/errors.ts | 2 +
.../canonicalize-encrypted-content_test.ts | 1 +
.../demote-developer-to-system_test.ts | 1 +
.../demote-interleaved-system-to-user_test.ts | 1 +
...le-reasoning-on-forced-tool-choice_test.ts | 1 +
.../interceptors/retry-cyber-policy_test.ts | 2 +
.../interceptors/server-tool-shim_test.ts | 2 +
.../image-generation-integration_test.ts | 1 +
.../server-tools/image-generation_test.ts | 1 +
.../vendor-deepseek-normalize_test.ts | 1 +
.../vendor-qwen-normalize_test.ts | 1 +
.../data-plane/chat/responses/serve-prep.ts | 8 +
.../data-plane/chat/responses/serve_test.ts | 55 +++-
.../src/data-plane/chat/shared/errors.ts | 9 +-
.../src/data-plane/chat/shared/gateway-ctx.ts | 12 +
.../data-plane/chat/shared/respond_test.ts | 1 +
.../chat/shared/upstream-telemetry_test.ts | 1 +
.../src/data-plane/embeddings/serve.ts | 14 +-
.../gateway/src/data-plane/images/serve.ts | 26 +-
.../src/data-plane/model-aliases/apply.ts | 122 +++++++++
.../data-plane/model-aliases/apply_test.ts | 198 ++++++++++++++
.../src/data-plane/model-aliases/resolve.ts | 132 ++++++++++
.../data-plane/model-aliases/resolve_test.ts | 241 ++++++++++++++++++
.../model-aliases/serve-integration.ts | 102 ++++++++
.../gateway/src/test-helpers/gateway-ctx.ts | 1 +
56 files changed, 1158 insertions(+), 10 deletions(-)
create mode 100644 packages/gateway/src/data-plane/model-aliases/apply.ts
create mode 100644 packages/gateway/src/data-plane/model-aliases/apply_test.ts
create mode 100644 packages/gateway/src/data-plane/model-aliases/resolve.ts
create mode 100644 packages/gateway/src/data-plane/model-aliases/resolve_test.ts
create mode 100644 packages/gateway/src/data-plane/model-aliases/serve-integration.ts
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
index 004c95cfb..bf3e3ef3c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
@@ -23,6 +23,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/errors.ts b/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
index 9d7c12a5f..ea5b3673c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
@@ -34,5 +34,7 @@ export const renderChatCompletionsFailure = (
return openAiErrorResult(404, appendFailedUpstreams(`Model ${failure.model} is not available on any configured upstream.`, failure.failedUpstreams));
case 'model-unsupported':
return openAiErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support the /chat/completions endpoint.`, failure.failedUpstreams));
+ case 'alias-no-target-available':
+ return openAiErrorResult(404, `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
}
};
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
index 3f2664912..e4edd7951 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
index 6f1ef50d4..40cc5b48c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 331856b9b..928b18b72 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
index 2af2ecba0..fb02a7314 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
index d4996ef8c..497a49cd8 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
index 176bad4b2..963125131 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
@@ -20,6 +20,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
index e2c3e61d8..3a6ee205f 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
index 8a8a7d6d8..f1c536f5a 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 73e8c1afd..3f04c8b8b 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -1,6 +1,8 @@
import { chatCompletionsAttempt } from './attempt.ts';
import { renderChatCompletionsFailure } from './errors.ts';
import { planChatCompletionsRouting } from './routing.ts';
+import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { resolveAndApplyAliasForChatCompletions } from '../../model-aliases/serve-integration.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -18,6 +20,12 @@ export interface ChatCompletionsServeGenerateArgs {
export const chatCompletionsServe = {
generate: async (args: ChatCompletionsServeGenerateArgs): Promise>> => {
const { payload, ctx, store, headers } = args;
+ try {
+ await resolveAndApplyAliasForChatCompletions(payload, ctx);
+ } catch (error) {
+ if (error instanceof AliasNoTargetAvailableError) return renderChatCompletionsFailure(aliasFailureFromError(error));
+ throw error;
+ }
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: payload.model,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index c3f90f81d..9117e2f80 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -16,11 +16,13 @@ import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-d
// Mock the candidates seam so each test hands the serve exactly the
// provider candidates it wants.
const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
+const lastCandidatesCall: { model?: string } = {};
vi.mock('../shared/candidates.ts', async importOriginal => {
const original = await importOriginal();
return {
...original,
- enumerateProviderCandidates: vi.fn(async () => {
+ enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
+ lastCandidatesCall.model = args.model;
const next = candidatesQueue.shift();
if (next === undefined) throw new Error('serve_test: no candidates enqueued');
return next;
@@ -28,6 +30,22 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
};
});
+// Mock the alias resolver so the integration test can inject a resolution
+// without standing up the full per-request fetcher + registry stack.
+const aliasResolutionQueue: ({ targetModelId: string; rules: Record; aliasName: string } | null | Error)[] = [];
+vi.mock('../../model-aliases/resolve.ts', async importOriginal => {
+ const original = await importOriginal();
+ return {
+ ...original,
+ resolveAlias: vi.fn(async () => {
+ if (aliasResolutionQueue.length === 0) return null;
+ const next = aliasResolutionQueue.shift()!;
+ if (next instanceof Error) throw next;
+ return next;
+ }),
+ };
+});
+
const { chatCompletionsServe } = await import('./serve.ts');
const API_KEY_ID = 'key_chat_completions_serve_test';
@@ -49,6 +67,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
@@ -281,3 +300,40 @@ test('generate renders model-missing when no candidates are available', async ()
assertEquals(body.error.type, 'invalid_request_error');
assertEquals(body.error.message, 'Model unknown-model is not available on any configured upstream.');
});
+
+test('alias resolution swaps the inbound model id for the target and overlays rules onto the IR', async () => {
+ installRepo();
+ aliasResolutionQueue.push({
+ targetModelId: 'gpt-5.4',
+ rules: { reasoning: { effort: 'low' }, verbosity: 'low' },
+ aliasName: 'gpt-fast',
+ });
+ const capturedBodies: ChatCompletionsPayload[] = [];
+ const callChatCompletions = vi.fn(async (_model: unknown, body: unknown): Promise> => {
+ capturedBodies.push(body as ChatCompletionsPayload);
+ return { ok: true, events: makeProtocolFrames(makeChatCompletionsEvents()), modelKey: 'gpt-5.4', headers: new Headers() };
+ });
+ queueCandidates([makeCandidate({ upstream: 'up_a', callChatCompletions })]);
+
+ const result = await chatCompletionsServe.generate({
+ payload: makePayload({ model: 'gpt-fast' }),
+ ctx: makeGatewayCtx(),
+ store: createNonResponsesSourceStore(API_KEY_ID),
+ headers: new Headers(),
+ });
+
+ assertEquals(result.type, 'events');
+ if (result.type !== 'events') throw new Error('unreachable');
+ await collectEvents(result.events);
+
+ // The resolved target id, not the alias name, must reach candidate
+ // enumeration so prefix routing addresses the real upstream model.
+ assertEquals(lastCandidatesCall.model, 'gpt-5.4');
+ // The alias rule overrides must land on the IR before the upstream call.
+ // (The attempt strips `model` from the body — the provider re-stamps it
+ // from `candidate.binding.upstreamModel.id` — so we only verify the rule
+ // fields here.)
+ const observed = capturedBodies[0]!;
+ assertEquals(observed.reasoning_effort, 'low');
+ assertEquals(observed.verbosity, 'low');
+});
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
index 877295494..0e562021e 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
@@ -24,6 +24,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/errors.ts b/packages/gateway/src/data-plane/chat/gemini/errors.ts
index ccfede92c..f1f231d31 100644
--- a/packages/gateway/src/data-plane/chat/gemini/errors.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/errors.ts
@@ -53,5 +53,7 @@ export const renderGeminiFailure = (
return geminiRpcErrorResult(404, appendFailedUpstreams(`Model ${failure.model} is not available on any configured upstream.`, failure.failedUpstreams));
case 'model-unsupported':
return geminiRpcErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support ${endpoint === 'countTokens' ? 'countTokens' : 'the Gemini generateContent endpoint'}.`, failure.failedUpstreams));
+ case 'alias-no-target-available':
+ return geminiRpcErrorResult(404, `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
}
};
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
index 90c86fb1a..225c690da 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
index 84a282748..f7f0beb55 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
index a066ce6c3..b3ca33289 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
index 0cbf60b61..a9978fa9f 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
index 76edee680..11629b1a9 100644
--- a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
@@ -26,6 +26,7 @@ const ctx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index f5daa1d86..091143618 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -1,6 +1,8 @@
import { geminiAttempt } from './attempt.ts';
import { renderGeminiFailure } from './errors.ts';
import { planGeminiRouting } from './routing.ts';
+import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { resolveAndApplyAliasForGemini } from '../../model-aliases/serve-integration.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -29,7 +31,14 @@ export interface GeminiServeCountTokensArgs {
export const geminiServe = {
generate: async (args: GeminiServeGenerateArgs): Promise>> => {
- const { payload, ctx, store, model, headers } = args;
+ const { payload, ctx, store, headers } = args;
+ let model: string;
+ try {
+ model = await resolveAndApplyAliasForGemini(args.model, payload, ctx);
+ } catch (error) {
+ if (error instanceof AliasNoTargetAvailableError) return renderGeminiFailure(aliasFailureFromError(error), 'generate');
+ throw error;
+ }
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model,
@@ -59,7 +68,14 @@ export const geminiServe = {
},
countTokens: async (args: GeminiServeCountTokensArgs): Promise> | PlainResult> => {
- const { payload, ctx, store, model, headers } = args;
+ const { payload, ctx, store, headers } = args;
+ let model: string;
+ try {
+ model = await resolveAndApplyAliasForGemini(args.model, payload, ctx);
+ } catch (error) {
+ if (error instanceof AliasNoTargetAvailableError) return renderGeminiFailure(aliasFailureFromError(error), 'countTokens');
+ throw error;
+ }
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model,
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 1e834a8be..43f2bd256 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -15,11 +15,13 @@ import { directFetcher, type ProviderCallResult, type ProviderStreamResult, type
import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
+const lastCandidatesCall: { model?: string } = {};
vi.mock('../shared/candidates.ts', async importOriginal => {
const original = await importOriginal();
return {
...original,
- enumerateProviderCandidates: vi.fn(async () => {
+ enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
+ lastCandidatesCall.model = args.model;
const next = candidatesQueue.shift();
if (next === undefined) throw new Error('serve_test: no candidates enqueued');
return next;
@@ -27,6 +29,22 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
};
});
+// Mock the alias resolver so the integration test can inject a resolution
+// without standing up the full per-request fetcher + registry stack.
+const aliasResolutionQueue: ({ targetModelId: string; rules: Record; aliasName: string } | null | Error)[] = [];
+vi.mock('../../model-aliases/resolve.ts', async importOriginal => {
+ const original = await importOriginal();
+ return {
+ ...original,
+ resolveAlias: vi.fn(async () => {
+ if (aliasResolutionQueue.length === 0) return null;
+ const next = aliasResolutionQueue.shift()!;
+ if (next instanceof Error) throw next;
+ return next;
+ }),
+ };
+});
+
const { geminiServe } = await import('./serve.ts');
const API_KEY_ID = 'key_gemini_serve_test';
@@ -48,6 +66,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
@@ -321,3 +340,33 @@ test('countTokens renders a Google RPC NOT_FOUND when no Messages-capable candid
const upstreamError = expectType(result, 'api-error');
assertEquals(upstreamError.status, 404);
});
+
+test('alias resolution swaps the inbound model id for the target and overlays rules onto the Gemini IR', async () => {
+ installRepo();
+ aliasResolutionQueue.push({
+ targetModelId: 'gpt-5.4',
+ rules: { reasoning: { effort: 'high', budget_tokens: 1024 }, verbosity: 'low' },
+ aliasName: 'gemini-fast',
+ });
+ const callChatCompletions = vi.fn(async (): Promise> => ({
+ ok: true, events: makeProtocolFrames(makeChatCompletionsEvents()), modelKey: 'gpt-5.4', headers: new Headers(),
+ }));
+ queueCandidates([makeCandidate({ targetApi: 'chat-completions', callChatCompletions })]);
+
+ const payload = makePayload();
+ const result = await geminiServe.generate({
+ payload,
+ ctx: makeGatewayCtx(),
+ store: createNonResponsesSourceStore(API_KEY_ID),
+ model: 'gemini-fast',
+ headers: new Headers(),
+ });
+ await collectEvents(expectType(result, 'events').events);
+
+ // The resolved target id, not the alias name, reaches candidate enumeration.
+ assertEquals(lastCandidatesCall.model, 'gpt-5.4');
+ // Alias rules land on the Gemini IR before the cross-protocol translation.
+ assertEquals(payload.generationConfig?.thinkingConfig?.thinkingLevel, 'high');
+ assertEquals(payload.generationConfig?.thinkingConfig?.thinkingBudget, 1024);
+ assertEquals(payload.generationConfig?.verbosity, 'low');
+});
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
index 36fe9e284..1495df93b 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
@@ -23,6 +23,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/messages/errors.ts b/packages/gateway/src/data-plane/chat/messages/errors.ts
index 6881616ec..9c4d5f976 100644
--- a/packages/gateway/src/data-plane/chat/messages/errors.ts
+++ b/packages/gateway/src/data-plane/chat/messages/errors.ts
@@ -38,5 +38,7 @@ export const renderMessagesFailure = (
return anthropicErrorResult(404, 'not_found_error', appendFailedUpstreams(`Model ${failure.model} is not available on any configured upstream.`, failure.failedUpstreams));
case 'model-unsupported':
return anthropicErrorResult(400, 'invalid_request_error', appendFailedUpstreams(`Model ${failure.model} does not support the ${endpointPath} endpoint.`, failure.failedUpstreams));
+ case 'alias-no-target-available':
+ return anthropicErrorResult(404, 'not_found_error', `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
}
};
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
index 590a05c7b..da3a72d81 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 2917537a7..bd317db35 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
index f1e8e49c0..90dfa9a3e 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
index f1cf56677..396efd981 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
@@ -58,6 +58,7 @@ const gatewayCtx = (apiKeyId: string = 'test-key'): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/messages/respond_test.ts b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
index 82191d8af..fba59f4b2 100644
--- a/packages/gateway/src/data-plane/chat/messages/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
@@ -539,6 +539,7 @@ const makeRespondCtx = (): GatewayCtx => ({
requestStartedAt: 0,
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
});
const messagesEventsForRespond = (): readonly MessagesStreamEvent[] => [
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index ae9bb5d6c..1a7b22372 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -1,6 +1,8 @@
import { messagesAttempt } from './attempt.ts';
import { renderMessagesFailure } from './errors.ts';
import { planMessagesRouting } from './routing.ts';
+import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { resolveAndApplyAliasForMessages } from '../../model-aliases/serve-integration.ts';
import type { StatefulResponsesStore } from '../responses/items/store.ts';
import { enumerateProviderCandidates } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -25,6 +27,12 @@ export interface MessagesServeCountTokensArgs {
export const messagesServe = {
generate: async (args: MessagesServeGenerateArgs): Promise>> => {
const { payload, ctx, store, headers } = args;
+ try {
+ await resolveAndApplyAliasForMessages(payload, ctx);
+ } catch (error) {
+ if (error instanceof AliasNoTargetAvailableError) return renderMessagesFailure(aliasFailureFromError(error), 'generate');
+ throw error;
+ }
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: payload.model,
@@ -57,6 +65,12 @@ export const messagesServe = {
countTokens: async (args: MessagesServeCountTokensArgs): Promise> | PlainResult> => {
const { payload, ctx, store, headers } = args;
+ try {
+ await resolveAndApplyAliasForMessages(payload, ctx);
+ } catch (error) {
+ if (error instanceof AliasNoTargetAvailableError) return renderMessagesFailure(aliasFailureFromError(error), 'countTokens');
+ throw error;
+ }
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: payload.model,
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index de09f5646..8b07302f9 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -13,11 +13,13 @@ import { defaultsForProvider, directFetcher, type ProviderCallResult, type Provi
import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
+const lastCandidatesCall: { model?: string } = {};
vi.mock('../shared/candidates.ts', async importOriginal => {
const original = await importOriginal();
return {
...original,
- enumerateProviderCandidates: vi.fn(async () => {
+ enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
+ lastCandidatesCall.model = args.model;
const next = candidatesQueue.shift();
if (next === undefined) throw new Error('serve_test: no candidates enqueued');
return next;
@@ -25,6 +27,22 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
};
});
+// Mock the alias resolver so the integration test can inject a resolution
+// without standing up the full per-request fetcher + registry stack.
+const aliasResolutionQueue: ({ targetModelId: string; rules: Record; aliasName: string } | null | Error)[] = [];
+vi.mock('../../model-aliases/resolve.ts', async importOriginal => {
+ const original = await importOriginal();
+ return {
+ ...original,
+ resolveAlias: vi.fn(async () => {
+ if (aliasResolutionQueue.length === 0) return null;
+ const next = aliasResolutionQueue.shift()!;
+ if (next instanceof Error) throw next;
+ return next;
+ }),
+ };
+});
+
const { messagesServe } = await import('./serve.ts');
const API_KEY_ID = 'key_messages_serve_test';
@@ -46,6 +64,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
@@ -436,3 +455,35 @@ test('copilot binding strips x-anthropic-billing-header system block via the def
assertEquals(observed.system.length, 1);
assertEquals(observed.system[0].text, 'You are a helpful assistant.');
});
+
+test('alias resolution swaps the inbound model id for the target and overlays rules onto the Messages IR', async () => {
+ installRepo();
+ aliasResolutionQueue.push({
+ targetModelId: 'claude-opus-4-7',
+ rules: { reasoning: { effort: 'high', budget_tokens: 2048 }, serviceTier: 'fast' },
+ aliasName: 'claude-fast',
+ });
+ const capturedBodies: MessagesPayload[] = [];
+ const callMessages = vi.fn(async (_model: unknown, body: unknown): Promise> => {
+ capturedBodies.push({ ...(body as Omit), model: 'claude-opus-4-7' });
+ return { ok: true, events: makeProtocolFrames(makeMessagesResultEvents()), modelKey: 'claude-opus-4-7' };
+ });
+ queueCandidates([makeCandidate({ upstream: 'up_cf', callMessages })]);
+
+ const result = await messagesServe.generate({
+ payload: makePayload({ model: 'claude-fast' }),
+ ctx: makeGatewayCtx(),
+ store: createNonResponsesSourceStore(API_KEY_ID),
+ headers: new Headers(),
+ });
+
+ await collectEvents(assertResultType(result, 'events').events);
+
+ assertEquals(lastCandidatesCall.model, 'claude-opus-4-7');
+ const observed = capturedBodies[0]!;
+ assertEquals(observed.output_config?.effort, 'high');
+ assertEquals(observed.thinking?.budget_tokens, 2048);
+ // The serviceTier=fast → speed=fast bridge lands the alias rule on
+ // Anthropic's native Fast Mode field.
+ assertEquals(observed.speed, 'fast');
+});
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
index 22ec01ee1..0bf10177a 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
@@ -25,6 +25,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/errors.ts b/packages/gateway/src/data-plane/chat/responses/errors.ts
index 215f218a0..1af6c42d0 100644
--- a/packages/gateway/src/data-plane/chat/responses/errors.ts
+++ b/packages/gateway/src/data-plane/chat/responses/errors.ts
@@ -34,5 +34,7 @@ export const renderResponsesFailure = (
return openAiErrorResult(404, appendFailedUpstreams(`Model ${failure.model} is not available on any configured upstream.`, failure.failedUpstreams));
case 'model-unsupported':
return openAiErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support the /responses endpoint.`, failure.failedUpstreams));
+ case 'alias-no-target-available':
+ return openAiErrorResult(404, `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
}
};
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
index 914711316..a1db87741 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
index fc422467e..3dc2ad972 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
index 4210d34ba..b5b7bed38 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 9618d2f02..b9bc85ed8 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
index 16c220950..1505ea5bb 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
@@ -45,6 +45,7 @@ const stubCtx = (overrides: { abortSignal?: AbortSignal } = {}): GatewayCtx => (
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
@@ -160,6 +161,7 @@ const performanceFor = (modelKey: string) => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
});
const upstreamCyberPolicyError = (message: string): ExecuteResult> => ({
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
index d48ea7d2b..1842ff08b 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
@@ -348,6 +348,7 @@ const makeGatewayCtx = (apiKeyId: string = 'k1'): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
@@ -4497,6 +4498,7 @@ test('downstream AbortSignal threads through to provider search / fetchPage and
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
index e10d24fa7..94859eb9c 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
@@ -144,6 +144,7 @@ const gatewayCtx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
index 284004632..f2513a078 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
@@ -56,6 +56,7 @@ const gatewayCtx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
index 08534e87b..31624e32a 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
index 9a51ac553..06e39fa79 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index 8b2638334..1d13c81cf 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -1,6 +1,8 @@
import { renderResponsesFailure } from './errors.ts';
import type { StatefulResponsesStore } from './items/store.ts';
import { planResponsesRouting } from './routing.ts';
+import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { resolveAndApplyAliasForResponses } from '../../model-aliases/serve-integration.ts';
import { enumerateProviderCandidates, type ChatCandidate } from '../shared/candidates.ts';
import type { GatewayCtx } from '../shared/gateway-ctx.ts';
import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common';
@@ -88,6 +90,12 @@ export const prepareResponsesServePlan = async (args: {
}): Promise => {
const { payload, ctx, store, pickTarget } = args;
const prepared = await expandPreviousResponseId(payload, store);
+ try {
+ await resolveAndApplyAliasForResponses(prepared, ctx);
+ } catch (error) {
+ if (error instanceof AliasNoTargetAvailableError) return { kind: 'failure', result: renderResponsesFailure(aliasFailureFromError(error)) };
+ throw error;
+ }
const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
upstreamIds: ctx.upstreamIds,
model: prepared.model,
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index f60be9a2a..548c49b8b 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -23,11 +23,13 @@ import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-d
// `model-missing` failure tests queue an empty list and expect `sawModel:
// false` so the serve renders 404 rather than 400.
const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
+const lastCandidatesCall: { model?: string } = {};
vi.mock('../shared/candidates.ts', async importOriginal => {
const original = await importOriginal();
return {
...original,
- enumerateProviderCandidates: vi.fn(async () => {
+ enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
+ lastCandidatesCall.model = args.model;
const next = candidatesQueue.shift();
if (next === undefined) throw new Error('serve_test: no candidates enqueued');
return next;
@@ -35,6 +37,22 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
};
});
+// Mock the alias resolver so the integration test can inject a resolution
+// without standing up the full per-request fetcher + registry stack.
+const aliasResolutionQueue: ({ targetModelId: string; rules: Record; aliasName: string } | null | Error)[] = [];
+vi.mock('../../model-aliases/resolve.ts', async importOriginal => {
+ const original = await importOriginal();
+ return {
+ ...original,
+ resolveAlias: vi.fn(async () => {
+ if (aliasResolutionQueue.length === 0) return null;
+ const next = aliasResolutionQueue.shift()!;
+ if (next instanceof Error) throw next;
+ return next;
+ }),
+ };
+});
+
const { responsesServe } = await import('./serve.ts');
const { expandPreviousResponseId } = await import('./serve-prep.ts');
@@ -57,6 +75,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: () => {},
c: stubAuthedContext(),
requestStartedAt: 0,
@@ -646,3 +665,37 @@ test('generate treats compaction_trigger-bearing input as compaction: snapshot r
if (!Array.isArray(receivedInput)) throw new Error('expected the wire input to be an array');
assertEquals((receivedInput.at(-1) as { type?: unknown })?.type, 'compaction_trigger');
});
+
+test('alias resolution swaps the inbound model id for the target and overlays rules onto the Responses IR', async () => {
+ installRepo();
+ aliasResolutionQueue.push({
+ targetModelId: 'gpt-5.4',
+ rules: { reasoning: { effort: 'high', summary: 'detailed' }, verbosity: 'medium', serviceTier: 'priority' },
+ aliasName: 'gpt-fast',
+ });
+ const capturedBodies: ResponsesPayload[] = [];
+ const callResponses = vi.fn(async (_model: unknown, body: unknown): Promise> => {
+ capturedBodies.push(body as ResponsesPayload);
+ return { ok: true, events: makeProtocolFrames([{ type: 'response.completed', sequence_number: 0, response: makeResponsesResult() }]), modelKey: 'gpt-5.4', headers: new Headers() };
+ });
+ queueCandidates([makeCandidate({ upstream: 'up_a', callResponses })]);
+
+ const result = await responsesServe.generate({
+ payload: makePayload({ model: 'gpt-fast' }),
+ ctx: makeGatewayCtx(),
+ store: createResponsesHttpStore(API_KEY_ID, true),
+ headers: new Headers(),
+ });
+
+ assertEquals(result.type, 'events');
+ if (result.type !== 'events') throw new Error('unreachable');
+ await collectEvents(result.events);
+
+ // The resolved target id, not the alias name, reaches candidate enumeration.
+ assertEquals(lastCandidatesCall.model, 'gpt-5.4');
+ const observed = capturedBodies[0]!;
+ assertEquals(observed.reasoning?.effort, 'high');
+ assertEquals(observed.reasoning?.summary, 'detailed');
+ assertEquals(observed.text?.verbosity, 'medium');
+ assertEquals(observed.service_tier, 'priority');
+});
diff --git a/packages/gateway/src/data-plane/chat/shared/errors.ts b/packages/gateway/src/data-plane/chat/shared/errors.ts
index 7a98db1db..896ff1a65 100644
--- a/packages/gateway/src/data-plane/chat/shared/errors.ts
+++ b/packages/gateway/src/data-plane/chat/shared/errors.ts
@@ -9,7 +9,14 @@ export type ChatServeFailure =
| { readonly kind: 'model-missing'; readonly model: string; readonly failedUpstreams?: readonly string[] }
| { readonly kind: 'model-unsupported'; readonly model: string; readonly failedUpstreams?: readonly string[] }
| { readonly kind: 'item-not-found'; readonly itemId: string }
- | { readonly kind: 'routing-unavailable'; readonly message: string };
+ | { readonly kind: 'routing-unavailable'; readonly message: string }
+ // Alias name resolved, but no entry in its targets list currently maps to
+ // an enabled upstream binding that exposes the inbound endpoint group.
+ // Rendered as a 404 carrying the canonical
+ // `alias '' has N target(s); none currently map to an enabled
+ // upstream binding` message — every protocol's renderer treats this as a
+ // model-not-found surface.
+ | { readonly kind: 'alias-no-target-available'; readonly aliasName: string; readonly targetCount: number };
class ChatServeFailureError extends Error {
readonly failure: ChatServeFailure;
diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
index 9cdbab0c8..4ef208a42 100644
--- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
+++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
@@ -27,6 +27,13 @@ export interface GatewayCtx {
// `finalizeGatewayResponse` short-circuits the dump tee and returns the
// response untouched.
readonly dump: DumpAccumulator | null;
+ // Headers staged by the gateway during request processing (e.g. the
+ // `x-floway-alias` header the alias resolver stamps when it picked a
+ // target). `finalizeGatewayResponse` writes each entry onto the outbound
+ // response just before returning it, so the headers ride along regardless
+ // of whether the responder built the Response via Hono's streamSSE,
+ // `Response.json`, or a raw `new Response(...)`.
+ readonly responseHeaders: Headers;
}
export interface CreateGatewayCtxOptions {
@@ -73,12 +80,17 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
runtimeLocation: colo,
currentColo: colo,
dump,
+ responseHeaders: new Headers(),
};
};
// Run the dump-accumulator's finalize tee on the outgoing Response. Every
// inbound HTTP wrapper returns its response through this seam so the dump
// pipeline applies uniformly across happy-path, error, and passthrough paths.
+// Gateway-staged response headers (today: `x-floway-alias`) are written onto
+// the response here so they ride along regardless of how the responder
+// built the body.
export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => {
+ for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value);
return ctx.dump?.finalize(response) ?? response;
};
diff --git a/packages/gateway/src/data-plane/chat/shared/respond_test.ts b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
index 9a5c506a6..58c8c454a 100644
--- a/packages/gateway/src/data-plane/chat/shared/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
@@ -44,6 +44,7 @@ const setup = (): Harness => {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
backgroundScheduler: promise => { background.push(promise); },
requestStartedAt,
c: stubAuthedContext(),
diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
index bdef8b1f1..756dccda8 100644
--- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
@@ -30,6 +30,7 @@ const baseCtx = (overrides: Partial = {}): GatewayCtx => {
runtimeLocation: 'TEST',
currentColo: 'TEST',
dump: null,
+ responseHeaders: new Headers(),
abortSignal: downstream.signal,
downstreamAbortController: downstream,
backgroundScheduler: promise => { void promise; },
diff --git a/packages/gateway/src/data-plane/embeddings/serve.ts b/packages/gateway/src/data-plane/embeddings/serve.ts
index 9c33e6736..3d83cd1e9 100644
--- a/packages/gateway/src/data-plane/embeddings/serve.ts
+++ b/packages/gateway/src/data-plane/embeddings/serve.ts
@@ -5,6 +5,8 @@ import type { Context } from 'hono';
import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
import { readRequestBody } from '../chat/shared/request-body.ts';
+import { AliasNoTargetAvailableError } from '../model-aliases/resolve.ts';
+import { resolveAliasForPassthrough } from '../model-aliases/serve-integration.ts';
import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
import { tokenUsageFromEmbeddingsBody } from '../shared/telemetry/usage.ts';
@@ -53,11 +55,21 @@ export const embeddings = async (c: Context): Promise => {
}
ctx.dump?.requestedModel(request.model);
+ let resolvedModel: string;
+ try {
+ resolvedModel = await resolveAliasForPassthrough(request.model, 'embedding', ctx);
+ } catch (error) {
+ if (error instanceof AliasNoTargetAvailableError) {
+ ctx.dump?.error('gateway');
+ return finalizeGatewayResponse(ctx, passthroughApiError(c, error.message, 404));
+ }
+ throw error;
+ }
const response = await passthroughServe({
c,
ctx,
sourceApi: '/embeddings',
- model: request.model,
+ model: resolvedModel,
bindingServesEndpoint: binding => binding.upstreamModel.endpoints.embeddings !== undefined,
call: async (binding, opts) => {
const { model: _model, ...body } = request.body;
diff --git a/packages/gateway/src/data-plane/images/serve.ts b/packages/gateway/src/data-plane/images/serve.ts
index 58f8a7a25..ed3c7891d 100644
--- a/packages/gateway/src/data-plane/images/serve.ts
+++ b/packages/gateway/src/data-plane/images/serve.ts
@@ -12,6 +12,8 @@ import type { Context } from 'hono';
import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
import { readRequestBody } from '../chat/shared/request-body.ts';
+import { AliasNoTargetAvailableError } from '../model-aliases/resolve.ts';
+import { resolveAliasForPassthrough } from '../model-aliases/serve-integration.ts';
import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
import { tokenUsageFromImagesBody } from '../shared/telemetry/usage.ts';
@@ -52,11 +54,21 @@ export const imagesGenerations = async (c: Context): Promise => {
}
ctx.dump?.requestedModel(request.model);
+ let resolvedModel: string;
+ try {
+ resolvedModel = await resolveAliasForPassthrough(request.model, 'image', ctx);
+ } catch (error) {
+ if (error instanceof AliasNoTargetAvailableError) {
+ ctx.dump?.error('gateway');
+ return finalizeGatewayResponse(ctx, passthroughApiError(c, error.message, 404));
+ }
+ throw error;
+ }
const response = await passthroughServe({
c,
ctx,
sourceApi: '/images/generations',
- model: request.model,
+ model: resolvedModel,
bindingServesEndpoint: binding => binding.upstreamModel.endpoints.imagesGenerations !== undefined,
call: (binding, opts) => {
const { model: _model, ...body } = request.body;
@@ -91,11 +103,21 @@ export const imagesEdits = async (c: Context): Promise => {
}
ctx.dump?.requestedModel(modelRaw);
+ let resolvedModel: string;
+ try {
+ resolvedModel = await resolveAliasForPassthrough(modelRaw, 'image', ctx);
+ } catch (error) {
+ if (error instanceof AliasNoTargetAvailableError) {
+ ctx.dump?.error('gateway');
+ return finalizeGatewayResponse(ctx, passthroughApiError(c, error.message, 404));
+ }
+ throw error;
+ }
const response = await passthroughServe({
c,
ctx,
sourceApi: '/images/edits',
- model: modelRaw,
+ model: resolvedModel,
bindingServesEndpoint: binding => binding.upstreamModel.endpoints.imagesEdits !== undefined,
call: (binding, opts) => {
// ModelProvider.callImagesEdits takes ownership of the FormData and
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
new file mode 100644
index 000000000..9208d1492
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -0,0 +1,122 @@
+// Per-protocol rule overlay. Given a resolved alias's ChatAliasRules,
+// stamp the rule values onto the inbound IR. Alias rules are authoritative
+// — an existing IR field is OVERWRITTEN by a rule that names it. Rules the
+// target IR cannot express are silently dropped; the runtime never tries
+// to enum-gate a value against a model's advertised capabilities. The
+// catalog-warning surface lives in the dashboard.
+
+import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
+import type { ChatAliasRules } from '@floway-dev/protocols/common';
+import type { GeminiPayload } from '@floway-dev/protocols/gemini';
+import type { MessagesPayload } from '@floway-dev/protocols/messages';
+import type { ResponsesPayload } from '@floway-dev/protocols/responses';
+
+// Type guard — `reasoning` is optional. Helpers below assume rules are
+// non-null but each sub-key may still be absent.
+const hasReasoning = (rules: ChatAliasRules): rules is ChatAliasRules & { reasoning: NonNullable } =>
+ rules.reasoning !== undefined;
+
+export const applyChatRulesToChatCompletions = (body: ChatCompletionsPayload, rules: ChatAliasRules): void => {
+ if (hasReasoning(rules)) {
+ const { effort, budget_tokens, adaptive, summary } = rules.reasoning;
+ if (effort !== undefined) body.reasoning_effort = effort;
+ if (budget_tokens !== undefined) body.thinking_budget = budget_tokens;
+ if (adaptive !== undefined) body.adaptive_thinking = adaptive;
+ if (summary !== undefined) body.reasoning_summary = summary;
+ }
+ if (rules.verbosity !== undefined) body.verbosity = rules.verbosity;
+ if (rules.serviceTier !== undefined) body.service_tier = rules.serviceTier;
+};
+
+export const applyChatRulesToResponses = (body: ResponsesPayload, rules: ChatAliasRules): void => {
+ if (hasReasoning(rules)) {
+ const { effort, budget_tokens, adaptive, summary } = rules.reasoning;
+ if (effort !== undefined || summary !== undefined) {
+ const existing = body.reasoning ?? {};
+ body.reasoning = {
+ ...existing,
+ ...(effort !== undefined ? { effort } : {}),
+ ...(summary !== undefined ? { summary } : {}),
+ };
+ }
+ if (budget_tokens !== undefined) body.thinking_budget = budget_tokens;
+ if (adaptive !== undefined) body.adaptive_thinking = adaptive;
+ }
+ if (rules.verbosity !== undefined) {
+ body.text = { ...body.text, verbosity: rules.verbosity };
+ }
+ if (rules.serviceTier !== undefined) body.service_tier = rules.serviceTier;
+};
+
+export const applyChatRulesToMessages = (body: MessagesPayload, rules: ChatAliasRules): void => {
+ if (hasReasoning(rules)) {
+ const { effort, budget_tokens, adaptive } = rules.reasoning;
+ // Anthropic stores explicit effort in `output_config.effort`; budget /
+ // adaptive ride on `thinking.*`. Splitting them so both can be set in
+ // the same overlay (effort fixed + budget pinned, e.g.) without one
+ // erasing the other.
+ if (effort !== undefined) {
+ body.output_config = { ...body.output_config, effort };
+ }
+ if (adaptive === true) {
+ body.thinking = { ...body.thinking, type: 'adaptive' };
+ } else if (budget_tokens !== undefined) {
+ body.thinking = { ...body.thinking, type: 'enabled', budget_tokens };
+ }
+ }
+ if (rules.verbosity !== undefined) body.verbosity = rules.verbosity;
+ if (rules.serviceTier !== undefined) {
+ // The cross-protocol bridge in translate maps `speed: 'fast'` ↔
+ // `service_tier: 'fast'`; on a native Messages target the alias rule
+ // `serviceTier: 'fast'` lands on `speed` so the upstream sees Fast Mode
+ // through its native field. Other tier values pass through on
+ // `service_tier` since Messages's native enum (`auto`/`standard_only`)
+ // doesn't model them.
+ if (rules.serviceTier === 'fast') {
+ body.speed = 'fast';
+ } else {
+ body.service_tier = rules.serviceTier;
+ }
+ }
+};
+
+// Map the discrete `ReasoningEffort` presets onto Gemini's `thinkingLevel`
+// enum, which carries the same five tiers under different names. Anything
+// outside the closed set is dropped — Gemini's wire reads from a fixed
+// enum and an unknown tier would just be rejected upstream.
+const GEMINI_THINKING_LEVEL_BY_EFFORT: Record = {
+ none: 'minimal',
+ low: 'low',
+ medium: 'medium',
+ high: 'high',
+ xhigh: 'xhigh',
+};
+
+export const applyChatRulesToGemini = (body: GeminiPayload, rules: ChatAliasRules): void => {
+ if (hasReasoning(rules)) {
+ const { effort, budget_tokens, adaptive } = rules.reasoning;
+ // Gemini collapses the three reasoning controls onto one `thinkingConfig`
+ // sub-object. Adaptive wins by encoding budget=-1 (Gemini's adaptive
+ // sentinel); an explicit budget pins the count; effort sets the level
+ // preset. All three can coexist on the same object.
+ const thinkingConfig = { ...body.generationConfig?.thinkingConfig };
+ if (adaptive === true) {
+ thinkingConfig.thinkingBudget = -1;
+ } else if (budget_tokens !== undefined) {
+ thinkingConfig.thinkingBudget = budget_tokens;
+ }
+ if (effort !== undefined) {
+ const level = GEMINI_THINKING_LEVEL_BY_EFFORT[effort];
+ if (level !== undefined) thinkingConfig.thinkingLevel = level;
+ }
+ if (Object.keys(thinkingConfig).length > 0) {
+ body.generationConfig = { ...body.generationConfig, thinkingConfig };
+ }
+ }
+ if (rules.verbosity !== undefined) {
+ body.generationConfig = { ...body.generationConfig, verbosity: rules.verbosity };
+ }
+ if (rules.serviceTier !== undefined) {
+ body.generationConfig = { ...body.generationConfig, serviceTier: rules.serviceTier };
+ }
+};
diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
new file mode 100644
index 000000000..72d4f6b35
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
@@ -0,0 +1,198 @@
+// Behavioral coverage for the per-protocol rule overlay. Each protocol's
+// apply helper is exercised against an inbound payload IR; alias rules are
+// authoritative — an existing IR field is OVERWRITTEN by a matching rule
+// — and rules the IR cannot express are silently dropped.
+
+import { test } from 'vitest';
+
+import { applyChatRulesToChatCompletions, applyChatRulesToGemini, applyChatRulesToMessages, applyChatRulesToResponses } from './apply.ts';
+import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
+import type { GeminiPayload } from '@floway-dev/protocols/gemini';
+import type { MessagesPayload } from '@floway-dev/protocols/messages';
+import type { ResponsesPayload } from '@floway-dev/protocols/responses';
+import { assertEquals } from '@floway-dev/test-utils';
+
+const ccPayload = (overrides: Partial = {}): ChatCompletionsPayload => ({
+ model: 'gpt-5.4',
+ messages: [{ role: 'user', content: 'hi' }],
+ ...overrides,
+});
+
+const resPayload = (overrides: Partial = {}): ResponsesPayload => ({
+ model: 'gpt-5.4',
+ input: 'hi',
+ ...overrides,
+});
+
+const msgPayload = (overrides: Partial = {}): MessagesPayload => ({
+ model: 'claude-opus-4-7',
+ max_tokens: 32,
+ messages: [{ role: 'user', content: 'hi' }],
+ ...overrides,
+});
+
+const gemPayload = (overrides: Partial = {}): GeminiPayload => ({
+ contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+ ...overrides,
+});
+
+// ── ChatCompletions ──
+
+test('chat-completions: empty rules leave the payload unchanged', () => {
+ const body = ccPayload({ reasoning_effort: 'high', verbosity: 'low', service_tier: 'priority' });
+ applyChatRulesToChatCompletions(body, {});
+ assertEquals(body.reasoning_effort, 'high');
+ assertEquals(body.verbosity, 'low');
+ assertEquals(body.service_tier, 'priority');
+});
+
+test('chat-completions: rules stamp every supported field onto the IR', () => {
+ const body = ccPayload();
+ applyChatRulesToChatCompletions(body, {
+ reasoning: { effort: 'high', budget_tokens: 1024, adaptive: true, summary: 'detailed' },
+ verbosity: 'low',
+ serviceTier: 'priority',
+ });
+ assertEquals(body.reasoning_effort, 'high');
+ assertEquals(body.thinking_budget, 1024);
+ assertEquals(body.adaptive_thinking, true);
+ assertEquals(body.reasoning_summary, 'detailed');
+ assertEquals(body.verbosity, 'low');
+ assertEquals(body.service_tier, 'priority');
+});
+
+test('chat-completions: alias rules overwrite existing IR fields', () => {
+ const body = ccPayload({ reasoning_effort: 'low', verbosity: 'high', service_tier: 'default' });
+ applyChatRulesToChatCompletions(body, {
+ reasoning: { effort: 'xhigh' },
+ verbosity: 'low',
+ serviceTier: 'priority',
+ });
+ assertEquals(body.reasoning_effort, 'xhigh');
+ assertEquals(body.verbosity, 'low');
+ assertEquals(body.service_tier, 'priority');
+});
+
+// ── Responses ──
+
+test('responses: empty rules leave the payload unchanged', () => {
+ const body = resPayload({ reasoning: { effort: 'high' }, text: { verbosity: 'low' }, service_tier: 'priority' });
+ applyChatRulesToResponses(body, {});
+ assertEquals(body.reasoning?.effort, 'high');
+ assertEquals(body.text?.verbosity, 'low');
+ assertEquals(body.service_tier, 'priority');
+});
+
+test('responses: rules stamp every supported field onto the IR', () => {
+ const body = resPayload();
+ applyChatRulesToResponses(body, {
+ reasoning: { effort: 'high', budget_tokens: 1024, adaptive: true, summary: 'concise' },
+ verbosity: 'medium',
+ serviceTier: 'flex',
+ });
+ assertEquals(body.reasoning?.effort, 'high');
+ assertEquals(body.reasoning?.summary, 'concise');
+ assertEquals(body.thinking_budget, 1024);
+ assertEquals(body.adaptive_thinking, true);
+ assertEquals(body.text?.verbosity, 'medium');
+ assertEquals(body.service_tier, 'flex');
+});
+
+test('responses: alias rules overwrite existing reasoning + service_tier fields', () => {
+ const body = resPayload({ reasoning: { effort: 'low', summary: 'auto' }, service_tier: 'default', text: { verbosity: 'high' } });
+ applyChatRulesToResponses(body, {
+ reasoning: { effort: 'xhigh', summary: 'detailed' },
+ verbosity: 'low',
+ serviceTier: 'priority',
+ });
+ assertEquals(body.reasoning?.effort, 'xhigh');
+ assertEquals(body.reasoning?.summary, 'detailed');
+ assertEquals(body.text?.verbosity, 'low');
+ assertEquals(body.service_tier, 'priority');
+});
+
+// ── Messages ──
+
+test('messages: empty rules leave the payload unchanged', () => {
+ const body = msgPayload({ output_config: { effort: 'high' }, thinking: { type: 'enabled', budget_tokens: 512 }, speed: 'fast' });
+ applyChatRulesToMessages(body, {});
+ assertEquals(body.output_config?.effort, 'high');
+ assertEquals(body.thinking?.budget_tokens, 512);
+ assertEquals(body.speed, 'fast');
+});
+
+test('messages: effort lands on output_config, budget+adaptive land on thinking', () => {
+ const body = msgPayload();
+ applyChatRulesToMessages(body, {
+ reasoning: { effort: 'high', budget_tokens: 2048 },
+ verbosity: 'low',
+ });
+ assertEquals(body.output_config?.effort, 'high');
+ assertEquals(body.thinking?.type, 'enabled');
+ assertEquals(body.thinking?.budget_tokens, 2048);
+ assertEquals(body.verbosity, 'low');
+});
+
+test('messages: adaptive=true sets thinking.type=adaptive and ignores budget_tokens', () => {
+ const body = msgPayload();
+ applyChatRulesToMessages(body, { reasoning: { adaptive: true, budget_tokens: 4096 } });
+ assertEquals(body.thinking?.type, 'adaptive');
+});
+
+test('messages: serviceTier=fast maps to speed=fast (cross-protocol bridge)', () => {
+ const body = msgPayload();
+ applyChatRulesToMessages(body, { serviceTier: 'fast' });
+ assertEquals(body.speed, 'fast');
+ assertEquals(body.service_tier, undefined);
+});
+
+test('messages: non-fast serviceTier lands on service_tier directly', () => {
+ const body = msgPayload();
+ applyChatRulesToMessages(body, { serviceTier: 'priority' });
+ assertEquals(body.service_tier, 'priority');
+ assertEquals(body.speed, undefined);
+});
+
+test('messages: alias rules overwrite existing thinking + output_config fields', () => {
+ const body = msgPayload({ output_config: { effort: 'low' }, thinking: { type: 'enabled', budget_tokens: 100 } });
+ applyChatRulesToMessages(body, { reasoning: { effort: 'xhigh', budget_tokens: 9999 } });
+ assertEquals(body.output_config?.effort, 'xhigh');
+ assertEquals(body.thinking?.budget_tokens, 9999);
+});
+
+// ── Gemini ──
+
+test('gemini: empty rules leave the payload unchanged', () => {
+ const body = gemPayload({ generationConfig: { thinkingConfig: { thinkingBudget: 256 }, verbosity: 'low' } });
+ applyChatRulesToGemini(body, {});
+ assertEquals(body.generationConfig?.thinkingConfig?.thinkingBudget, 256);
+ assertEquals(body.generationConfig?.verbosity, 'low');
+});
+
+test('gemini: effort maps to thinkingLevel; budget lands on thinkingBudget', () => {
+ const body = gemPayload();
+ applyChatRulesToGemini(body, {
+ reasoning: { effort: 'high', budget_tokens: 1024 },
+ verbosity: 'medium',
+ serviceTier: 'flex',
+ });
+ assertEquals(body.generationConfig?.thinkingConfig?.thinkingLevel, 'high');
+ assertEquals(body.generationConfig?.thinkingConfig?.thinkingBudget, 1024);
+ assertEquals(body.generationConfig?.verbosity, 'medium');
+ assertEquals(body.generationConfig?.serviceTier, 'flex');
+});
+
+test('gemini: adaptive=true encodes thinkingBudget=-1 and overrides any budget_tokens', () => {
+ const body = gemPayload();
+ applyChatRulesToGemini(body, { reasoning: { adaptive: true, budget_tokens: 9999 } });
+ assertEquals(body.generationConfig?.thinkingConfig?.thinkingBudget, -1);
+});
+
+test('gemini: alias rules overwrite existing generationConfig fields', () => {
+ const body = gemPayload({ generationConfig: { thinkingConfig: { thinkingBudget: 100, thinkingLevel: 'low' }, verbosity: 'high', serviceTier: 'default' } });
+ applyChatRulesToGemini(body, { reasoning: { effort: 'xhigh', budget_tokens: 2048 }, verbosity: 'low', serviceTier: 'priority' });
+ assertEquals(body.generationConfig?.thinkingConfig?.thinkingLevel, 'xhigh');
+ assertEquals(body.generationConfig?.thinkingConfig?.thinkingBudget, 2048);
+ assertEquals(body.generationConfig?.verbosity, 'low');
+ assertEquals(body.generationConfig?.serviceTier, 'priority');
+});
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
new file mode 100644
index 000000000..6ea5c73c0
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -0,0 +1,132 @@
+// Alias resolver. Runs once per request, above prefix routing. The target
+// string it returns is fed verbatim back into the existing prefix-router
+// (enumerateModelInterpretations → resolveModelForProvider); alias names
+// never re-enter the alias layer, so recursion is impossible by
+// construction and the shadow-the-real-model pattern (an alias whose first
+// target is its own name) Just Works.
+
+import { createPerRequestFetcher } from '../../dial/per-request.ts';
+import type { ModelAliasesRepo, ModelAliasRecord } from '../../repo/types.ts';
+import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../providers/registry.ts';
+import type { BackgroundScheduler } from '@floway-dev/platform';
+import type { AliasKind, AliasRules, ModelEndpointKey } from '@floway-dev/protocols/common';
+
+// Endpoint family the inbound request belongs to. Mirrors `AliasKind` but
+// named in the data-plane vocabulary so the resolver argument site reads as
+// "this is the request's endpoint group", not "this is some alias".
+export type AliasEndpointKind = AliasKind;
+
+// The endpoints (`ModelEndpoints` keys) an inbound `AliasEndpointKind` will
+// accept. A target row is considered routable when it resolves to a binding
+// whose `endpoints` map contains any one of these keys. Chat aliases accept
+// any chat surface — the source serve will pick the actual upstream target
+// API when it runs.
+const ENDPOINTS_FOR_KIND: Record = {
+ chat: ['chatCompletions', 'responses', 'messages'],
+ embedding: ['embeddings'],
+ image: ['imagesGenerations', 'imagesEdits'],
+};
+
+export interface AliasResolution {
+ readonly targetModelId: string;
+ readonly rules: AliasRules;
+ // Original alias name, for the `x-floway-alias` response header and dump
+ // attribution.
+ readonly aliasName: string;
+}
+
+// Thrown when the alias name was found but no target currently resolves to
+// an enabled upstream binding that exposes the inbound endpoint. Caught at
+// each protocol's serve seam and surfaced as a 404 in the protocol-specific
+// error envelope.
+export class AliasNoTargetAvailableError extends Error {
+ readonly aliasName: string;
+ readonly targetCount: number;
+
+ constructor(aliasName: string, targetCount: number) {
+ super(`alias '${aliasName}' has ${targetCount} target(s); none currently map to an enabled upstream binding`);
+ this.name = 'AliasNoTargetAvailableError';
+ this.aliasName = aliasName;
+ this.targetCount = targetCount;
+ }
+}
+
+// Lift `AliasNoTargetAvailableError` into a `ChatServeFailure` so a chat
+// serve can route it through its existing failure renderer without
+// special-casing the error type.
+export const aliasFailureFromError = (error: AliasNoTargetAvailableError): { kind: 'alias-no-target-available'; aliasName: string; targetCount: number } => ({
+ kind: 'alias-no-target-available',
+ aliasName: error.aliasName,
+ targetCount: error.targetCount,
+});
+
+interface ResolveAliasArgs {
+ readonly modelName: string;
+ readonly endpointKind: AliasEndpointKind;
+ // Upstream cap intersected from the per-user + per-api-key whitelists.
+ // null means unrestricted; matches the same parameter on
+ // `enumerateProviderCandidates` / `listModelProviders`.
+ readonly upstreamIds: readonly string[] | null;
+ readonly scheduler: BackgroundScheduler;
+ readonly currentColo: string;
+ // Injected so tests can hand in a stub; the per-request ctx already owns
+ // a concrete one via `getRepo().modelAliases`.
+ readonly repo: ModelAliasesRepo;
+}
+
+// Reports true when the given target_model_id resolves to at least one
+// enabled upstream binding that exposes any endpoint the inbound
+// `endpointKind` cares about. Mirrors the resolution path
+// `enumerateProviderCandidates` takes, narrowed to a yes/no answer so we
+// can pre-filter the alias target list once.
+const candidateIsRoutable = async (
+ targetModelId: string,
+ endpointKind: AliasEndpointKind,
+ upstreamIds: readonly string[] | null,
+ scheduler: BackgroundScheduler,
+ currentColo: string,
+): Promise => {
+ const fetcherForUpstream = await createPerRequestFetcher(currentColo);
+ const providers = await listModelProviders(upstreamIds);
+ if (providers.length === 0) return false;
+ const interpretations = enumerateModelInterpretations(targetModelId, providers);
+ const { resolutions } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
+ const accepted = ENDPOINTS_FOR_KIND[endpointKind];
+ return resolutions.some(({ resolved }) =>
+ accepted.some(key => resolved.binding.upstreamModel.endpoints[key] !== undefined));
+};
+
+// Pre-pick the available pool ONCE. Order is preserved so
+// selection=first-available picks deterministically; selection=random picks
+// uniformly within whatever subset survived availability filtering.
+const buildAvailablePool = async (
+ record: ModelAliasRecord,
+ endpointKind: AliasEndpointKind,
+ upstreamIds: readonly string[] | null,
+ scheduler: BackgroundScheduler,
+ currentColo: string,
+): Promise => {
+ const availability = await Promise.all(record.targets.map(target =>
+ candidateIsRoutable(target.target_model_id, endpointKind, upstreamIds, scheduler, currentColo)));
+ return record.targets.filter((_, index) => availability[index]);
+};
+
+export const resolveAlias = async (args: ResolveAliasArgs): Promise => {
+ const { modelName, endpointKind, upstreamIds, scheduler, currentColo, repo } = args;
+ const record = await repo.getByName(modelName);
+ if (!record) return null;
+
+ // Kind-mismatch is silent: the literal string falls through to prefix
+ // routing, which will 404 on its own if nothing in the catalog matches.
+ // Mirrors the "unknown model" surface a plain id would produce.
+ if (record.kind !== endpointKind) return null;
+
+ const pool = await buildAvailablePool(record, endpointKind, upstreamIds, scheduler, currentColo);
+ if (pool.length === 0) throw new AliasNoTargetAvailableError(record.name, record.targets.length);
+
+ const picked = record.selection === 'first-available'
+ ? pool[0]
+ : pool[Math.floor(Math.random() * pool.length)];
+
+ return { targetModelId: picked.target_model_id, rules: picked.rules, aliasName: record.name };
+};
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
new file mode 100644
index 000000000..428b339d1
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
@@ -0,0 +1,241 @@
+// Behavioral coverage for the alias resolver. Mocks `providers/registry.ts`
+// + the per-request fetcher so each test can hand-script which target
+// model ids look routable; the resolver itself runs unmocked, so its
+// filter logic (kind match, availability, selection strategy) is the
+// thing under test.
+
+import { test, vi } from 'vitest';
+
+import type { ModelAliasRecord, ModelAliasesRepo } from '../../repo/types.ts';
+import { stubAuthedContext } from '../../test-helpers/gateway-ctx.ts';
+import type { ModelInterpretation, ProviderModelResolution } from '../providers/registry.ts';
+import { directFetcher } from '@floway-dev/provider';
+import { assert, assertEquals, assertRejects } from '@floway-dev/test-utils';
+
+// Avoid the real `listModelProviders` (which reads the global repo) and the
+// real `collectInterpretationOutcomes` (which goes through the per-request
+// fetcher cache). The mocks let each test stamp which target_model_ids are
+// "routable" right now and which endpoints they expose.
+const routableModels = new Map }>();
+const ALWAYS_ROUTABLE_ENDPOINTS = { chatCompletions: {}, responses: {}, messages: {} };
+
+vi.mock('../providers/registry.ts', () => ({
+ listModelProviders: vi.fn(async () => [{ upstream: 'u_test', name: 'u_test', modelPrefix: null }]),
+ enumerateModelInterpretations: vi.fn((modelId: string, providers: readonly { upstream: string }[]): ModelInterpretation[] =>
+ providers.map(p => ({ provider: p, lookupId: modelId } as unknown as ModelInterpretation))),
+ collectInterpretationOutcomes: vi.fn(async (interpretations: readonly { provider: { upstream: string }; lookupId: string }[]) => ({
+ resolutions: interpretations
+ .filter(i => routableModels.has(i.lookupId))
+ .map(i => ({
+ interpretation: i,
+ provider: i.provider,
+ resolved: {
+ id: i.lookupId,
+ model: { id: i.lookupId, endpoints: routableModels.get(i.lookupId)!.endpoints },
+ binding: { upstream: i.provider.upstream, upstreamModel: { id: i.lookupId, endpoints: routableModels.get(i.lookupId)!.endpoints } },
+ } as unknown as ProviderModelResolution,
+ })),
+ failedUpstreams: [],
+ })),
+}));
+
+vi.mock('../../dial/per-request.ts', () => ({
+ createPerRequestFetcher: vi.fn(async () => () => directFetcher),
+}));
+
+const { resolveAlias, AliasNoTargetAvailableError } = await import('./resolve.ts');
+
+const stubRepoFor = (record: ModelAliasRecord | null): ModelAliasesRepo => ({
+ list: () => Promise.resolve(record ? [record] : []),
+ getByName: name => Promise.resolve(record?.name === name ? structuredClone(record) : null),
+ insert: () => Promise.reject(new Error('insert should not be called from resolver tests')),
+ update: () => Promise.reject(new Error('update should not be called from resolver tests')),
+ delete: () => Promise.resolve(false),
+ deleteAll: () => Promise.resolve(),
+});
+
+const aliasRecord = (overrides: Partial = {}): ModelAliasRecord => ({
+ name: 'gpt-fast',
+ kind: 'chat',
+ selection: 'first-available',
+ displayName: null,
+ visibleInModelsList: true,
+ targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
+ sortOrder: 0,
+ createdAt: '2026-06-26T00:00:00.000Z',
+ updatedAt: '2026-06-26T00:00:00.000Z',
+ ...overrides,
+});
+
+const RESOLVE_DEFAULTS = {
+ endpointKind: 'chat' as const,
+ upstreamIds: null,
+ scheduler: () => {},
+ currentColo: 'TEST',
+};
+
+const setRoutable = (...ids: string[]): void => {
+ routableModels.clear();
+ for (const id of ids) routableModels.set(id, { endpoints: ALWAYS_ROUTABLE_ENDPOINTS });
+};
+
+// Silence the unused-ctx warning helpers
+void stubAuthedContext;
+
+test('returns null when no alias matches the inbound name', async () => {
+ setRoutable('gpt-5.4');
+ const result = await resolveAlias({
+ ...RESOLVE_DEFAULTS,
+ modelName: 'not-an-alias',
+ repo: stubRepoFor(null),
+ });
+ assertEquals(result, null);
+});
+
+test('returns the target and rules when kind matches and a single target is available', async () => {
+ setRoutable('gpt-5.4');
+ const result = await resolveAlias({
+ ...RESOLVE_DEFAULTS,
+ modelName: 'gpt-fast',
+ repo: stubRepoFor(aliasRecord()),
+ });
+ assert(result !== null);
+ assertEquals(result.targetModelId, 'gpt-5.4');
+ assertEquals(result.aliasName, 'gpt-fast');
+ assertEquals(result.rules, { reasoning: { effort: 'low' } });
+});
+
+test('returns null when the alias kind does not match the inbound endpoint group', async () => {
+ setRoutable('gpt-5.4');
+ const result = await resolveAlias({
+ ...RESOLVE_DEFAULTS,
+ endpointKind: 'embedding',
+ modelName: 'gpt-fast',
+ repo: stubRepoFor(aliasRecord()),
+ });
+ assertEquals(result, null);
+});
+
+test('throws AliasNoTargetAvailableError when the alias exists but no target is currently routable', async () => {
+ setRoutable(); // catalog empty
+ await assertRejects(
+ () => resolveAlias({
+ ...RESOLVE_DEFAULTS,
+ modelName: 'gpt-fast',
+ repo: stubRepoFor(aliasRecord({
+ targets: [
+ { target_model_id: 'gpt-5.4', rules: {} },
+ { target_model_id: 'gpt-5.5', rules: {} },
+ ],
+ })),
+ }),
+ AliasNoTargetAvailableError,
+ "alias 'gpt-fast' has 2 target(s); none currently map to an enabled upstream binding",
+ );
+});
+
+test('first-available skips unroutable rows and picks the first available, not the first listed', async () => {
+ setRoutable('gpt-5.5'); // `gpt-5.4` is not in the catalog
+ const result = await resolveAlias({
+ ...RESOLVE_DEFAULTS,
+ modelName: 'gpt-fast',
+ repo: stubRepoFor(aliasRecord({
+ targets: [
+ { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'high' } } },
+ { target_model_id: 'gpt-5.5', rules: { reasoning: { effort: 'low' } } },
+ { target_model_id: 'gpt-6', rules: {} },
+ ],
+ })),
+ });
+ assert(result !== null);
+ assertEquals(result.targetModelId, 'gpt-5.5');
+ assertEquals(result.rules, { reasoning: { effort: 'low' } });
+});
+
+test('random selection picks every available target across enough iterations', async () => {
+ setRoutable('a', 'b');
+ const seen = new Set();
+ for (let i = 0; i < 100; i += 1) {
+ const result = await resolveAlias({
+ ...RESOLVE_DEFAULTS,
+ modelName: 'gpt-fast',
+ repo: stubRepoFor(aliasRecord({
+ selection: 'random',
+ targets: [
+ { target_model_id: 'a', rules: {} },
+ { target_model_id: 'b', rules: {} },
+ ],
+ })),
+ });
+ assert(result !== null);
+ seen.add(result.targetModelId);
+ if (seen.size === 2) break;
+ }
+ // Two targets, both routable, 100 iterations: hitting only one is a
+ // (1/2)^100 fluke. Treat anything less than two distinct picks as a real
+ // regression in the selection logic, not coincidence.
+ assertEquals(seen.size, 2);
+});
+
+test('shadow pattern: alias whose first target equals its own name picks the real model when present', async () => {
+ setRoutable('codex-auto-review'); // the real model IS in the catalog
+ const result = await resolveAlias({
+ ...RESOLVE_DEFAULTS,
+ modelName: 'codex-auto-review',
+ repo: stubRepoFor(aliasRecord({
+ name: 'codex-auto-review',
+ targets: [
+ { target_model_id: 'codex-auto-review', rules: {} },
+ { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
+ ],
+ })),
+ });
+ assert(result !== null);
+ assertEquals(result.targetModelId, 'codex-auto-review');
+ assertEquals(result.rules, {});
+});
+
+test('shadow pattern: alias falls back to the second target when the real model is not in the catalog', async () => {
+ setRoutable('gpt-5.4'); // only the fallback is routable
+ const result = await resolveAlias({
+ ...RESOLVE_DEFAULTS,
+ modelName: 'codex-auto-review',
+ repo: stubRepoFor(aliasRecord({
+ name: 'codex-auto-review',
+ targets: [
+ { target_model_id: 'codex-auto-review', rules: {} },
+ { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
+ ],
+ })),
+ });
+ assert(result !== null);
+ assertEquals(result.targetModelId, 'gpt-5.4');
+ assertEquals(result.rules, { reasoning: { effort: 'low' } });
+});
+
+test('embedding-kind alias accepts only embedding endpoints', async () => {
+ routableModels.clear();
+ routableModels.set('text-embedding-3', { endpoints: { embeddings: {} } });
+ routableModels.set('gpt-5.4', { endpoints: ALWAYS_ROUTABLE_ENDPOINTS });
+
+ const okResult = await resolveAlias({
+ ...RESOLVE_DEFAULTS,
+ endpointKind: 'embedding',
+ modelName: 'embed-fast',
+ repo: stubRepoFor(aliasRecord({ name: 'embed-fast', kind: 'embedding', targets: [{ target_model_id: 'text-embedding-3', rules: {} }] })),
+ });
+ assert(okResult !== null);
+ assertEquals(okResult.targetModelId, 'text-embedding-3');
+
+ await assertRejects(
+ () => resolveAlias({
+ ...RESOLVE_DEFAULTS,
+ endpointKind: 'embedding',
+ modelName: 'embed-fast',
+ // gpt-5.4 is in the catalog but only exposes chat endpoints, so it
+ // cannot satisfy an embedding-kind alias.
+ repo: stubRepoFor(aliasRecord({ name: 'embed-fast', kind: 'embedding', targets: [{ target_model_id: 'gpt-5.4', rules: {} }] })),
+ }),
+ AliasNoTargetAvailableError,
+ );
+});
diff --git a/packages/gateway/src/data-plane/model-aliases/serve-integration.ts b/packages/gateway/src/data-plane/model-aliases/serve-integration.ts
new file mode 100644
index 000000000..f14308403
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/serve-integration.ts
@@ -0,0 +1,102 @@
+// Per-protocol alias preamble helpers. Each protocol's serve calls
+// `resolveAndApply` immediately after parsing the inbound payload
+// and before `enumerateProviderCandidates`. The helper:
+// 1. looks up the inbound model name in the alias repo,
+// 2. on a hit whose kind matches `chat`, picks one target row,
+// 3. stamps the row's rules onto the IR (overwriting any matching field),
+// 4. stages the `x-floway-alias` response header on the gateway ctx, and
+// 5. returns the resolved target_model_id so the caller substitutes it
+// for `payload.model` before candidate enumeration runs.
+//
+// Returns `null` when the inbound name is not an alias of kind=chat;
+// callers continue with the literal name and the catalog's miss surface
+// renders if nothing matches. Throws `AliasNoTargetAvailableError` when
+// the alias exists but every target is currently unroutable — caught at
+// the serve seam and rendered via the protocol's failure renderer.
+
+import { applyChatRulesToChatCompletions, applyChatRulesToGemini, applyChatRulesToMessages, applyChatRulesToResponses } from './apply.ts';
+import { resolveAlias, type AliasResolution } from './resolve.ts';
+import { getRepo } from '../../repo/index.ts';
+import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
+import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
+import type { ChatAliasRules } from '@floway-dev/protocols/common';
+import type { GeminiPayload } from '@floway-dev/protocols/gemini';
+import type { MessagesPayload } from '@floway-dev/protocols/messages';
+import type { ResponsesPayload } from '@floway-dev/protocols/responses';
+
+const ALIAS_RESPONSE_HEADER = 'x-floway-alias';
+
+// Common preamble: resolve the alias against the request's chat endpoint
+// group and stage the response header. Returns the resolution (or null) so
+// the caller can apply rules through its protocol's overlay helper. The
+// chat-kind check lives inside the resolver — a kind mismatch silently
+// returns null here.
+const resolveChatAlias = async (modelName: string, ctx: GatewayCtx): Promise => {
+ const resolution = await resolveAlias({
+ modelName,
+ endpointKind: 'chat',
+ upstreamIds: ctx.upstreamIds,
+ scheduler: ctx.backgroundScheduler,
+ currentColo: ctx.currentColo,
+ repo: getRepo().modelAliases,
+ });
+ if (resolution !== null) ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, resolution.aliasName);
+ return resolution;
+};
+
+// All four `resolveAndApplyAliasFor*` helpers narrow the rule shape to
+// `ChatAliasRules` before calling the per-protocol overlay. Today every
+// chat-kind alias target carries `ChatAliasRules` per the wire schema; the
+// cast is the unavoidable narrowing from the generic `AliasRules` union.
+const asChatRules = (rules: AliasResolution['rules']): ChatAliasRules => rules as ChatAliasRules;
+
+export const resolveAndApplyAliasForChatCompletions = async (payload: ChatCompletionsPayload, ctx: GatewayCtx): Promise => {
+ const resolution = await resolveChatAlias(payload.model, ctx);
+ if (!resolution) return;
+ payload.model = resolution.targetModelId;
+ applyChatRulesToChatCompletions(payload, asChatRules(resolution.rules));
+};
+
+export const resolveAndApplyAliasForResponses = async (payload: ResponsesPayload, ctx: GatewayCtx): Promise => {
+ const resolution = await resolveChatAlias(payload.model, ctx);
+ if (!resolution) return;
+ payload.model = resolution.targetModelId;
+ applyChatRulesToResponses(payload, asChatRules(resolution.rules));
+};
+
+export const resolveAndApplyAliasForMessages = async (payload: MessagesPayload, ctx: GatewayCtx): Promise => {
+ const resolution = await resolveChatAlias(payload.model, ctx);
+ if (!resolution) return;
+ payload.model = resolution.targetModelId;
+ applyChatRulesToMessages(payload, asChatRules(resolution.rules));
+};
+
+// Gemini's model id is carried on the URL path, not the body — the caller
+// passes it in alongside the payload and gets the resolved id back so it
+// can substitute into the candidate-enumeration call. The payload is still
+// mutated in place to overlay rules.
+export const resolveAndApplyAliasForGemini = async (model: string, payload: GeminiPayload, ctx: GatewayCtx): Promise => {
+ const resolution = await resolveChatAlias(model, ctx);
+ if (!resolution) return model;
+ applyChatRulesToGemini(payload, asChatRules(resolution.rules));
+ return resolution.targetModelId;
+};
+
+// Passthrough endpoints (embeddings, images) don't carry rules today; the
+// resolver still runs to substitute the target id and stage the response
+// header. Returns the resolved target_model_id (or the original name on
+// miss). Throws `AliasNoTargetAvailableError` on the all-unroutable case
+// like the chat helpers do.
+export const resolveAliasForPassthrough = async (model: string, endpointKind: 'embedding' | 'image', ctx: GatewayCtx): Promise => {
+ const resolution = await resolveAlias({
+ modelName: model,
+ endpointKind,
+ upstreamIds: ctx.upstreamIds,
+ scheduler: ctx.backgroundScheduler,
+ currentColo: ctx.currentColo,
+ repo: getRepo().modelAliases,
+ });
+ if (resolution === null) return model;
+ ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, resolution.aliasName);
+ return resolution.targetModelId;
+};
diff --git a/packages/gateway/src/test-helpers/gateway-ctx.ts b/packages/gateway/src/test-helpers/gateway-ctx.ts
index 43358b7fb..782d87370 100644
--- a/packages/gateway/src/test-helpers/gateway-ctx.ts
+++ b/packages/gateway/src/test-helpers/gateway-ctx.ts
@@ -24,5 +24,6 @@ export const mockGatewayCtx = (overrides: Partial = {}): GatewayCtx
dump: null,
backgroundScheduler: promise => { void promise; },
requestStartedAt: 0,
+ responseHeaders: new Headers(),
...overrides,
});
From 67ec5a2380de8778664abd4627585568c7e469e4 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 18:18:46 +0800
Subject: [PATCH 051/170] feat(aliases): /v1/models listing + protocol shape
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
Make operator-defined aliases visible in every model-listing surface
without changing how they resolve at request time. The path is the
same one a real catalog row takes — `PublicModel` + an extra
`aliasedFrom` block that an alias-aware UI uses to render the
alias-of relationship inline.
Protocol surface
- `PublicModelAliasedFrom` carries `name`, `kind`, `selection`, and
every configured `targets[]` row — including targets that the live
catalog can not currently serve. The dashboard needs the full
configuration so it can warn about unavailable targets without a
second control-plane round trip.
- Pure display helpers move next to `ChatAliasRules`:
`composeAliasDisplayName(targetId, rules)`,
`formatAliasRulesInline(rules)`, `formatAliasRuleBadges(rules)`.
All three walk one ordered builder so an alias configured with
`effort + verbosity` reads identically as a badge sequence, as a
comma-joined caption, and as the parenthesized suffix on the
derived display name. Boolean toggles render as dedicated words
(`adaptive`, `non-adaptive`, `mandatory reasoning`) so the inline
prose stays readable when several rules are set together.
Core synthesis
- `synthesizeListedAliases({ aliases, realModels })` is a pure
transformation: one `PublicModel` per visible alias, hidden aliases
(`visible_in_models_list = false`) dropped, entries sorted by
`(sort_order, name)`.
- Single-target aliases inherit their sole target's `chat`/`cost`
metadata, narrowed by rules — a fixed `reasoning.effort` collapses
the reported effort set to that one value, a fixed budget collapses
the reported range to a single point.
- Multi-target aliases use the INTERSECTION across every available
target. A capability survives only when every available target
declares it, so whichever target gets picked at request time is
guaranteed to support the catalog's claims. Unavailable targets
(not in `realModels`, or kind-mismatched) are excluded from the
intersection but still appear in `aliasedFrom.targets`.
- Aliases with no available targets still emit a row — capability
metadata stays absent so clients see no inherited claims; the
dashboard's no-target warning explains the situation.
Wire-up
- `loadModels` now loads the alias repo alongside `getModels` and
merges the two lists. Alias entries follow real entries; an alias
whose `name` collides with a real id replaces the real entry in
the final catalog, because two entries with the same `id` would
break OpenAI client deduplication and the operator added the alias
deliberately.
- The same merge is applied at `/api/models` (dashboard, where alias
rows surface an empty `upstreams: []` since they have no upstream
binding of their own) and `/v1beta/models` (Gemini, narrowed to
chat-kind entries).
Tests
- `aliases_test.ts` exercises every helper, including the
parenthesization rule (no rule = bare id, any rule = parens) and
the canonical field order.
- `alias-listing_test.ts` covers single-target narrowing,
multi-target intersection (including drop-on-mismatch and
unavailable-target subsets), the visibility gate, kind-mismatch
exclusion, the no-target row, the operator-set display-name
override, the alias-vs-real id collision, and the
`(sort_order, name)` ordering.
- `serve_test.ts` gains an integration case that asserts the merged
payload at `/v1/models` carries `aliasedFrom`, the alias entry
replaces the colliding real id, and hidden aliases stay out.
---
.../src/control-plane/models/routes.ts | 26 ++-
.../src/data-plane/models/alias-listing.ts | 201 ++++++++++++++++++
.../data-plane/models/alias-listing_test.ts | 186 ++++++++++++++++
.../gateway/src/data-plane/models/gemini.ts | 33 ++-
.../gateway/src/data-plane/models/load.ts | 21 +-
.../src/data-plane/models/load_test.ts | 5 +
.../gateway/src/data-plane/models/serve.ts | 3 +-
.../src/data-plane/models/serve_test.ts | 92 ++++++++
packages/protocols/src/common/aliases.ts | 49 +++++
packages/protocols/src/common/aliases_test.ts | 56 +++++
packages/protocols/src/common/models.ts | 18 ++
11 files changed, 675 insertions(+), 15 deletions(-)
create mode 100644 packages/gateway/src/data-plane/models/alias-listing.ts
create mode 100644 packages/gateway/src/data-plane/models/alias-listing_test.ts
create mode 100644 packages/protocols/src/common/aliases_test.ts
diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 3a6620496..752fe0105 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -1,10 +1,12 @@
import type { Context } from 'hono';
+import { synthesizeListedAliases } from '../../data-plane/models/alias-listing.ts';
import { toPublicModel } from '../../data-plane/models/load.ts';
import { MODEL_LISTING_FAILURE_MESSAGE } from '../../data-plane/models/shared.ts';
import { getModels } from '../../data-plane/providers/registry.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
import { getCurrentColo } from '../../runtime/runtime-info.ts';
import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
@@ -15,7 +17,9 @@ import type { ResolvedModel, UpstreamProviderKind } from '@floway-dev/provider';
// `upstreams` lists every provider binding for this model as { kind, id, name }
// triples. A single model id can be served by mixed provider kinds (e.g. one
// azure deployment + one custom upstream both expose `gpt-5.5`), so a flat
-// `provider`/`upstream_ids` split would misrepresent that.
+// `provider`/`upstream_ids` split would misrepresent that. Alias-synthesized
+// rows carry an empty list — they do not bind to an upstream directly; their
+// targets live under `aliasedFrom`.
interface ControlPlaneModel extends PublicModel {
upstreams: { kind: UpstreamProviderKind; id: string; name: string }[];
}
@@ -36,12 +40,20 @@ export const controlPlaneModels = async (c: Context) => {
// API key, so this resolves to the user's per-user upstream cap: a user who
// has had an upstream removed must not see its models in the Models tab.
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- const models = await getModels(
- effectiveUpstreamIdsFromContext(c),
- fetcherForUpstream,
- backgroundSchedulerFromContext(c),
- );
- const data = models.map(toControlPlaneModel);
+ const [models, aliases] = await Promise.all([
+ getModels(
+ effectiveUpstreamIdsFromContext(c),
+ fetcherForUpstream,
+ backgroundSchedulerFromContext(c),
+ ),
+ getRepo().modelAliases.list(),
+ ]);
+ const aliasEntries = synthesizeListedAliases({ aliases, realModels: models });
+ const aliasIds = new Set(aliasEntries.map(entry => entry.id));
+ const data: ControlPlaneModel[] = [
+ ...models.filter(model => !aliasIds.has(model.id)).map(toControlPlaneModel),
+ ...aliasEntries.map(entry => ({ ...entry, upstreams: [] })),
+ ];
const response: ControlPlaneModelsResponse = {
object: 'list',
has_more: false,
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
new file mode 100644
index 000000000..70d97fdb3
--- /dev/null
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -0,0 +1,201 @@
+// Synthesizes the alias entries that join the real-model catalog inside
+// `/v1/models`. One PublicModel per visible alias — hidden aliases
+// (visible_in_models_list = false) are dropped from the listing while
+// remaining routable. The synthesized entry carries an `aliasedFrom` block
+// so an alias-aware UI (today: the dashboard) can render the alias-of
+// relationship without a second round trip.
+//
+// Capability metadata is the safe lower bound for the inbound request:
+// • single-target → the sole target's metadata, narrowed by the alias's
+// `rules` (a fixed reasoning effort collapses the reported effort set
+// to that one value, a fixed budget collapses the reported range to
+// a single point).
+// • multi-target → the intersection across every currently-available
+// target. A capability survives only when every target backing the
+// alias declares it; whichever target gets picked at request time is
+// then guaranteed to support whatever the catalog reported.
+//
+// "Available target" for intersection purposes means a target whose
+// `target_model_id` appears in `realModels` AND whose entry's `kind`
+// matches the alias's `kind`. Unavailable targets are excluded from the
+// intersection but still appear in `aliasedFrom.targets` so the dashboard
+// can show the full configuration.
+//
+// Collision: when an alias's `name` exactly equals a real model id, the
+// alias entry replaces the real entry in the final catalog. Two entries
+// with the same `id` would break OpenAI client deduplication; collapsing
+// to the alias entry preserves the operator's intent (the alias is the
+// reason both rows would have been present). The dashboard surfaces this
+// via a shadow warning in the alias editor; here it is purely a wire-shape
+// concern. The real entry is removed at the `loadModels` merge step.
+
+import type { ModelAliasRecord } from '../../repo/types.ts';
+import { composeAliasDisplayName } from '@floway-dev/protocols/common';
+import type { AliasTarget, ChatAliasRules, ChatModelInfo, PublicModel, PublicModelAliasedFrom } from '@floway-dev/protocols/common';
+import type { InternalModel } from '@floway-dev/provider';
+
+export interface ListedAliasInputs {
+ readonly aliases: readonly ModelAliasRecord[];
+ readonly realModels: readonly InternalModel[];
+}
+
+// The repo guarantees rule shape matches the row's `kind` (chat rows carry
+// `ChatAliasRules`; embedding / image rows carry the empty record), so a
+// chat-row target can be read as ChatAliasRules without a runtime check.
+const chatRules = (target: AliasTarget): ChatAliasRules => target.rules as ChatAliasRules;
+
+const intersectArrays = (arrays: readonly (readonly T[])[]): T[] => {
+ if (arrays.length === 0) return [];
+ const [head, ...tail] = arrays;
+ return head.filter(value => tail.every(other => other.includes(value)));
+};
+
+const intersectChat = (chats: readonly ChatModelInfo[]): ChatModelInfo | undefined => {
+ if (chats.length === 0) return undefined;
+ const result: ChatModelInfo = {};
+
+ const modalityChats = chats.filter(c => c.modalities !== undefined);
+ if (modalityChats.length === chats.length) {
+ const input = intersectArrays(modalityChats.map(c => c.modalities!.input));
+ const output = intersectArrays(modalityChats.map(c => c.modalities!.output));
+ if (input.length > 0 || output.length > 0) result.modalities = { input, output };
+ }
+
+ const reasoningChats = chats.filter(c => c.reasoning !== undefined);
+ if (reasoningChats.length === chats.length) {
+ const reasoning: NonNullable = {};
+
+ const effortChats = reasoningChats.filter(c => c.reasoning!.effort !== undefined);
+ if (effortChats.length === reasoningChats.length) {
+ const supported = intersectArrays(effortChats.map(c => c.reasoning!.effort!.supported));
+ const defaults = new Set(effortChats.map(c => c.reasoning!.effort!.default));
+ // Intersection's `default` is the agreed value when every target
+ // names the same one and that value still survives the supported
+ // intersection; otherwise we report supported-only.
+ if (supported.length > 0) {
+ const agreedDefault = defaults.size === 1 ? [...defaults][0] : undefined;
+ reasoning.effort = agreedDefault !== undefined && supported.includes(agreedDefault)
+ ? { supported, default: agreedDefault }
+ : { supported, default: supported[0] };
+ }
+ }
+
+ const budgetChats = reasoningChats.filter(c => c.reasoning!.budget_tokens !== undefined);
+ if (budgetChats.length === reasoningChats.length) {
+ const mins = budgetChats.map(c => c.reasoning!.budget_tokens!.min).filter((v): v is number => v !== undefined);
+ const maxes = budgetChats.map(c => c.reasoning!.budget_tokens!.max).filter((v): v is number => v !== undefined);
+ const min = mins.length === budgetChats.length ? Math.max(...mins) : undefined;
+ const max = maxes.length === budgetChats.length ? Math.min(...maxes) : undefined;
+ // Drop the budget block entirely when the intersected window is
+ // empty (every caller would otherwise see a contradictory range).
+ if (!(min !== undefined && max !== undefined && min > max)) {
+ const budget: NonNullable['budget_tokens']> = {};
+ if (min !== undefined) budget.min = min;
+ if (max !== undefined) budget.max = max;
+ if (min !== undefined || max !== undefined) reasoning.budget_tokens = budget;
+ }
+ }
+
+ const adaptiveAgreed = new Set(reasoningChats.map(c => c.reasoning!.adaptive));
+ if (adaptiveAgreed.size === 1) {
+ const value = [...adaptiveAgreed][0];
+ if (value !== undefined) reasoning.adaptive = value;
+ }
+ const mandatoryAgreed = new Set(reasoningChats.map(c => c.reasoning!.mandatory));
+ if (mandatoryAgreed.size === 1) {
+ const value = [...mandatoryAgreed][0];
+ if (value !== undefined) reasoning.mandatory = value;
+ }
+
+ if (Object.keys(reasoning).length > 0) result.reasoning = reasoning;
+ }
+
+ return Object.keys(result).length > 0 ? result : undefined;
+};
+
+// Narrow the single target's chat metadata against the alias's rule
+// overlay. Fields the rule doesn't touch pass through unchanged.
+const narrowChatByRules = (chat: ChatModelInfo | undefined, target: AliasTarget): ChatModelInfo | undefined => {
+ if (chat === undefined) return undefined;
+ const rules = chatRules(target);
+ if (rules.reasoning === undefined) return chat;
+ const out: ChatModelInfo = { ...chat };
+ if (chat.reasoning !== undefined) {
+ const reasoning: NonNullable = { ...chat.reasoning };
+ if (rules.reasoning.effort !== undefined) {
+ const fixed = rules.reasoning.effort;
+ reasoning.effort = { supported: [fixed], default: fixed };
+ }
+ if (rules.reasoning.budget_tokens !== undefined) {
+ const fixed = rules.reasoning.budget_tokens;
+ reasoning.budget_tokens = { min: fixed, max: fixed };
+ }
+ out.reasoning = reasoning;
+ }
+ return out;
+};
+
+const deriveDisplayName = (alias: ModelAliasRecord): string => {
+ if (alias.displayName !== null) return alias.displayName;
+ if (alias.targets.length === 1) return composeAliasDisplayName(alias.targets[0].target_model_id, alias.targets[0].rules);
+ return alias.name;
+};
+
+const buildAliasedFrom = (alias: ModelAliasRecord): PublicModelAliasedFrom => ({
+ name: alias.name,
+ kind: alias.kind,
+ selection: alias.selection,
+ // Every configured target — including ones the live catalog can not
+ // serve — so the dashboard can show the full configuration.
+ targets: alias.targets,
+});
+
+const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly InternalModel[]): PublicModel => {
+ const realById = new Map(realModels.map(m => [m.id, m] as const));
+ const availableTargets = alias.targets
+ .map(target => ({ target, real: realById.get(target.target_model_id) }))
+ .filter((entry): entry is { target: AliasTarget; real: InternalModel } => entry.real !== undefined && entry.real.kind === alias.kind);
+
+ const entry: PublicModel = {
+ id: alias.name,
+ object: 'model',
+ type: 'model',
+ display_name: deriveDisplayName(alias),
+ limits: {},
+ kind: alias.kind,
+ aliasedFrom: buildAliasedFrom(alias),
+ };
+
+ // No backing target — still emit the row so the dashboard can show the
+ // alias with a no-target warning. Capability metadata stays absent so
+ // clients see no inherited claims.
+ if (availableTargets.length === 0) return entry;
+
+ if (availableTargets.length === 1) {
+ const [{ target, real }] = availableTargets;
+ if (real.chat !== undefined) {
+ const chat = narrowChatByRules(real.chat, target);
+ if (chat !== undefined) entry.chat = chat;
+ }
+ if (real.cost !== undefined) entry.cost = real.cost;
+ return entry;
+ }
+
+ const chats = availableTargets.map(({ real }) => real.chat).filter((c): c is ChatModelInfo => c !== undefined);
+ // Intersect chat metadata only when every available target declares it;
+ // a half-declared block would leak the metadata of whichever subset
+ // happened to carry it.
+ if (chats.length === availableTargets.length) {
+ const chat = intersectChat(chats);
+ if (chat !== undefined) entry.chat = chat;
+ }
+ return entry;
+};
+
+const sortAliases = (aliases: readonly ModelAliasRecord[]): ModelAliasRecord[] =>
+ [...aliases].sort((a, b) => (a.sortOrder - b.sortOrder) || a.name.localeCompare(b.name));
+
+export const synthesizeListedAliases = (input: ListedAliasInputs): PublicModel[] =>
+ sortAliases(input.aliases)
+ .filter(alias => alias.visibleInModelsList)
+ .map(alias => synthesizeOne(alias, input.realModels));
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
new file mode 100644
index 000000000..99ef91e73
--- /dev/null
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -0,0 +1,186 @@
+import { describe, expect, test } from 'vitest';
+
+import { synthesizeListedAliases } from './alias-listing.ts';
+import type { ModelAliasRecord } from '../../repo/types.ts';
+import type { InternalModel } from '@floway-dev/provider';
+
+const aliasFixture = (overrides: Partial = {}): ModelAliasRecord => ({
+ name: 'gpt-fast',
+ kind: 'chat',
+ selection: 'first-available',
+ displayName: null,
+ visibleInModelsList: true,
+ targets: [{ target_model_id: 'gpt-5.4', rules: {} }],
+ sortOrder: 0,
+ createdAt: '2026-06-26T00:00:00.000Z',
+ updatedAt: '2026-06-26T00:00:00.000Z',
+ ...overrides,
+});
+
+const realModel = (overrides: Partial & { id: string }): InternalModel => ({
+ kind: 'chat',
+ limits: {},
+ ...overrides,
+});
+
+describe('synthesizeListedAliases', () => {
+ test('single-target alias narrows reasoning.effort to the fixed value', () => {
+ const aliases = [aliasFixture({
+ name: 'gpt-fast',
+ targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
+ })];
+ const realModels = [realModel({
+ id: 'gpt-5.4',
+ display_name: 'GPT 5.4',
+ chat: {
+ modalities: { input: ['text', 'image'], output: ['text'] },
+ reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } },
+ },
+ })];
+
+ const [entry] = synthesizeListedAliases({ aliases, realModels });
+ expect(entry.id).toBe('gpt-fast');
+ expect(entry.display_name).toBe('gpt-5.4 (low effort)');
+ expect(entry.chat?.reasoning?.effort).toEqual({ supported: ['low'], default: 'low' });
+ expect(entry.chat?.modalities).toEqual({ input: ['text', 'image'], output: ['text'] });
+ expect(entry.aliasedFrom).toEqual({
+ name: 'gpt-fast',
+ kind: 'chat',
+ selection: 'first-available',
+ targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
+ });
+ });
+
+ test('single-target alias narrows reasoning.budget_tokens to a single point', () => {
+ const aliases = [aliasFixture({
+ targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { budget_tokens: 4096 } } }],
+ })];
+ const realModels = [realModel({
+ id: 'gpt-5.4',
+ chat: { reasoning: { budget_tokens: { min: 1024, max: 65536 } } },
+ })];
+ const [entry] = synthesizeListedAliases({ aliases, realModels });
+ expect(entry.chat?.reasoning?.budget_tokens).toEqual({ min: 4096, max: 4096 });
+ });
+
+ test('multi-target alias intersects chat.modalities across every target', () => {
+ const aliases = [aliasFixture({
+ name: 'smart-router',
+ targets: [
+ { target_model_id: 'a', rules: {} },
+ { target_model_id: 'b', rules: {} },
+ ],
+ })];
+ const realModels = [
+ realModel({ id: 'a', chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
+ realModel({ id: 'b', chat: { modalities: { input: ['text'], output: ['text'] } } }),
+ ];
+ const [entry] = synthesizeListedAliases({ aliases, realModels });
+ expect(entry.id).toBe('smart-router');
+ expect(entry.display_name).toBe('smart-router');
+ expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
+ });
+
+ test('multi-target intersection drops capabilities only one target declares', () => {
+ const aliases = [aliasFixture({
+ targets: [
+ { target_model_id: 'a', rules: {} },
+ { target_model_id: 'b', rules: {} },
+ ],
+ })];
+ const realModels = [
+ realModel({ id: 'a', chat: { reasoning: { effort: { supported: ['low'], default: 'low' } } } }),
+ realModel({ id: 'b', chat: {} }),
+ ];
+ const [entry] = synthesizeListedAliases({ aliases, realModels });
+ expect(entry.chat?.reasoning).toBeUndefined();
+ });
+
+ test('multi-target with an unavailable target intersects over the available subset', () => {
+ const aliases = [aliasFixture({
+ targets: [
+ { target_model_id: 'a', rules: {} },
+ { target_model_id: 'gone', rules: {} },
+ { target_model_id: 'b', rules: {} },
+ ],
+ })];
+ const realModels = [
+ realModel({ id: 'a', chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
+ realModel({ id: 'b', chat: { modalities: { input: ['text'], output: ['text', 'image'] } } }),
+ ];
+ const [entry] = synthesizeListedAliases({ aliases, realModels });
+ expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
+ // Every configured target — including the unavailable one — survives in aliasedFrom.
+ expect(entry.aliasedFrom?.targets.map(t => t.target_model_id)).toEqual(['a', 'gone', 'b']);
+ });
+
+ test('hidden alias is not emitted', () => {
+ const aliases = [aliasFixture({ visibleInModelsList: false })];
+ const realModels = [realModel({ id: 'gpt-5.4' })];
+ expect(synthesizeListedAliases({ aliases, realModels })).toEqual([]);
+ });
+
+ test('alias whose name collides with a real id is emitted (loadModels drops the duplicate real)', () => {
+ const aliases = [aliasFixture({
+ name: 'gpt-5.4',
+ targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
+ })];
+ const realModels = [realModel({ id: 'gpt-5.4', display_name: 'GPT 5.4' })];
+ const entries = synthesizeListedAliases({ aliases, realModels });
+ expect(entries).toHaveLength(1);
+ expect(entries[0].id).toBe('gpt-5.4');
+ expect(entries[0].aliasedFrom?.name).toBe('gpt-5.4');
+ });
+
+ test('no available targets still emits an entry with no chat metadata', () => {
+ const aliases = [aliasFixture({
+ name: 'orphan',
+ targets: [{ target_model_id: 'missing', rules: {} }],
+ })];
+ const [entry] = synthesizeListedAliases({ aliases, realModels: [] });
+ expect(entry.id).toBe('orphan');
+ expect(entry.display_name).toBe('missing');
+ expect(entry.chat).toBeUndefined();
+ expect(entry.cost).toBeUndefined();
+ expect(entry.aliasedFrom?.targets).toEqual([{ target_model_id: 'missing', rules: {} }]);
+ });
+
+ test('sorts entries by (sort_order, name) so listing order stays stable', () => {
+ const aliases = [
+ aliasFixture({ name: 'late', sortOrder: 1 }),
+ aliasFixture({ name: 'mid-a', sortOrder: 0 }),
+ aliasFixture({ name: 'mid-b', sortOrder: 0 }),
+ ];
+ const realModels = [realModel({ id: 'gpt-5.4' })];
+ const ids = synthesizeListedAliases({ aliases, realModels }).map(entry => entry.id);
+ expect(ids).toEqual(['mid-a', 'mid-b', 'late']);
+ });
+
+ test('targets whose kind disagrees with the alias are not counted as available', () => {
+ const aliases = [aliasFixture({
+ kind: 'chat',
+ targets: [
+ { target_model_id: 'emb', rules: {} },
+ { target_model_id: 'chat', rules: {} },
+ ],
+ })];
+ const realModels = [
+ realModel({ id: 'emb', kind: 'embedding' }),
+ realModel({ id: 'chat', chat: { modalities: { input: ['text'], output: ['text'] } } }),
+ ];
+ const [entry] = synthesizeListedAliases({ aliases, realModels });
+ // Only the chat target backs the metadata — the embedding row never
+ // enters the intersection / narrowing path.
+ expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
+ });
+
+ test('operator-set display_name wins over the derived form', () => {
+ const aliases = [aliasFixture({
+ displayName: 'My Fast GPT',
+ targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
+ })];
+ const realModels = [realModel({ id: 'gpt-5.4' })];
+ const [entry] = synthesizeListedAliases({ aliases, realModels });
+ expect(entry.display_name).toBe('My Fast GPT');
+ });
+});
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index ab9242bd1..b7b34bd4a 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -1,8 +1,11 @@
import type { Context } from 'hono';
+import { synthesizeListedAliases } from './alias-listing.ts';
import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
+import type { ModelAliasesRepo } from '../../repo/types.ts';
import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
import { getCurrentColo } from '../../runtime/runtime-info.ts';
import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts';
@@ -62,20 +65,40 @@ const geminiModelLoadError = (error: unknown): Response => {
return geminiError(502, error instanceof Error ? error.message : String(error));
};
+// Mirror loadModels: real models plus chat-kind alias entries, with alias
+// names winning id collisions.
const loadGeminiModels = async (
upstreamFilter: readonly string[] | null,
fetcherForUpstream: (upstreamId: string) => Fetcher,
scheduler: BackgroundScheduler,
+ aliasRepo: ModelAliasesRepo,
): Promise => {
- const models = await getModels(upstreamFilter, fetcherForUpstream, scheduler);
- // Only chat models are representable in the Gemini /models shape.
- return models.filter(model => model.kind === 'chat').map(toGeminiModel);
+ const [models, aliases] = await Promise.all([
+ getModels(upstreamFilter, fetcherForUpstream, scheduler),
+ aliasRepo.list(),
+ ]);
+ const chatModels = models.filter(model => model.kind === 'chat');
+ const aliasEntries = synthesizeListedAliases({ aliases, realModels: models })
+ .filter(entry => entry.kind === 'chat');
+ const aliasIds = new Set(aliasEntries.map(entry => entry.id));
+ const merged: InternalModel[] = [
+ ...chatModels.filter(model => !aliasIds.has(model.id)),
+ ...aliasEntries.map(entry => ({
+ id: entry.id,
+ display_name: entry.display_name,
+ limits: entry.limits,
+ kind: entry.kind,
+ ...(entry.cost !== undefined ? { cost: entry.cost } : {}),
+ ...(entry.chat !== undefined ? { chat: entry.chat } : {}),
+ })),
+ ];
+ return merged.map(toGeminiModel);
};
export const serveGeminiModels = async (c: Context): Promise => {
try {
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)) });
+ return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), getRepo().modelAliases) });
} catch (error) {
return geminiModelLoadError(error);
}
@@ -88,7 +111,7 @@ export const serveGeminiModelInfo = async (c: Context): Promise => {
const modelId = rawModelId.replace(/^models\//, '');
try {
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c))).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
+ const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), getRepo().modelAliases)).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
if (!model) return geminiError(404, `Model not found: ${modelId}`);
return Response.json(model);
} catch (error) {
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 027ba38db..e0a8b513a 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,3 +1,5 @@
+import { synthesizeListedAliases } from './alias-listing.ts';
+import type { ModelAliasesRepo } from '../../repo/types.ts';
import { getModels } from '../providers/registry.ts';
import type { BackgroundScheduler } from '@floway-dev/platform';
import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
@@ -22,13 +24,28 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
return info;
};
+// Merge real-model entries with alias entries synthesized off the operator's
+// alias catalog. An alias whose `name` collides with a real model id wins —
+// two entries with the same `id` would break OpenAI client deduplication, and
+// the alias was added by the operator deliberately, so collapsing to it
+// preserves intent. `synthesizeListedAliases` already produces the alias entry;
+// the merge step drops the real entry with that id.
export const loadModels = async (
upstreamFilter: readonly string[] | null,
fetcherForUpstream: (upstreamId: string) => Fetcher,
scheduler: BackgroundScheduler,
+ aliasRepo: ModelAliasesRepo,
): Promise => {
- const models = await getModels(upstreamFilter, fetcherForUpstream, scheduler);
- const data = models.map(toPublicModel);
+ const [realModels, aliases] = await Promise.all([
+ getModels(upstreamFilter, fetcherForUpstream, scheduler),
+ aliasRepo.list(),
+ ]);
+ const aliasEntries = synthesizeListedAliases({ aliases, realModels });
+ const aliasIds = new Set(aliasEntries.map(entry => entry.id));
+ const data: PublicModel[] = [
+ ...realModels.map(toPublicModel).filter(model => !aliasIds.has(model.id)),
+ ...aliasEntries,
+ ];
return {
object: 'list',
has_more: false,
diff --git a/packages/gateway/src/data-plane/models/load_test.ts b/packages/gateway/src/data-plane/models/load_test.ts
index 20560891f..39985c9c4 100644
--- a/packages/gateway/src/data-plane/models/load_test.ts
+++ b/packages/gateway/src/data-plane/models/load_test.ts
@@ -22,3 +22,8 @@ describe('toPublicModel', () => {
expect(toPublicModel({ ...base, chat }).chat).toEqual(chat);
});
});
+
+// The alias merge step inside `loadModels` (alias entries follow real
+// entries, alias names winning id collisions) is exercised through the
+// integration suite in `serve_test.ts` so the assertion observes the same
+// `/v1/models` payload a real client would see.
diff --git a/packages/gateway/src/data-plane/models/serve.ts b/packages/gateway/src/data-plane/models/serve.ts
index 9b8b510f9..a66755b6e 100644
--- a/packages/gateway/src/data-plane/models/serve.ts
+++ b/packages/gateway/src/data-plane/models/serve.ts
@@ -7,6 +7,7 @@ import { loadModels } from './load.ts';
import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
import { createPerRequestFetcher } from '../../dial/per-request.ts';
import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
import { getCurrentColo } from '../../runtime/runtime-info.ts';
import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -14,7 +15,7 @@ import { ProviderModelsUnavailableError } from '@floway-dev/provider';
export const models = async (c: Context) => {
try {
const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
- return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)));
+ return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), getRepo().modelAliases));
} catch (e) {
// Upstream HTTP/parse failures squash to a generic message so we do not
// leak upstream identity. Other registry-thrown errors (e.g. the "no
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 1408f10a6..2be79e4c5 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -586,3 +586,95 @@ test('/v1/models returns the last real error when every account model load fails
},
);
});
+
+test('/v1/models appends visible aliases with their aliasedFrom block and folds alias-id collisions onto the alias entry', async () => {
+ const { repo, apiKey } = await setupAppTest();
+ await repo.modelAliases.deleteAll();
+ await repo.upstreams.save(buildCustomUpstreamRecord({
+ id: 'up_oai',
+ name: 'Test OpenAI',
+ sortOrder: 100,
+ config: {
+ baseUrl: 'https://oai.example.com',
+ authStyle: 'bearer',
+ apiKey: 'sk-test',
+ endpoints: { chatCompletions: {} },
+ },
+ }));
+ // Two aliases: one shadows a real id (`gpt-4o`) so the alias entry must
+ // replace the catalog entry; one points at a real id under a brand-new
+ // name (`gpt-fast`).
+ await repo.modelAliases.insert({
+ name: 'gpt-4o',
+ kind: 'chat',
+ selection: 'first-available',
+ displayName: null,
+ visibleInModelsList: true,
+ targets: [{ target_model_id: 'gpt-4o', rules: { reasoning: { effort: 'low' } } }],
+ sortOrder: 1,
+ createdAt: '2026-06-26T00:00:00.000Z',
+ updatedAt: '2026-06-26T00:00:00.000Z',
+ });
+ await repo.modelAliases.insert({
+ name: 'gpt-fast',
+ kind: 'chat',
+ selection: 'first-available',
+ displayName: 'Operator Fast',
+ visibleInModelsList: true,
+ targets: [{ target_model_id: 'gpt-4o-mini', rules: {} }],
+ sortOrder: 0,
+ createdAt: '2026-06-26T00:00:00.000Z',
+ updatedAt: '2026-06-26T00:00:00.000Z',
+ });
+ await repo.modelAliases.insert({
+ name: 'hidden-alias',
+ kind: 'chat',
+ selection: 'first-available',
+ displayName: null,
+ visibleInModelsList: false,
+ targets: [{ target_model_id: 'gpt-4o', rules: {} }],
+ sortOrder: 2,
+ createdAt: '2026-06-26T00:00:00.000Z',
+ updatedAt: '2026-06-26T00:00:00.000Z',
+ });
+
+ await withMockedFetch(
+ request => {
+ const url = new URL(request.url);
+ if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+ if (url.pathname === '/copilot_internal/v2/token') {
+ return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+ }
+ if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
+ return jsonResponse(copilotModels([]));
+ }
+ if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
+ return jsonResponse({ object: 'list', data: [{ id: 'gpt-4o' }, { id: 'gpt-4o-mini' }] });
+ }
+ throw new Error(`Unhandled fetch ${request.url}`);
+ },
+ async () => {
+ const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+ assertEquals(response.status, 200);
+ const body = (await response.json()) as { data: Array<{ id: string; display_name: string; aliasedFrom?: { name: string; selection: string } }> };
+ const ids = body.data.map(model => model.id);
+
+ // Real `gpt-4o` is replaced by the alias of the same name; the alias
+ // entry sits where the catalog ordering placed it. `gpt-4o-mini`
+ // (still a real id) stays first, and the two visible aliases land
+ // after the real-only entries.
+ assertEquals(ids.includes('gpt-4o-mini'), true);
+ assertEquals(ids.filter(id => id === 'gpt-4o').length, 1);
+ assertEquals(ids.includes('hidden-alias'), false);
+
+ const collided = body.data.find(model => model.id === 'gpt-4o')!;
+ assertEquals(collided.aliasedFrom?.name, 'gpt-4o');
+ assertEquals(collided.aliasedFrom?.selection, 'first-available');
+ assertEquals(collided.display_name, 'gpt-4o (low effort)');
+
+ const fast = body.data.find(model => model.id === 'gpt-fast')!;
+ assertEquals(fast.aliasedFrom?.name, 'gpt-fast');
+ assertEquals(fast.display_name, 'Operator Fast');
+ },
+ );
+});
diff --git a/packages/protocols/src/common/aliases.ts b/packages/protocols/src/common/aliases.ts
index ac212f91e..6958bd8e8 100644
--- a/packages/protocols/src/common/aliases.ts
+++ b/packages/protocols/src/common/aliases.ts
@@ -77,3 +77,52 @@ export interface ModelAlias {
created_at: string;
updated_at: string;
}
+
+// Inline-prose parts for an alias's rules, in the canonical field order. The
+// same builder backs `formatAliasRulesInline` (joins with `, ` for a single
+// summary string) and `formatAliasRuleBadges` (one badge per part). Keeping
+// every surface — inline copy, badge sequence, parenthesized suffix in the
+// derived display name — on a single ordered walk means an operator who
+// configures `effort + verbosity` sees them in the same order whether the
+// dashboard renders badges or a comma-joined caption.
+const aliasRulePartLabels = (rules: AliasRules): string[] => {
+ const chat = rules as ChatAliasRules;
+ const parts: string[] = [];
+ if (chat.reasoning?.effort !== undefined) parts.push(`${chat.reasoning.effort} effort`);
+ if (chat.reasoning?.budget_tokens !== undefined) parts.push(`${chat.reasoning.budget_tokens}tok budget`);
+ if (chat.reasoning?.adaptive === true) parts.push('adaptive');
+ else if (chat.reasoning?.adaptive === false) parts.push('non-adaptive');
+ if (chat.reasoning?.mandatory === true) parts.push('mandatory reasoning');
+ if (chat.reasoning?.summary !== undefined) parts.push(`summary: ${chat.reasoning.summary}`);
+ if (chat.verbosity !== undefined) parts.push(`${chat.verbosity} verbosity`);
+ if (chat.serviceTier !== undefined) parts.push(`${chat.serviceTier} tier`);
+ return parts;
+};
+
+// One badge per configured rule field, in the canonical order. `value` is
+// reserved for callers that want to render a separate value pill alongside
+// the label; today every part already self-describes through `label`, so
+// `value` stays undefined.
+export interface AliasRuleBadge {
+ label: string;
+ value?: string;
+}
+
+export const formatAliasRuleBadges = (rules: AliasRules): AliasRuleBadge[] =>
+ aliasRulePartLabels(rules).map(label => ({ label }));
+
+// Comma-joined version of the same ordered parts. Empty string when no
+// rule applies — callers should drop the line entirely rather than render
+// blank.
+export const formatAliasRulesInline = (rules: AliasRules): string =>
+ aliasRulePartLabels(rules).join(', ');
+
+// Derived display name for a single-target alias whose operator did not set
+// `display_name`. Bare `target_model_id` when no rule is configured; with
+// rules, the inline summary is parenthesized. Multi-target aliases skip
+// this helper entirely — the listing falls back to the alias's own name
+// because no single target represents the alias.
+export const composeAliasDisplayName = (targetModelId: string, rules: AliasRules): string => {
+ const inline = formatAliasRulesInline(rules);
+ return inline === '' ? targetModelId : `${targetModelId} (${inline})`;
+};
diff --git a/packages/protocols/src/common/aliases_test.ts b/packages/protocols/src/common/aliases_test.ts
new file mode 100644
index 000000000..719ff4ee1
--- /dev/null
+++ b/packages/protocols/src/common/aliases_test.ts
@@ -0,0 +1,56 @@
+import { describe, expect, test } from 'vitest';
+
+import { composeAliasDisplayName, formatAliasRuleBadges, formatAliasRulesInline } from './aliases.ts';
+
+describe('composeAliasDisplayName', () => {
+ test('bare target id when no rules apply', () => {
+ expect(composeAliasDisplayName('gpt-5.4', {})).toBe('gpt-5.4');
+ });
+
+ test('parenthesizes the inline summary when a rule is set', () => {
+ expect(composeAliasDisplayName('gpt-5.4', { reasoning: { effort: 'low' } })).toBe('gpt-5.4 (low effort)');
+ });
+});
+
+describe('formatAliasRulesInline', () => {
+ test('returns empty string when no rule is set', () => {
+ expect(formatAliasRulesInline({})).toBe('');
+ });
+
+ test('joins configured parts in the canonical order', () => {
+ expect(formatAliasRulesInline({
+ reasoning: { effort: 'high' },
+ verbosity: 'low',
+ serviceTier: 'priority',
+ })).toBe('high effort, low verbosity, priority tier');
+ });
+
+ test('renders boolean reasoning toggles in their dedicated wording', () => {
+ expect(formatAliasRulesInline({
+ reasoning: { adaptive: false, mandatory: true, summary: 'concise' },
+ })).toBe('non-adaptive, mandatory reasoning, summary: concise');
+ });
+
+ test('emits adaptive when reasoning.adaptive is true and budget_tokens when set', () => {
+ expect(formatAliasRulesInline({
+ reasoning: { budget_tokens: 4096, adaptive: true },
+ })).toBe('4096tok budget, adaptive');
+ });
+});
+
+describe('formatAliasRuleBadges', () => {
+ test('returns one badge per configured part in the canonical order', () => {
+ expect(formatAliasRuleBadges({
+ reasoning: { effort: 'high', budget_tokens: 2048 },
+ verbosity: 'medium',
+ })).toEqual([
+ { label: 'high effort' },
+ { label: '2048tok budget' },
+ { label: 'medium verbosity' },
+ ]);
+ });
+
+ test('returns an empty array when no rule is set', () => {
+ expect(formatAliasRuleBadges({})).toEqual([]);
+ });
+});
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 54243d56d..d12ab4d8b 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -1,3 +1,5 @@
+import type { AliasKind, AliasSelection, AliasTarget } from './aliases.ts';
+
// Disjoint billing dimensions a single request can be charged on. Every count
// keyed by these is non-overlapping: a prompt token is counted under exactly
// one of `input`, `input_cache_read`, `input_cache_write`,
@@ -113,6 +115,19 @@ export interface ChatModelInfo {
};
}
+// Alias provenance attached to a `/v1/models` entry that the gateway
+// synthesized from an operator-defined alias rather than fetched from an
+// upstream catalog. `targets` carries every configured target — including
+// targets the live catalog currently can not serve — so the dashboard can
+// show the full configuration and warn about unavailable ones without a
+// second control-plane round trip.
+export interface PublicModelAliasedFrom {
+ name: string;
+ kind: AliasKind;
+ selection: AliasSelection;
+ targets: AliasTarget[];
+}
+
// Public DTO served at /v1/models and /models. Single superset shape — OpenAI's
// and Anthropic's /models field names do not overlap, so one payload satisfies
// both client shapes.
@@ -135,6 +150,9 @@ export interface PublicModel {
kind: ModelKind;
cost?: ModelPricing;
chat?: ChatModelInfo;
+ // Present only on entries the gateway synthesized from an operator-defined
+ // alias; absent for entries that came from an upstream catalog.
+ aliasedFrom?: PublicModelAliasedFrom;
}
export interface PublicModelsResponse {
From 68662865e793f382d94191e4e11413816a38c825 Mon Sep 17 00:00:00 2001
From: Menci
Date: Fri, 26 Jun 2026 18:46:14 +0800
Subject: [PATCH 052/170] feat(aliases): dashboard UI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit
End-to-end dashboard surface for the v2 alias system:
- useModelAliases composable: singleton store mirroring the
proxies/upstreams stores; loads /api/aliases once and exposes the live
list to every consumer.
- AliasesSettingsCard + AliasRow: Settings card slotted under the Proxies
card. Each row is a two-line block — display name (derived for
single-target aliases via composeAliasDisplayName, alias name for
multi-target, operator override always wins) over a single-line caption
that walks name · N targets · selection · optional "hidden from
/v1/models". The action cluster reserves a left-end slot for the
alias-level warning icon so the edit / delete buttons keep their
on-screen position whether or not a warning is firing.
- AliasEditDialog: top form (alias name, display name, kind, selection
segmented control), vertical AliasTargetRow stack with an Add target
button, alias-level shadow-warning card at the bottom, visibility
switch + Cancel/Save footer. Switching kind resets every target row's
rules to that kind's empty default. Save is gated on a non-empty,
non-colliding name, ≥1 target, and every target carrying a non-empty
target_model_id.
- AliasTargetRow: expandable card whose header is a borderless,
chevron-less Combobox (extended Combobox.vue with `borderless` +
`hideDropdownTrigger` props rather than one-off styles, so future inline
combobox surfaces reuse the same component) that suggests the live set
of non-alias model ids from useModels. The expanded body is the flat
chat-rule form (effort, budget, adaptive, summary, verbosity, service
tier) for chat-kind aliases and a one-line empty-state caption for the
other kinds. Rule-level warnings render inline under the annotated
field as a single line of text-amber-300; the model-level warning is a
yellow `!` icon at the left end of the row's action cluster.
- Shared warnings.ts: one place for catalog lookup
(findCatalogModel, realModelIds), rule-level warning derivation
(advertised effort levels, budget bounds, adaptive support),
model-level warning (target id resolution against the live catalog),
and alias-level shadow warning (name collides with a real id AND no
target references it). Both the Settings row and the dialog read the
same catalog through the same helpers.
- ModelInfoBar: alias-of badge sequence (`alias of: , +N more`,
`selection: `) plus a rule-badge row that keeps the single-target
per-rule badges and collapses multi-target rules to one
`: varies` pill per varying field.
- packages/ui/Combobox.vue: borderless + hideDropdownTrigger props for
use as an inline row title.
Tests: warnings_test for the helpers, AliasRow_test for title /
caption / action cluster, AliasTargetRow_test for combobox v-model,
edge-disabled controls, expand-on-toggle, model and rule warnings,
AliasEditDialog_test for the form bindings, kind-switch behaviour,
save gate, and shadow card, AliasesSettingsCard_test for the list,
empty state, add emit, and delete confirm flow.
---
apps/web/src/api/types.ts | 30 +++
.../components/alias-edit/AliasEditDialog.vue | 249 +++++++++++++++++
.../alias-edit/AliasEditDialog_test.ts | 168 ++++++++++++
.../components/alias-edit/AliasTargetRow.vue | 253 ++++++++++++++++++
.../alias-edit/AliasTargetRow_test.ts | 89 ++++++
.../web/src/components/alias-edit/warnings.ts | 129 +++++++++
.../components/alias-edit/warnings_test.ts | 156 +++++++++++
.../src/components/models/ModelInfoBar.vue | 48 +++-
apps/web/src/components/settings/AliasRow.vue | 101 +++++++
.../src/components/settings/AliasRow_test.ts | 120 +++++++++
.../settings/AliasesSettingsCard.vue | 71 +++++
.../settings/AliasesSettingsCard_test.ts | 105 ++++++++
apps/web/src/composables/useModelAliases.ts | 30 +++
apps/web/src/pages/dashboard/settings.vue | 29 +-
packages/ui/src/Combobox.vue | 28 +-
15 files changed, 1599 insertions(+), 7 deletions(-)
create mode 100644 apps/web/src/components/alias-edit/AliasEditDialog.vue
create mode 100644 apps/web/src/components/alias-edit/AliasEditDialog_test.ts
create mode 100644 apps/web/src/components/alias-edit/AliasTargetRow.vue
create mode 100644 apps/web/src/components/alias-edit/AliasTargetRow_test.ts
create mode 100644 apps/web/src/components/alias-edit/warnings.ts
create mode 100644 apps/web/src/components/alias-edit/warnings_test.ts
create mode 100644 apps/web/src/components/settings/AliasRow.vue
create mode 100644 apps/web/src/components/settings/AliasRow_test.ts
create mode 100644 apps/web/src/components/settings/AliasesSettingsCard.vue
create mode 100644 apps/web/src/components/settings/AliasesSettingsCard_test.ts
create mode 100644 apps/web/src/composables/useModelAliases.ts
diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 088891e1b..3c3930ffb 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -1,7 +1,13 @@
// Control-plane DTOs the SPA consumes — serialized shapes the gateway emits at /api.
import type {
+ AliasKind,
+ AliasRules,
+ AliasSelection,
+ AliasTarget,
BillingDimension,
+ ChatAliasRules,
+ ModelAlias,
ModelEndpointKey,
ModelEndpoints,
ModelKind,
@@ -11,6 +17,7 @@ import type { AddressableForm, ModelPrefixConfig } from '@floway-dev/provider/mo
export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing };
export type { AddressableForm, ModelPrefixConfig };
+export type { AliasKind, AliasRules, AliasSelection, AliasTarget, ChatAliasRules, ModelAlias };
export type UpstreamProviderKind = 'custom' | 'azure' | 'copilot' | 'codex' | 'claude-code' | 'ollama';
@@ -340,6 +347,29 @@ export interface PublicModel {
endpoints?: Record;
cost?: ModelPricing;
kind?: ModelKind;
+ // Chat-only capability metadata sourced from the upstream model config.
+ // Mirrored from `@floway-dev/protocols/common`'s ChatModelInfo so the
+ // dashboard can render rule warnings against the live catalog without
+ // pulling the full protocol shape.
+ chat?: {
+ modalities?: { input: readonly ('text' | 'image')[]; output: readonly ('text' | 'image')[] };
+ reasoning?: {
+ effort?: { supported: readonly string[]; default: string };
+ budget_tokens?: { min?: number; max?: number };
+ adaptive?: boolean;
+ mandatory?: boolean;
+ };
+ };
+ // Alias provenance — present only on `/api/models` entries the gateway
+ // synthesized from an operator-defined alias. The dashboard uses this
+ // both to render alias-of badges on the Models page and to identify
+ // alias rows when computing the target-id suggestion list.
+ aliasedFrom?: {
+ name: string;
+ kind: AliasKind;
+ selection: AliasSelection;
+ targets: AliasTarget[];
+ };
}
export interface ControlPlaneModel extends PublicModel {
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
new file mode 100644
index 000000000..cb49f9055
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -0,0 +1,249 @@
+
+
+
+
+
+
+ {{ saveError }}
+
+
+
+
+
+
+ Kind
+ setKind(v as AliasKind)" />
+
+
+
Selection
+
+ First available
+ Random
+
+
+
+
+
+
+
Models
+ Add target
+
+
+
updateTarget(idx, next)"
+ @move-up="moveTarget(idx, -1)"
+ @move-down="moveTarget(idx, 1)"
+ @remove="removeTarget(idx)"
+ />
+
+
+
+
+ This alias name shadows a real model id:
+ {{ shadowWarning.shadowedId }}
+
+ ({{ shadowWarning.shadowedDisplayName }} ).
+
+ .
+
+
+
+
+
+ Visible in /v1/models
+
+
+ Cancel
+ Save
+
+
+
+
+
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
new file mode 100644
index 000000000..9dedb62af
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -0,0 +1,168 @@
+import { mount } from '@vue/test-utils';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { nextTick, ref } from 'vue';
+
+import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+
+// Mock the API client + composables so the dialog mounts without hitting the
+// network. The composables expose `ref`-based state — return the same shape
+// so the dialog reads the catalog and the alias list directly off these
+// stubs.
+const aliasesRef = ref([]);
+const modelsRef = ref([]);
+const postSpy = vi.fn(async (_arg: unknown) => new Response(JSON.stringify({}), { status: 201 }));
+const putSpy = vi.fn(async (_arg: unknown) => new Response(JSON.stringify({}), { status: 200 }));
+
+vi.mock('../../composables/useModelAliases.ts', () => ({
+ useModelAliases: () => ({ aliases: aliasesRef, loading: ref(false), error: ref(null), load: async () => {} }),
+}));
+vi.mock('../../composables/useModels.ts', () => ({
+ useModelsStore: () => ({ models: modelsRef, loading: ref(false), error: ref(null), load: async () => {} }),
+}));
+vi.mock('../../api/client.ts', () => ({
+ useApi: () => ({
+ api: {
+ aliases: {
+ $post: (arg: unknown) => postSpy(arg),
+ ':name': { $put: (arg: unknown) => putSpy(arg) },
+ },
+ },
+ }),
+ callApi: async (fn: () => Promise) => {
+ const res = await fn();
+ if (!res.ok) return { error: { status: res.status, message: 'mock-error' } };
+ return { data: (await res.json()) as T };
+ },
+ authFetch: vi.fn(),
+}));
+
+// Import after mocks are registered.
+const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
+
+const realModel = (id: string, display?: string): ControlPlaneModel => ({
+ id,
+ display_name: display,
+ upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
+});
+
+const baseAlias = (over: Partial & { name: string }): ModelAlias => ({
+ kind: 'chat',
+ selection: 'first-available',
+ display_name: null,
+ visible_in_models_list: true,
+ targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
+ sort_order: 0,
+ created_at: '2026-01-01T00:00:00Z',
+ updated_at: '2026-01-01T00:00:00Z',
+ ...over,
+});
+
+// Reka-UI's DialogPortal teleports content out of the wrapper. Read the
+// portal-rooted DOM by scanning document.body directly.
+const portalText = () => document.body.textContent ?? '';
+const portalQuery = (selector: string): T | null => document.body.querySelector(selector);
+const portalQueryAll = (selector: string): T[] => Array.from(document.body.querySelectorAll(selector));
+
+beforeEach(() => {
+ aliasesRef.value = [];
+ modelsRef.value = [realModel('gpt-5', 'GPT 5'), realModel('claude')];
+ postSpy.mockClear();
+ putSpy.mockClear();
+});
+
+afterEach(() => {
+ // Reka-UI portals append to document.body; clear them between tests so
+ // subsequent assertions don't see stale content.
+ document.body.innerHTML = '';
+});
+
+describe('AliasEditDialog', () => {
+ it('starts create mode with one blank target row and seeds the form fields', async () => {
+ const w = mount(AliasEditDialog, { props: { open: true, record: null }, attachTo: document.body });
+ await nextTick();
+ expect(portalQueryAll('[aria-label="Toggle target row"]')).toHaveLength(1);
+ const inputs = portalQueryAll('input[type="text"]');
+ expect(inputs[0].value).toBe('');
+ w.unmount();
+ });
+
+ it('"Add target" appends a row', async () => {
+ const w = mount(AliasEditDialog, { props: { open: true, record: null }, attachTo: document.body });
+ await nextTick();
+ expect(portalQueryAll('[aria-label="Toggle target row"]')).toHaveLength(1);
+ const addBtn = portalQueryAll('button').find(b => b.textContent?.trim() === 'Add target')!;
+ addBtn.click();
+ await nextTick();
+ expect(portalQueryAll('[aria-label="Toggle target row"]')).toHaveLength(2);
+ w.unmount();
+ });
+
+ it('renders the chat rule body when the kind is chat, and the empty-state caption when the kind is embedding', async () => {
+ const chat = mount(AliasEditDialog, {
+ props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: { reasoning: { effort: 'low' } } as ChatAliasRules }] }) },
+ attachTo: document.body,
+ });
+ await nextTick();
+ portalQuery('button[aria-label="Toggle target row"]')!.click();
+ await nextTick();
+ expect(portalText()).toContain('Reasoning effort');
+ chat.unmount();
+ document.body.innerHTML = '';
+
+ const embed = mount(AliasEditDialog, {
+ props: { open: true, record: baseAlias({ name: 'e', kind: 'embedding', targets: [{ target_model_id: 'embed-1', rules: {} as never }] }) },
+ attachTo: document.body,
+ });
+ await nextTick();
+ portalQuery('button[aria-label="Toggle target row"]')!.click();
+ await nextTick();
+ expect(portalText()).toContain('No per-target rules for this kind.');
+ expect(portalText()).not.toContain('Reasoning effort');
+ embed.unmount();
+ });
+
+ it('Save is disabled on empty name and on collision with another alias; enabled once the name is unique', async () => {
+ aliasesRef.value = [baseAlias({ name: 'existing' })];
+ // Seed the edit dialog with a valid target so the only validation knob
+ // under test is the alias name (the borderless combobox in the target
+ // row doesn't surface a plain HTMLInput we can drive from the test).
+ const w = mount(AliasEditDialog, {
+ props: {
+ open: true,
+ record: baseAlias({ name: '', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }),
+ },
+ attachTo: document.body,
+ });
+ await nextTick();
+
+ const saveBtn = portalQueryAll('button').find(b => b.textContent?.trim() === 'Save')!;
+ expect(saveBtn.disabled).toBe(true);
+
+ const nameInput = portalQueryAll('input[type="text"]')[0];
+ nameInput.value = 'existing';
+ nameInput.dispatchEvent(new Event('input', { bubbles: true }));
+ await nextTick();
+ expect(saveBtn.disabled).toBe(true);
+
+ nameInput.value = 'fresh';
+ nameInput.dispatchEvent(new Event('input', { bubbles: true }));
+ await nextTick();
+ expect(saveBtn.disabled).toBe(false);
+
+ w.unmount();
+ });
+
+ it('renders the shadow warning card when the alias name collides with a real model and no target references it', async () => {
+ const w = mount(AliasEditDialog, { props: { open: true, record: null }, attachTo: document.body });
+ await nextTick();
+
+ const nameInput = portalQueryAll('input[type="text"]')[0];
+ nameInput.value = 'gpt-5';
+ nameInput.dispatchEvent(new Event('input', { bubbles: true }));
+ await nextTick();
+
+ expect(portalText()).toContain('shadows a real model id');
+ expect(document.body.innerHTML).toContain('GPT 5 ');
+ w.unmount();
+ });
+});
diff --git a/apps/web/src/components/alias-edit/AliasTargetRow.vue b/apps/web/src/components/alias-edit/AliasTargetRow.vue
new file mode 100644
index 000000000..d3f21b4a0
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AliasTargetRow.vue
@@ -0,0 +1,253 @@
+
+
+
+
+