From 726814b483e2444d584c9f81da2d3a11cb5f6ac2 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Thu, 25 Jun 2026 20:40:21 +0800
Subject: [PATCH 001/170] feat(aliases): add model_aliases table, types, and
 repo

Introduces the storage layer for the model-aliases data-plane feature.
The table is global, primary-keyed by alias name. Conflict resolution
is encoded as a CHECK-constrained TEXT column, freeform rule values
are stored as JSON, and the codex-auto-review seed entry lands with
the table.

loadAllAliases reads the full table per request (the table is
operator-managed and small; a cache layer is unnecessary for v0).
---
 .../gateway/migrations/0046_model_aliases.sql | 14 +++
 .../src/control-plane/model-aliases/repo.ts   | 37 ++++++++
 .../control-plane/model-aliases/repo_test.ts  | 92 +++++++++++++++++++
 .../src/control-plane/model-aliases/types.ts  | 26 ++++++
 4 files changed, 169 insertions(+)
 create mode 100644 packages/gateway/migrations/0046_model_aliases.sql
 create mode 100644 packages/gateway/src/control-plane/model-aliases/repo.ts
 create mode 100644 packages/gateway/src/control-plane/model-aliases/repo_test.ts
 create mode 100644 packages/gateway/src/control-plane/model-aliases/types.ts
diff --git a/packages/gateway/migrations/0046_model_aliases.sql b/packages/gateway/migrations/0046_model_aliases.sql
new file mode 100644
index 000000000..c934d77b6
--- /dev/null
+++ b/packages/gateway/migrations/0046_model_aliases.sql
@@ -0,0 +1,14 @@
+CREATE TABLE model_aliases (
+  alias TEXT PRIMARY KEY,
+  target_model_id TEXT NOT NULL,
+  upstream_ids_json TEXT NOT NULL DEFAULT '[]',
+  rules_json TEXT NOT NULL DEFAULT '{}',
+  visible_in_models_list INTEGER NOT NULL DEFAULT 1,
+  on_conflict TEXT NOT NULL DEFAULT 'real-only'
+    CHECK (on_conflict IN ('alias-only', 'real-only', 'both-real-first', 'both-alias-first')),
+  created_at INTEGER NOT NULL DEFAULT (unixepoch()),
+  updated_at INTEGER NOT NULL DEFAULT (unixepoch())
+);
+
+INSERT INTO model_aliases (alias, target_model_id, rules_json, on_conflict)
+VALUES ('codex-auto-review', 'gpt-5.4', '{"reasoning":{"effort":"low"}}', 'real-only');
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
new file mode 100644
index 000000000..70024e0cd
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -0,0 +1,37 @@
+import type { ModelAlias, OnConflict } from './types.ts';
+import type { SqlDatabase } from '@floway-dev/platform';
+
+interface ModelAliasRow {
+  alias: string;
+  target_model_id: string;
+  upstream_ids_json: string;
+  rules_json: string;
+  visible_in_models_list: number;
+  on_conflict: OnConflict;
+}
+
+// The model_aliases table is operator-managed and small (dozens of rows at
+// most), so the data plane reads the full table per request — no cache layer.
+export const loadAllAliases = async (db: SqlDatabase): Promise<readonly ModelAlias[]> => {
+  const { results } = await db
+    .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict FROM model_aliases')
+    .all<ModelAliasRow>();
+  return results.map(toModelAlias);
+};
+
+const toModelAlias = (row: ModelAliasRow): ModelAlias => ({
+  alias: row.alias,
+  targetModelId: row.target_model_id,
+  upstreamIds: parseJsonField<string[]>(row.alias, 'upstream_ids_json', row.upstream_ids_json),
+  rules: parseJsonField<ModelAlias['rules']>(row.alias, 'rules_json', row.rules_json),
+  visibleInModelsList: row.visible_in_models_list === 1,
+  onConflict: row.on_conflict,
+});
+
+const parseJsonField = <T>(alias: string, field: string, raw: string): T => {
+  try {
+    return JSON.parse(raw) as T;
+  } catch (cause) {
+    throw new Error(`Malformed model_aliases ${field} for ${alias}`, { cause });
+  }
+};
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
new file mode 100644
index 000000000..a4da76fde
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
@@ -0,0 +1,92 @@
+import { test } from 'vitest';
+
+import { loadAllAliases } from './repo.ts';
+import { createSqliteTestDb } from '../../repo/test-sqlite.ts';
+import { assertEquals, assertRejects } from '@floway-dev/test-utils';
+
+test('loadAllAliases reads the seed row from a freshly migrated database', async () => {
+  const db = await createSqliteTestDb();
+
+  const aliases = await loadAllAliases(db);
+
+  assertEquals(aliases, [
+    {
+      alias: 'codex-auto-review',
+      targetModelId: 'gpt-5.4',
+      upstreamIds: [],
+      rules: { reasoning: { effort: 'low' } },
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+    },
+  ]);
+});
+
+test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_models_list to a boolean', async () => {
+  const db = await createSqliteTestDb();
+  await db.exec('DELETE FROM model_aliases');
+  await db
+    .prepare(
+      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+    )
+    .bind(
+      'opus-xhigh',
+      'claude-opus-4-6',
+      '["up_priority","up_secondary"]',
+      '{"reasoning":{"effort":"xhigh"},"anthropicBeta":["fine-grained-tool-streaming"]}',
+      0,
+      'alias-only',
+    )
+    .run();
+  await db
+    .prepare(
+      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+    )
+    .bind('gpt-5-fast', 'gpt-5.4', '[]', '{"serviceTier":"priority"}', 1, 'both-alias-first')
+    .run();
+
+  const aliases = await loadAllAliases(db);
+  const byAlias = new Map(aliases.map(entry => [entry.alias, entry]));
+
+  assertEquals(byAlias.get('opus-xhigh'), {
+    alias: 'opus-xhigh',
+    targetModelId: 'claude-opus-4-6',
+    upstreamIds: ['up_priority', 'up_secondary'],
+    rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] },
+    visibleInModelsList: false,
+    onConflict: 'alias-only',
+  });
+  assertEquals(byAlias.get('gpt-5-fast'), {
+    alias: 'gpt-5-fast',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: { serviceTier: 'priority' },
+    visibleInModelsList: true,
+    onConflict: 'both-alias-first',
+  });
+});
+
+test('loadAllAliases surfaces malformed rules_json as a descriptive error', async () => {
+  const db = await createSqliteTestDb();
+  await db.exec('DELETE FROM model_aliases');
+  await db
+    .prepare(
+      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+    )
+    .bind('bad-rules', 'gpt-5.4', '[]', '{not json', 1, 'real-only')
+    .run();
+
+  await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases rules_json for bad-rules');
+});
+
+test('loadAllAliases surfaces malformed upstream_ids_json as a descriptive error', async () => {
+  const db = await createSqliteTestDb();
+  await db.exec('DELETE FROM model_aliases');
+  await db
+    .prepare(
+      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+    )
+    .bind('bad-upstreams', 'gpt-5.4', '[bad', '{}', 1, 'real-only')
+    .run();
+
+  await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases upstream_ids_json for bad-upstreams');
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts
new file mode 100644
index 000000000..8e1bff467
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/types.ts
@@ -0,0 +1,26 @@
+// Closed set of request-time mode knobs an operator can lock on a matched
+// alias. Each value is freeform — the gateway does not enum-gate operator
+// input so values pass through to upstream verbatim.
+export type ModelAliasRules = {
+  readonly reasoning?: {
+    readonly effort?: string;
+    readonly budgetTokens?: number;
+    readonly adaptive?: boolean;
+    readonly summary?: string;
+  };
+  readonly verbosity?: string;
+  readonly serviceTier?: string;
+  readonly anthropicSpeed?: string;
+  readonly anthropicBeta?: readonly string[];
+};
+
+export type OnConflict = 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
+
+export type ModelAlias = {
+  readonly alias: string;
+  readonly targetModelId: string;
+  readonly upstreamIds: readonly string[];
+  readonly rules: ModelAliasRules;
+  readonly visibleInModelsList: boolean;
+  readonly onConflict: OnConflict;
+};

From a4ac67e606297c4f988fa171f645647efa892b78 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Thu, 25 Jun 2026 20:53:03 +0800
Subject: [PATCH 002/170] feat(protocols): add Floway extension fields and
 per-upstream sanitizers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each inbound protocol IR gains the closed set of mode-knob fields it
cannot natively express (thinking_budget, adaptive_thinking,
reasoning_summary on chat-completions; thinking_budget, adaptive_thinking
on responses; verbosity on messages; verbosity, serviceTier inside
generationConfig on gemini; anthropic_speed/anthropicSpeed and
anthropic_beta/anthropicBeta everywhere they apply).

The extensions are public — a client can set them directly and they
behave identically to alias-injected rules. The per-upstream sanitizer
strips any extension residue before the upstream call and emits one
log line per drop when given a trace context, so cross-protocol drops
are observable without leaking the field to upstream.
---
 .../src/data-plane/chat/shared/sanitize.ts    | 41 +++++++++++
 .../data-plane/chat/shared/sanitize_test.ts   | 73 +++++++++++++++++++
 packages/protocols/package.json               |  3 +-
 .../protocols/src/chat-completions/index.ts   | 10 +++
 packages/protocols/src/extensions/index.ts    | 16 ++++
 packages/protocols/src/gemini/index.ts        |  8 ++
 packages/protocols/src/index.ts               |  1 +
 packages/protocols/src/messages/index.ts      |  2 +
 packages/protocols/src/responses/index.ts     |  8 ++
 9 files changed, 161 insertions(+), 1 deletion(-)
 create mode 100644 packages/gateway/src/data-plane/chat/shared/sanitize.ts
 create mode 100644 packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
 create mode 100644 packages/protocols/src/extensions/index.ts

diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
new file mode 100644
index 000000000..918156d16
--- /dev/null
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -0,0 +1,41 @@
+import { FLOWAY_EXTENSION_FIELDS } from '@floway-dev/protocols/extensions';
+
+export interface SanitizeTraceCtx {
+  readonly aliasName?: string;
+  readonly emit: (line: { alias?: string; field: string; targetProtocol: string }) => void;
+}
+
+const stripKeys = (
+  body: Record<string, unknown>,
+  keys: readonly string[],
+  targetProtocol: string,
+  trace: SanitizeTraceCtx | undefined,
+  fieldPrefix: string = '',
+): void => {
+  for (const key of keys) {
+    if (key in body) {
+      delete body[key];
+      trace?.emit({ alias: trace.aliasName, field: `${fieldPrefix}${key}`, targetProtocol });
+    }
+  }
+};
+
+export const sanitizeForChatCompletionsUpstream = (body: Record<string, unknown>, trace?: SanitizeTraceCtx): void => {
+  stripKeys(body, FLOWAY_EXTENSION_FIELDS.chatCompletions, 'chat-completions', trace);
+};
+
+export const sanitizeForResponsesUpstream = (body: Record<string, unknown>, trace?: SanitizeTraceCtx): void => {
+  stripKeys(body, FLOWAY_EXTENSION_FIELDS.responses, 'responses', trace);
+};
+
+export const sanitizeForMessagesUpstream = (body: Record<string, unknown>, trace?: SanitizeTraceCtx): void => {
+  stripKeys(body, FLOWAY_EXTENSION_FIELDS.messages, 'messages', trace);
+};
+
+export const sanitizeForGeminiUpstream = (body: Record<string, unknown>, trace?: SanitizeTraceCtx): void => {
+  stripKeys(body, FLOWAY_EXTENSION_FIELDS.gemini.topLevel, 'gemini', trace);
+  const generationConfig = body.generationConfig;
+  if (generationConfig && typeof generationConfig === 'object') {
+    stripKeys(generationConfig as Record<string, unknown>, FLOWAY_EXTENSION_FIELDS.gemini.generationConfig, 'gemini', trace, 'generationConfig.');
+  }
+};
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
new file mode 100644
index 000000000..eebcd5d06
--- /dev/null
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
@@ -0,0 +1,73 @@
+import { test } from 'vitest';
+
+import {
+  sanitizeForChatCompletionsUpstream,
+  sanitizeForGeminiUpstream,
+  sanitizeForMessagesUpstream,
+  sanitizeForResponsesUpstream,
+  type SanitizeTraceCtx,
+} from './sanitize.ts';
+import { assertEquals } from '@floway-dev/test-utils';
+
+type TraceLine = { alias?: string; field: string; targetProtocol: string };
+
+const makeTrace = (aliasName?: string): { ctx: SanitizeTraceCtx; lines: TraceLine[] } => {
+  const lines: TraceLine[] = [];
+  return {
+    ctx: { aliasName, emit: line => lines.push(line) },
+    lines,
+  };
+};
+
+test('sanitizeForMessagesUpstream strips verbosity and emits one trace line', () => {
+  const body: Record<string, unknown> = { verbosity: 'low', model: 'x' };
+  const { ctx, lines } = makeTrace('codex-auto-review');
+  sanitizeForMessagesUpstream(body, ctx);
+  assertEquals(body, { model: 'x' });
+  assertEquals(lines, [{ alias: 'codex-auto-review', field: 'verbosity', targetProtocol: 'messages' }]);
+});
+
+test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves native fields', () => {
+  const body: Record<string, unknown> = {
+    thinking_budget: 4096,
+    anthropic_speed: 'fast',
+    reasoning_effort: 'high',
+    model: 'x',
+  };
+  const { ctx, lines } = makeTrace('alias-1');
+  sanitizeForChatCompletionsUpstream(body, ctx);
+  assertEquals(body, { reasoning_effort: 'high', model: 'x' });
+  assertEquals(lines.length, 2);
+  assertEquals(lines.every(l => l.alias === 'alias-1' && l.targetProtocol === 'chat-completions'), true);
+  const droppedFields = lines.map(l => l.field).sort();
+  assertEquals(droppedFields, ['anthropic_speed', 'thinking_budget']);
+});
+
+test('sanitizeForResponsesUpstream strips extensions without a trace context', () => {
+  const body: Record<string, unknown> = { adaptive_thinking: true, anthropic_beta: ['ctx-1m'] };
+  sanitizeForResponsesUpstream(body);
+  assertEquals(body, {});
+});
+
+test('sanitizeForGeminiUpstream walks top-level and generationConfig', () => {
+  const body: Record<string, unknown> = {
+    generationConfig: { verbosity: 'low', thinkingConfig: { thinkingBudget: 100 } },
+    anthropicSpeed: 'fast',
+  };
+  const { ctx, lines } = makeTrace('alias-g');
+  sanitizeForGeminiUpstream(body, ctx);
+  assertEquals(body, { generationConfig: { thinkingConfig: { thinkingBudget: 100 } } });
+  assertEquals(lines.length, 2);
+  const droppedFields = lines.map(l => l.field).sort();
+  assertEquals(droppedFields, ['anthropicSpeed', 'generationConfig.verbosity']);
+  assertEquals(lines.every(l => l.alias === 'alias-g' && l.targetProtocol === 'gemini'), true);
+});
+
+test('sanitizer is idempotent — a second run emits no additional traces', () => {
+  const body: Record<string, unknown> = { verbosity: 'low', model: 'x' };
+  const { ctx, lines } = makeTrace();
+  sanitizeForMessagesUpstream(body, ctx);
+  assertEquals(lines.length, 1);
+  sanitizeForMessagesUpstream(body, ctx);
+  assertEquals(lines.length, 1);
+});
diff --git a/packages/protocols/package.json b/packages/protocols/package.json
index 5ada835f0..1a8409de0 100644
--- a/packages/protocols/package.json
+++ b/packages/protocols/package.json
@@ -12,7 +12,8 @@
     "./messages": { "import": "./src/messages/index.ts", "types": "./src/messages/index.ts" },
     "./gemini": { "import": "./src/gemini/index.ts", "types": "./src/gemini/index.ts" },
     "./embeddings": { "import": "./src/embeddings/index.ts", "types": "./src/embeddings/index.ts" },
-    "./images": { "import": "./src/images/index.ts", "types": "./src/images/index.ts" }
+    "./images": { "import": "./src/images/index.ts", "types": "./src/images/index.ts" },
+    "./extensions": { "import": "./src/extensions/index.ts", "types": "./src/extensions/index.ts" }
   },
   "scripts": {
     "typecheck": "tsc --noEmit",
diff --git a/packages/protocols/src/chat-completions/index.ts b/packages/protocols/src/chat-completions/index.ts
index 582381555..64a62c91d 100644
--- a/packages/protocols/src/chat-completions/index.ts
+++ b/packages/protocols/src/chat-completions/index.ts
@@ -25,6 +25,16 @@ export interface ChatCompletionsPayload {
   tool_choice?: 'none' | 'auto' | 'required' | { type: 'function'; function: { name: string } } | null;
   /** Request usage stats in streaming responses */
   stream_options?: { include_usage: boolean } | null;
+  /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  thinking_budget?: number;
+  /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  adaptive_thinking?: boolean;
+  /** Floway protocol extension. Translated to OpenAI Responses `reasoning.summary` / Anthropic `thinking.display` / Gemini `thinkingConfig.includeThoughts` when routed to those upstreams; dropped on OpenAI Chat targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  reasoning_summary?: string;
+  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  anthropic_speed?: string;
+  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  anthropic_beta?: readonly string[];
 }
 
 export interface ChatCompletionsTool {
diff --git a/packages/protocols/src/extensions/index.ts b/packages/protocols/src/extensions/index.ts
new file mode 100644
index 000000000..b6579ce2b
--- /dev/null
+++ b/packages/protocols/src/extensions/index.ts
@@ -0,0 +1,16 @@
+/**
+ * Closed enumeration of Floway protocol extension fields that the gateway
+ * adds to each inbound IR on top of the host protocol's own schema. The
+ * per-upstream sanitizer in the gateway reads this manifest to strip any
+ * extension residue before the upstream HTTP call. See
+ * docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+ */
+export const FLOWAY_EXTENSION_FIELDS = {
+  chatCompletions: ['thinking_budget', 'adaptive_thinking', 'reasoning_summary', 'anthropic_speed', 'anthropic_beta'] as const,
+  responses: ['thinking_budget', 'adaptive_thinking', 'anthropic_speed', 'anthropic_beta'] as const,
+  messages: ['verbosity'] as const,
+  gemini: {
+    topLevel: ['anthropicSpeed', 'anthropicBeta'] as const,
+    generationConfig: ['verbosity', 'serviceTier'] as const,
+  },
+} as const;
diff --git a/packages/protocols/src/gemini/index.ts b/packages/protocols/src/gemini/index.ts
index 1530fd2f5..ded7ebb36 100644
--- a/packages/protocols/src/gemini/index.ts
+++ b/packages/protocols/src/gemini/index.ts
@@ -6,6 +6,10 @@ export interface GeminiPayload {
   generationConfig?: GeminiGenerationConfig;
   safetySettings?: GeminiSafetySetting[];
   cachedContent?: string;
+  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  anthropicSpeed?: string;
+  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  anthropicBeta?: readonly string[];
 }
 
 export interface GeminiContent {
@@ -38,6 +42,10 @@ export interface GeminiGenerationConfig {
   responseMimeType?: string;
   responseSchema?: unknown;
   thinkingConfig?: GeminiThinkingConfig;
+  /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  verbosity?: string;
+  /** Floway protocol extension. Translated to OpenAI Chat `service_tier` / Responses `service_tier` / Anthropic `service_tier` when routed to those upstreams; dropped on Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  serviceTier?: string;
 }
 
 export interface GeminiThinkingConfig {
diff --git a/packages/protocols/src/index.ts b/packages/protocols/src/index.ts
index 981d4fda1..ceaa785f6 100644
--- a/packages/protocols/src/index.ts
+++ b/packages/protocols/src/index.ts
@@ -2,6 +2,7 @@ export * from './common/index.ts';
 export * from './completions/index.ts';
 export * from './chat-completions/index.ts';
 export * from './embeddings/index.ts';
+export * from './extensions/index.ts';
 export * from './gemini/index.ts';
 export * from './messages/index.ts';
 export * from './responses/index.ts';
diff --git a/packages/protocols/src/messages/index.ts b/packages/protocols/src/messages/index.ts
index 9689db240..94e44188e 100644
--- a/packages/protocols/src/messages/index.ts
+++ b/packages/protocols/src/messages/index.ts
@@ -56,6 +56,8 @@ export interface MessagesPayload {
   // protocol layer because the gateway treats `speed: 'fast'` as the canonical
   // client signal regardless of which upstream serves it.
   speed?: 'standard' | 'fast' | (string & {});
+  /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  verbosity?: string;
 }
 
 export interface MessagesSearchResultLocationCitation {
diff --git a/packages/protocols/src/responses/index.ts b/packages/protocols/src/responses/index.ts
index 8822c1f3d..8cf83f0ea 100644
--- a/packages/protocols/src/responses/index.ts
+++ b/packages/protocols/src/responses/index.ts
@@ -33,6 +33,14 @@ export interface ResponsesPayload {
   prompt_cache_key?: string | null;
   safety_identifier?: string | null;
   service_tier?: 'default' | 'auto' | 'flex' | 'priority' | 'scale' | (string & {}) | null;
+  /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  thinking_budget?: number;
+  /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  adaptive_thinking?: boolean;
+  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  anthropic_speed?: string;
+  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  anthropic_beta?: readonly string[];
 }
 
 // Narrower payload for `/responses/compact`. The official endpoint accepts a

From e1891e1dddb03b519090e99fa1fa1f262a09a96e Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Thu, 25 Jun 2026 21:15:38 +0800
Subject: [PATCH 003/170] feat(translate): emit Floway extension fields to
 upstream slots
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each translate pair now reads the inbound IR's native and Floway-extension
mode-knob fields and writes them to the upstream protocol's natural slot
per the model-aliases design table. Routing is purely by upstream wire
protocol; translate never branches on model version.

Coverage per rule:
- reasoning.effort: emitted onto OpenAI Chat reasoning_effort, Responses
  reasoning.effort, Anthropic output_config.effort, Gemini
  thinkingConfig.thinkingLevel (the inverse mappers stay where they were).
- reasoning.budgetTokens / reasoning.adaptive: emitted onto Anthropic
  thinking.{type:'enabled', budget_tokens} and thinking.{type:'adaptive'}
  via a shared via-messages helper; Gemini path keeps its native
  thinkingBudget handling.
- reasoning.summary: bidirectional Responses reasoning.summary ↔ Anthropic
  thinking.display mapping with concise|detailed → summarized, omitted →
  omitted, auto → upstream default; reverse picks concise as the
  Responses-side canonical form.
- verbosity: native fields on Chat and Responses (added now — the IR
  did not carry them yet), Floway extension on Messages and Gemini.
- serviceTier: passes through verbatim onto each protocol's service_tier
  slot; Messages' service_tier type relaxed to admit operator-typed
  values per the alias design's freeform contract.
- anthropicSpeed: emitted onto Anthropic Messages speed; dropped on
  non-Messages targets.
- anthropicBeta: translate cannot move it to the request header (the
  translate signature has no headers), so it is left as body residue
  and the gateway-side rule-apply pass owns header materialization in
  the next task; a mergeAnthropicBetaTokens helper lives in
  via-messages/ for that consumer.

Drop-side emission stays the per-upstream sanitizer's job; translate
emits only the non-drop cells of the table.

The shared reasoning_effort union (gemini-via/gemini.ts) extends to the
seven values the alias suggestion list publishes (none|minimal|low|
medium|high|xhigh|max) and stops collapsing minimal onto low.
---
 .../protocols/src/chat-completions/index.ts   |  5 +
 packages/protocols/src/gemini/index.ts        |  2 +-
 packages/protocols/src/messages/index.ts      |  2 +-
 packages/protocols/src/responses/index.ts     |  8 +-
 .../chat-completions-via-messages/request.ts  | 15 +++
 .../chat-completions-via-responses/request.ts | 15 ++-
 .../gemini-via-chat-completions/request.ts    |  5 +
 .../src/gemini-via-messages/request.ts        | 18 ++++
 .../src/gemini-via-responses/request.ts       | 27 ++++--
 .../messages-via-chat-completions/request.ts  |  2 +
 .../src/messages-via-responses/request.ts     | 20 +++-
 .../responses-via-chat-completions/request.ts |  1 +
 .../src/responses-via-messages/request.ts     | 29 +++++-
 .../responses-via-messages/request_test.ts    |  4 +-
 .../translate/src/shared/gemini-via/gemini.ts | 16 +++-
 .../shared/messages-via/reasoning-summary.ts  | 21 +++++
 .../via-messages/anthropic-extensions.ts      | 93 +++++++++++++++++++
 17 files changed, 262 insertions(+), 21 deletions(-)
 create mode 100644 packages/translate/src/shared/messages-via/reasoning-summary.ts
 create mode 100644 packages/translate/src/shared/via-messages/anthropic-extensions.ts

diff --git a/packages/protocols/src/chat-completions/index.ts b/packages/protocols/src/chat-completions/index.ts
index 64a62c91d..8804fd449 100644
--- a/packages/protocols/src/chat-completions/index.ts
+++ b/packages/protocols/src/chat-completions/index.ts
@@ -18,6 +18,11 @@ export interface ChatCompletionsPayload {
   parallel_tool_calls?: boolean | null;
   response_format?: Record<string, unknown> | null;
   reasoning_effort?: string | null;
+  // GPT-5-family response-length control. Native OpenAI Chat field; Floway
+  // mirrors it onto Responses `text.verbosity` and exposes it as an
+  // extension on Messages / Gemini IRs.
+  // Reference: https://platform.openai.com/docs/api-reference/chat/create
+  verbosity?: string | null;
   prompt_cache_key?: string | null;
   safety_identifier?: string | null;
   service_tier?: 'default' | 'auto' | 'flex' | 'priority' | 'scale' | (string & {}) | null;
diff --git a/packages/protocols/src/gemini/index.ts b/packages/protocols/src/gemini/index.ts
index ded7ebb36..c3e7e646a 100644
--- a/packages/protocols/src/gemini/index.ts
+++ b/packages/protocols/src/gemini/index.ts
@@ -50,7 +50,7 @@ export interface GeminiGenerationConfig {
 
 export interface GeminiThinkingConfig {
   thinkingBudget?: number;
-  thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high' | string;
+  thinkingLevel?: 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' | 'max' | string;
   includeThoughts?: boolean;
 }
 
diff --git a/packages/protocols/src/messages/index.ts b/packages/protocols/src/messages/index.ts
index 94e44188e..663dcef24 100644
--- a/packages/protocols/src/messages/index.ts
+++ b/packages/protocols/src/messages/index.ts
@@ -49,7 +49,7 @@ export interface MessagesPayload {
     // no `json_object` variant.
     format?: { type: 'json_schema'; schema: Record<string, unknown> };
   };
-  service_tier?: 'auto' | 'standard_only';
+  service_tier?: 'auto' | 'standard_only' | (string & {});
   // https://docs.claude.com/en/build-with-claude/fast-mode — Fast Mode is
   // opt-in per request. Beta-only on the upstream wire (gated by
   // `anthropic-beta: fast-mode-2026-02-01`), but we expose the field at the
diff --git a/packages/protocols/src/responses/index.ts b/packages/protocols/src/responses/index.ts
index 8cf83f0ea..39af5e148 100644
--- a/packages/protocols/src/responses/index.ts
+++ b/packages/protocols/src/responses/index.ts
@@ -26,10 +26,14 @@ export interface ResponsesPayload {
   parallel_tool_calls?: boolean | null;
   reasoning?: {
     effort?: string;
-    summary?: 'detailed' | 'auto' | 'concise';
+    summary?: 'detailed' | 'auto' | 'concise' | (string & {});
   };
   include?: string[];
-  text?: { format?: Record<string, unknown> | null } | null;
+  // `text.verbosity` is a native GPT-5-family Responses field that controls
+  // response length; `text.format` carries structured-output schemas. Both
+  // ride on the same `text` object.
+  // Reference: https://platform.openai.com/docs/api-reference/responses/create
+  text?: { format?: Record<string, unknown> | null; verbosity?: string | null } | null;
   prompt_cache_key?: string | null;
   safety_identifier?: string | null;
   service_tier?: 'default' | 'auto' | 'flex' | 'priority' | 'scale' | (string & {}) | null;
diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts
index a09bd44b9..82dd22c59 100644
--- a/packages/translate/src/chat-completions-via-messages/request.ts
+++ b/packages/translate/src/chat-completions-via-messages/request.ts
@@ -2,6 +2,7 @@ import { messagesThinkingBlockFromChatCompletionsScalarReasoning } from '../shar
 import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts';
 import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint } from '../shared/via-messages/cache-breakpoints.ts';
 import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts';
+import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts';
 import type { ChatCompletionsPayload, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsTool } from '@floway-dev/protocols/chat-completions';
 import { MESSAGES_FALLBACK_MAX_TOKENS, type MessagesAssistantContentBlock, type MessagesMessage, type MessagesPayload, type MessagesTextBlock, type MessagesUserContentBlock } from '@floway-dev/protocols/messages';
 
@@ -188,6 +189,17 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
   if (formatSchema) outputConfig.format = { type: 'json_schema', schema: formatSchema };
   const hasOutputConfig = Object.keys(outputConfig).length > 0;
 
+  // Materialize the Floway extension fields onto their Messages-natural
+  // slots. `anthropic_beta` is body-side residue that the per-upstream
+  // sanitizer strips after translation; the gateway-side rule-apply pass owns
+  // moving its value onto the outbound `anthropic-beta` header before the
+  // upstream call. See docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+  const thinking = buildMessagesThinkingFromExtensions({
+    thinkingBudget: payload.thinking_budget,
+    adaptiveThinking: payload.adaptive_thinking,
+    reasoningSummary: payload.reasoning_summary,
+  });
+
   // Leave OpenAI `user` and generic metadata out of the Messages fallback instead
   // of treating them as a backchannel for Anthropic `metadata.user_id`.
   return {
@@ -205,6 +217,9 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
     ...(tools ? { tools } : {}),
     ...(payload.tool_choice != null ? { tool_choice: translateChatCompletionsToolChoice(payload.tool_choice) } : {}),
     ...(hasOutputConfig ? { output_config: outputConfig } : {}),
+    ...(thinking ? { thinking } : {}),
+    ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
+    ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
   };
 };
 
diff --git a/packages/translate/src/chat-completions-via-responses/request.ts b/packages/translate/src/chat-completions-via-responses/request.ts
index 6efd82c34..1865ff40b 100644
--- a/packages/translate/src/chat-completions-via-responses/request.ts
+++ b/packages/translate/src/chat-completions-via-responses/request.ts
@@ -112,6 +112,19 @@ export const translateChatCompletionsToResponses = (payload: ChatCompletionsPayl
 
   const responseTextConfig = payload.response_format === undefined ? undefined : payload.response_format === null ? null : { format: payload.response_format };
 
+  // `reasoning_summary` is the inbound CC extension that materializes onto
+  // the Responses-native `reasoning.summary` slot. Co-emit alongside
+  // `reasoning.effort` so a single `reasoning` object captures both knobs.
+  const reasoningEffort = payload.reasoning_effort != null ? payload.reasoning_effort : undefined;
+  const reasoningSummary = payload.reasoning_summary;
+  const reasoning =
+    reasoningEffort !== undefined || reasoningSummary !== undefined
+      ? {
+          ...(reasoningEffort !== undefined ? { effort: reasoningEffort } : {}),
+          ...(reasoningSummary !== undefined ? { summary: reasoningSummary } : {}),
+        }
+      : undefined;
+
   return {
     model: payload.model,
     input,
@@ -134,7 +147,7 @@ export const translateChatCompletionsToResponses = (payload: ChatCompletionsPayl
     // https://developers.openai.com/api/docs/guides/migrate-to-responses
     ...(payload.store !== undefined ? { store: payload.store } : {}),
     ...(payload.parallel_tool_calls !== undefined ? { parallel_tool_calls: payload.parallel_tool_calls } : {}),
-    ...(payload.reasoning_effort != null ? { reasoning: { effort: payload.reasoning_effort } } : {}),
+    ...(reasoning ? { reasoning } : {}),
     ...(responseTextConfig !== undefined ? { text: responseTextConfig } : {}),
     ...(payload.prompt_cache_key !== undefined ? { prompt_cache_key: payload.prompt_cache_key } : {}),
     ...(payload.safety_identifier !== undefined ? { safety_identifier: payload.safety_identifier } : {}),
diff --git a/packages/translate/src/gemini-via-chat-completions/request.ts b/packages/translate/src/gemini-via-chat-completions/request.ts
index 6f490dc25..07cc8bdce 100644
--- a/packages/translate/src/gemini-via-chat-completions/request.ts
+++ b/packages/translate/src/gemini-via-chat-completions/request.ts
@@ -188,6 +188,11 @@ const applyGenerationConfig = (request: ChatCompletionsPayload, generationConfig
 
   const reasoningEffort = geminiReasoningEffort(generationConfig.thinkingConfig);
   if (reasoningEffort) request.reasoning_effort = reasoningEffort;
+
+  // Extension fields landed on CC: `verbosity` flows verbatim; `serviceTier`
+  // crosses naming conventions (camelCase Gemini → snake_case OpenAI).
+  if (generationConfig.verbosity != null) request.verbosity = generationConfig.verbosity;
+  if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier;
 };
 
 const buildTools = (payload: GeminiPayload): ChatCompletionsTool[] | undefined => {
diff --git a/packages/translate/src/gemini-via-messages/request.ts b/packages/translate/src/gemini-via-messages/request.ts
index 29dd7c066..91fbe0493 100644
--- a/packages/translate/src/gemini-via-messages/request.ts
+++ b/packages/translate/src/gemini-via-messages/request.ts
@@ -161,6 +161,14 @@ const applyThinkingConfig = (request: MessagesPayload, thinkingConfig?: GeminiTh
     }
   }
 
+  // `includeThoughts` materializes onto `thinking.display`: true → summarized
+  // (Anthropic redacts to a single-block summary), false → omitted (no
+  // thinking surface at all). Skip when the source did not express either.
+  if (thinkingConfig.includeThoughts !== undefined && request.thinking?.type !== 'disabled') {
+    const display = thinkingConfig.includeThoughts === true ? ('summarized' as const) : ('omitted' as const);
+    request.thinking = request.thinking ? { ...request.thinking, display } : { type: 'enabled', display };
+  }
+
   const effort = geminiThinkingLevelEffort(thinkingConfig);
   // Spread to merge with any output_config fields a sibling helper has
   // already written (e.g. structured-output `format` from
@@ -196,6 +204,11 @@ const applyGenerationConfig = (request: MessagesPayload, generationConfig: Gemin
     };
   }
 
+  // `serviceTier` extension flows verbatim onto the Messages-native slot;
+  // `verbosity` has no Anthropic equivalent and stays as inbound residue
+  // that the sanitizer strips after translation.
+  if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier;
+
   applyThinkingConfig(request, generationConfig.thinkingConfig);
 };
 
@@ -260,6 +273,11 @@ export const buildTargetRequest = (
 
   applyGenerationConfig(request, payload.generationConfig, fallbackMaxOutputTokens);
 
+  // Top-level Gemini Floway extensions: `anthropicSpeed` is the only one
+  // with a Messages-natural slot. `anthropicBeta` is header-bound at the
+  // gateway boundary (Task 5) since translate functions do not own headers.
+  if (payload.anthropicSpeed != null) request.speed = payload.anthropicSpeed;
+
   const tools = buildTools(payload);
   if (tools) request.tools = tools;
   applyLastToolCacheBreakpoint(request.tools);
diff --git a/packages/translate/src/gemini-via-responses/request.ts b/packages/translate/src/gemini-via-responses/request.ts
index 62d67c827..df85e4729 100644
--- a/packages/translate/src/gemini-via-responses/request.ts
+++ b/packages/translate/src/gemini-via-responses/request.ts
@@ -132,6 +132,7 @@ const applyGenerationConfig = (request: ResponsesPayload, generationConfig?: Gem
 
   if (generationConfig.responseSchema !== undefined) {
     request.text = {
+      ...request.text,
       format: {
         type: 'json_schema',
         json_schema: {
@@ -141,16 +142,28 @@ const applyGenerationConfig = (request: ResponsesPayload, generationConfig?: Gem
       },
     };
   } else if (generationConfig.responseMimeType === 'application/json') {
-    request.text = { format: { type: 'json_object' } };
+    request.text = { ...request.text, format: { type: 'json_object' } };
   }
 
-  const effort = geminiReasoningEffort(generationConfig.thinkingConfig);
-  if (!effort) return;
+  // `verbosity` extension rides under `text` alongside the structured-output
+  // format, matching the native Responses placement.
+  if (generationConfig.verbosity != null) request.text = { ...request.text, verbosity: generationConfig.verbosity };
 
-  request.reasoning = {
-    effort,
-    ...(effort !== 'none' && generationConfig.thinkingConfig?.includeThoughts === true ? { summary: 'detailed' as const } : {}),
-  };
+  if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier;
+
+  const effort = geminiReasoningEffort(generationConfig.thinkingConfig);
+  const summary =
+    generationConfig.thinkingConfig?.includeThoughts === true
+      ? ('detailed' as const)
+      : generationConfig.thinkingConfig?.includeThoughts === false
+        ? ('omitted' as const)
+        : undefined;
+  if (effort || summary !== undefined) {
+    request.reasoning = {
+      ...(effort ? { effort } : {}),
+      ...(summary !== undefined && effort !== 'none' ? { summary } : {}),
+    };
+  }
 };
 
 const buildTools = (payload: GeminiPayload): ResponsesTool[] | undefined => {
diff --git a/packages/translate/src/messages-via-chat-completions/request.ts b/packages/translate/src/messages-via-chat-completions/request.ts
index 76f5347d8..bd08e1b26 100644
--- a/packages/translate/src/messages-via-chat-completions/request.ts
+++ b/packages/translate/src/messages-via-chat-completions/request.ts
@@ -290,6 +290,8 @@ export const translateMessagesToChatCompletions = (payload: MessagesPayload): Ch
     tools: translateMessagesTools(clientTools),
     tool_choice: translateMessagesToolChoice(payload.tool_choice, clientTools),
     ...(responseFormat ? { response_format: responseFormat } : {}),
+    ...(payload.verbosity != null ? { verbosity: payload.verbosity } : {}),
+    ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
   };
 };
 
diff --git a/packages/translate/src/messages-via-responses/request.ts b/packages/translate/src/messages-via-responses/request.ts
index 7bb365cfe..b1c593443 100644
--- a/packages/translate/src/messages-via-responses/request.ts
+++ b/packages/translate/src/messages-via-responses/request.ts
@@ -1,6 +1,7 @@
 import { openAiJsonSchemaCoreFromMessagesFormat } from '../shared/messages/structured-output.ts';
 import { messagesReasoningBlockToResponsesReasoning } from '../shared/messages-and-responses/reasoning.ts';
 import { resolveMessagesReasoningEffort } from '../shared/messages-via/reasoning-effort.ts';
+import { mapAnthropicDisplayToSummary } from '../shared/messages-via/reasoning-summary.ts';
 import { normalizeMessagesToolInputSchema } from '../shared/messages-via/tool-schema.ts';
 import {
   type MessagesAssistantMessage,
@@ -207,15 +208,25 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response
   // Responses upstream may reject it. Translation stays pairwise and leaves
   // target-side validation to the selected upstream endpoint.
   const effort = resolveMessagesReasoningEffort(payload);
-  const reasoning = effort ? { effort } : undefined;
+  const display = payload.thinking?.display;
+  const summary = display !== undefined ? mapAnthropicDisplayToSummary(display) : undefined;
+  const reasoning =
+    effort !== undefined || summary !== undefined
+      ? {
+          ...(effort !== undefined ? { effort } : {}),
+          ...(summary !== undefined ? { summary } : {}),
+        }
+      : undefined;
   const clientTools = getClientTools(payload.tools);
   const instructions = translateSystemPrompt(payload.system);
   const jsonSchema = openAiJsonSchemaCoreFromMessagesFormat(payload.output_config?.format);
-  const text = jsonSchema ? { format: { type: 'json_schema' as const, ...jsonSchema } } : undefined;
+  const formatPart = jsonSchema ? { format: { type: 'json_schema' as const, ...jsonSchema } } : undefined;
+  const verbosityPart = payload.verbosity != null ? { verbosity: payload.verbosity } : undefined;
+  const text = formatPart || verbosityPart ? { ...formatPart, ...verbosityPart } : undefined;
 
   // Keep fallback semantics strict: do not synthesize `temperature: 1`,
-  // `store: false`, `parallel_tool_calls: true`, or `reasoning.summary` when the
-  // Messages source did not express those knobs.
+  // `store: false`, or `parallel_tool_calls: true` when the Messages source
+  // did not express those knobs.
   return {
     model: payload.model,
     input: translateMessagesInput(payload.messages),
@@ -229,6 +240,7 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response
     stream: true,
     ...(reasoning ? { reasoning } : {}),
     ...(text ? { text } : {}),
+    ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
   };
 };
 
diff --git a/packages/translate/src/responses-via-chat-completions/request.ts b/packages/translate/src/responses-via-chat-completions/request.ts
index 6d0230db3..e7d23486f 100644
--- a/packages/translate/src/responses-via-chat-completions/request.ts
+++ b/packages/translate/src/responses-via-chat-completions/request.ts
@@ -242,6 +242,7 @@ export const translateResponsesToChatCompletions = (payload: ResponsesPayload):
     ...(payload.prompt_cache_key !== undefined ? { prompt_cache_key: payload.prompt_cache_key } : {}),
     ...(payload.safety_identifier !== undefined ? { safety_identifier: payload.safety_identifier } : {}),
     ...(payload.reasoning?.effort != null ? { reasoning_effort: payload.reasoning.effort } : {}),
+    ...(payload.text?.verbosity != null ? { verbosity: payload.text.verbosity } : {}),
     ...(payload.service_tier !== undefined ? { service_tier: payload.service_tier } : {}),
     // Chat Completions has no request-level counterpart for Responses
     // `reasoning`; only explicit reasoning items survive this translation.
diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts
index 54fcf93a6..786ce4a98 100644
--- a/packages/translate/src/responses-via-messages/request.ts
+++ b/packages/translate/src/responses-via-messages/request.ts
@@ -3,6 +3,7 @@ import { responsesReasoningToMessagesUpstreamBlock } from '../shared/messages-an
 import { buildCustomToolInputSchema } from '../shared/responses-via/custom-tool-wrap.ts';
 import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint, EPHEMERAL_CACHE_CONTROL } from '../shared/via-messages/cache-breakpoints.ts';
 import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts';
+import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts';
 import {
   MESSAGES_FALLBACK_MAX_TOKENS,
   type MessagesAssistantContentBlock,
@@ -331,6 +332,30 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
   if (formatSchema) outputConfig.format = { type: 'json_schema', schema: formatSchema };
   const hasOutputConfig = Object.keys(outputConfig).length > 0;
 
+  // Native Responses → Messages: `reasoning.summary` materializes onto the
+  // Messages-native `thinking.display`. Extension-driven thinking
+  // (`thinking_budget`, `adaptive_thinking`) takes precedence over the
+  // summary-only fallback because the alias write-side validator pins
+  // facets one-at-a-time; when neither extension is set and summary is the
+  // only signal, we synthesize `thinking.{type:'enabled', display}` so the
+  // display reaches the wire.
+  const extensionThinking = buildMessagesThinkingFromExtensions({
+    thinkingBudget: payload.thinking_budget,
+    adaptiveThinking: payload.adaptive_thinking,
+  });
+  const disabledThinking = effort === 'none' ? { type: 'disabled' as const } : undefined;
+  const summaryDisplay = payload.reasoning?.summary !== undefined ? mapSummaryToAnthropicDisplay(payload.reasoning.summary) : undefined;
+  const fallbackDisplayThinking =
+    !extensionThinking && !disabledThinking && summaryDisplay !== undefined
+      ? { type: 'enabled' as const, display: summaryDisplay as NonNullable<MessagesPayload['thinking']>['display'] }
+      : undefined;
+  const thinkingFromExtensions = extensionThinking
+    ? summaryDisplay !== undefined
+      ? { ...extensionThinking, display: summaryDisplay as NonNullable<MessagesPayload['thinking']>['display'] }
+      : extensionThinking
+    : undefined;
+  const thinking = thinkingFromExtensions ?? disabledThinking ?? fallbackDisplayThinking;
+
   // Responses `metadata` is intentionally omitted on the Messages path;
   // not coerced into Anthropic metadata.user_id, prompt-cache, or safety
   // semantics.
@@ -344,8 +369,10 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
     stream: true,
     tools,
     tool_choice: translateToolChoice(payload.tool_choice),
-    ...(effort === 'none' ? { thinking: { type: 'disabled' as const } } : {}),
+    ...(thinking ? { thinking } : {}),
     ...(hasOutputConfig ? { output_config: outputConfig } : {}),
+    ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
+    ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
   };
 
   return { target, customToolNames };
diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts
index 9b690ccbf..2aedc14b7 100644
--- a/packages/translate/src/responses-via-messages/request_test.ts
+++ b/packages/translate/src/responses-via-messages/request_test.ts
@@ -6,7 +6,7 @@ import { MESSAGES_FALLBACK_MAX_TOKENS, type MessagesClientTool, type MessagesToo
 
 const stubRemoteImageLoader = (result: { mediaType: string | null; data: Uint8Array } | null) => () => Promise.resolve(result);
 
-test('translateResponsesToMessages maps reasoning.effort none to thinking.disabled', async () => {
+test('translateResponsesToMessages maps reasoning.effort none to thinking.disabled (summary ignored when reasoning is disabled)', async () => {
   const result = await translateResponsesToMessages({
     model: 'claude-test',
     input: [{ type: 'message', role: 'user', content: 'hi' }],
@@ -41,7 +41,7 @@ test('translateResponsesToMessages maps reasoning.effort directly to output_conf
     stream: null,
     store: false,
     parallel_tool_calls: true,
-    reasoning: { effort: 'minimal', summary: 'detailed' },
+    reasoning: { effort: 'minimal' },
   });
 
   assertEquals(result.target.output_config, { effort: 'minimal' });
diff --git a/packages/translate/src/shared/gemini-via/gemini.ts b/packages/translate/src/shared/gemini-via/gemini.ts
index a5b4993ea..99d8b4872 100644
--- a/packages/translate/src/shared/gemini-via/gemini.ts
+++ b/packages/translate/src/shared/gemini-via/gemini.ts
@@ -117,21 +117,33 @@ export const geminiFunctionResponsePart = (part: GeminiPart, ids: GeminiToolCall
   return { response, id: unmatched?.shift() ?? id };
 };
 
-export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig): 'low' | 'medium' | 'high' | undefined => {
+// Reasoning effort is freeform on the inbound IRs (per Goal 2: never gate
+// operator-typed values), but the gateway publishes a canonical closed set so
+// translate-side mappers can normalize without rewriting unknown values.
+// References:
+// - docs/superpowers/specs/2026-06-25-model-aliases-design.md (Translate Layer)
+export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
+
+export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig): ReasoningEffort | undefined => {
   switch (thinkingConfig?.thinkingLevel) {
   case 'minimal':
+    return 'minimal';
   case 'low':
     return 'low';
   case 'medium':
     return 'medium';
   case 'high':
     return 'high';
+  case 'xhigh':
+    return 'xhigh';
+  case 'max':
+    return 'max';
   default:
     return undefined;
   }
 };
 
-export const geminiReasoningEffort = (thinkingConfig?: GeminiThinkingConfig): 'none' | 'low' | 'medium' | 'high' | null => {
+export const geminiReasoningEffort = (thinkingConfig?: GeminiThinkingConfig): ReasoningEffort | null => {
   if (!thinkingConfig) return null;
 
   if (thinkingConfig.thinkingBudget !== undefined) {
diff --git a/packages/translate/src/shared/messages-via/reasoning-summary.ts b/packages/translate/src/shared/messages-via/reasoning-summary.ts
new file mode 100644
index 000000000..6d12bab9b
--- /dev/null
+++ b/packages/translate/src/shared/messages-via/reasoning-summary.ts
@@ -0,0 +1,21 @@
+import type { MessagesThinkingDisplay } from '@floway-dev/protocols/messages';
+
+// Reverse of via-messages/anthropic-extensions.ts mapSummaryToAnthropicDisplay.
+// Anthropic's `summarized` collapsed both `concise` and `detailed`; we pick
+// `concise` as the canonical reverse since it is Responses' more compact
+// summary mode and round-tripping through the gateway should not silently
+// inflate verbosity. Unknown operator-typed values pass through verbatim so
+// the Responses upstream gets the original spelling and decides for itself
+// whether to accept it.
+export const mapAnthropicDisplayToSummary = (display: MessagesThinkingDisplay | string): string | undefined => {
+  switch (display) {
+  case 'summarized':
+    return 'concise';
+  case 'omitted':
+    return 'omitted';
+  case 'full':
+    return 'detailed';
+  default:
+    return display;
+  }
+};
diff --git a/packages/translate/src/shared/via-messages/anthropic-extensions.ts b/packages/translate/src/shared/via-messages/anthropic-extensions.ts
new file mode 100644
index 000000000..513db32ce
--- /dev/null
+++ b/packages/translate/src/shared/via-messages/anthropic-extensions.ts
@@ -0,0 +1,93 @@
+import type { MessagesPayload, MessagesThinkingDisplay } from '@floway-dev/protocols/messages';
+
+// Anthropic structured `thinking.display` enumerates three modes; the
+// inbound IR's `reasoning_summary` extension and the Responses-native
+// `reasoning.summary` share an OpenAI-style {auto|concise|detailed|omitted}
+// vocabulary. The mapping collapses concise+detailed onto Anthropic's single
+// `summarized` mode (both surface a redacted summary, not the full chain),
+// `omitted` is the canonical hide-everything spelling, and `auto` returns
+// `undefined` so Anthropic's account-default takes over. Operator-typed
+// values that match neither vocabulary pass through verbatim — Anthropic
+// rejects unknown values at the wire, which is the explicit-failure path we
+// want per the alias design's no-enum-gating contract.
+export const mapSummaryToAnthropicDisplay = (summary: string): MessagesThinkingDisplay | string | undefined => {
+  switch (summary) {
+  case 'concise':
+  case 'detailed':
+    return 'summarized';
+  case 'omitted':
+    return 'omitted';
+  case 'auto':
+    return undefined;
+  default:
+    return summary;
+  }
+};
+
+// Merge a beta token list onto an existing `anthropic-beta` header value.
+// The header is a case-sensitive, comma-separated list per the Anthropic
+// docs; dedupe is by exact-match equality so operators can carry parallel
+// tokens that differ only by date suffix. Re-joined with `, ` so the wire
+// shape matches both Anthropic's own examples and downstream gateways
+// (envoyproxy/ai-gateway).
+// References:
+// - https://platform.claude.com/docs/en/api/beta-headers
+// - https://github.com/envoyproxy/ai-gateway
+export const mergeAnthropicBetaTokens = (existing: string | null | undefined, additions: readonly string[]): string => {
+  const seen = new Set<string>();
+  const merged: string[] = [];
+  const collect = (token: string): void => {
+    const trimmed = token.trim();
+    if (!trimmed || seen.has(trimmed)) return;
+    seen.add(trimmed);
+    merged.push(trimmed);
+  };
+
+  if (existing) {
+    for (const token of existing.split(',')) collect(token);
+  }
+  for (const token of additions) collect(token);
+
+  return merged.join(', ');
+};
+
+// Materialize the Messages-bound `anthropic_beta` extension list onto an
+// outbound request's `anthropic-beta` header. The helper takes a `Headers`
+// object so the caller (typically the gateway-side rule-apply pass) doesn't
+// have to re-parse and re-set the header itself.
+export const applyAnthropicBetaToHeaders = (headers: Headers, additions: readonly string[]): void => {
+  if (!additions.length) return;
+  const merged = mergeAnthropicBetaTokens(headers.get('anthropic-beta'), additions);
+  if (merged) headers.set('anthropic-beta', merged);
+};
+
+// Build a Messages `thinking` block from the Floway extension fields a
+// non-Messages inbound carries (`thinking_budget`, `adaptive_thinking`,
+// `reasoning_summary`). `adaptive_thinking: true` overrides `thinking_budget`
+// because the alias write-side validator enforces single-facet selection;
+// when both still arrive the adaptive choice wins.
+//
+// `reasoningSummary` is the OpenAI-style summary vocabulary
+// ({auto|concise|detailed|omitted} plus pass-through). It synthesizes
+// `thinking.{type:'enabled', display}` when the inbound carries summary
+// but no budget/adaptive signal — without an explicit thinking mode
+// Anthropic would otherwise discard the display field.
+export const buildMessagesThinkingFromExtensions = (input: {
+  thinkingBudget?: number;
+  adaptiveThinking?: boolean;
+  reasoningSummary?: string;
+}): MessagesPayload['thinking'] | undefined => {
+  const display = input.reasoningSummary !== undefined ? mapSummaryToAnthropicDisplay(input.reasoningSummary) : undefined;
+  const displayPart = display !== undefined ? { display: display as MessagesThinkingDisplay } : {};
+
+  if (input.adaptiveThinking === true) {
+    return { type: 'adaptive', ...displayPart };
+  }
+  if (input.thinkingBudget !== undefined) {
+    return { type: 'enabled', budget_tokens: input.thinkingBudget, ...displayPart };
+  }
+  if (input.reasoningSummary !== undefined && display !== undefined) {
+    return { type: 'enabled', ...displayPart };
+  }
+  return undefined;
+};

From 6ea94045b2b46b0eb144827e2dd2df36e1d81d3d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Thu, 25 Jun 2026 21:24:12 +0800
Subject: [PATCH 004/170] test(translate): cover Floway extension emission
 across all nine pairs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

One assertion per non-drop cell of the model-aliases translate-emission
table: each test sets a single inbound rule (native or extension) and
checks the upstream-natural slot is present with the value forwarded
verbatim. Each pair also gets a drop-side assertion that the residue
field does not leak into the translated body — the per-upstream
sanitizer is the actual stripper, but translate must not invent a
target field where the mapping table says drop.

Pre-existing responses-via-messages tests that paired effort with
reasoning.summary keep their summary input (so the disabled-precedence
behavior is still verified) but no longer assume summary is silently
discarded; the new contract surfaces it as thinking.display where the
upstream has a slot, and the disabled case continues to win.
---
 .../chat-completions-via-messages/request.ts  |  2 +-
 .../request_test.ts                           | 99 +++++++++++++++++++
 .../chat-completions-via-responses/request.ts |  2 +-
 .../request_test.ts                           | 53 ++++++++++
 .../request_test.ts                           | 56 +++++++++++
 .../src/gemini-via-messages/request_test.ts   | 65 ++++++++++++
 .../src/gemini-via-responses/request_test.ts  | 55 +++++++++++
 .../request_test.ts                           | 53 ++++++++++
 .../messages-via-responses/request_test.ts    | 80 +++++++++++++++
 .../request_test.ts                           | 52 ++++++++++
 .../src/responses-via-messages/request.ts     |  2 +-
 .../responses-via-messages/request_test.ts    | 51 ++++++++++
 12 files changed, 567 insertions(+), 3 deletions(-)

diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts
index 82dd22c59..5e83a230b 100644
--- a/packages/translate/src/chat-completions-via-messages/request.ts
+++ b/packages/translate/src/chat-completions-via-messages/request.ts
@@ -1,8 +1,8 @@
 import { messagesThinkingBlockFromChatCompletionsScalarReasoning } from '../shared/chat-completions-and-messages/reasoning.ts';
 import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts';
+import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts';
 import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint } from '../shared/via-messages/cache-breakpoints.ts';
 import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts';
-import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts';
 import type { ChatCompletionsPayload, ChatCompletionsContentPart, ChatCompletionsMessage, ChatCompletionsTool } from '@floway-dev/protocols/chat-completions';
 import { MESSAGES_FALLBACK_MAX_TOKENS, type MessagesAssistantContentBlock, type MessagesMessage, type MessagesPayload, type MessagesTextBlock, type MessagesUserContentBlock } from '@floway-dev/protocols/messages';
 
diff --git a/packages/translate/src/chat-completions-via-messages/request_test.ts b/packages/translate/src/chat-completions-via-messages/request_test.ts
index e0c04c65b..0fce785db 100644
--- a/packages/translate/src/chat-completions-via-messages/request_test.ts
+++ b/packages/translate/src/chat-completions-via-messages/request_test.ts
@@ -1161,3 +1161,102 @@ test('translateChatCompletionsToMessages rejects an unknown user content part ty
     'does not accept video_url content parts',
   );
 });
+
+// ── Floway extension emission ──
+
+test('translateChatCompletionsToMessages emits thinking_budget extension onto thinking.{enabled, budget_tokens}', async () => {
+  const result = await translateChatCompletionsToMessages(
+    mkPayload({
+      messages: [{ role: 'user', content: 'hi' }],
+      thinking_budget: 4096,
+    }),
+  );
+
+  assertEquals(result.thinking, { type: 'enabled', budget_tokens: 4096 });
+});
+
+test('translateChatCompletionsToMessages emits adaptive_thinking extension onto thinking.{adaptive} (wins over budget)', async () => {
+  const result = await translateChatCompletionsToMessages(
+    mkPayload({
+      messages: [{ role: 'user', content: 'hi' }],
+      thinking_budget: 4096,
+      adaptive_thinking: true,
+    }),
+  );
+
+  assertEquals(result.thinking, { type: 'adaptive' });
+});
+
+test('translateChatCompletionsToMessages maps reasoning_summary onto thinking.display via concise|detailed → summarized', async () => {
+  const concise = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'concise' }));
+  const detailed = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'detailed' }));
+  const omitted = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'omitted' }));
+  const auto = await translateChatCompletionsToMessages(mkPayload({ messages: [{ role: 'user', content: 'hi' }], reasoning_summary: 'auto' }));
+
+  assertEquals(concise.thinking, { type: 'enabled', display: 'summarized' });
+  assertEquals(detailed.thinking, { type: 'enabled', display: 'summarized' });
+  assertEquals(omitted.thinking, { type: 'enabled', display: 'omitted' });
+  // `auto` returns undefined display so Anthropic's account-default applies;
+  // with no budget/adaptive signal there is no thinking block to attach to.
+  assertEquals(auto.thinking, undefined);
+});
+
+test('translateChatCompletionsToMessages merges reasoning_summary onto budget-driven thinking block', async () => {
+  const result = await translateChatCompletionsToMessages(
+    mkPayload({
+      messages: [{ role: 'user', content: 'hi' }],
+      thinking_budget: 2048,
+      reasoning_summary: 'concise',
+    }),
+  );
+
+  assertEquals(result.thinking, { type: 'enabled', budget_tokens: 2048, display: 'summarized' });
+});
+
+test('translateChatCompletionsToMessages emits anthropic_speed onto Messages speed', async () => {
+  const result = await translateChatCompletionsToMessages(
+    mkPayload({
+      messages: [{ role: 'user', content: 'hi' }],
+      anthropic_speed: 'fast',
+    }),
+  );
+
+  assertEquals(result.speed, 'fast');
+});
+
+test('translateChatCompletionsToMessages forwards service_tier verbatim', async () => {
+  const result = await translateChatCompletionsToMessages(
+    mkPayload({
+      messages: [{ role: 'user', content: 'hi' }],
+      service_tier: 'priority',
+    }),
+  );
+
+  assertEquals(result.service_tier, 'priority');
+});
+
+test('translateChatCompletionsToMessages does not emit Messages-protocol fields when the extension is unset', async () => {
+  const result = await translateChatCompletionsToMessages(
+    mkPayload({
+      messages: [{ role: 'user', content: 'hi' }],
+    }),
+  );
+
+  assertEquals(result.thinking, undefined);
+  assertEquals(result.speed, undefined);
+  assertEquals(result.service_tier, undefined);
+});
+
+test('translateChatCompletionsToMessages leaves anthropic_beta as inbound residue (header injection is the gateway-side rule-apply step)', async () => {
+  const result = await translateChatCompletionsToMessages(
+    mkPayload({
+      messages: [{ role: 'user', content: 'hi' }],
+      anthropic_beta: ['fast-mode-2026-02-01', 'context-1m-2025-08-07'],
+    }),
+  );
+
+  // The translated body must not echo the OpenAI-family `anthropic_beta`
+  // field; the per-upstream sanitizer is responsible for stripping any
+  // residue, and the rule-apply pass handles the outbound header.
+  assertEquals('anthropic_beta' in result, false);
+});
diff --git a/packages/translate/src/chat-completions-via-responses/request.ts b/packages/translate/src/chat-completions-via-responses/request.ts
index 1865ff40b..8d33b9e8a 100644
--- a/packages/translate/src/chat-completions-via-responses/request.ts
+++ b/packages/translate/src/chat-completions-via-responses/request.ts
@@ -115,7 +115,7 @@ export const translateChatCompletionsToResponses = (payload: ChatCompletionsPayl
   // `reasoning_summary` is the inbound CC extension that materializes onto
   // the Responses-native `reasoning.summary` slot. Co-emit alongside
   // `reasoning.effort` so a single `reasoning` object captures both knobs.
-  const reasoningEffort = payload.reasoning_effort != null ? payload.reasoning_effort : undefined;
+  const reasoningEffort = payload.reasoning_effort ?? undefined;
   const reasoningSummary = payload.reasoning_summary;
   const reasoning =
     reasoningEffort !== undefined || reasoningSummary !== undefined
diff --git a/packages/translate/src/chat-completions-via-responses/request_test.ts b/packages/translate/src/chat-completions-via-responses/request_test.ts
index 3c753a35c..137df562f 100644
--- a/packages/translate/src/chat-completions-via-responses/request_test.ts
+++ b/packages/translate/src/chat-completions-via-responses/request_test.ts
@@ -431,3 +431,56 @@ test('translateChatCompletionsToResponses rejects an unknown message role', () =
     'does not accept function messages',
   );
 });
+
+// ── Floway extension emission ──
+
+test('translateChatCompletionsToResponses maps reasoning_summary onto reasoning.summary', () => {
+  const result = translateChatCompletionsToResponses({
+    model: 'gpt-test',
+    messages: [{ role: 'user', content: 'hi' }],
+    reasoning_summary: 'detailed',
+  });
+
+  assertEquals(result.reasoning, { summary: 'detailed' });
+});
+
+test('translateChatCompletionsToResponses co-emits reasoning_effort and reasoning_summary on the same reasoning object', () => {
+  const result = translateChatCompletionsToResponses({
+    model: 'gpt-test',
+    messages: [{ role: 'user', content: 'hi' }],
+    reasoning_effort: 'xhigh',
+    reasoning_summary: 'concise',
+  });
+
+  assertEquals(result.reasoning, { effort: 'xhigh', summary: 'concise' });
+});
+
+test('translateChatCompletionsToResponses leaves Messages-only extensions as inbound residue', () => {
+  const result = translateChatCompletionsToResponses({
+    model: 'gpt-test',
+    messages: [{ role: 'user', content: 'hi' }],
+    thinking_budget: 4096,
+    adaptive_thinking: true,
+    anthropic_speed: 'fast',
+    anthropic_beta: ['fast-mode-2026-02-01'],
+  });
+
+  // Responses has no slot for any of these; the sanitizer strips the
+  // residue. Translate must not invent a target field.
+  assertEquals('thinking_budget' in result, false);
+  assertEquals('adaptive_thinking' in result, false);
+  assertEquals('anthropic_speed' in result, false);
+  assertEquals('anthropic_beta' in result, false);
+});
+
+test('translateChatCompletionsToResponses passes a fully extension-free payload through unchanged from prior behavior', () => {
+  const result = translateChatCompletionsToResponses({
+    model: 'gpt-test',
+    messages: [{ role: 'user', content: 'hi' }],
+    reasoning_effort: 'medium',
+    service_tier: 'priority',
+  });
+
+  assertEquals(result.reasoning, { effort: 'medium' });
+  assertEquals(result.service_tier, 'priority');
+});
diff --git a/packages/translate/src/gemini-via-chat-completions/request_test.ts b/packages/translate/src/gemini-via-chat-completions/request_test.ts
index 7b9c8403f..45f98c146 100644
--- a/packages/translate/src/gemini-via-chat-completions/request_test.ts
+++ b/packages/translate/src/gemini-via-chat-completions/request_test.ts
@@ -477,3 +477,59 @@ test('buildTargetRequest rejects a part with no recognized content field', () =>
     'has no recognized content',
   );
 });
+
+// ── Floway extension emission ──
+
+test('buildTargetRequest emits generationConfig.verbosity onto Chat verbosity', () => {
+  const result = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'low' } },
+    'gpt-test',
+  );
+
+  assertEquals(result.verbosity, 'low');
+});
+
+test('buildTargetRequest emits generationConfig.serviceTier onto Chat service_tier (camelCase → snake_case)', () => {
+  const result = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { serviceTier: 'priority' } },
+    'gpt-test',
+  );
+
+  assertEquals(result.service_tier, 'priority');
+});
+
+test('buildTargetRequest drops top-level Anthropic extensions (anthropicSpeed, anthropicBeta) on Chat', () => {
+  const result = buildTargetRequest(
+    {
+      contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+      anthropicSpeed: 'fast',
+      anthropicBeta: ['fast-mode-2026-02-01'],
+    },
+    'gpt-test',
+  );
+
+  assertEquals('anthropicSpeed' in result, false);
+  assertEquals('anthropic_speed' in result, false);
+  assertEquals('speed' in result, false);
+  assertEquals('anthropicBeta' in result, false);
+  assertEquals('anthropic_beta' in result, false);
+});
+
+test('buildTargetRequest extends reasoning_effort enum to recognize xhigh and max', () => {
+  const xhigh = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { thinkingLevel: 'xhigh' } } },
+    'gpt-test',
+  );
+  const max = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { thinkingLevel: 'max' } } },
+    'gpt-test',
+  );
+  const minimal = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { thinkingLevel: 'minimal' } } },
+    'gpt-test',
+  );
+
+  assertEquals(xhigh.reasoning_effort, 'xhigh');
+  assertEquals(max.reasoning_effort, 'max');
+  assertEquals(minimal.reasoning_effort, 'minimal');
+});
diff --git a/packages/translate/src/gemini-via-messages/request_test.ts b/packages/translate/src/gemini-via-messages/request_test.ts
index 2bfd96510..b10339a49 100644
--- a/packages/translate/src/gemini-via-messages/request_test.ts
+++ b/packages/translate/src/gemini-via-messages/request_test.ts
@@ -405,3 +405,68 @@ test('buildTargetRequest rejects a part with no recognized content field', () =>
     'has no recognized content',
   );
 });
+
+// ── Floway extension emission ──
+
+test('buildTargetRequest emits top-level anthropicSpeed onto Messages speed', () => {
+  const result = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], anthropicSpeed: 'fast' },
+    'claude-test',
+    noOptions,
+  );
+
+  assertEquals(result.speed, 'fast');
+});
+
+test('buildTargetRequest emits generationConfig.serviceTier onto Messages service_tier', () => {
+  const result = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { serviceTier: 'priority' } },
+    'claude-test',
+    noOptions,
+  );
+
+  assertEquals(result.service_tier, 'priority');
+});
+
+test('buildTargetRequest maps includeThoughts onto thinking.display (true → summarized, false → omitted)', () => {
+  const summarized = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: true } } },
+    'claude-test',
+    noOptions,
+  );
+  const omitted = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: false } } },
+    'claude-test',
+    noOptions,
+  );
+
+  assertEquals(summarized.thinking, { type: 'enabled', display: 'summarized' });
+  assertEquals(omitted.thinking, { type: 'enabled', display: 'omitted' });
+});
+
+test('buildTargetRequest drops verbosity extension on Messages (no slot)', () => {
+  const result = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'low' } },
+    'claude-test',
+    noOptions,
+  );
+
+  assertEquals('verbosity' in result, false);
+});
+
+test('buildTargetRequest leaves anthropicBeta as inbound residue for the gateway header pass', () => {
+  const result = buildTargetRequest(
+    {
+      contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+      anthropicBeta: ['fast-mode-2026-02-01'],
+    },
+    'claude-test',
+    noOptions,
+  );
+
+  // Translate cannot move it to a header; the gateway-side rule-apply pass
+  // (Task 5) materializes anthropicBeta into the outbound anthropic-beta
+  // header. The body must not echo it.
+  assertEquals('anthropicBeta' in result, false);
+  assertEquals('anthropic_beta' in result, false);
+});
diff --git a/packages/translate/src/gemini-via-responses/request_test.ts b/packages/translate/src/gemini-via-responses/request_test.ts
index e140d715c..66476f0d8 100644
--- a/packages/translate/src/gemini-via-responses/request_test.ts
+++ b/packages/translate/src/gemini-via-responses/request_test.ts
@@ -411,3 +411,58 @@ test('buildTargetRequest rejects a part with no recognized content field', () =>
     'has no recognized content',
   );
 });
+
+// ── Floway extension emission ──
+
+test('buildTargetRequest emits generationConfig.verbosity onto text.verbosity', () => {
+  const result = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'medium' } },
+    'gpt-test',
+  );
+
+  assertEquals(result.text?.verbosity, 'medium');
+});
+
+test('buildTargetRequest emits generationConfig.serviceTier onto Responses service_tier', () => {
+  const result = buildTargetRequest(
+    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { serviceTier: 'priority' } },
+    'gpt-test',
+  );
+
+  assertEquals(result.service_tier, 'priority');
+});
+
+test('buildTargetRequest maps includeThoughts onto reasoning.summary (true → detailed, false → omitted)', () => {
+  const withSummary = buildTargetRequest(
+    {
+      contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+      generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: true } },
+    },
+    'gpt-test',
+  );
+  const withoutSummary = buildTargetRequest(
+    {
+      contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+      generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: false } },
+    },
+    'gpt-test',
+  );
+
+  assertEquals(withSummary.reasoning, { effort: 'high', summary: 'detailed' });
+  assertEquals(withoutSummary.reasoning, { effort: 'high', summary: 'omitted' });
+});
+
+test('buildTargetRequest drops top-level Anthropic extensions on Responses', () => {
+  const result = buildTargetRequest(
+    {
+      contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+      anthropicSpeed: 'fast',
+      anthropicBeta: ['fast-mode-2026-02-01'],
+    },
+    'gpt-test',
+  );
+
+  assertEquals('anthropicSpeed' in result, false);
+  assertEquals('anthropic_speed' in result, false);
+  assertEquals('anthropicBeta' in result, false);
+});
diff --git a/packages/translate/src/messages-via-chat-completions/request_test.ts b/packages/translate/src/messages-via-chat-completions/request_test.ts
index a76ec42b1..50f326860 100644
--- a/packages/translate/src/messages-via-chat-completions/request_test.ts
+++ b/packages/translate/src/messages-via-chat-completions/request_test.ts
@@ -480,3 +480,56 @@ test('translateMessagesToChatCompletions rejects an unknown message role', () =>
     'does not accept role tool',
   );
 });
+
+// ── Floway extension emission ──
+
+test('translateMessagesToChatCompletions emits verbosity extension verbatim', () => {
+  const result = translateMessagesToChatCompletions({
+    model: 'gpt-test',
+    max_tokens: 256,
+    messages: [{ role: 'user', content: 'hi' }],
+    verbosity: 'low',
+  });
+
+  assertEquals(result.verbosity, 'low');
+});
+
+test('translateMessagesToChatCompletions forwards service_tier verbatim', () => {
+  const result = translateMessagesToChatCompletions({
+    model: 'gpt-test',
+    max_tokens: 256,
+    messages: [{ role: 'user', content: 'hi' }],
+    service_tier: 'priority',
+  });
+
+  assertEquals(result.service_tier, 'priority');
+});
+
+test('translateMessagesToChatCompletions drops Anthropic-only knobs that have no Chat-completions slot', () => {
+  const result = translateMessagesToChatCompletions({
+    model: 'gpt-test',
+    max_tokens: 256,
+    messages: [{ role: 'user', content: 'hi' }],
+    thinking: { type: 'enabled', budget_tokens: 4096, display: 'summarized' },
+    speed: 'fast',
+  });
+
+  // Only the OpenAI-canonical effort axis survives; budget_tokens, display,
+  // and speed have no Chat-completions equivalent and the translate function
+  // emits nothing for them. (The sanitizer would strip anything anyway.)
+  assertEquals(result.reasoning_effort, 'medium');
+  assertEquals('thinking_budget' in result, false);
+  assertEquals('reasoning_summary' in result, false);
+  assertEquals('speed' in result, false);
+  assertEquals('anthropic_speed' in result, false);
+});
+
+test('translateMessagesToChatCompletions does not emit verbosity when the extension is unset', () => {
+  const result = translateMessagesToChatCompletions({
+    model: 'gpt-test',
+    max_tokens: 256,
+    messages: [{ role: 'user', content: 'hi' }],
+  });
+
+  assertEquals('verbosity' in result, false);
+});
diff --git a/packages/translate/src/messages-via-responses/request_test.ts b/packages/translate/src/messages-via-responses/request_test.ts
index 6a6e3fa94..2846f1d39 100644
--- a/packages/translate/src/messages-via-responses/request_test.ts
+++ b/packages/translate/src/messages-via-responses/request_test.ts
@@ -502,3 +502,83 @@ test('translateMessagesToResponses rejects an unknown message role', () => {
     'does not accept role tool',
   );
 });
+
+// ── Floway extension emission ──
+
+test('translateMessagesToResponses emits verbosity onto text.verbosity', () => {
+  const result = translateMessagesToResponses({
+    model: 'gpt-test',
+    max_tokens: 256,
+    messages: [{ role: 'user', content: 'hi' }],
+    verbosity: 'medium',
+  });
+
+  assertEquals(result.text?.verbosity, 'medium');
+});
+
+test('translateMessagesToResponses co-emits verbosity with json_schema format under text', () => {
+  const result = translateMessagesToResponses({
+    model: 'gpt-test',
+    max_tokens: 256,
+    messages: [{ role: 'user', content: 'hi' }],
+    verbosity: 'low',
+    output_config: { format: { type: 'json_schema', schema: { type: 'object', properties: {} } } },
+  });
+
+  assertEquals(result.text?.verbosity, 'low');
+  assertEquals(result.text?.format?.type, 'json_schema');
+});
+
+test('translateMessagesToResponses maps thinking.display onto reasoning.summary (summarized → concise, omitted → omitted, full → detailed)', () => {
+  const summarized = translateMessagesToResponses({
+    model: 'gpt-test',
+    max_tokens: 256,
+    messages: [{ role: 'user', content: 'hi' }],
+    thinking: { type: 'enabled', display: 'summarized' },
+  });
+  const omitted = translateMessagesToResponses({
+    model: 'gpt-test',
+    max_tokens: 256,
+    messages: [{ role: 'user', content: 'hi' }],
+    thinking: { type: 'enabled', display: 'omitted' },
+  });
+  const full = translateMessagesToResponses({
+    model: 'gpt-test',
+    max_tokens: 256,
+    messages: [{ role: 'user', content: 'hi' }],
+    thinking: { type: 'enabled', display: 'full' },
+  });
+
+  assertEquals(summarized.reasoning?.summary, 'concise');
+  assertEquals(omitted.reasoning?.summary, 'omitted');
+  assertEquals(full.reasoning?.summary, 'detailed');
+});
+
+test('translateMessagesToResponses forwards service_tier verbatim', () => {
+  const result = translateMessagesToResponses({
+    model: 'gpt-test',
+    max_tokens: 256,
+    messages: [{ role: 'user', content: 'hi' }],
+    service_tier: 'priority',
+  });
+
+  assertEquals(result.service_tier, 'priority');
+});
+
+test('translateMessagesToResponses drops Anthropic-only mode knobs the Responses wire cannot express', () => {
+  const result = translateMessagesToResponses({
+    model: 'gpt-test',
+    max_tokens: 256,
+    messages: [{ role: 'user', content: 'hi' }],
+    thinking: { type: 'enabled', budget_tokens: 4096 },
+    speed: 'fast',
+  });
+
+  // budget_tokens, adaptive, speed, anthropic-beta have no Responses slot;
+  // translate emits nothing for them. The sanitizer drops residue.
+  assertEquals('thinking_budget' in result, false);
+  assertEquals('adaptive_thinking' in result, false);
+  assertEquals('anthropic_speed' in result, false);
+  assertEquals('anthropic_beta' in result, false);
+  assertEquals('speed' in result, false);
+});
diff --git a/packages/translate/src/responses-via-chat-completions/request_test.ts b/packages/translate/src/responses-via-chat-completions/request_test.ts
index f708f5929..448222f45 100644
--- a/packages/translate/src/responses-via-chat-completions/request_test.ts
+++ b/packages/translate/src/responses-via-chat-completions/request_test.ts
@@ -1455,3 +1455,55 @@ test('translateResponsesToChatCompletions maps multimodal function_call_output i
     { type: 'image_url', image_url: { url: 'data:image/png;base64,AQID', detail: 'high' } },
   ]);
 });
+
+// ── Floway extension emission ──
+
+test('translateResponsesToChatCompletions maps text.verbosity onto verbosity', () => {
+  const result = translateResponsesToChatCompletions({
+    model: 'gpt-test',
+    input: [{ type: 'message', role: 'user', content: 'hi' }],
+    text: { verbosity: 'low' },
+  });
+
+  assertEquals(result.target.verbosity, 'low');
+});
+
+test('translateResponsesToChatCompletions co-emits reasoning.effort onto reasoning_effort and service_tier verbatim', () => {
+  const result = translateResponsesToChatCompletions({
+    model: 'gpt-test',
+    input: [{ type: 'message', role: 'user', content: 'hi' }],
+    reasoning: { effort: 'xhigh' },
+    service_tier: 'priority',
+  });
+
+  assertEquals(result.target.reasoning_effort, 'xhigh');
+  assertEquals(result.target.service_tier, 'priority');
+});
+
+test('translateResponsesToChatCompletions leaves Messages-only extensions as inbound residue (CC has no slot)', () => {
+  const result = translateResponsesToChatCompletions({
+    model: 'gpt-test',
+    input: [{ type: 'message', role: 'user', content: 'hi' }],
+    thinking_budget: 4096,
+    adaptive_thinking: true,
+    anthropic_speed: 'fast',
+    anthropic_beta: ['fast-mode-2026-02-01'],
+  });
+
+  assertEquals('thinking_budget' in result.target, false);
+  assertEquals('adaptive_thinking' in result.target, false);
+  assertEquals('anthropic_speed' in result.target, false);
+  assertEquals('anthropic_beta' in result.target, false);
+});
+
+test('translateResponsesToChatCompletions drops reasoning.summary (Chat has no slot)', () => {
+  const result = translateResponsesToChatCompletions({
+    model: 'gpt-test',
+    input: [{ type: 'message', role: 'user', content: 'hi' }],
+    reasoning: { effort: 'medium', summary: 'concise' },
+  });
+
+  assertEquals(result.target.reasoning_effort, 'medium');
+  // Verbosity is on text.* not reasoning; ensure no surrogate field invented.
+  assertEquals('reasoning_summary' in result.target, false);
+});
diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts
index 786ce4a98..504ca45fa 100644
--- a/packages/translate/src/responses-via-messages/request.ts
+++ b/packages/translate/src/responses-via-messages/request.ts
@@ -1,9 +1,9 @@
 import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts';
 import { responsesReasoningToMessagesUpstreamBlock } from '../shared/messages-and-responses/reasoning.ts';
 import { buildCustomToolInputSchema } from '../shared/responses-via/custom-tool-wrap.ts';
+import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts';
 import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint, EPHEMERAL_CACHE_CONTROL } from '../shared/via-messages/cache-breakpoints.ts';
 import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts';
-import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts';
 import {
   MESSAGES_FALLBACK_MAX_TOKENS,
   type MessagesAssistantContentBlock,
diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts
index 2aedc14b7..f36ff89f6 100644
--- a/packages/translate/src/responses-via-messages/request_test.ts
+++ b/packages/translate/src/responses-via-messages/request_test.ts
@@ -645,3 +645,54 @@ test('translateResponsesToMessages keeps payload.instructions as the Messages to
   assertEquals(result.target.messages[0], { role: 'system', content: 'mid-array note' });
   assertEquals(result.target.messages[1].role, 'user');
 });
+
+// ── Floway extension emission ──
+
+const minimalResponsesPayload = (overrides: Record<string, unknown>) => ({
+  model: 'claude-test' as const,
+  input: [{ type: 'message' as const, role: 'user' as const, content: 'hi' }],
+  ...overrides,
+});
+
+test('translateResponsesToMessages emits thinking_budget onto thinking.{enabled, budget_tokens}', async () => {
+  const result = await translateResponsesToMessages(minimalResponsesPayload({ thinking_budget: 8192 }));
+  assertEquals(result.target.thinking, { type: 'enabled', budget_tokens: 8192 });
+});
+
+test('translateResponsesToMessages emits adaptive_thinking onto thinking.{adaptive}', async () => {
+  const result = await translateResponsesToMessages(minimalResponsesPayload({ adaptive_thinking: true }));
+  assertEquals(result.target.thinking, { type: 'adaptive' });
+});
+
+test('translateResponsesToMessages maps reasoning.summary onto thinking.display (concise|detailed → summarized, omitted → omitted)', async () => {
+  const concise = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'concise' } }));
+  const detailed = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'detailed' } }));
+  const omitted = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'omitted' } }));
+
+  assertEquals(concise.target.thinking, { type: 'enabled', display: 'summarized' });
+  assertEquals(detailed.target.thinking, { type: 'enabled', display: 'summarized' });
+  assertEquals(omitted.target.thinking, { type: 'enabled', display: 'omitted' });
+});
+
+test('translateResponsesToMessages emits anthropic_speed onto speed', async () => {
+  const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_speed: 'fast' }));
+  assertEquals(result.target.speed, 'fast');
+});
+
+test('translateResponsesToMessages forwards service_tier verbatim', async () => {
+  const result = await translateResponsesToMessages(minimalResponsesPayload({ service_tier: 'priority' }));
+  assertEquals(result.target.service_tier, 'priority');
+});
+
+test('translateResponsesToMessages leaves anthropic_beta as inbound residue for the gateway header pass', async () => {
+  const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_beta: ['fast-mode-2026-02-01'] }));
+  assertEquals('anthropic_beta' in result.target, false);
+});
+
+test('translateResponsesToMessages emission stack: budget + summary writes display onto the budget-driven block', async () => {
+  const result = await translateResponsesToMessages(minimalResponsesPayload({
+    thinking_budget: 2048,
+    reasoning: { effort: 'medium', summary: 'concise' },
+  }));
+  assertEquals(result.target.thinking, { type: 'enabled', budget_tokens: 2048, display: 'summarized' });
+});

From d7e9fe0d2c0a4d7a62cef83ff4101fa170a41187 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Thu, 25 Jun 2026 21:54:09 +0800
Subject: [PATCH 005/170] feat(gateway): weave alias matching into model
 resolution fan-out
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

enumerateModelInterpretations now matches each (provider, lookupId) pair
against the global alias table (post-prefix-strip, semantic P). Per the
matched alias's onConflict, the fan-out pushes either the alias-rewrite
interpretation, the real-name interpretation, or both (in either order).
A post-resolution prune drops the alias-rewrite when the real-name
resolved under onConflict=real-only — the alias remains when the real
lookup misses, so an empty upstream catalog falls back to the alias's
target id.

The aliasRules and aliasName ride through into a new ChatCandidate
wrapper type so downstream attempt logic can apply the rules and set
the x-floway-alias response header without polluting the
@floway-dev/provider package. RoutingDecision and classifyResponsesItemAffinity
become generic over the candidate type to carry alias metadata across
the affinity walk without re-deriving it.

modelAliases is added to the central Repo interface so each chat
serve.ts call site reaches it through getRepo() — the same pattern
the other operator-managed config tables follow.
---
 .../chat/chat-completions/routing.ts          |   4 +-
 .../data-plane/chat/chat-completions/serve.ts |   3 +
 .../src/data-plane/chat/gemini/routing.ts     |   4 +-
 .../src/data-plane/chat/gemini/serve.ts       |   5 +
 .../src/data-plane/chat/messages/routing.ts   |   4 +-
 .../src/data-plane/chat/messages/serve.ts     |   5 +
 .../chat/responses/items/affinity.ts          |  12 +-
 .../src/data-plane/chat/responses/routing.ts  |   4 +-
 .../data-plane/chat/responses/serve-prep.ts   |   7 +-
 .../src/data-plane/chat/shared/candidates.ts  |  41 +++-
 .../data-plane/chat/shared/candidates_test.ts |  12 +
 .../src/data-plane/chat/shared/routing.ts     |  12 +-
 .../src/data-plane/model-aliases/match.ts     |  19 ++
 .../data-plane/model-aliases/match_test.ts    |  54 +++++
 .../src/data-plane/providers/registry.ts      | 123 ++++++++++-
 .../src/data-plane/providers/registry_test.ts | 208 +++++++++++++++++-
 packages/gateway/src/repo/memory.ts           |  19 ++
 packages/gateway/src/repo/sql.ts              |  13 ++
 packages/gateway/src/repo/types.ts            |   8 +
 19 files changed, 515 insertions(+), 42 deletions(-)
 create mode 100644 packages/gateway/src/data-plane/model-aliases/match.ts
 create mode 100644 packages/gateway/src/data-plane/model-aliases/match_test.ts

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/routing.ts b/packages/gateway/src/data-plane/chat/chat-completions/routing.ts
index 381feaea9..efcc380d6 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/routing.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/routing.ts
@@ -1,13 +1,13 @@
 import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
 import type { RoutingDecision } from '../shared/routing.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import { chatCompletionsViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
 export const planChatCompletionsRouting = async (input: {
   readonly payload: ChatCompletionsPayload;
-  readonly candidates: readonly ProviderCandidate[];
+  readonly candidates: readonly ChatCandidate[];
   readonly store: StatefulResponsesStore;
 }): Promise<RoutingDecision> =>
   await classifyResponsesItemAffinity({
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 73e8c1afd..5d27541f9 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -1,6 +1,7 @@
 import { chatCompletionsAttempt } from './attempt.ts';
 import { renderChatCompletionsFailure } from './errors.ts';
 import { planChatCompletionsRouting } from './routing.ts';
+import { getRepo } from '../../../repo/index.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -18,9 +19,11 @@ export interface ChatCompletionsServeGenerateArgs {
 export const chatCompletionsServe = {
   generate: async (args: ChatCompletionsServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<ChatCompletionsStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
+    const aliases = await getRepo().modelAliases.loadAll();
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model: payload.model,
+      aliases,
       pickTarget: endpoints =>
         endpoints.chatCompletions ? 'chat-completions'
           : endpoints.messages ? 'messages'
diff --git a/packages/gateway/src/data-plane/chat/gemini/routing.ts b/packages/gateway/src/data-plane/chat/gemini/routing.ts
index 2d5e37d87..28e353c59 100644
--- a/packages/gateway/src/data-plane/chat/gemini/routing.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/routing.ts
@@ -1,6 +1,6 @@
 import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
 import type { RoutingDecision } from '../shared/routing.ts';
 import type { GeminiPayload } from '@floway-dev/protocols/gemini';
 import { geminiViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
@@ -9,7 +9,7 @@ export type GeminiRoutingDecision = RoutingDecision;
 
 export const planGeminiRouting = async (input: {
   readonly payload: GeminiPayload;
-  readonly candidates: readonly ProviderCandidate[];
+  readonly candidates: readonly ChatCandidate[];
   readonly store: StatefulResponsesStore;
 }): Promise<GeminiRoutingDecision> =>
   await classifyResponsesItemAffinity({
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index f5daa1d86..840da62c7 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -1,6 +1,7 @@
 import { geminiAttempt } from './attempt.ts';
 import { renderGeminiFailure } from './errors.ts';
 import { planGeminiRouting } from './routing.ts';
+import { getRepo } from '../../../repo/index.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -30,9 +31,11 @@ export interface GeminiServeCountTokensArgs {
 export const geminiServe = {
   generate: async (args: GeminiServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<GeminiStreamEvent>>> => {
     const { payload, ctx, store, model, headers } = args;
+    const aliases = await getRepo().modelAliases.loadAll();
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model,
+      aliases,
       // Gemini has no native upstream target in the provider API; prefer
       // Chat Completions, then Messages, then Responses.
       pickTarget: endpoints => endpoints.chatCompletions ? 'chat-completions' : endpoints.messages ? 'messages' : endpoints.responses ? 'responses' : null,
@@ -60,9 +63,11 @@ export const geminiServe = {
 
   countTokens: async (args: GeminiServeCountTokensArgs): Promise<ExecuteResult<ProtocolFrame<GeminiStreamEvent>> | PlainResult> => {
     const { payload, ctx, store, model, headers } = args;
+    const aliases = await getRepo().modelAliases.loadAll();
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model,
+      aliases,
       // Gemini countTokens has no native upstream support; only providers
       // exposing the Messages endpoint qualify because we translate Gemini
       // → Messages and call Messages count_tokens upstream.
diff --git a/packages/gateway/src/data-plane/chat/messages/routing.ts b/packages/gateway/src/data-plane/chat/messages/routing.ts
index d6de52107..e9783625c 100644
--- a/packages/gateway/src/data-plane/chat/messages/routing.ts
+++ b/packages/gateway/src/data-plane/chat/messages/routing.ts
@@ -1,6 +1,6 @@
 import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
 import type { RoutingDecision } from '../shared/routing.ts';
 import type { MessagesPayload } from '@floway-dev/protocols/messages';
 import { messagesViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
@@ -9,7 +9,7 @@ export type MessagesRoutingDecision = RoutingDecision;
 
 export const planMessagesRouting = async (input: {
   readonly payload: MessagesPayload;
-  readonly candidates: readonly ProviderCandidate[];
+  readonly candidates: readonly ChatCandidate[];
   readonly store: StatefulResponsesStore;
 }): Promise<MessagesRoutingDecision> =>
   await classifyResponsesItemAffinity({
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index ae9bb5d6c..719091768 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -1,6 +1,7 @@
 import { messagesAttempt } from './attempt.ts';
 import { renderMessagesFailure } from './errors.ts';
 import { planMessagesRouting } from './routing.ts';
+import { getRepo } from '../../../repo/index.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -25,9 +26,11 @@ export interface MessagesServeCountTokensArgs {
 export const messagesServe = {
   generate: async (args: MessagesServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<MessagesStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
+    const aliases = await getRepo().modelAliases.loadAll();
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model: payload.model,
+      aliases,
       pickTarget: endpoints =>
         endpoints.messages ? 'messages'
           : endpoints.responses ? 'responses'
@@ -57,9 +60,11 @@ export const messagesServe = {
 
   countTokens: async (args: MessagesServeCountTokensArgs): Promise<ExecuteResult<ProtocolFrame<MessagesStreamEvent>> | PlainResult> => {
     const { payload, ctx, store, headers } = args;
+    const aliases = await getRepo().modelAliases.loadAll();
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model: payload.model,
+      aliases,
       pickTarget: endpoints => endpoints.messages ? 'messages' : null,
       scheduler: ctx.backgroundScheduler,
       currentColo: ctx.currentColo,
diff --git a/packages/gateway/src/data-plane/chat/responses/items/affinity.ts b/packages/gateway/src/data-plane/chat/responses/items/affinity.ts
index 8fbc1a146..d5b5e5e47 100644
--- a/packages/gateway/src/data-plane/chat/responses/items/affinity.ts
+++ b/packages/gateway/src/data-plane/chat/responses/items/affinity.ts
@@ -98,10 +98,10 @@ const collectStoredResponsesItemRefs = async <TSourceItems>(
   return references;
 };
 
-const orderCandidatesByStoredResponsesAffinity = (
-  candidates: readonly ProviderCandidate[],
+const orderCandidatesByStoredResponsesAffinity = <T extends ProviderCandidate>(
+  candidates: readonly T[],
   preferredUpstreamIds: ReadonlySet<string>,
-): readonly ProviderCandidate[] => {
+): readonly T[] => {
   const preferred = [...preferredUpstreamIds].reverse();
   if (preferred.length === 0) return candidates;
 
@@ -113,17 +113,17 @@ const orderCandidatesByStoredResponsesAffinity = (
   return [...preferredCandidates, ...remainingCandidates];
 };
 
-export const classifyResponsesItemAffinity = async <TSourceItems>(input: {
+export const classifyResponsesItemAffinity = async <TSourceItems, TCandidate extends ProviderCandidate>(input: {
   sourceItems: TSourceItems;
   view: ResponsesItemsView<TSourceItems>;
   store: StatefulResponsesStore;
-  candidates: readonly ProviderCandidate[];
+  candidates: readonly TCandidate[];
   // Items the caller will stage as inputs after the affinity walk; passed
   // here so `loadInputItems` can pre-load any stored row whose content hash
   // matches one of them. Without this, a duplicate user message resent on
   // a later turn cannot be reused — it would mint a fresh row each time.
   inputItemsToStage?: readonly ResponsesInputItem[];
-}): Promise<RoutingDecision> => {
+}): Promise<RoutingDecision<TCandidate>> => {
   const { sourceItems, view, store, candidates, inputItemsToStage } = input;
   await store.loadInputItems({
     sourceItems,
diff --git a/packages/gateway/src/data-plane/chat/responses/routing.ts b/packages/gateway/src/data-plane/chat/responses/routing.ts
index 519e95bfe..05661aa69 100644
--- a/packages/gateway/src/data-plane/chat/responses/routing.ts
+++ b/packages/gateway/src/data-plane/chat/responses/routing.ts
@@ -1,5 +1,5 @@
 import { classifyResponsesItemAffinity } from './items/affinity.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
 import type { RoutingDecision } from '../shared/routing.ts';
 import type { StatefulResponsesStore } from './items/store.ts';
 import type { ResponsesInputItem, ResponsesPayload } from '@floway-dev/protocols/responses';
@@ -7,7 +7,7 @@ import { responsesItemsView } from '@floway-dev/translate/via-responses/response
 
 export const planResponsesRouting = async (input: {
   readonly payload: ResponsesPayload;
-  readonly candidates: readonly ProviderCandidate[];
+  readonly candidates: readonly ChatCandidate[];
   readonly store: StatefulResponsesStore;
 }): Promise<RoutingDecision> => {
   // A bare-string input is wrapped into a synthetic user message for staging;
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index 96096ee29..ec4a48afa 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -1,7 +1,8 @@
 import { renderResponsesFailure } from './errors.ts';
 import type { StatefulResponsesStore } from './items/store.ts';
 import { planResponsesRouting } from './routing.ts';
-import { enumerateProviderCandidates, type ProviderCandidate } from '../shared/candidates.ts';
+import { getRepo } from '../../../repo/index.ts';
+import { enumerateProviderCandidates, type ChatCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ResponsesInputItem, ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
@@ -72,7 +73,7 @@ const stageUserInputItems = async (input: ResponsesPayload['input'], store: Stat
 
 export type ResponsesServePlan =
   | { readonly kind: 'failure'; readonly result: ExecuteResult<ProtocolFrame<ResponsesStreamEvent>> }
-  | { readonly kind: 'ready'; readonly prepared: ResponsesPayload; readonly candidate: ProviderCandidate };
+  | { readonly kind: 'ready'; readonly prepared: ResponsesPayload; readonly candidate: ChatCandidate };
 
 // Runs the shared serve-side prep both `responsesServe.generate` and
 // `responsesServe.compact` need before dispatching to `responsesAttempt`:
@@ -88,9 +89,11 @@ export const prepareResponsesServePlan = async (args: {
 }): Promise<ResponsesServePlan> => {
   const { payload, ctx, store, pickTarget } = args;
   const prepared = await expandPreviousResponseId(payload, store);
+  const aliases = await getRepo().modelAliases.loadAll();
   const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
     upstreamIds: ctx.upstreamIds,
     model: prepared.model,
+    aliases,
     pickTarget,
     scheduler: ctx.backgroundScheduler,
     currentColo: ctx.currentColo,
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates.ts b/packages/gateway/src/data-plane/chat/shared/candidates.ts
index 86b02e721..7a58f9e08 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates.ts
@@ -1,3 +1,4 @@
+import type { ModelAlias, ModelAliasRules } from '../../../control-plane/model-aliases/types.ts';
 import { createPerRequestFetcher } from '../../../dial/per-request.ts';
 import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
@@ -6,6 +7,18 @@ import type { ChatTargetApi, ProviderCandidate } from '@floway-dev/provider';
 
 export type { ProviderCandidate };
 
+// Wrapper around `ProviderCandidate` that carries the matched alias's
+// operator-locked request-time rules and the alias name. The wrapper lives
+// here (in the gateway) rather than on `ProviderCandidate` itself to keep
+// the `@floway-dev/provider` package unaware of the gateway's alias
+// concept. Downstream attempt logic narrows the candidate when it needs
+// to apply rules or stamp the `x-floway-alias` response header; passthrough
+// consumers continue to treat the candidate as a plain `ProviderCandidate`.
+export type ChatCandidate = ProviderCandidate & {
+  readonly aliasRules?: ModelAliasRules;
+  readonly aliasName?: string;
+};
+
 // Returns the candidates that satisfy both the model resolution and the
 // target-endpoint pick, plus a `sawModel` flag that distinguishes the
 // "model is missing entirely" failure from "model exists but does not
@@ -13,11 +26,16 @@ export type { ProviderCandidate };
 // whose catalog fetch rejected this round so the caller's failure
 // renderer can surface them parenthetically.
 export const enumerateProviderCandidates = async ({
-  upstreamIds, model, pickTarget, scheduler, currentColo,
+  upstreamIds, model, aliases, pickTarget, scheduler, currentColo,
 }: {
   // null = unrestricted; empty list = no providers visible.
   upstreamIds: readonly string[] | null;
   model: string;
+  // Operator-managed alias table loaded by the caller (typically via
+  // `getRepo().modelAliases.loadAll()`). The fan-out matches each
+  // (provider, lookupId) interpretation against this list; an empty list
+  // is a valid input and produces only literal interpretations.
+  aliases: readonly ModelAlias[];
   pickTarget: (endpoints: ModelEndpoints) => ChatTargetApi | null;
   // Threaded into `resolveModelForProvider` so the per-upstream catalog
   // lookup hits the SWR-cached `fetchUpstreamModelsCached` instead of
@@ -27,7 +45,7 @@ export const enumerateProviderCandidates = async ({
   // into the per-request fetcher so colo-scoped fallback entries can be
   // honoured at dial time.
   currentColo: string;
-}): Promise<{ readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean; readonly failedUpstreams: readonly string[] }> => {
+}): Promise<{ readonly candidates: readonly ChatCandidate[]; readonly sawModel: boolean; readonly failedUpstreams: readonly string[] }> => {
   const fetcherForUpstream = await createPerRequestFetcher(currentColo);
   const providers = await listModelProviders(upstreamIds);
 
@@ -39,17 +57,28 @@ export const enumerateProviderCandidates = async ({
   // `resolveModelForRequest`; first-viable-wins ordering follows configured
   // sort_order across upstreams, with the unprefixed interpretation pushed
   // before the prefixed one within a single upstream.
-  const interpretations = enumerateModelInterpretations(model, providers);
+  //
+  // Alias matching runs inside `enumerateModelInterpretations`: each
+  // (provider, lookupId) pair is checked against the alias table and the
+  // matched alias's `onConflict` decides what to push. The alias-rewrite
+  // metadata rides out alongside each resolved candidate so the attempt
+  // layer can apply the locked rules.
+  const interpretations = enumerateModelInterpretations(model, providers, aliases);
   const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
 
-  const candidates: ProviderCandidate[] = [];
+  const candidates: ChatCandidate[] = [];
   let sawModel = false;
 
-  for (const { provider, resolved } of resolutions) {
+  for (const { interpretation, provider, resolved } of resolutions) {
     sawModel = true;
     const targetApi = pickTarget(resolved.binding.upstreamModel.endpoints);
     if (!targetApi) continue;
-    candidates.push({ provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) });
+    const base: ProviderCandidate = { provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) };
+    candidates.push(
+      interpretation.aliasRules !== undefined
+        ? { ...base, aliasRules: interpretation.aliasRules, aliasName: interpretation.aliasName }
+        : base,
+    );
   }
 
   return { candidates, sawModel, failedUpstreams };
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
index 627b631d5..381d395b9 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
@@ -54,6 +54,7 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates, sawModel } = await enumerateProviderCandidates({
       upstreamIds: null,
+      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -74,6 +75,7 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates, sawModel } = await enumerateProviderCandidates({
       upstreamIds: null,
+      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -94,6 +96,7 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates, sawModel } = await enumerateProviderCandidates({
       upstreamIds: null,
+      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -113,6 +116,7 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates } = await enumerateProviderCandidates({
       upstreamIds: null,
+      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -133,6 +137,7 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates } = await enumerateProviderCandidates({
       upstreamIds: ['up_c', 'up_a'],
+      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -155,6 +160,7 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates } = await enumerateProviderCandidates({
       upstreamIds: null,
+      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -172,6 +178,7 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates: msgCandidates } = await enumerateProviderCandidates({
       upstreamIds: null,
+      aliases: [],
       model: 'test-model',
       pickTarget: pickMessagesOrResponses,
       scheduler: testScheduler,
@@ -182,6 +189,7 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates: resCandidates } = await enumerateProviderCandidates({
       upstreamIds: null,
+      aliases: [],
       model: 'test-model',
       pickTarget: pickResponses,
       scheduler: testScheduler,
@@ -198,6 +206,7 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates: anyCandidates } = await enumerateProviderCandidates({
       upstreamIds: null,
+      aliases: [],
       model: 'test-model',
       pickTarget: pickAny,
       scheduler: testScheduler,
@@ -208,6 +217,7 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates: msgCandidates, sawModel } = await enumerateProviderCandidates({
       upstreamIds: null,
+      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -245,6 +255,7 @@ describe('enumerateProviderCandidates', () => {
       async () => {
         const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
           upstreamIds: null,
+          aliases: [],
           model: 'test-model',
           pickTarget: pickMessages,
           scheduler: testScheduler,
@@ -288,6 +299,7 @@ describe('enumerateProviderCandidates', () => {
       async () => {
         const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
           upstreamIds: null,
+          aliases: [],
           model: 'test-model',
           pickTarget: pickMessages,
           scheduler: testScheduler,
diff --git a/packages/gateway/src/data-plane/chat/shared/routing.ts b/packages/gateway/src/data-plane/chat/shared/routing.ts
index 96785da70..392c50cf5 100644
--- a/packages/gateway/src/data-plane/chat/shared/routing.ts
+++ b/packages/gateway/src/data-plane/chat/shared/routing.ts
@@ -1,6 +1,12 @@
-import type { ProviderCandidate } from './candidates.ts';
+import type { ChatCandidate, ProviderCandidate } from './candidates.ts';
 import type { ChatServeFailure } from './errors.ts';
 
-export type RoutingDecision =
-  | { readonly kind: 'success'; readonly candidates: readonly ProviderCandidate[] }
+// Generic over the candidate type so call sites that hand in `ChatCandidate`
+// receive a decision whose surviving candidates retain the alias metadata.
+// The candidate filtering and ordering inside routing is shape-agnostic —
+// it touches `binding.upstream` and `binding.supportsResponsesItemReference`
+// only — so the generic narrows naturally from `ChatCandidate` back out
+// without re-deriving the alias fields.
+export type RoutingDecision<T extends ProviderCandidate = ChatCandidate> =
+  | { readonly kind: 'success'; readonly candidates: readonly T[] }
   | { readonly kind: 'failure'; readonly failure: ChatServeFailure };
diff --git a/packages/gateway/src/data-plane/model-aliases/match.ts b/packages/gateway/src/data-plane/model-aliases/match.ts
new file mode 100644
index 000000000..f297d1a50
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/match.ts
@@ -0,0 +1,19 @@
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
+
+export interface AliasMatchResult {
+  readonly alias: ModelAlias;
+}
+
+// Lookup an alias for the (post-prefix-strip) lookupId against the upstream's
+// id. An empty `upstreamIds` filter on the alias means "match any upstream";
+// a non-empty filter must include the upstream's id.
+export const matchAlias = (
+  lookupId: string,
+  upstreamId: string,
+  aliases: readonly ModelAlias[],
+): AliasMatchResult | undefined => {
+  const hit = aliases.find(a => a.alias === lookupId);
+  if (!hit) return undefined;
+  if (hit.upstreamIds.length > 0 && !hit.upstreamIds.includes(upstreamId)) return undefined;
+  return { alias: hit };
+};
diff --git a/packages/gateway/src/data-plane/model-aliases/match_test.ts b/packages/gateway/src/data-plane/model-aliases/match_test.ts
new file mode 100644
index 000000000..b1dd8ff2f
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/match_test.ts
@@ -0,0 +1,54 @@
+import { describe, expect, test } from 'vitest';
+
+import { matchAlias } from './match.ts';
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
+
+const make = (overrides: Partial<ModelAlias>): ModelAlias => ({
+  alias: 'a',
+  targetModelId: 't',
+  upstreamIds: [],
+  rules: {},
+  visibleInModelsList: true,
+  onConflict: 'real-only',
+  ...overrides,
+});
+
+describe('matchAlias', () => {
+  test('matches by exact lookupId when alias has no upstream filter', () => {
+    const aliases = [make({ alias: 'codex-auto-review', targetModelId: 'gpt-5.4' })];
+    expect(matchAlias('codex-auto-review', 'up-1', aliases)?.alias.alias).toBe('codex-auto-review');
+  });
+
+  test('does not match when lookupId differs', () => {
+    const aliases = [make({ alias: 'codex-auto-review' })];
+    expect(matchAlias('something-else', 'up-1', aliases)).toBeUndefined();
+  });
+
+  test('respects upstreamIds allowlist (member matches)', () => {
+    const aliases = [make({ alias: 'a', upstreamIds: ['up-1', 'up-2'] })];
+    expect(matchAlias('a', 'up-1', aliases)).toBeDefined();
+    expect(matchAlias('a', 'up-2', aliases)).toBeDefined();
+  });
+
+  test('respects upstreamIds allowlist (non-member misses)', () => {
+    const aliases = [make({ alias: 'a', upstreamIds: ['up-1'] })];
+    expect(matchAlias('a', 'up-3', aliases)).toBeUndefined();
+  });
+
+  test('empty upstreamIds means match-any', () => {
+    const aliases = [make({ alias: 'a', upstreamIds: [] })];
+    expect(matchAlias('a', 'anywhere', aliases)).toBeDefined();
+  });
+
+  test('returns the first matching alias entry verbatim', () => {
+    const aliases = [
+      make({ alias: 'a', targetModelId: 'first', rules: { reasoning: { effort: 'low' } } }),
+      make({ alias: 'a', targetModelId: 'second' }),
+    ];
+    expect(matchAlias('a', 'up-x', aliases)?.alias).toEqual(aliases[0]);
+  });
+
+  test('returns undefined for an empty alias list', () => {
+    expect(matchAlias('a', 'up-x', [])).toBeUndefined();
+  });
+});
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 79a4f1c5a..8ca75c518 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -1,5 +1,7 @@
 import { fetchUpstreamModelsCached } from './models-cache.ts';
+import type { ModelAlias, ModelAliasRules } from '../../control-plane/model-aliases/types.ts';
 import { getRepo } from '../../repo/index.ts';
+import { matchAlias } from '../model-aliases/match.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import { type ModelEndpointKey, type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common';
 import type { InternalModel, ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
@@ -289,8 +291,22 @@ export interface ModelInterpretation {
   provider: ModelProviderInstance;
   // The bare id to query the upstream's catalog with. Equals the inbound
   // model id for the unprefixed surface; equals `inbound.slice(prefix.length)`
-  // for the prefixed surface.
+  // for the prefixed surface. For an alias-rewrite interpretation it equals
+  // the matched alias's `targetModelId`.
   lookupId: string;
+  // Operator-locked request-time rules carried alongside an alias-rewrite
+  // interpretation. Set only when this interpretation is the alias-rewrite
+  // half of a matched alias; the real-name interpretation in the same
+  // `conflictGroup` (and every non-aliased interpretation) leaves this
+  // undefined.
+  aliasRules?: ModelAliasRules;
+  // The alias name as authored by the operator. Set in lockstep with
+  // `aliasRules` and carried out for the `x-floway-alias` response header.
+  aliasName?: string;
+  // Identity-keyed group shared by the two interpretations a single
+  // `onConflict: 'real-only'` alias emits. The post-resolution prune uses
+  // this to drop the alias-rewrite member when both halves resolved.
+  conflictGroup?: { readonly originalLookupId: string };
 }
 
 // Expands one inbound model id into every (provider, catalog-lookup-id) pair
@@ -298,44 +314,102 @@ export interface ModelInterpretation {
 // when the inbound id literally equals one of the public-id surfaces the
 // upstream advertises (bare and/or prefixed, per `modelPrefix.addressable`).
 // The unprefixed interpretation is always pushed first when both apply.
+//
+// Each (provider, lookupId) candidate is then matched against the global
+// alias table — semantic P, post-prefix-strip — and the matched alias's
+// `onConflict` decides whether to push the real-name interpretation, the
+// alias-rewrite interpretation, or both (in either order). When neither
+// the alias nor the alias's target id is exposed by the upstream catalog,
+// the fan-out still emits both interpretations and resolution simply
+// drops the half that misses.
 export const enumerateModelInterpretations = (
   modelId: string,
   providers: readonly ModelProviderInstance[],
+  aliases: readonly ModelAlias[],
 ): ModelInterpretation[] => {
   const out: ModelInterpretation[] = [];
   for (const provider of providers) {
     const cfg = provider.modelPrefix;
     if (cfg === null || cfg.addressable.includes('unprefixed')) {
-      out.push({ provider, lookupId: modelId });
+      pushInterpretation(out, provider, modelId, aliases);
     }
     if (cfg !== null && cfg.addressable.includes('prefixed') && modelId.startsWith(cfg.prefix)) {
-      out.push({ provider, lookupId: modelId.slice(cfg.prefix.length) });
+      pushInterpretation(out, provider, modelId.slice(cfg.prefix.length), aliases);
     }
   }
   return out;
 };
 
+const pushInterpretation = (
+  out: ModelInterpretation[],
+  provider: ModelProviderInstance,
+  lookupId: string,
+  aliases: readonly ModelAlias[],
+): void => {
+  const hit = matchAlias(lookupId, provider.upstream, aliases);
+  if (!hit) {
+    out.push({ provider, lookupId });
+    return;
+  }
+  const { alias } = hit;
+  const aliasInterp: ModelInterpretation = {
+    provider,
+    lookupId: alias.targetModelId,
+    aliasRules: alias.rules,
+    aliasName: alias.alias,
+  };
+  const realInterp: ModelInterpretation = { provider, lookupId };
+  switch (alias.onConflict) {
+  case 'alias-only':
+    out.push(aliasInterp);
+    return;
+  case 'real-only': {
+    // Both halves enter the resolution pass; the post-resolution prune
+    // drops the alias-rewrite member when the real-name resolved too.
+    // Identity-keyed group so the prune step can rejoin them without
+    // re-deriving an alias key.
+    const group = { originalLookupId: lookupId };
+    out.push({ ...realInterp, conflictGroup: group });
+    out.push({ ...aliasInterp, conflictGroup: group });
+    return;
+  }
+  case 'both-real-first':
+    out.push(realInterp);
+    out.push(aliasInterp);
+    return;
+  case 'both-alias-first':
+    out.push(aliasInterp);
+    out.push(realInterp);
+    return;
+  }
+};
+
 // Fan out per-interpretation against the SWR cache and collect the resolved
 // matches plus a deduped list of upstreams whose catalog fetch rejected.
 // Shared by `resolveModelForRequest` and `enumerateProviderCandidates`; the
 // per-caller divergence (passthrough vs LLM-candidate shape) happens after
 // this returns. Cancellation (`AbortError`) propagates so the per-request
 // abort signal cannot be masked by a slow upstream's rejection.
+//
+// Each successful resolution carries its source `interpretation` back to
+// the caller so the alias-rewrite metadata (`aliasRules`, `aliasName`)
+// rides through to the candidate, and so the `real-only` post-resolution
+// prune can rejoin the two halves of a conflict group.
 export const collectInterpretationOutcomes = async (
   interpretations: readonly ModelInterpretation[],
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
 ): Promise<{
-  resolutions: Array<{ provider: ModelProviderInstance; resolved: ProviderModelResolution }>;
+  resolutions: Array<{ interpretation: ModelInterpretation; provider: ModelProviderInstance; resolved: ProviderModelResolution }>;
   failedUpstreams: string[];
 }> => {
-  const settled = await Promise.allSettled(interpretations.map(({ provider, lookupId }) =>
-    resolveModelForProvider(provider, lookupId, fetcherForUpstream(provider.upstream), scheduler)
-      .then(resolved => ({ provider, resolved }))));
+  const settled = await Promise.allSettled(interpretations.map(interpretation =>
+    resolveModelForProvider(interpretation.provider, interpretation.lookupId, fetcherForUpstream(interpretation.provider.upstream), scheduler)
+      .then(resolved => ({ interpretation, resolved }))));
 
   const failedUpstreams: string[] = [];
   const failedSeen = new Set<string>();
-  const resolutions: Array<{ provider: ModelProviderInstance; resolved: ProviderModelResolution }> = [];
+  const resolutions: Array<{ interpretation: ModelInterpretation; provider: ModelProviderInstance; resolved: ProviderModelResolution }> = [];
 
   for (const [index, result] of settled.entries()) {
     if (result.status === 'rejected') {
@@ -350,12 +424,36 @@ export const collectInterpretationOutcomes = async (
       }
       continue;
     }
-    const { provider, resolved } = result.value;
+    const { interpretation, resolved } = result.value;
     if (!resolved) continue;
-    resolutions.push({ provider, resolved });
+    resolutions.push({ interpretation, provider: interpretation.provider, resolved });
   }
 
-  return { resolutions, failedUpstreams };
+  // `onConflict: 'real-only'`: when both halves of a conflict group
+  // resolved, drop the alias-rewrite half so the real-name match is the
+  // only one downstream sees. When only the alias-rewrite half resolved
+  // (the upstream has no model named after the alias itself), keep it —
+  // the operator's intent is to fall back to the alias when no real model
+  // collides.
+  const droppedInterpretations = new Set<ModelInterpretation>();
+  const byGroup = new Map<{ readonly originalLookupId: string }, ModelInterpretation[]>();
+  for (const { interpretation } of resolutions) {
+    const group = interpretation.conflictGroup;
+    if (!group) continue;
+    const list = byGroup.get(group) ?? [];
+    list.push(interpretation);
+    byGroup.set(group, list);
+  }
+  for (const members of byGroup.values()) {
+    if (members.length < 2) continue;
+    const aliasRewriteMember = members.find(i => i.aliasRules !== undefined);
+    if (aliasRewriteMember) droppedInterpretations.add(aliasRewriteMember);
+  }
+
+  return {
+    resolutions: resolutions.filter(r => !droppedInterpretations.has(r.interpretation)),
+    failedUpstreams,
+  };
 };
 
 export const resolveModelForRequest = async (
@@ -363,13 +461,14 @@ export const resolveModelForRequest = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
+  aliases: readonly ModelAlias[] = [],
 ): Promise<ModelResolution> => {
   const providers = await listModelProviders(upstreamFilter);
   if (providers.length === 0) {
     throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
   }
 
-  const interpretations = enumerateModelInterpretations(modelId, providers);
+  const interpretations = enumerateModelInterpretations(modelId, providers, aliases);
   const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
   return { matches: resolutions.map(r => r.resolved), failedUpstreams };
 };
diff --git a/packages/gateway/src/data-plane/providers/registry_test.ts b/packages/gateway/src/data-plane/providers/registry_test.ts
index c330abfee..d470f9169 100644
--- a/packages/gateway/src/data-plane/providers/registry_test.ts
+++ b/packages/gateway/src/data-plane/providers/registry_test.ts
@@ -2,6 +2,7 @@ import { describe, expect, test } from 'vitest';
 
 import { clearInFlightForTesting } from './models-cache.ts';
 import { compareModelIds, enumerateModelInterpretations, getInternalModels, listModelProviders, resolveModelForProvider, resolveModelForRequest } from './registry.ts';
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
 import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, setupAppTest } from '../../test-helpers.ts';
 import { directFetcher, type ModelProviderInstance } from '@floway-dev/provider';
 import { createCopilotProvider } from '@floway-dev/provider-copilot';
@@ -609,20 +610,20 @@ describe('enumerateModelInterpretations', () => {
     // A: no prefix, bare always accepted. B: prefixed-only addressable — bare
     // is not accepted. C: dual-addressable, bare accepted; the prefixed form
     // does not apply because `gpt-4o` does not start with `cx/`.
-    assertEquals(shape(enumerateModelInterpretations('gpt-4o', [A, B, C])), [
+    assertEquals(shape(enumerateModelInterpretations('gpt-4o', [A, B, C], [])), [
       { upstream: 'A', lookupId: 'gpt-4o' },
       { upstream: 'C', lookupId: 'gpt-4o' },
     ]);
   });
 
   test('prefix-only-addressable upstream strips the prefix when it matches', () => {
-    assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [B])), [
+    assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [B], [])), [
       { upstream: 'B', lookupId: 'gpt-4o' },
     ]);
   });
 
   test('prefix-only-addressable upstream is silent when the prefix does not match', () => {
-    assertEquals(enumerateModelInterpretations('gpt-4o', [B]), []);
+    assertEquals(enumerateModelInterpretations('gpt-4o', [B], []), []);
   });
 
   test('dual-addressable upstream produces two interpretations when the prefix matches', () => {
@@ -633,7 +634,7 @@ describe('enumerateModelInterpretations', () => {
       upstream: 'D', name: 'd',
       modelPrefix: { prefix: 'or/', addressable: ['unprefixed', 'prefixed'], listed: ['prefixed'] },
     });
-    assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [D])), [
+    assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [D], [])), [
       { upstream: 'D', lookupId: 'or/gpt-4o' },
       { upstream: 'D', lookupId: 'gpt-4o' },
     ]);
@@ -653,7 +654,7 @@ describe('enumerateModelInterpretations', () => {
       modelPrefix: { prefix: 'aa/bb/', addressable: ['prefixed'], listed: ['prefixed'] },
     });
     const Z = fakeProvider({ upstream: 'Z', name: 'z', modelPrefix: null });
-    assertEquals(shape(enumerateModelInterpretations('aa/bb/gpt-5', [X, Y, Z])), [
+    assertEquals(shape(enumerateModelInterpretations('aa/bb/gpt-5', [X, Y, Z], [])), [
       { upstream: 'X', lookupId: 'bb/gpt-5' },
       { upstream: 'Y', lookupId: 'gpt-5' },
       { upstream: 'Z', lookupId: 'aa/bb/gpt-5' },
@@ -906,3 +907,200 @@ describe('catalog listing under modelPrefix', () => {
     );
   });
 });
+
+// Synthetic-catalog alias matching against a single provider. Verifies that
+// each `onConflict` mode emits the right interpretation shape from
+// `enumerateModelInterpretations`. The downstream `collectInterpretationOutcomes`
+// pass is exercised in the e2e suite below.
+describe('enumerateModelInterpretations with alias matching', () => {
+  const provider = fakeProvider({ upstream: 'U', name: 'u', modelPrefix: null });
+
+  const makeAlias = (over: Partial<ModelAlias>): ModelAlias => ({
+    alias: 'codex-auto-review',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: { reasoning: { effort: 'low' } },
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    ...over,
+  });
+
+  test('alias-only emits exactly the alias-rewrite interpretation, with rules', () => {
+    const aliases = [makeAlias({ onConflict: 'alias-only' })];
+    const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
+    assertEquals(out.length, 1);
+    assertEquals(out[0].lookupId, 'gpt-5.4');
+    assertEquals(out[0].aliasRules, { reasoning: { effort: 'low' } });
+    assertEquals(out[0].aliasName, 'codex-auto-review');
+    assertEquals(out[0].conflictGroup, undefined);
+  });
+
+  test('real-only emits both halves, tagged with a shared conflictGroup', () => {
+    const aliases = [makeAlias({ onConflict: 'real-only' })];
+    const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
+    assertEquals(out.length, 2);
+    // Real first, alias second — the prune step removes the alias when
+    // real resolved, so real-first keeps the natural iteration order.
+    assertEquals(out[0].lookupId, 'codex-auto-review');
+    assertEquals(out[0].aliasRules, undefined);
+    assertEquals(out[1].lookupId, 'gpt-5.4');
+    assertEquals(out[1].aliasRules, { reasoning: { effort: 'low' } });
+    expect(out[0].conflictGroup).toBeDefined();
+    expect(out[0].conflictGroup).toBe(out[1].conflictGroup);
+  });
+
+  test('both-real-first emits real then alias, neither group-tagged', () => {
+    const aliases = [makeAlias({ onConflict: 'both-real-first' })];
+    const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
+    assertEquals(out.length, 2);
+    assertEquals(out[0].lookupId, 'codex-auto-review');
+    assertEquals(out[0].aliasRules, undefined);
+    assertEquals(out[1].lookupId, 'gpt-5.4');
+    assertEquals(out[1].aliasRules, { reasoning: { effort: 'low' } });
+    assertEquals(out[0].conflictGroup, undefined);
+    assertEquals(out[1].conflictGroup, undefined);
+  });
+
+  test('both-alias-first emits alias then real, neither group-tagged', () => {
+    const aliases = [makeAlias({ onConflict: 'both-alias-first' })];
+    const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
+    assertEquals(out.length, 2);
+    assertEquals(out[0].lookupId, 'gpt-5.4');
+    assertEquals(out[0].aliasRules, { reasoning: { effort: 'low' } });
+    assertEquals(out[1].lookupId, 'codex-auto-review');
+    assertEquals(out[1].aliasRules, undefined);
+  });
+
+  test('upstreamIds filter skips the alias on providers outside the allowlist', () => {
+    const aliases = [makeAlias({ onConflict: 'alias-only', upstreamIds: ['OTHER'] })];
+    const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
+    // The alias only applies to OTHER, so this provider sees a literal
+    // (no-rewrite) interpretation.
+    assertEquals(out.length, 1);
+    assertEquals(out[0].lookupId, 'codex-auto-review');
+    assertEquals(out[0].aliasRules, undefined);
+  });
+
+  test('prefix-strip happens before alias matching (semantic P)', () => {
+    // Configure the provider with a prefix; the inbound `cx/codex-auto-review`
+    // strips to `codex-auto-review` and matches the alias. The alias-rewrite
+    // interpretation carries the target id `gpt-5.4`.
+    const prefixedProvider = fakeProvider({
+      upstream: 'P', name: 'p',
+      modelPrefix: { prefix: 'cx/', addressable: ['prefixed'], listed: ['prefixed'] },
+    });
+    const aliases = [makeAlias({ onConflict: 'alias-only' })];
+    const out = enumerateModelInterpretations('cx/codex-auto-review', [prefixedProvider], aliases);
+    assertEquals(out.length, 1);
+    assertEquals(out[0].lookupId, 'gpt-5.4');
+    assertEquals(out[0].aliasName, 'codex-auto-review');
+  });
+});
+
+// E2E coverage of the post-resolution prune. Uses a real Azure-backed
+// catalog (resolved without HTTP) so the conflict pruning behavior is
+// observed end-to-end via `resolveModelForRequest`.
+describe('resolveModelForRequest applies alias onConflict pruning', () => {
+  // Helper that stages a single Azure upstream exposing both the real
+  // alias-named model and the alias's target model.
+  const stageBothNamesUpstream = async (): Promise<void> => {
+    const { repo } = await setupAppTest();
+    await repo.upstreams.deleteAll();
+    await repo.upstreams.save({
+      id: 'up_a',
+      provider: 'azure',
+      name: 'A',
+      enabled: true,
+      sortOrder: 1,
+      createdAt: '2026-05-21T00:00:00.000Z',
+      updatedAt: '2026-05-21T00:00:00.000Z',
+      config: {
+        endpoint: 'https://a.openai.azure.com',
+        apiKey: 'az-key',
+        models: [
+          { upstreamModelId: 'codex-auto-review', endpoints: { chatCompletions: {} } },
+          { upstreamModelId: 'gpt-5.4', endpoints: { chatCompletions: {} } },
+        ],
+      },
+      flagOverrides: {},
+      disabledPublicModelIds: [],
+      proxyFallbackList: [],
+      modelPrefix: null,
+      state: null,
+    });
+  };
+
+  // Helper that stages a single Azure upstream exposing ONLY the alias's
+  // target model (no real `codex-auto-review` collision).
+  const stageTargetOnlyUpstream = async (): Promise<void> => {
+    const { repo } = await setupAppTest();
+    await repo.upstreams.deleteAll();
+    await repo.upstreams.save({
+      id: 'up_a',
+      provider: 'azure',
+      name: 'A',
+      enabled: true,
+      sortOrder: 1,
+      createdAt: '2026-05-21T00:00:00.000Z',
+      updatedAt: '2026-05-21T00:00:00.000Z',
+      config: {
+        endpoint: 'https://a.openai.azure.com',
+        apiKey: 'az-key',
+        models: [
+          { upstreamModelId: 'gpt-5.4', endpoints: { chatCompletions: {} } },
+        ],
+      },
+      flagOverrides: {},
+      disabledPublicModelIds: [],
+      proxyFallbackList: [],
+      modelPrefix: null,
+      state: null,
+    });
+  };
+
+  const aliasOf = (onConflict: ModelAlias['onConflict']): ModelAlias => ({
+    alias: 'codex-auto-review',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: { reasoning: { effort: 'low' } },
+    visibleInModelsList: true,
+    onConflict,
+  });
+
+  test('alias-only resolves to a single match against the alias target id', async () => {
+    await stageBothNamesUpstream();
+    const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('alias-only')]);
+    assertEquals(resolved.matches.length, 1);
+    assertEquals(resolved.matches[0].id, 'gpt-5.4');
+  });
+
+  test('real-only drops the alias-rewrite resolution when the real-name resolves too', async () => {
+    await stageBothNamesUpstream();
+    const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('real-only')]);
+    assertEquals(resolved.matches.length, 1);
+    assertEquals(resolved.matches[0].id, 'codex-auto-review');
+  });
+
+  test('real-only keeps the alias-rewrite resolution when the real-name catalog lookup misses', async () => {
+    await stageTargetOnlyUpstream();
+    const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('real-only')]);
+    assertEquals(resolved.matches.length, 1);
+    assertEquals(resolved.matches[0].id, 'gpt-5.4');
+  });
+
+  test('both-real-first resolves to two matches, real first', async () => {
+    await stageBothNamesUpstream();
+    const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('both-real-first')]);
+    assertEquals(resolved.matches.length, 2);
+    assertEquals(resolved.matches[0].id, 'codex-auto-review');
+    assertEquals(resolved.matches[1].id, 'gpt-5.4');
+  });
+
+  test('both-alias-first resolves to two matches, alias first', async () => {
+    await stageBothNamesUpstream();
+    const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('both-alias-first')]);
+    assertEquals(resolved.matches.length, 2);
+    assertEquals(resolved.matches[0].id, 'gpt-5.4');
+    assertEquals(resolved.matches[1].id, 'codex-auto-review');
+  });
+});
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index 5a85dba39..85f01b621 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -13,6 +13,7 @@ import type {
   ApiKeyRepo,
   BackoffRow,
   CachedModelsRow,
+  ModelAliasesRepo,
   ModelsCacheRepo,
   PerformanceDimensions,
   PerformanceErrorSample,
@@ -39,6 +40,7 @@ import type {
   UsersRepo,
 } from './types.ts';
 import { serializeStoredState } from './upstream-json.ts';
+import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
 import { latencyBucketForMs } from '../shared/performance-histogram.ts';
 import { generateSessionToken } from '../shared/session-tokens.ts';
 import { assertWebSearchProviderName } from '../shared/web-search-providers.ts';
@@ -896,6 +898,7 @@ export class InMemoryRepo implements Repo {
   proxyBackoffs: ProxyBackoffRepo;
   responsesItems: ResponsesItemsRepo;
   responsesSnapshots: ResponsesSnapshotsRepo;
+  modelAliases: ModelAliasesRepo;
 
   constructor() {
     this.users = new MemoryUsersRepo();
@@ -911,5 +914,21 @@ export class InMemoryRepo implements Repo {
     this.proxyBackoffs = new MemoryProxyBackoffRepo();
     this.responsesItems = new MemoryResponsesItemsRepo();
     this.responsesSnapshots = new MemoryResponsesSnapshotsRepo();
+    this.modelAliases = new MemoryModelAliasesRepo();
+  }
+}
+
+// Test-only in-memory backing for the alias table. The list starts empty
+// and can be reseeded via `setAll` so tests exercising alias-resolution
+// behavior do not depend on a live SQL database.
+export class MemoryModelAliasesRepo implements ModelAliasesRepo {
+  private rows: readonly ModelAlias[] = [];
+
+  loadAll(): Promise<readonly ModelAlias[]> {
+    return Promise.resolve(this.rows);
+  }
+
+  setAll(rows: readonly ModelAlias[]): void {
+    this.rows = rows;
   }
 }
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index b716d07e4..109b35024 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -7,6 +7,7 @@ import type {
   ApiKeyRepo,
   BackoffRow,
   CachedModelsRow,
+  ModelAliasesRepo,
   ModelsCacheRepo,
   PerformanceDimensions,
   PerformanceErrorSample,
@@ -34,6 +35,8 @@ import type {
   UsersRepo,
 } from './types.ts';
 import { serializeStoredConfig, serializeStoredState } from './upstream-json.ts';
+import { loadAllAliases } from '../control-plane/model-aliases/repo.ts';
+import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
 import { latencyBucketForMs } from '../shared/performance-histogram.ts';
 import { generateSessionToken } from '../shared/session-tokens.ts';
 import { assertWebSearchProviderName } from '../shared/web-search-providers.ts';
@@ -1599,6 +1602,7 @@ export class SqlRepo implements Repo {
   proxyBackoffs: ProxyBackoffRepo;
   responsesItems: ResponsesItemsRepo;
   responsesSnapshots: ResponsesSnapshotsRepo;
+  modelAliases: ModelAliasesRepo;
 
   constructor(db: SqlDatabase) {
     this.users = new SqlUsersRepo(db);
@@ -1614,5 +1618,14 @@ export class SqlRepo implements Repo {
     this.proxyBackoffs = new SqlProxyBackoffRepo(db);
     this.responsesItems = new SqlResponsesItemsRepo(db);
     this.responsesSnapshots = new SqlResponsesSnapshotsRepo(db);
+    this.modelAliases = new SqlModelAliasesRepo(db);
+  }
+}
+
+class SqlModelAliasesRepo implements ModelAliasesRepo {
+  constructor(private db: SqlDatabase) {}
+
+  loadAll(): Promise<readonly ModelAlias[]> {
+    return loadAllAliases(this.db);
   }
 }
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 0341d41ef..d282aaa98 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -1,3 +1,4 @@
+import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
 import type { HistogramBucket } from '../shared/performance-histogram.ts';
 import type { WebSearchProviderName } from '../shared/web-search-providers.ts';
 import type { BillingDimension, ModelPricing } from '@floway-dev/protocols/common';
@@ -332,4 +333,11 @@ export interface Repo {
   proxyBackoffs: ProxyBackoffRepo;
   responsesItems: ResponsesItemsRepo;
   responsesSnapshots: ResponsesSnapshotsRepo;
+  modelAliases: ModelAliasesRepo;
+}
+
+// Operator-managed alias table; small (dozens of rows at most) and read
+// per request, so the repo deliberately exposes only a full-table fetch.
+export interface ModelAliasesRepo {
+  loadAll(): Promise<readonly ModelAlias[]>;
 }

From 8305153a58fac34afd42b8c92f5a1346a83c67b4 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Thu, 25 Jun 2026 23:12:35 +0800
Subject: [PATCH 006/170] feat(gateway): apply alias rules, synthesize
 /v1/models entries, set x-floway-alias
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

applyAliasRulesTo<InboundProtocol> writes rule values into each inbound
IR's native slot when the protocol supports the concept and the Floway
extension slot otherwise. Alias values override user-supplied values per
the operator-locked semantics in Goal 3 of the design.

/v1/models appends alias entries with aliasedFrom carrying the target,
upstream filter, rules, and conflict mode. Aliases with
visibleInModelsList=false are omitted; aliases whose targets are
unreachable are still listed — operator-declared, no silent hide. The
Gemini /v1beta/models surface mirrors the same alias-listing policy.

The x-floway-alias response header carries the matched alias name on
every call served via an alias, giving callers a no-mode-required debug
hook for understanding routing.

Per-upstream sanitizers run just before each upstream HTTP call,
emitting one drop-trace line per stripped extension field with the
matched alias name attached. The same sanitize emission point fires for
client-sent extension residue regardless of alias provenance.

Embeddings, images, and /v1/completions thread aliases through
resolveModelForRequest so alias-name resolution still rewrites the
target id; rules don't apply to these passthrough endpoints (no protocol
slots) but the matched alias name still rides out on the response
header, and one drop trace line per declared rule lands so an operator
can confirm the rewrite ran.

Side touches:
- ChatCandidate replaces ProviderCandidate on every chat attempt arg
  type, restoring the alias-metadata propagation the routing layer
  already preserves.
- GatewayCtx grows a per-request responseHeaders bag; the http wrappers
  flush it onto the outgoing Response through a new
  finalizeGatewayResponse helper that also routes through the dump
  accumulator.
- ProviderModelResolution gains an optional aliasName; passthrough
  callers read it directly off the resolved match.
- pushInterpretation's onConflict switch grows an assertNever default.
---
 .../src/control-plane/model-aliases/repo.ts   |   4 +-
 .../control-plane/model-aliases/repo_test.ts  |  35 +--
 .../src/control-plane/model-aliases/types.ts  |   4 +
 .../chat/chat-completions/attempt.ts          |  10 +-
 .../chat/chat-completions/attempt_test.ts     |   1 +
 .../data-plane/chat/chat-completions/http.ts  |   6 +-
 .../demote-developer-to-system_test.ts        |   1 +
 .../demote-interleaved-system-to-user_test.ts |   1 +
 ...le-reasoning-on-forced-tool-choice_test.ts |   1 +
 .../include-usage-stream-options_test.ts      |   1 +
 .../interceptors/normalize-usage_test.ts      |   1 +
 .../vendor-deepseek-normalize_test.ts         |   1 +
 .../vendor-kimi-normalize_test.ts             |   1 +
 .../vendor-qwen-normalize_test.ts             |   1 +
 .../data-plane/chat/chat-completions/serve.ts |   6 +
 .../chat/chat-completions/serve_test.ts       |   1 +
 .../src/data-plane/chat/gemini/attempt.ts     |   6 +-
 .../data-plane/chat/gemini/attempt_test.ts    |   1 +
 .../src/data-plane/chat/gemini/http.ts        |  10 +-
 .../strip-safety-settings_test.ts             |   1 +
 .../strip-unsupported-part-fields_test.ts     |   1 +
 .../strip-unsupported-tools_test.ts           |   1 +
 .../suppress-thought-parts_test.ts            |   1 +
 .../data-plane/chat/gemini/respond_test.ts    |   1 +
 .../src/data-plane/chat/gemini/serve.ts       |   8 +
 .../src/data-plane/chat/gemini/serve_test.ts  |   1 +
 .../src/data-plane/chat/messages/attempt.ts   |  28 ++-
 .../data-plane/chat/messages/attempt_test.ts  |   1 +
 .../src/data-plane/chat/messages/http.ts      |   8 +-
 .../src/data-plane/chat/messages/http_test.ts |  65 +++++-
 .../demote-interleaved-system-to-user_test.ts |   1 +
 ...le-reasoning-on-forced-tool-choice_test.ts |   1 +
 .../strip-billing-attribution_test.ts         |   1 +
 .../interceptors/web-search-shim_test.ts      |   1 +
 .../data-plane/chat/messages/respond_test.ts  |   1 +
 .../src/data-plane/chat/messages/serve.ts     |  13 ++
 .../data-plane/chat/messages/serve_test.ts    |   1 +
 .../src/data-plane/chat/responses/attempt.ts  |  15 +-
 .../data-plane/chat/responses/attempt_test.ts |   1 +
 .../src/data-plane/chat/responses/http.ts     |  14 +-
 .../canonicalize-encrypted-content_test.ts    |   1 +
 .../demote-developer-to-system_test.ts        |   1 +
 .../demote-interleaved-system-to-user_test.ts |   1 +
 ...le-reasoning-on-forced-tool-choice_test.ts |   1 +
 .../interceptors/retry-cyber-policy_test.ts   |   1 +
 .../interceptors/server-tool-shim_test.ts     |   2 +
 .../image-generation-integration_test.ts      |   1 +
 .../server-tools/image-generation.ts          |   9 +-
 .../server-tools/image-generation_test.ts     |   1 +
 .../vendor-deepseek-normalize_test.ts         |   1 +
 .../vendor-qwen-normalize_test.ts             |   1 +
 .../src/data-plane/chat/responses/serve.ts    |  12 +
 .../data-plane/chat/responses/serve_test.ts   |   1 +
 .../src/data-plane/chat/shared/gateway-ctx.ts |  23 +-
 .../data-plane/chat/shared/respond_test.ts    |   1 +
 .../src/data-plane/chat/shared/sanitize.ts    |  10 +
 .../chat/shared/upstream-telemetry_test.ts    |   1 +
 .../src/data-plane/model-aliases/apply.ts     | 105 +++++++++
 .../data-plane/model-aliases/apply_test.ts    | 218 ++++++++++++++++++
 .../data-plane/model-aliases/match_test.ts    |   1 +
 .../gateway/src/data-plane/models/gemini.ts   |  29 ++-
 .../gateway/src/data-plane/models/load.ts     |  39 +++-
 .../gateway/src/data-plane/models/serve.ts    |   4 +-
 .../src/data-plane/models/serve_test.ts       | 178 ++++++++++++++
 .../src/data-plane/providers/registry.ts      |  18 +-
 .../src/data-plane/providers/registry_test.ts |   2 +
 .../data-plane/shared/passthrough-serve.ts    |  40 +++-
 .../gateway/src/test-helpers/gateway-ctx.ts   |   1 +
 packages/protocols/src/common/models.ts       |  24 ++
 packages/translate/package.json               |   3 +-
 70 files changed, 918 insertions(+), 69 deletions(-)
 create mode 100644 packages/gateway/src/data-plane/model-aliases/apply.ts
 create mode 100644 packages/gateway/src/data-plane/model-aliases/apply_test.ts

diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index 70024e0cd..4c13cd09b 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -8,13 +8,14 @@ interface ModelAliasRow {
   rules_json: string;
   visible_in_models_list: number;
   on_conflict: OnConflict;
+  created_at: number;
 }
 
 // The model_aliases table is operator-managed and small (dozens of rows at
 // most), so the data plane reads the full table per request — no cache layer.
 export const loadAllAliases = async (db: SqlDatabase): Promise<readonly ModelAlias[]> => {
   const { results } = await db
-    .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict FROM model_aliases')
+    .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases')
     .all<ModelAliasRow>();
   return results.map(toModelAlias);
 };
@@ -26,6 +27,7 @@ const toModelAlias = (row: ModelAliasRow): ModelAlias => ({
   rules: parseJsonField<ModelAlias['rules']>(row.alias, 'rules_json', row.rules_json),
   visibleInModelsList: row.visible_in_models_list === 1,
   onConflict: row.on_conflict,
+  createdAt: row.created_at,
 });
 
 const parseJsonField = <T>(alias: string, field: string, raw: string): T => {
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
index a4da76fde..ff1efa046 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
@@ -8,17 +8,21 @@ test('loadAllAliases reads the seed row from a freshly migrated database', async
   const db = await createSqliteTestDb();
 
   const aliases = await loadAllAliases(db);
-
-  assertEquals(aliases, [
-    {
-      alias: 'codex-auto-review',
-      targetModelId: 'gpt-5.4',
-      upstreamIds: [],
-      rules: { reasoning: { effort: 'low' } },
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-    },
-  ]);
+  assertEquals(aliases.length, 1);
+  const [seed] = aliases;
+  // `createdAt` rides off the migration's `DEFAULT (unixepoch())`, so the
+  // exact value is wall-clock dependent. Assert structurally that it landed
+  // as a number and strip it before comparing the rest of the row.
+  assertEquals(typeof seed.createdAt, 'number');
+  const { createdAt: _createdAt, ...withoutTimestamp } = seed;
+  assertEquals(withoutTimestamp, {
+    alias: 'codex-auto-review',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: { reasoning: { effort: 'low' } },
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+  });
 });
 
 test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_models_list to a boolean', async () => {
@@ -26,7 +30,7 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo
   await db.exec('DELETE FROM model_aliases');
   await db
     .prepare(
-      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
     )
     .bind(
       'opus-xhigh',
@@ -35,13 +39,14 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo
       '{"reasoning":{"effort":"xhigh"},"anthropicBeta":["fine-grained-tool-streaming"]}',
       0,
       'alias-only',
+      1_700_000_000,
     )
     .run();
   await db
     .prepare(
-      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
+      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
     )
-    .bind('gpt-5-fast', 'gpt-5.4', '[]', '{"serviceTier":"priority"}', 1, 'both-alias-first')
+    .bind('gpt-5-fast', 'gpt-5.4', '[]', '{"serviceTier":"priority"}', 1, 'both-alias-first', 1_700_000_001)
     .run();
 
   const aliases = await loadAllAliases(db);
@@ -54,6 +59,7 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo
     rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] },
     visibleInModelsList: false,
     onConflict: 'alias-only',
+    createdAt: 1_700_000_000,
   });
   assertEquals(byAlias.get('gpt-5-fast'), {
     alias: 'gpt-5-fast',
@@ -62,6 +68,7 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo
     rules: { serviceTier: 'priority' },
     visibleInModelsList: true,
     onConflict: 'both-alias-first',
+    createdAt: 1_700_000_001,
   });
 });
 
diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts
index 8e1bff467..7594ceff6 100644
--- a/packages/gateway/src/control-plane/model-aliases/types.ts
+++ b/packages/gateway/src/control-plane/model-aliases/types.ts
@@ -23,4 +23,8 @@ export type ModelAlias = {
   readonly rules: ModelAliasRules;
   readonly visibleInModelsList: boolean;
   readonly onConflict: OnConflict;
+  // Unix epoch seconds stamped at row insertion. Surfaced on the
+  // `/v1/models` synthesized alias entry so callers see when an alias was
+  // declared, matching the `created` semantics of the real entries.
+  readonly createdAt: number;
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
index 5862192e5..71c3a8288 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
@@ -5,9 +5,10 @@ import { responsesAttempt } from '../responses/attempt.ts';
 import { rewriteStoredResponsesItemsForCandidate } from '../responses/items/rewrite.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { providerStreamResultToExecuteResult, buildUpstreamCallOptions } from '../shared/attempt-helpers.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
 import { tryCatchChatServeFailure } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createSanitizeTraceCtx, sanitizeForChatCompletionsUpstream } from '../shared/sanitize.ts';
 import { traverseTranslation } from '../shared/translate-traverse.ts';
 import { createUpstreamLatencyRecorder } from '../shared/upstream-telemetry.ts';
 import { runInterceptors } from '@floway-dev/interceptor';
@@ -21,7 +22,7 @@ export interface ChatCompletionsAttemptArgs {
   readonly payload: ChatCompletionsPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ProviderCandidate;
+  readonly candidate: ChatCandidate;
   readonly headers: Headers;
 }
 
@@ -67,7 +68,7 @@ export const chatCompletionsAttempt = {
 const rewriteOrRenderChatCompletionsFailure = async (
   payload: ChatCompletionsPayload,
   store: StatefulResponsesStore,
-  candidate: ProviderCandidate,
+  candidate: ChatCandidate,
 ): Promise<{ payload: ChatCompletionsPayload; failure?: undefined } | { payload?: undefined; failure: ExecuteResult<ProtocolFrame<ChatCompletionsStreamEvent>> & { type: 'api-error' } }> => {
   try {
     const rewrittenMessages = await rewriteStoredResponsesItemsForCandidate(
@@ -98,10 +99,11 @@ const rewriteOrRenderChatCompletionsFailure = async (
 const callChatCompletionsAsExecuteResult = async (
   payload: ChatCompletionsPayload,
   ctx: GatewayCtx,
-  candidate: ProviderCandidate,
+  candidate: ChatCandidate,
   headers: Headers,
 ): Promise<ExecuteResult<ProtocolFrame<ChatCompletionsStreamEvent>>> => {
   const { model: _model, ...body } = payload;
+  sanitizeForChatCompletionsUpstream(body as Record<string, unknown>, createSanitizeTraceCtx(candidate.aliasName));
   const recorder = createUpstreamLatencyRecorder();
   const providerResult = await candidate.binding.provider.callChatCompletions(
     candidate.binding.upstreamModel,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
index 747a93de5..62b814359 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
@@ -23,6 +23,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/http.ts b/packages/gateway/src/data-plane/chat/chat-completions/http.ts
index a46d537ed..a22a86543 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/http.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/http.ts
@@ -3,7 +3,7 @@ import { chatCompletionsServe } from './serve.ts';
 import type { AuthedContext } from '../../../middleware/auth.ts';
 import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts';
 import { readRequestBody, type RequestBody } from '../shared/request-body.ts';
 import { providerModelsUnavailableResponse } from '../shared/upstream-models-error.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
@@ -24,7 +24,7 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques
   const effectiveCtx = ctx ?? createGatewayCtxFromHono(c, { wantsStream: false, requestBody });
   const result = internalErrorResult(502, toInternalDebugError(error));
   const { response } = await respondChatCompletions(c, result, false, false, effectiveCtx);
-  return (effectiveCtx.dump?.finalize(response) ?? response);
+  return finalizeGatewayResponse(effectiveCtx, response);
 };
 
 export const chatCompletionsHttp = {
@@ -44,7 +44,7 @@ export const chatCompletionsHttp = {
       const store = createNonResponsesSourceStore(ctx.apiKeyId);
       const result = await chatCompletionsServe.generate({ payload, ctx, store, headers: inboundHeadersForUpstream(c) });
       const { response } = await respondChatCompletions(c, result, wantsStream, includeUsageChunk, ctx);
-      return (ctx.dump?.finalize(response) ?? response);
+      return finalizeGatewayResponse(ctx, response);
     } catch (error) {
       return await respondWithInternalError(c, error, requestBody, ctx);
     }
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
index 1d3c8252e..83d9bccb7 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
index c7f560cb3..156389a46 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 7ba1962fd..e46726510 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
index 1a28fef4a..e3e4147a2 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
index 0969e8d8e..0b6fed4f1 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
index d72f890f5..81be2c3ab 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
@@ -20,6 +20,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
index 74de17c38..1cfc304b7 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
index 4f7197da8..0506a1e25 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 5d27541f9..1347dd6bd 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -2,6 +2,7 @@ import { chatCompletionsAttempt } from './attempt.ts';
 import { renderChatCompletionsFailure } from './errors.ts';
 import { planChatCompletionsRouting } from './routing.ts';
 import { getRepo } from '../../../repo/index.ts';
+import { applyAliasRulesToChatCompletions } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -47,6 +48,11 @@ export const chatCompletionsServe = {
           : { kind: 'model-missing', model: payload.model, failedUpstreams },
       );
     }
+    // Apply operator-locked alias rules to the inbound IR before the
+    // attempt runs its interceptor chain. The matching `x-floway-alias`
+    // header rides out via ctx.responseHeaders.
+    if (candidate.aliasRules) applyAliasRulesToChatCompletions(payload, candidate.aliasRules);
+    if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
     return await chatCompletionsAttempt.generate({ payload, ctx, store, candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 616b2ba66..402803203 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -49,6 +49,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt.ts b/packages/gateway/src/data-plane/chat/gemini/attempt.ts
index 1d120ca00..880b0d62a 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt.ts
@@ -6,7 +6,7 @@ import { chatCompletionsAttempt } from '../chat-completions/attempt.ts';
 import { messagesAttempt } from '../messages/attempt.ts';
 import { responsesAttempt } from '../responses/attempt.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { traverseTranslation } from '../shared/translate-traverse.ts';
 import { runInterceptors } from '@floway-dev/interceptor';
@@ -19,7 +19,7 @@ export interface GeminiAttemptGenerateArgs {
   readonly payload: GeminiPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ProviderCandidate;
+  readonly candidate: ChatCandidate;
   readonly headers: Headers;
 }
 
@@ -27,7 +27,7 @@ export interface GeminiAttemptCountTokensArgs {
   readonly payload: GeminiPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ProviderCandidate;
+  readonly candidate: ChatCandidate;
   readonly headers: Headers;
 }
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
index 9d08e557f..29a4e9bc5 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
@@ -24,6 +24,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/http.ts b/packages/gateway/src/data-plane/chat/gemini/http.ts
index 35a6e921f..8126d13e7 100644
--- a/packages/gateway/src/data-plane/chat/gemini/http.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/http.ts
@@ -3,7 +3,7 @@ import { geminiServe } from './serve.ts';
 import type { AuthedContext } from '../../../middleware/auth.ts';
 import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts';
 import { readRequestBody, type RequestBody } from '../shared/request-body.ts';
 import type { GeminiContent, GeminiPayload } from '@floway-dev/protocols/gemini';
 import { internalErrorResult, ProviderModelsUnavailableError, toInternalDebugError } from '@floway-dev/provider';
@@ -66,11 +66,11 @@ const respondWithGeminiError = async (
       body: new TextEncoder().encode(body),
     };
     const { response } = await respondGemini(c, apiErrorResult, wantsStream, ctx);
-    return (ctx.dump?.finalize(response) ?? response);
+    return finalizeGatewayResponse(ctx, response);
   }
   const internalResult = internalErrorResult(500, toInternalDebugError(error));
   const { response } = await respondGemini(c, internalResult, wantsStream, ctx);
-  return (ctx.dump?.finalize(response) ?? response);
+  return finalizeGatewayResponse(ctx, response);
 };
 
 // Single entry for `/v1beta/models/:modelAction`. Splits the model and action
@@ -97,7 +97,7 @@ const runGeminiGenerate = async (c: AuthedContext, model: string, wantsStream: b
   try {
     const result = await geminiServe.generate({ payload, ctx, store, model, headers: inboundHeadersForUpstream(c) });
     const { response } = await respondGemini(c, result, wantsStream, ctx);
-    return (ctx.dump?.finalize(response) ?? response);
+    return finalizeGatewayResponse(ctx, response);
   } catch (error) {
     return await respondWithGeminiError(c, error, ctx, wantsStream);
   }
@@ -113,7 +113,7 @@ const runGeminiCountTokens = async (c: AuthedContext, model: string): Promise<Re
   try {
     const result = await geminiServe.countTokens({ payload, ctx, store, model, headers: inboundHeadersForUpstream(c) });
     const { response } = await respondGemini(c, result, false, ctx);
-    return (ctx.dump?.finalize(response) ?? response);
+    return finalizeGatewayResponse(ctx, response);
   } catch (error) {
     return await respondWithGeminiError(c, error, ctx, false);
   }
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
index c0b39f699..d4b48ac3a 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
index 6a4608cf4..3b02b63f8 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
index 7cbfc4593..6a2c20ef7 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
index e526bb9e3..eb67a0092 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
index a5c887c14..31981b544 100644
--- a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
@@ -26,6 +26,7 @@ const ctx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index 840da62c7..e1f61d628 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -2,6 +2,7 @@ import { geminiAttempt } from './attempt.ts';
 import { renderGeminiFailure } from './errors.ts';
 import { planGeminiRouting } from './routing.ts';
 import { getRepo } from '../../../repo/index.ts';
+import { applyAliasRulesToGemini } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -58,6 +59,11 @@ export const geminiServe = {
         'generate',
       );
     }
+    // Operator-locked alias rules apply to the Gemini IR before the attempt
+    // runs; the matching `x-floway-alias` header rides out via
+    // ctx.responseHeaders.
+    if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
+    if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
     return await geminiAttempt.generate({ payload, ctx, store, candidate, headers });
   },
 
@@ -90,6 +96,8 @@ export const geminiServe = {
         'countTokens',
       );
     }
+    if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
+    if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
     return await geminiAttempt.countTokens({ payload, ctx, store, candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 42945a6d8..070d44471 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -48,6 +48,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt.ts b/packages/gateway/src/data-plane/chat/messages/attempt.ts
index e67387b85..3dcde67c7 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt.ts
@@ -6,10 +6,11 @@ import { responsesAttempt } from '../responses/attempt.ts';
 import { rewriteStoredResponsesItemsForCandidate } from '../responses/items/rewrite.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { providerStreamResultToExecuteResult, buildUpstreamCallOptions } from '../shared/attempt-helpers.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
 import { tryCatchChatServeFailure } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { plainResultFromResponse } from '../shared/respond.ts';
+import { sanitizeForMessagesUpstream, createSanitizeTraceCtx } from '../shared/sanitize.ts';
 import { traverseTranslation } from '../shared/translate-traverse.ts';
 import { createUpstreamLatencyRecorder } from '../shared/upstream-telemetry.ts';
 import { runInterceptors } from '@floway-dev/interceptor';
@@ -17,13 +18,14 @@ import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesMessage, MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import { type ExecuteResult, type PlainResult } from '@floway-dev/provider';
 import { translateMessagesViaChatCompletions, translateMessagesViaResponses } from '@floway-dev/translate';
+import { applyAnthropicBetaToHeaders } from '@floway-dev/translate/via-messages/anthropic-extensions';
 import { messagesViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
 export interface MessagesAttemptGenerateArgs {
   readonly payload: MessagesPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ProviderCandidate;
+  readonly candidate: ChatCandidate;
   readonly headers: Headers;
 }
 
@@ -31,7 +33,7 @@ export interface MessagesAttemptCountTokensArgs {
   readonly payload: MessagesPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ProviderCandidate;
+  readonly candidate: ChatCandidate;
   readonly headers: Headers;
 }
 
@@ -48,12 +50,21 @@ export const messagesAttempt = {
     return await runInterceptors(invocation, ctx, messagesInterceptors, async () => {
       if (candidate.targetApi === 'messages') {
         const { model: _model, ...body } = invocation.payload;
+        // The candidate's `anthropic_beta` alias rule merges onto the
+        // anthropic-beta header (the wire path; the body slot is rejected
+        // by the http entry). Body extensions are stripped just before the
+        // upstream call, after every interceptor has had its say.
+        const outgoingHeaders = new Headers(invocation.headers);
+        if (candidate.aliasRules?.anthropicBeta?.length) {
+          applyAnthropicBetaToHeaders(outgoingHeaders, candidate.aliasRules.anthropicBeta);
+        }
+        sanitizeForMessagesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx(candidate.aliasName));
         const recorder = createUpstreamLatencyRecorder();
         const providerResult = await candidate.binding.provider.callMessages(
           candidate.binding.upstreamModel,
           body,
           ctx.abortSignal,
-          buildUpstreamCallOptions(candidate, ctx, recorder.record, invocation.headers),
+          buildUpstreamCallOptions(candidate, ctx, recorder.record, outgoingHeaders),
         );
         return await providerStreamResultToExecuteResult(providerResult, candidate, ctx, recorder);
       }
@@ -98,11 +109,16 @@ export const messagesAttempt = {
     const recorder = createUpstreamLatencyRecorder();
     const response = await runInterceptors(invocation, ctx, messagesCountTokensInterceptors, async () => {
       const { model: _model, ...body } = invocation.payload;
+      const outgoingHeaders = new Headers(invocation.headers);
+      if (candidate.aliasRules?.anthropicBeta?.length) {
+        applyAnthropicBetaToHeaders(outgoingHeaders, candidate.aliasRules.anthropicBeta);
+      }
+      sanitizeForMessagesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx(candidate.aliasName));
       const { response } = await candidate.binding.provider.callMessagesCountTokens(
         candidate.binding.upstreamModel,
         body,
         ctx.abortSignal,
-        buildUpstreamCallOptions(candidate, ctx, recorder.record, invocation.headers),
+        buildUpstreamCallOptions(candidate, ctx, recorder.record, outgoingHeaders),
       );
       return response;
     });
@@ -124,7 +140,7 @@ export const messagesAttempt = {
 const rewriteOrRenderMessagesFailure = async (
   payload: MessagesPayload,
   store: StatefulResponsesStore,
-  candidate: ProviderCandidate,
+  candidate: ChatCandidate,
 ): Promise<{ payload: MessagesPayload; failure?: undefined } | { payload?: undefined; failure: ExecuteResult<ProtocolFrame<MessagesStreamEvent>> & { type: 'api-error' } }> => {
   try {
     const rewrittenMessages = await rewriteStoredResponsesItemsForCandidate(
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
index 2cd89323e..f9192e289 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
@@ -23,6 +23,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/messages/http.ts b/packages/gateway/src/data-plane/chat/messages/http.ts
index 8dfc4dc6e..b138a9ccf 100644
--- a/packages/gateway/src/data-plane/chat/messages/http.ts
+++ b/packages/gateway/src/data-plane/chat/messages/http.ts
@@ -3,7 +3,7 @@ import { messagesServe } from './serve.ts';
 import type { AuthedContext } from '../../../middleware/auth.ts';
 import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts';
 import { readRequestBody, type RequestBody } from '../shared/request-body.ts';
 import { providerModelsUnavailableResponse } from '../shared/upstream-models-error.ts';
 import type { MessagesPayload } from '@floway-dev/protocols/messages';
@@ -44,7 +44,7 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques
   const effectiveCtx = ctx ?? createGatewayCtxFromHono(c, { wantsStream: false, requestBody });
   const result = internalErrorResult(502, toInternalDebugError(error));
   const { response } = await respondMessages(c, result, false, effectiveCtx);
-  return (effectiveCtx.dump?.finalize(response) ?? response);
+  return finalizeGatewayResponse(effectiveCtx, response);
 };
 
 const parsePayload = (requestBody: RequestBody): MessagesPayload =>
@@ -64,7 +64,7 @@ export const messagesHttp = {
       const store = createNonResponsesSourceStore(ctx.apiKeyId);
       const result = await messagesServe.generate({ payload, ctx, store, headers: inboundHeadersForUpstream(c) });
       const { response } = await respondMessages(c, result, wantsStream, ctx);
-      return (ctx.dump?.finalize(response) ?? response);
+      return finalizeGatewayResponse(ctx, response);
     } catch (error) {
       return await respondWithInternalError(c, error, requestBody, ctx);
     }
@@ -82,7 +82,7 @@ export const messagesHttp = {
       const store = createNonResponsesSourceStore(ctx.apiKeyId);
       const result = await messagesServe.countTokens({ payload, ctx, store, headers: inboundHeadersForUpstream(c) });
       const { response } = await respondMessages(c, result, false, ctx);
-      return (ctx.dump?.finalize(response) ?? response);
+      return finalizeGatewayResponse(ctx, response);
     } catch (error) {
       return await respondWithInternalError(c, error, requestBody, ctx);
     }
diff --git a/packages/gateway/src/data-plane/chat/messages/http_test.ts b/packages/gateway/src/data-plane/chat/messages/http_test.ts
index 844a05177..cf32509d2 100644
--- a/packages/gateway/src/data-plane/chat/messages/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/http_test.ts
@@ -5,13 +5,13 @@ import type { AuthVars } from '../../../middleware/auth.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { ApiKey, User } from '../../../repo/types.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import { directFetcher, type ProviderCallResult, type ProviderStreamResult, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
-const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
+const candidatesQueue: { readonly candidates: readonly ChatCandidate[]; readonly sawModel: boolean }[] = [];
 vi.mock('../shared/candidates.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../shared/candidates.ts')>();
   return {
@@ -28,7 +28,7 @@ const { messagesHttp } = await import('./http.ts');
 
 const API_KEY_ID = 'key_messages_http_test';
 
-const queueCandidates = (candidates: readonly ProviderCandidate[], sawModel = candidates.length > 0): void => {
+const queueCandidates = (candidates: readonly ChatCandidate[], sawModel = candidates.length > 0): void => {
   candidatesQueue.push({ candidates, sawModel });
 };
 
@@ -104,7 +104,7 @@ const makeCandidate = (overrides: {
   upstream?: string;
   callMessages?: (model: unknown, body: unknown, signal?: AbortSignal, opts?: UpstreamCallOptions) => Promise<ProviderStreamResult<MessagesStreamEvent>>;
   callMessagesCountTokens?: (model: unknown, body: unknown, signal?: AbortSignal, opts?: UpstreamCallOptions) => Promise<ProviderCallResult>;
-} = {}): ProviderCandidate => {
+} = {}): ChatCandidate => {
   const upstream = overrides.upstream ?? 'up_test';
   const upstreamModel = stubUpstreamModel();
   const provider = stubProvider({
@@ -272,3 +272,60 @@ test('POST /v1/messages forwards upstream response headers end-to-end (non-strea
   assertEquals(response.headers.get('anthropic-ratelimit-unified-status'), 'allowed');
   assertEquals(response.headers.get('cf-ray'), 'cf_ray_e2e');
 });
+
+test('POST /v1/messages stamps x-floway-alias when the candidate is alias-matched', async () => {
+  installRepo();
+  const callMessages = vi.fn(async (): Promise<ProviderStreamResult<MessagesStreamEvent>> => ({
+    ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers(),
+  }));
+  const candidate = makeCandidate({ callMessages });
+  queueCandidates([{ ...candidate, aliasRules: { reasoning: { effort: 'low' } }, aliasName: 'codex-auto-review' }]);
+
+  const response = await makeApp().request('/v1/messages', {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ model: 'codex-auto-review', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
+  });
+
+  assertEquals(response.status, 200);
+  assertEquals(response.headers.get('x-floway-alias'), 'codex-auto-review');
+});
+
+test('POST /v1/messages does not set x-floway-alias when no alias matched', async () => {
+  installRepo();
+  const callMessages = vi.fn(async (): Promise<ProviderStreamResult<MessagesStreamEvent>> => ({
+    ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers(),
+  }));
+  queueCandidates([makeCandidate({ callMessages })]);
+
+  const response = await makeApp().request('/v1/messages', {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ model: 'test-model', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
+  });
+
+  assertEquals(response.status, 200);
+  assertEquals(response.headers.get('x-floway-alias'), null);
+});
+
+test('POST /v1/messages applies alias reasoning.effort onto output_config before upstream call', async () => {
+  installRepo();
+  const observedBodies: { output_config?: { effort?: string } }[] = [];
+  const callMessages = vi.fn(async (_model: unknown, body: unknown): Promise<ProviderStreamResult<MessagesStreamEvent>> => {
+    observedBodies.push(body as { output_config?: { effort?: string } });
+    return { ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers() };
+  });
+  const candidate = makeCandidate({ callMessages });
+  queueCandidates([{ ...candidate, aliasRules: { reasoning: { effort: 'high' } }, aliasName: 'alias-x' }]);
+
+  const response = await makeApp().request('/v1/messages', {
+    method: 'POST',
+    headers: { 'content-type': 'application/json' },
+    body: JSON.stringify({ model: 'alias-x', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
+  });
+
+  assertEquals(response.status, 200);
+  const observed = observedBodies[0];
+  if (observed === undefined) throw new Error('expected callMessages to receive a body');
+  assertEquals(observed.output_config?.effort, 'high');
+});
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
index 7fcb07153..9df67c5c0 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 7ed0ca556..7b7045355 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
index 3c74ea4a9..8ae90e232 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
index 27c5cad83..dca97addd 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
@@ -58,6 +58,7 @@ const gatewayCtx = (apiKeyId: string = 'test-key'): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/messages/respond_test.ts b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
index dfd3b10e3..79d0a9db6 100644
--- a/packages/gateway/src/data-plane/chat/messages/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
@@ -534,6 +534,7 @@ const makeRespondCtx = (): GatewayCtx => ({
   wantsStream: false,
   runtimeLocation: 'TEST',
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
   currentColo: 'TEST',
   dump: null,
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index 719091768..30282afd4 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -2,6 +2,7 @@ import { messagesAttempt } from './attempt.ts';
 import { renderMessagesFailure } from './errors.ts';
 import { planMessagesRouting } from './routing.ts';
 import { getRepo } from '../../../repo/index.ts';
+import { applyAliasRulesToMessages } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -55,6 +56,13 @@ export const messagesServe = {
         'generate',
       );
     }
+    // Operator-locked alias rules go onto the inbound IR before the attempt
+    // begins so the per-protocol interceptor chain (and any downstream
+    // translate pass) sees the already-injected fields. The matching
+    // `x-floway-alias` response header is staged on the gateway-stamped
+    // header set; the http wrapper flushes it onto the outgoing Response.
+    if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
+    if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
     return await messagesAttempt.generate({ payload, ctx, store, candidate, headers });
   },
 
@@ -84,6 +92,11 @@ export const messagesServe = {
         'countTokens',
       );
     }
+    // count_tokens carries the same alias semantics as generate — operator
+    // rules apply uniformly regardless of endpoint, and the response header
+    // rides out the same way.
+    if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
+    if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
     return await messagesAttempt.countTokens({ payload, ctx, store, candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index 3bf0faace..734bad296 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -46,6 +46,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt.ts b/packages/gateway/src/data-plane/chat/responses/attempt.ts
index db7220990..c0fb6a902 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt.ts
@@ -10,9 +10,10 @@ import { recordPerformanceLatency, requireRecordedDurationMs } from '../../share
 import { chatCompletionsAttempt } from '../chat-completions/attempt.ts';
 import { messagesAttempt } from '../messages/attempt.ts';
 import { providerStreamResultToExecuteResult, buildUpstreamCallOptions, telemetryModelIdentity } from '../shared/attempt-helpers.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
+import type { ChatCandidate } from '../shared/candidates.ts';
 import { tryCatchChatServeFailure } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createSanitizeTraceCtx, sanitizeForResponsesUpstream } from '../shared/sanitize.ts';
 import { traverseTranslation } from '../shared/translate-traverse.ts';
 import { createUpstreamLatencyRecorder, recordUpstreamHttpFailure, upstreamPerformanceContext } from '../shared/upstream-telemetry.ts';
 import { runInterceptors } from '@floway-dev/interceptor';
@@ -26,7 +27,7 @@ export interface ResponsesAttemptGenerateArgs {
   readonly payload: ResponsesPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ProviderCandidate;
+  readonly candidate: ChatCandidate;
   // Native HTTP/WS entry passes 'append'; the cross-protocol translation-in
   // path (another protocol's attempt translating into Responses) passes
   // 'none' so the outer source owns snapshot persistence.
@@ -38,7 +39,7 @@ export interface ResponsesAttemptCompactArgs {
   readonly payload: ResponsesPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ProviderCandidate;
+  readonly candidate: ChatCandidate;
   readonly headers: Headers;
 }
 
@@ -148,7 +149,7 @@ type RewriteOutcome =
 const rewriteOrRenderFailure = async (
   payload: ResponsesPayload,
   store: StatefulResponsesStore,
-  candidate: ProviderCandidate,
+  candidate: ChatCandidate,
 ): Promise<RewriteOutcome> => {
   try {
     return await rewriteResponsesItemsForCandidate(payload, store, candidate);
@@ -185,12 +186,13 @@ const dispatchResponses = async (
   payload: ResponsesPayload,
   ctx: GatewayCtx,
   store: StatefulResponsesStore,
-  candidate: ProviderCandidate,
+  candidate: ChatCandidate,
   headers: Headers,
 ): Promise<ExecuteResult<ProtocolFrame<ResponsesStreamEvent>>> => {
   switch (candidate.targetApi) {
   case 'responses': {
     const { model: _model, ...body } = payload;
+    sanitizeForResponsesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx(candidate.aliasName));
     const recorder = createUpstreamLatencyRecorder();
     const providerResult = await candidate.binding.provider.callResponses(
       candidate.binding.upstreamModel,
@@ -236,10 +238,11 @@ const dispatchResponses = async (
 const callResponsesCompactAsExecuteResult = async (
   payload: ResponsesPayload,
   ctx: GatewayCtx,
-  candidate: ProviderCandidate,
+  candidate: ChatCandidate,
   headers: Headers,
 ): Promise<ExecuteResult<ProtocolFrame<ResponsesStreamEvent>>> => {
   const { model: _model, stream: _stream, store: _store, ...body } = payload;
+  sanitizeForResponsesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx(candidate.aliasName));
   const recorder = createUpstreamLatencyRecorder();
   const providerResult = await candidate.binding.provider.callResponsesCompact(
     candidate.binding.upstreamModel,
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
index d698b5189..a952627b4 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
@@ -25,6 +25,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/http.ts b/packages/gateway/src/data-plane/chat/responses/http.ts
index dc6e1694b..54c497a0a 100644
--- a/packages/gateway/src/data-plane/chat/responses/http.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http.ts
@@ -5,7 +5,7 @@ import { responsesServe } from './serve.ts';
 import type { AuthedContext } from '../../../middleware/auth.ts';
 import { CODEX_AUTO_REVIEW_ALIAS, CODEX_AUTO_REVIEW_TARGET } from '../../codex/auto-review-alias.ts';
 import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts';
-import { createGatewayCtxFromHono, type GatewayCtx } from '../shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts';
 import { readRequestBody, type RequestBody } from '../shared/request-body.ts';
 import { providerModelsUnavailableResponse } from '../shared/upstream-models-error.ts';
 import type { ResponsesPayload } from '@floway-dev/protocols/responses';
@@ -57,7 +57,7 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques
   const effectiveCtx = ctx ?? createGatewayCtxFromHono(c, { wantsStream: false, requestBody });
   const result = internalErrorResult(502, toInternalDebugError(error));
   const { response } = await respondResponses(c, result, false, effectiveCtx);
-  return (effectiveCtx.dump?.finalize(response) ?? response);
+  return finalizeGatewayResponse(effectiveCtx, response);
 };
 
 const parsePayload = (requestBody: RequestBody, stampReasoningEffort: boolean): ResponsesPayload =>
@@ -74,12 +74,12 @@ export const responsesHttp = {
       const store = createResponsesHttpStore(ctx.apiKeyId, payload.store ?? undefined);
       const result = await responsesServe.generate({ payload, ctx, store, snapshotMode: payload.store === false ? 'none' : 'append', headers: inboundHeadersForUpstream(c) });
       const { response } = await respondResponses(c, result, wantsStream, ctx);
-      return (ctx.dump?.finalize(response) ?? response);
+      return finalizeGatewayResponse(ctx, response);
     } catch (error) {
       if (error instanceof PreviousResponseNotFoundError) {
         const response = previousResponseNotFoundResponse(error.previousResponseId);
         ctx?.dump?.error('gateway');
-        return (ctx?.dump?.finalize(response) ?? response);
+        return ctx ? finalizeGatewayResponse(ctx, response) : response;
       }
       return await respondWithInternalError(c, error, requestBody, ctx);
     }
@@ -96,15 +96,15 @@ export const responsesHttp = {
       if (result.type === 'result') {
         ctx.dump?.success(result.modelIdentity, result.usage);
         const compactResponse = Response.json(result.result);
-        return (ctx.dump?.finalize(compactResponse) ?? compactResponse);
+        return finalizeGatewayResponse(ctx, compactResponse);
       }
       const { response } = await respondResponses(c, result, false, ctx);
-      return (ctx.dump?.finalize(response) ?? response);
+      return finalizeGatewayResponse(ctx, response);
     } catch (error) {
       if (error instanceof PreviousResponseNotFoundError) {
         const response = previousResponseNotFoundResponse(error.previousResponseId);
         ctx?.dump?.error('gateway');
-        return (ctx?.dump?.finalize(response) ?? response);
+        return ctx ? finalizeGatewayResponse(ctx, response) : response;
       }
       return await respondWithInternalError(c, error, requestBody, ctx);
     }
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
index de97faf65..f904f85d5 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
index 0a705fd12..a193d01db 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
index e32752001..ae1fc3970 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 7c4ee2f6b..dcddbd6c8 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
index 98fa7fdcb..3cf947b01 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
@@ -45,6 +45,7 @@ const stubCtx = (overrides: { abortSignal?: AbortSignal } = {}): GatewayCtx => (
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
   ...overrides,
 });
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
index f9d5cf7b2..6688dcec7 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
@@ -348,6 +348,7 @@ const makeGatewayCtx = (apiKeyId: string = 'k1'): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
@@ -4496,6 +4497,7 @@ test('downstream AbortSignal threads through to provider search / fetchPage and
     currentColo: 'TEST',
     dump: null,
     backgroundScheduler: () => {},
+    responseHeaders: new Headers(),
     requestStartedAt: 0,
     abortSignal: controller.signal,
   };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
index 908b18489..46e30b043 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
@@ -144,6 +144,7 @@ const gatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
index a66fe9995..77904bb63 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
@@ -1,4 +1,5 @@
 import { createPerRequestFetcher } from '../../../../../dial/per-request.ts';
+import { getRepo } from '../../../../../repo/index.ts';
 import { sleep } from '../../../../../shared/sleep.ts';
 import { resolveModelForRequest } from '../../../../providers/registry.ts';
 import { appendFailedUpstreams } from '../../../../shared/failed-upstreams.ts';
@@ -535,7 +536,13 @@ const resolveImageBinding = async (
   const endpointPath = isEdit ? '/images/edits' : '/images/generations';
   let resolution;
   try {
-    resolution = await resolveModelForRequest(state.config.model, state.upstreamIds, fetcherForUpstream, state.backgroundScheduler);
+    // The image-generation server-tool runs inside a Responses request; the
+    // outer request's matched alias (if any) has already stamped the
+    // response header. Threading aliases here keeps the second
+    // resolveModelForRequest (for the image tool's own model id) consistent
+    // with how the outer LLM call resolved its candidate.
+    const aliases = await getRepo().modelAliases.loadAll();
+    resolution = await resolveModelForRequest(state.config.model, state.upstreamIds, fetcherForUpstream, state.backgroundScheduler, aliases);
   } catch (e) {
     return { ok: false, error: serverError(e) };
   }
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
index 515b4ef6c..da94b3068 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
@@ -56,6 +56,7 @@ const gatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
index fbed0749c..7db1b6360 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
index 45de471f4..23afe8462 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/serve.ts b/packages/gateway/src/data-plane/chat/responses/serve.ts
index bed8b6df3..e66a2a29a 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve.ts
@@ -2,6 +2,7 @@ import { responsesAttempt } from './attempt.ts';
 import type { ResponsesAttemptResult } from './interceptors/types.ts';
 import type { ResponsesSnapshotMode, StatefulResponsesStore } from './items/store.ts';
 import { prepareResponsesServePlan } from './serve-prep.ts';
+import { applyAliasRulesToResponses } from '../../model-aliases/apply.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
@@ -46,6 +47,11 @@ export const responsesServe = {
               : null,
     });
     if (plan.kind === 'failure') return plan.result;
+    // Operator-locked alias rules apply to the prepared inbound IR before
+    // the attempt runs; the `x-floway-alias` header rides out via
+    // ctx.responseHeaders.
+    if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
+    if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName);
     const effectiveSnapshotMode: ResponsesSnapshotMode = snapshotMode !== 'none' && containsCompactionTrigger(plan.prepared.input)
       ? 'replace'
       : snapshotMode;
@@ -62,6 +68,12 @@ export const responsesServe = {
       pickTarget: endpoints => endpoints.responses ? 'responses' : null,
     });
     if (plan.kind === 'failure') return plan.result;
+    // Alias rules also apply on the compact path. The upstream compact
+    // endpoint silently drops fields like `reasoning` it does not honor;
+    // applying uniformly keeps the operator's intent expressed at the
+    // inbound boundary regardless of which endpoint runs.
+    if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
+    if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName);
     return await responsesAttempt.compact({ payload: plan.prepared, ctx, store, candidate: plan.candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index 389771d69..07369504d 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -57,6 +57,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
index 21f14d66d..0e199e403 100644
--- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
+++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
@@ -21,10 +21,17 @@ export interface GatewayCtx {
   // provider-call boundary.
   readonly runtimeLocation: string;
   readonly currentColo: string;
-  // Null when the api key has no retention configured, in which case the
-  // respond layer's `ctx.dump?.X(...)` calls collapse to no-ops and
-  // `ctx.dump?.finalize(response) ?? response` returns the response unchanged.
+  // Null when the api key has no retention configured, in which case
+  // `finalizeGatewayResponse` short-circuits the dump tee and returns the
+  // response untouched (headers from `responseHeaders` are still applied).
   readonly dump: DumpAccumulator | null;
+  // Per-request response-header staging. The data-plane writes alias-aware
+  // and similar non-upstream headers here mid-request; the inbound HTTP
+  // wrapper merges them onto the final outgoing Response before
+  // `dump?.finalize`. Mutable on purpose — the serve layer owns the
+  // chosen candidate and is the right seam for stamping the
+  // `x-floway-alias` header.
+  readonly responseHeaders: Headers;
 }
 
 export interface CreateGatewayCtxOptions {
@@ -70,5 +77,15 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
     runtimeLocation: colo,
     currentColo: colo,
     dump,
+    responseHeaders: new Headers(),
   };
 };
+
+// Apply ctx-stamped response headers onto the outgoing Response and then run
+// the dump-accumulator's finalize tee. Every inbound HTTP wrapper returns its
+// response through this seam so alias and other gateway-stamped headers ride
+// out uniformly across happy-path, error, and passthrough paths.
+export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => {
+  for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value);
+  return ctx.dump?.finalize(response) ?? response;
+};
diff --git a/packages/gateway/src/data-plane/chat/shared/respond_test.ts b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
index e57f52ae4..fd506b083 100644
--- a/packages/gateway/src/data-plane/chat/shared/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
@@ -45,6 +45,7 @@ const setup = (): Harness => {
       dump: null,
       backgroundScheduler: promise => { background.push(promise); },
       requestStartedAt,
+      responseHeaders: new Headers(),
     }),
   };
 };
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
index 918156d16..832f8f41d 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -5,6 +5,16 @@ export interface SanitizeTraceCtx {
   readonly emit: (line: { alias?: string; field: string; targetProtocol: string }) => void;
 }
 
+// Default per-request trace that flows through the gateway's console logger.
+// `aliasName` rides through to the trace line so an operator inspecting logs
+// can correlate the drop with the matched alias; when no alias matched the
+// field still appears (residue from a client-sent extension), just without
+// alias attribution.
+export const createSanitizeTraceCtx = (aliasName: string | undefined): SanitizeTraceCtx => ({
+  ...(aliasName !== undefined ? { aliasName } : {}),
+  emit: line => console.warn('floway.alias.drop', JSON.stringify(line)),
+});
+
 const stripKeys = (
   body: Record<string, unknown>,
   keys: readonly string[],
diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
index d1d10a103..374cab4fe 100644
--- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
@@ -24,6 +24,7 @@ const baseCtx = (overrides: Partial<GatewayCtx> = {}): GatewayCtx => {
     apiKeyId: 'key_1',
     upstreamIds: null,
     wantsStream: true,
+    responseHeaders: new Headers(),
     requestStartedAt: 0,
     runtimeLocation: 'TEST',
     currentColo: 'TEST',
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
new file mode 100644
index 000000000..9a2bb5950
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -0,0 +1,105 @@
+import type { ModelAliasRules } from '../../control-plane/model-aliases/types.ts';
+import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
+import type { GeminiPayload } from '@floway-dev/protocols/gemini';
+import type { MessagesPayload, MessagesThinkingDisplay } from '@floway-dev/protocols/messages';
+import type { ResponsesPayload } from '@floway-dev/protocols/responses';
+import { mapSummaryToAnthropicDisplay } from '@floway-dev/translate/via-messages/anthropic-extensions';
+
+// Each function writes the alias rules into the inbound IR's slot best suited
+// to the host protocol: native when the protocol can express the concept,
+// extension otherwise. Writes overwrite any user-supplied value — aliases are
+// operator-locked per Goal 3. Mapping table is the single source of truth in
+// docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+
+export const applyAliasRulesToChatCompletions = (payload: ChatCompletionsPayload, rules: ModelAliasRules): void => {
+  // reasoning.effort is native; budget/adaptive/summary ride on extension slots
+  // because Chat Completions has no native expression for those.
+  if (rules.reasoning?.effort !== undefined) payload.reasoning_effort = rules.reasoning.effort;
+  if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens;
+  if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true;
+  if (rules.reasoning?.summary !== undefined) payload.reasoning_summary = rules.reasoning.summary;
+  if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
+  if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
+  if (rules.anthropicSpeed !== undefined) payload.anthropic_speed = rules.anthropicSpeed;
+  if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
+};
+
+export const applyAliasRulesToResponses = (payload: ResponsesPayload, rules: ModelAliasRules): void => {
+  // reasoning.{effort, summary} and text.verbosity / service_tier are native;
+  // budget/adaptive ride on extension slots; the two anthropic_* knobs only
+  // matter when this Responses inbound lands on a Messages upstream.
+  if (rules.reasoning?.effort !== undefined) payload.reasoning = { ...payload.reasoning, effort: rules.reasoning.effort };
+  if (rules.reasoning?.summary !== undefined) payload.reasoning = { ...payload.reasoning, summary: rules.reasoning.summary };
+  if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens;
+  if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true;
+  if (rules.verbosity !== undefined) payload.text = { ...payload.text, verbosity: rules.verbosity };
+  if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
+  if (rules.anthropicSpeed !== undefined) payload.anthropic_speed = rules.anthropicSpeed;
+  if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
+};
+
+export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: ModelAliasRules): void => {
+  // Anthropic has natives for effort, thinking, speed, and service_tier; only
+  // verbosity is a Floway extension on this inbound. anthropic_beta is the
+  // wire header — the attempt layer reads `candidate.aliasRules.anthropicBeta`
+  // and merges via mergeAnthropicBetaTokens, so we do not stamp the body here.
+  if (rules.reasoning?.effort !== undefined) {
+    payload.output_config = { ...payload.output_config, effort: rules.reasoning.effort };
+  }
+  // Adaptive wins over budgetTokens when both arrive — the write-side
+  // validator forbids the combination, but the apply step has to make a
+  // choice if both slip through and the translate-layer policy is
+  // adaptive-first.
+  if (rules.reasoning?.adaptive === true) {
+    payload.thinking = { type: 'adaptive' };
+  } else if (rules.reasoning?.budgetTokens !== undefined) {
+    payload.thinking = { type: 'enabled', budget_tokens: rules.reasoning.budgetTokens };
+  }
+  if (rules.reasoning?.summary !== undefined) {
+    const display = mapSummaryToAnthropicDisplay(rules.reasoning.summary);
+    if (display !== undefined) {
+      // When no prior thinking branch ran (no effort/budget/adaptive in this
+      // rule), synthesize `thinking: {type:'enabled', display}` so the
+      // operator's summary intent survives — Anthropic discards `display`
+      // without `type`. Matches `buildMessagesThinkingFromExtensions`.
+      const base = payload.thinking ?? { type: 'enabled' as const };
+      payload.thinking = { ...base, display: display as MessagesThinkingDisplay };
+    }
+  }
+  if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
+  if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
+  if (rules.anthropicSpeed !== undefined) payload.speed = rules.anthropicSpeed;
+};
+
+export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAliasRules): void => {
+  // All four reasoning knobs ride on the native thinkingConfig; verbosity and
+  // serviceTier ride on extension slots under generationConfig; the
+  // anthropic_* knobs ride on top-level extension slots so the existing
+  // gemini-via-messages translator picks them up there.
+  const hasThinking = rules.reasoning?.effort !== undefined
+    || rules.reasoning?.budgetTokens !== undefined
+    || rules.reasoning?.adaptive === true
+    || rules.reasoning?.summary !== undefined;
+  const hasGenerationConfig = hasThinking || rules.verbosity !== undefined || rules.serviceTier !== undefined;
+
+  if (hasGenerationConfig) {
+    const generationConfig = { ...payload.generationConfig };
+    const thinkingConfig = { ...generationConfig.thinkingConfig };
+    if (rules.reasoning?.effort !== undefined) thinkingConfig.thinkingLevel = rules.reasoning.effort;
+    if (rules.reasoning?.budgetTokens !== undefined) thinkingConfig.thinkingBudget = rules.reasoning.budgetTokens;
+    if (rules.reasoning?.adaptive === true) thinkingConfig.thinkingBudget = -1;
+    if (rules.reasoning?.summary !== undefined) {
+      // Gemini exposes a single boolean for summary; map summary='omitted' to
+      // false and every other value (auto / concise / detailed / freeform) to
+      // true. Operators that want to fall back to Gemini's account default
+      // simply omit `reasoning.summary` from the rule.
+      thinkingConfig.includeThoughts = rules.reasoning.summary !== 'omitted';
+    }
+    if (hasThinking) generationConfig.thinkingConfig = thinkingConfig;
+    if (rules.verbosity !== undefined) generationConfig.verbosity = rules.verbosity;
+    if (rules.serviceTier !== undefined) generationConfig.serviceTier = rules.serviceTier;
+    payload.generationConfig = generationConfig;
+  }
+  if (rules.anthropicSpeed !== undefined) payload.anthropicSpeed = rules.anthropicSpeed;
+  if (rules.anthropicBeta?.length) payload.anthropicBeta = [...rules.anthropicBeta];
+};
diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
new file mode 100644
index 000000000..e05f40c91
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
@@ -0,0 +1,218 @@
+import { describe, expect, test } from 'vitest';
+
+import {
+  applyAliasRulesToChatCompletions,
+  applyAliasRulesToGemini,
+  applyAliasRulesToMessages,
+  applyAliasRulesToResponses,
+} from './apply.ts';
+import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
+import type { GeminiPayload } from '@floway-dev/protocols/gemini';
+import type { MessagesPayload } from '@floway-dev/protocols/messages';
+import type { ResponsesPayload } from '@floway-dev/protocols/responses';
+
+// Empty-shaped payload helpers; the apply functions only touch the alias-rule
+// slots so the rest can stay structurally minimal.
+const cc = (overrides: Partial<ChatCompletionsPayload> = {}): ChatCompletionsPayload => ({ model: 'x', messages: [], ...overrides });
+const resp = (overrides: Partial<ResponsesPayload> = {}): ResponsesPayload => ({ model: 'x', input: 'hi', ...overrides });
+const msg = (overrides: Partial<MessagesPayload> = {}): MessagesPayload => ({ model: 'x', messages: [], max_tokens: 1, ...overrides });
+const gem = (overrides: Partial<GeminiPayload> = {}): GeminiPayload => ({ ...overrides });
+
+describe('applyAliasRulesToChatCompletions', () => {
+  test('writes effort to native reasoning_effort and overrides user value', () => {
+    const payload = cc({ reasoning_effort: 'low' });
+    applyAliasRulesToChatCompletions(payload, { reasoning: { effort: 'high' } });
+    expect(payload.reasoning_effort).toBe('high');
+  });
+
+  test('writes budgetTokens to extension thinking_budget', () => {
+    const payload = cc();
+    applyAliasRulesToChatCompletions(payload, { reasoning: { budgetTokens: 4096 } });
+    expect(payload.thinking_budget).toBe(4096);
+  });
+
+  test('writes adaptive to extension adaptive_thinking', () => {
+    const payload = cc();
+    applyAliasRulesToChatCompletions(payload, { reasoning: { adaptive: true } });
+    expect(payload.adaptive_thinking).toBe(true);
+  });
+
+  test('writes summary to extension reasoning_summary', () => {
+    const payload = cc();
+    applyAliasRulesToChatCompletions(payload, { reasoning: { summary: 'detailed' } });
+    expect(payload.reasoning_summary).toBe('detailed');
+  });
+
+  test('writes verbosity, serviceTier, anthropicSpeed, anthropicBeta to their slots', () => {
+    const payload = cc();
+    applyAliasRulesToChatCompletions(payload, {
+      verbosity: 'low', serviceTier: 'flex', anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'],
+    });
+    expect(payload.verbosity).toBe('low');
+    expect(payload.service_tier).toBe('flex');
+    expect(payload.anthropic_speed).toBe('fast');
+    expect(payload.anthropic_beta).toEqual(['ctx-1m']);
+  });
+
+  test('leaves payload untouched when rules carry no fields', () => {
+    const payload = cc({ reasoning_effort: 'medium', verbosity: 'high' });
+    applyAliasRulesToChatCompletions(payload, {});
+    expect(payload.reasoning_effort).toBe('medium');
+    expect(payload.verbosity).toBe('high');
+  });
+});
+
+describe('applyAliasRulesToResponses', () => {
+  test('writes effort to native reasoning.effort and overrides user value', () => {
+    const payload = resp({ reasoning: { effort: 'low' } });
+    applyAliasRulesToResponses(payload, { reasoning: { effort: 'high' } });
+    expect(payload.reasoning?.effort).toBe('high');
+  });
+
+  test('writes summary to native reasoning.summary', () => {
+    const payload = resp();
+    applyAliasRulesToResponses(payload, { reasoning: { summary: 'detailed' } });
+    expect(payload.reasoning?.summary).toBe('detailed');
+  });
+
+  test('writes budgetTokens to extension thinking_budget', () => {
+    const payload = resp();
+    applyAliasRulesToResponses(payload, { reasoning: { budgetTokens: 4096 } });
+    expect(payload.thinking_budget).toBe(4096);
+  });
+
+  test('writes adaptive to extension adaptive_thinking', () => {
+    const payload = resp();
+    applyAliasRulesToResponses(payload, { reasoning: { adaptive: true } });
+    expect(payload.adaptive_thinking).toBe(true);
+  });
+
+  test('writes verbosity to native text.verbosity, preserving format', () => {
+    const payload = resp({ text: { format: { type: 'json_object' } } });
+    applyAliasRulesToResponses(payload, { verbosity: 'low' });
+    expect(payload.text?.verbosity).toBe('low');
+    expect(payload.text?.format).toEqual({ type: 'json_object' });
+  });
+
+  test('writes serviceTier to native service_tier', () => {
+    const payload = resp();
+    applyAliasRulesToResponses(payload, { serviceTier: 'flex' });
+    expect(payload.service_tier).toBe('flex');
+  });
+
+  test('writes anthropicSpeed / anthropicBeta to extension slots', () => {
+    const payload = resp();
+    applyAliasRulesToResponses(payload, { anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'] });
+    expect(payload.anthropic_speed).toBe('fast');
+    expect(payload.anthropic_beta).toEqual(['ctx-1m']);
+  });
+});
+
+describe('applyAliasRulesToMessages', () => {
+  test('writes effort to native output_config.effort', () => {
+    const payload = msg();
+    applyAliasRulesToMessages(payload, { reasoning: { effort: 'high' } });
+    expect(payload.output_config?.effort).toBe('high');
+  });
+
+  test('writes budgetTokens to thinking.enabled', () => {
+    const payload = msg();
+    applyAliasRulesToMessages(payload, { reasoning: { budgetTokens: 4096 } });
+    expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 4096 });
+  });
+
+  test('writes adaptive to thinking.type=adaptive', () => {
+    const payload = msg();
+    applyAliasRulesToMessages(payload, { reasoning: { adaptive: true } });
+    expect(payload.thinking).toEqual({ type: 'adaptive' });
+  });
+
+  test('writes summary to thinking.display (mapped from OpenAI vocabulary)', () => {
+    const payload = msg({ thinking: { type: 'enabled', budget_tokens: 1024 } });
+    applyAliasRulesToMessages(payload, { reasoning: { summary: 'detailed' } });
+    expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 1024, display: 'summarized' });
+  });
+
+  test('writes anthropicSpeed to native speed', () => {
+    const payload = msg();
+    applyAliasRulesToMessages(payload, { anthropicSpeed: 'fast' });
+    expect(payload.speed).toBe('fast');
+  });
+
+  test('writes serviceTier to native service_tier', () => {
+    const payload = msg();
+    applyAliasRulesToMessages(payload, { serviceTier: 'priority' });
+    expect(payload.service_tier).toBe('priority');
+  });
+
+  test('writes verbosity to the extension slot', () => {
+    const payload = msg();
+    applyAliasRulesToMessages(payload, { verbosity: 'low' });
+    expect(payload.verbosity).toBe('low');
+  });
+
+  test('adaptive overrides budgetTokens when both arrive on the same call', () => {
+    // The write-side validator forbids both, but if both still arrive the
+    // adaptive choice has to win to match the translate-layer policy.
+    const payload = msg();
+    applyAliasRulesToMessages(payload, { reasoning: { budgetTokens: 1024, adaptive: true } });
+    expect(payload.thinking).toEqual({ type: 'adaptive' });
+  });
+});
+
+describe('applyAliasRulesToGemini', () => {
+  test('writes effort to generationConfig.thinkingConfig.thinkingLevel', () => {
+    const payload = gem();
+    applyAliasRulesToGemini(payload, { reasoning: { effort: 'high' } });
+    expect(payload.generationConfig?.thinkingConfig?.thinkingLevel).toBe('high');
+  });
+
+  test('writes budgetTokens to generationConfig.thinkingConfig.thinkingBudget', () => {
+    const payload = gem();
+    applyAliasRulesToGemini(payload, { reasoning: { budgetTokens: 4096 } });
+    expect(payload.generationConfig?.thinkingConfig?.thinkingBudget).toBe(4096);
+  });
+
+  test('writes adaptive to generationConfig.thinkingConfig.thinkingBudget = -1', () => {
+    const payload = gem();
+    applyAliasRulesToGemini(payload, { reasoning: { adaptive: true } });
+    expect(payload.generationConfig?.thinkingConfig?.thinkingBudget).toBe(-1);
+  });
+
+  test('writes summary to generationConfig.thinkingConfig.includeThoughts when not omitted', () => {
+    const payload = gem();
+    applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } });
+    expect(payload.generationConfig?.thinkingConfig?.includeThoughts).toBe(true);
+  });
+
+  test('writes summary=omitted to generationConfig.thinkingConfig.includeThoughts=false', () => {
+    const payload = gem();
+    applyAliasRulesToGemini(payload, { reasoning: { summary: 'omitted' } });
+    expect(payload.generationConfig?.thinkingConfig?.includeThoughts).toBe(false);
+  });
+
+  test('writes verbosity to generationConfig.verbosity extension', () => {
+    const payload = gem();
+    applyAliasRulesToGemini(payload, { verbosity: 'low' });
+    expect(payload.generationConfig?.verbosity).toBe('low');
+  });
+
+  test('writes serviceTier to generationConfig.serviceTier extension', () => {
+    const payload = gem();
+    applyAliasRulesToGemini(payload, { serviceTier: 'flex' });
+    expect(payload.generationConfig?.serviceTier).toBe('flex');
+  });
+
+  test('writes anthropicSpeed / anthropicBeta to top-level extension slots', () => {
+    const payload = gem();
+    applyAliasRulesToGemini(payload, { anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'] });
+    expect(payload.anthropicSpeed).toBe('fast');
+    expect(payload.anthropicBeta).toEqual(['ctx-1m']);
+  });
+
+  test('preserves existing thinkingConfig entries when adding a new one', () => {
+    const payload = gem({ generationConfig: { thinkingConfig: { thinkingBudget: 1024 } } });
+    applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } });
+    expect(payload.generationConfig?.thinkingConfig).toEqual({ thinkingBudget: 1024, includeThoughts: true });
+  });
+});
diff --git a/packages/gateway/src/data-plane/model-aliases/match_test.ts b/packages/gateway/src/data-plane/model-aliases/match_test.ts
index b1dd8ff2f..7252078c9 100644
--- a/packages/gateway/src/data-plane/model-aliases/match_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/match_test.ts
@@ -10,6 +10,7 @@ const make = (overrides: Partial<ModelAlias>): ModelAlias => ({
   rules: {},
   visibleInModelsList: true,
   onConflict: 'real-only',
+  createdAt: 0,
   ...overrides,
 });
 
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 93f4b6822..33dbefa40 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -1,8 +1,10 @@
 import type { Context } from 'hono';
 
 import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts';
@@ -66,16 +68,36 @@ const loadGeminiModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
+  aliases: readonly ModelAlias[],
 ): Promise<GeminiModel[]> => {
   const models = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler);
   // Only chat models are representable in the Gemini /models shape.
-  return models.filter(model => model.kind === 'chat').map(toGeminiModel);
+  const realChatEntries = models.filter(model => model.kind === 'chat').map(toGeminiModel);
+  // Visible aliases append in `loadAllAliases` order; the Gemini surface
+  // carries no `aliasedFrom` extension (Gemini's `Model` resource is closed)
+  // so the entry advertises the alias id plus the target's display fields.
+  const byId = new Map<string, InternalModel>(models.map(m => [m.id, m]));
+  const aliasEntries: GeminiModel[] = [];
+  for (const alias of aliases) {
+    if (!alias.visibleInModelsList) continue;
+    const target = byId.get(alias.targetModelId);
+    if (target && target.kind !== 'chat') continue;
+    aliasEntries.push(toGeminiModel({
+      ...(target ?? {} as InternalModel),
+      id: alias.alias,
+      display_name: alias.alias,
+      kind: 'chat',
+      limits: target?.limits ?? {},
+    }));
+  }
+  return [...realChatEntries, ...aliasEntries];
 };
 
 export const serveGeminiModels = async (c: Context): Promise<Response> => {
   try {
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)) });
+    const aliases = await getRepo().modelAliases.loadAll();
+    return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases) });
   } catch (error) {
     return geminiModelLoadError(error);
   }
@@ -88,7 +110,8 @@ export const serveGeminiModelInfo = async (c: Context): Promise<Response> => {
   const modelId = rawModelId.replace(/^models\//, '');
   try {
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c))).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
+    const aliases = await getRepo().modelAliases.loadAll();
+    const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases)).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
     if (!model) return geminiError(404, `Model not found: ${modelId}`);
     return Response.json(model);
   } catch (error) {
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index eed33c9de..585b5d638 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,3 +1,4 @@
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
 import { getInternalModels } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
@@ -21,12 +22,48 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
   return info;
 };
 
+// Synthesize one PublicModel for each visible alias, appended after the real
+// entries. The owner falls back to the alias-target's `owned_by` on whichever
+// real entry resolves it; if the target isn't present on any reachable
+// upstream, the entry still appears (operator-declared; the listing reflects
+// operator intent) with a `floway` owner so the row is unambiguous.
+export const toPublicModelFromAlias = (alias: ModelAlias, byId: ReadonlyMap<string, InternalModel>): PublicModel => {
+  const target = byId.get(alias.targetModelId);
+  const info: PublicModel = {
+    id: alias.alias,
+    object: 'model',
+    type: 'model',
+    display_name: alias.alias,
+    limits: target?.limits ? { ...target.limits } : {},
+    kind: target?.kind ?? 'chat',
+    created: alias.createdAt,
+    created_at: new Date(alias.createdAt * 1000).toISOString(),
+    aliasedFrom: {
+      targetModelId: alias.targetModelId,
+      upstreamIds: alias.upstreamIds,
+      rules: alias.rules,
+      onConflict: alias.onConflict,
+    },
+  };
+  info.owned_by = target?.owned_by ?? alias.upstreamIds[0] ?? 'floway';
+  return info;
+};
+
 export const loadModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
+  aliases: readonly ModelAlias[],
 ): Promise<PublicModelsResponse> => {
-  const data = (await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler)).map(toPublicModel);
+  const internal = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler);
+  const realEntries = internal.map(toPublicModel);
+  const byId = new Map<string, InternalModel>(internal.map(m => [m.id, m]));
+  // Visible aliases append in `loadAllAliases` order, after every real entry.
+  // The spec's no-silent-hide policy keeps disabled-target aliases visible —
+  // the user-facing failure on call is the canonical signal, not the
+  // listing.
+  const aliasEntries = aliases.filter(a => a.visibleInModelsList).map(a => toPublicModelFromAlias(a, byId));
+  const data = [...realEntries, ...aliasEntries];
   return {
     object: 'list',
     has_more: false,
diff --git a/packages/gateway/src/data-plane/models/serve.ts b/packages/gateway/src/data-plane/models/serve.ts
index 9b8b510f9..60736266b 100644
--- a/packages/gateway/src/data-plane/models/serve.ts
+++ b/packages/gateway/src/data-plane/models/serve.ts
@@ -7,6 +7,7 @@ import { loadModels } from './load.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -14,7 +15,8 @@ import { ProviderModelsUnavailableError } from '@floway-dev/provider';
 export const models = async (c: Context) => {
   try {
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)));
+    const aliases = await getRepo().modelAliases.loadAll();
+    return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases));
   } catch (e) {
     // Upstream HTTP/parse failures squash to a generic message so we do not
     // leak upstream identity. Other registry-thrown errors (e.g. the "no
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 1408f10a6..855eca5fa 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -1,5 +1,6 @@
 import { test } from 'vitest';
 
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
 import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
 import { jsonResponse, withMockedFetch, assertEquals } from '@floway-dev/test-utils';
@@ -586,3 +587,180 @@ test('/v1/models returns the last real error when every account model load fails
     },
   );
 });
+
+// /v1/models alias-listing coverage. Each test exercises one slice of the
+// spec's visibility contract: visible alias appears with `aliasedFrom`,
+// hidden alias does not appear, alias-with-disabled-target is still listed,
+// the `aliasedFrom` shape matches the spec byte-for-byte.
+test('/v1/models appends a visible alias with aliasedFrom after the real entries', async () => {
+  const { repo, apiKey } = await setupAppTest();
+
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'codex-auto-review',
+      targetModelId: 'gpt-5.4',
+      upstreamIds: [],
+      rules: { reasoning: { effort: 'low' } },
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      createdAt: 1_700_000_000,
+    },
+  ]);
+
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_oai',
+    name: 'Test OpenAI',
+    sortOrder: 100,
+    config: {
+      baseUrl: 'https://oai.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-test',
+      endpoints: { chatCompletions: {} },
+    },
+  }));
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({
+          token: 'copilot-access-token',
+          expires_at: 4102444800,
+          refresh_in: 3600,
+          endpoints: { api: 'https://api.individual.githubcopilot.com' },
+        });
+      }
+      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
+        return jsonResponse(copilotModels([]));
+      }
+      if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
+        return jsonResponse({
+          object: 'list',
+          data: [{ id: 'gpt-5.4', owned_by: 'openai' }],
+        });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+      assertEquals(response.status, 200);
+      const body = await response.json() as { data: Array<{ id: string; owned_by?: string; aliasedFrom?: unknown }> };
+      const ids = body.data.map(m => m.id);
+      assertEquals(ids[ids.length - 1], 'codex-auto-review');
+      const aliasEntry = body.data.find(m => m.id === 'codex-auto-review');
+      if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
+      assertEquals(aliasEntry.aliasedFrom, {
+        targetModelId: 'gpt-5.4',
+        upstreamIds: [],
+        rules: { reasoning: { effort: 'low' } },
+        onConflict: 'real-only',
+      });
+      assertEquals(aliasEntry.owned_by, 'openai');
+    },
+  );
+});
+
+test('/v1/models omits aliases marked visibleInModelsList=false', async () => {
+  const { repo, apiKey } = await setupAppTest();
+
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'hidden-alias',
+      targetModelId: 'gpt-5.4',
+      upstreamIds: [],
+      rules: {},
+      visibleInModelsList: false,
+      onConflict: 'real-only',
+      createdAt: 0,
+    },
+  ]);
+
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_oai',
+    name: 'Test OpenAI',
+    sortOrder: 100,
+    config: {
+      baseUrl: 'https://oai.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-test',
+      endpoints: { chatCompletions: {} },
+    },
+  }));
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
+        return jsonResponse(copilotModels([]));
+      }
+      if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
+        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+      const body = await response.json() as { data: Array<{ id: string }> };
+      assertEquals(body.data.map(m => m.id).includes('hidden-alias'), false);
+    },
+  );
+});
+
+test('/v1/models lists an alias whose target is not present on any upstream (no silent hide)', async () => {
+  const { repo, apiKey } = await setupAppTest();
+
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'orphan-alias',
+      targetModelId: 'never-resolves',
+      upstreamIds: ['up_oai'],
+      rules: {},
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      createdAt: 0,
+    },
+  ]);
+
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_oai',
+    name: 'Test OpenAI',
+    sortOrder: 100,
+    config: {
+      baseUrl: 'https://oai.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-test',
+      endpoints: { chatCompletions: {} },
+    },
+  }));
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
+        return jsonResponse(copilotModels([]));
+      }
+      if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
+        return jsonResponse({ object: 'list', data: [] });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+      const body = await response.json() as { data: Array<{ id: string; aliasedFrom?: { targetModelId: string }; owned_by?: string }> };
+      const orphan = body.data.find(m => m.id === 'orphan-alias');
+      if (!orphan) throw new Error('expected orphan-alias entry');
+      assertEquals(orphan.aliasedFrom?.targetModelId, 'never-resolves');
+      // No matching real entry → owner falls back to the alias's primary upstream id.
+      assertEquals(orphan.owned_by, 'up_oai');
+    },
+  );
+});
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 8ca75c518..e29df1c6a 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -285,6 +285,11 @@ export interface ProviderModelResolution {
   id: string;
   model: UpstreamModel;
   binding: ProviderModelRecord;
+  // Set when this resolution came from an alias-rewrite interpretation. The
+  // gateway-side passthrough callers (embeddings/images/completions) stamp
+  // this onto the `x-floway-alias` response header so alias-served calls are
+  // observable without enabling any extra mode.
+  aliasName?: string;
 }
 
 export interface ModelInterpretation {
@@ -381,6 +386,10 @@ const pushInterpretation = (
     out.push(aliasInterp);
     out.push(realInterp);
     return;
+  default: {
+    const exhaustive: never = alias.onConflict;
+    throw new Error(`pushInterpretation: unhandled onConflict '${exhaustive as string}'`);
+  }
   }
 };
 
@@ -470,7 +479,14 @@ export const resolveModelForRequest = async (
 
   const interpretations = enumerateModelInterpretations(modelId, providers, aliases);
   const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
-  return { matches: resolutions.map(r => r.resolved), failedUpstreams };
+  // Project each resolution's alias-rewrite interpretation onto the
+  // returned ProviderModelResolution so passthrough callers can stamp the
+  // `x-floway-alias` header without re-deriving the match.
+  const matches: ProviderModelResolution[] = resolutions.map(r =>
+    r.interpretation.aliasName !== undefined
+      ? { ...r.resolved, aliasName: r.interpretation.aliasName }
+      : r.resolved);
+  return { matches, failedUpstreams };
 };
 
 export const resolveModelForProvider = async (
diff --git a/packages/gateway/src/data-plane/providers/registry_test.ts b/packages/gateway/src/data-plane/providers/registry_test.ts
index d470f9169..9823aeb98 100644
--- a/packages/gateway/src/data-plane/providers/registry_test.ts
+++ b/packages/gateway/src/data-plane/providers/registry_test.ts
@@ -922,6 +922,7 @@ describe('enumerateModelInterpretations with alias matching', () => {
     rules: { reasoning: { effort: 'low' } },
     visibleInModelsList: true,
     onConflict: 'real-only',
+    createdAt: 0,
     ...over,
   });
 
@@ -1065,6 +1066,7 @@ describe('resolveModelForRequest applies alias onConflict pruning', () => {
     rules: { reasoning: { effort: 'low' } },
     visibleInModelsList: true,
     onConflict,
+    createdAt: 0,
   });
 
   test('alias-only resolves to a single match against the alias target id', async () => {
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 1add1a115..6c917fb86 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -21,6 +21,7 @@ import { createUpstreamLatencyRecorder, recordPerformanceError, recordPerformanc
 import { recordTokenUsage } from './telemetry/usage.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import type { AuthedContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
 import type { TokenUsage } from '../../repo/types.ts';
 import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
 import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
@@ -117,6 +118,31 @@ interface PassthroughServeContext {
 export const passthroughApiError = (c: Context, message: string, status: ContentfulStatusCode): Response =>
   c.json({ error: { message, type: 'api_error' } }, status);
 
+// Emit one trace line per rule field present on the matched alias when the
+// inbound endpoint has no slot for the rule. The passthrough endpoints
+// (embeddings, images, /v1/completions) carry no Floway-extension fields
+// so a non-empty `rules` object is structurally dropped before the upstream
+// call; emitting one trace line per knob gives an operator the same signal
+// the chat sanitizers do.
+const traceDroppedAliasRulesForPassthrough = (
+  aliasName: string,
+  aliases: readonly { alias: string; rules: Record<string, unknown> }[],
+  sourceApi: PassthroughServeApiName,
+): void => {
+  const matched = aliases.find(a => a.alias === aliasName);
+  if (!matched) return;
+  const rules = matched.rules as { reasoning?: Record<string, unknown>; verbosity?: unknown; serviceTier?: unknown; anthropicSpeed?: unknown; anthropicBeta?: readonly unknown[] };
+  const fields: string[] = [];
+  if (rules.reasoning) for (const key of Object.keys(rules.reasoning)) fields.push(`reasoning.${key}`);
+  if (rules.verbosity !== undefined) fields.push('verbosity');
+  if (rules.serviceTier !== undefined) fields.push('serviceTier');
+  if (rules.anthropicSpeed !== undefined) fields.push('anthropicSpeed');
+  if (rules.anthropicBeta?.length) fields.push('anthropicBeta');
+  for (const field of fields) {
+    console.warn('floway.alias.drop', JSON.stringify({ alias: aliasName, field, targetProtocol: sourceApi }));
+  }
+};
+
 export const passthroughServe = async (input: PassthroughServeContext): Promise<Response> => {
   const { c, ctx, sourceApi, model, bindingServesEndpoint, call, response: responseHandling } = input;
   const requestStartedAt = performance.now();
@@ -124,12 +150,20 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
 
   try {
     const fetcherForUpstream = await createPerRequestFetcher(ctx.currentColo);
+    // Aliases pass through so a `(model, lookupId)` interpretation can rewrite
+    // to the alias's target id even for non-LLM-shaped endpoints. The alias
+    // rules themselves never apply here — the inbound payload (embeddings,
+    // images, /v1/completions) has no protocol-extension slots for the rule
+    // knobs. We still surface the matched alias name on the
+    // `x-floway-alias` response header and trace one log line per dropped
+    // rule so an operator can confirm the rewrite ran.
+    const aliases = await getRepo().modelAliases.loadAll();
     // Each match is one (upstream, upstream-catalog id) pair that interprets
     // the inbound public id. Iteration order follows configured sort_order
     // across upstreams, with the unprefixed interpretation pushed before the
     // prefixed one within a single upstream. The first match whose binding
     // satisfies the endpoint capability wins.
-    const { matches, failedUpstreams } = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler);
+    const { matches, failedUpstreams } = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler, aliases);
     if (matches.length === 0) {
       ctx.dump?.error('gateway');
       return passthroughApiError(c, appendFailedUpstreams(`Model ${model} is not available on any configured upstream.`, failedUpstreams), 404);
@@ -137,6 +171,10 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
 
     for (const match of matches) {
       if (!bindingServesEndpoint(match.binding)) continue;
+      if (match.aliasName !== undefined) {
+        ctx.responseHeaders.set('x-floway-alias', match.aliasName);
+        traceDroppedAliasRulesForPassthrough(match.aliasName, aliases, sourceApi);
+      }
 
       const recorder = createUpstreamLatencyRecorder();
       const { response, modelKey } = await call(match.binding, {
diff --git a/packages/gateway/src/test-helpers/gateway-ctx.ts b/packages/gateway/src/test-helpers/gateway-ctx.ts
index 3a19af303..3ebf1f474 100644
--- a/packages/gateway/src/test-helpers/gateway-ctx.ts
+++ b/packages/gateway/src/test-helpers/gateway-ctx.ts
@@ -15,5 +15,6 @@ export const mockGatewayCtx = (overrides: Partial<GatewayCtx> = {}): GatewayCtx
   dump: null,
   backgroundScheduler: promise => { void promise; },
   requestStartedAt: 0,
+  responseHeaders: new Headers(),
   ...overrides,
 });
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index fddc80318..383e0ffe0 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -110,6 +110,30 @@ export interface PublicModel {
   };
   kind: ModelKind;
   cost?: ModelPricing;
+  // Floway protocol extension. Present on synthesized alias entries the
+  // gateway appends to the listing. Clients that do not know about the
+  // field ignore it; alias-aware clients (dashboard, CLI shims) render the
+  // alias's target id and rules from this payload directly.
+  // See docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+  aliasedFrom?: PublicModelAliasedFrom;
+}
+
+export interface PublicModelAliasedFrom {
+  targetModelId: string;
+  upstreamIds: readonly string[];
+  rules: {
+    reasoning?: {
+      effort?: string;
+      budgetTokens?: number;
+      adaptive?: boolean;
+      summary?: string;
+    };
+    verbosity?: string;
+    serviceTier?: string;
+    anthropicSpeed?: string;
+    anthropicBeta?: readonly string[];
+  };
+  onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
 }
 
 export interface PublicModelsResponse {
diff --git a/packages/translate/package.json b/packages/translate/package.json
index 462bf2d1c..e18d7f564 100644
--- a/packages/translate/package.json
+++ b/packages/translate/package.json
@@ -5,7 +5,8 @@
   "type": "module",
   "exports": {
     ".": { "import": "./src/index.ts", "types": "./src/index.ts" },
-    "./via-responses/responses-items": { "import": "./src/shared/via-responses/responses-items.ts", "types": "./src/shared/via-responses/responses-items.ts" }
+    "./via-responses/responses-items": { "import": "./src/shared/via-responses/responses-items.ts", "types": "./src/shared/via-responses/responses-items.ts" },
+    "./via-messages/anthropic-extensions": { "import": "./src/shared/via-messages/anthropic-extensions.ts", "types": "./src/shared/via-messages/anthropic-extensions.ts" }
   },
   "scripts": {
     "typecheck": "tsc --noEmit"

From 8f2ce38b7a896c093fb397b71dd681c04ab3d59a Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Thu, 25 Jun 2026 23:55:54 +0800
Subject: [PATCH 007/170] fix(aliases): address final review (header on
 passthrough, streaming-safe, idempotent seed, ordered listing)

Final-review fix wave on top of the model-aliases data-plane series. Each
finding from the whole-branch review is addressed; one shim is kept and
documented per the reviewer's option-B recommendation.

- Critical #1: `/v1/embeddings`, `/v1/images/*`, and `/v1/completions`
  returned the response through the legacy `ctx.dump?.finalize` pattern
  instead of `finalizeGatewayResponse`, so the `x-floway-alias` header
  the passthrough scaffold stamped on the per-ctx bag was silently
  dropped. Route all three call sites through `finalizeGatewayResponse`
  for a uniform finalize seam.

- Important #4: Make the `x-floway-alias` stamp streaming-safe by
  introducing `stageGatewayResponseHeader(ctx, name, value)` that writes
  the header to BOTH Hono's `c.header` (the documented knob that
  survives `streamSSE`'s internal `c.newResponse`) and the per-ctx
  `responseHeaders` bag `finalizeGatewayResponse` merges onto Web-
  `Response.json`-built non-streaming responses. The chat serve.ts
  layers (messages, gemini, responses, chat-completions) and
  passthrough-serve all go through this helper, eliminating the
  reliance on post-construction `response.headers.set` for streaming.

- Important #3: Add coverage in `gemini_test.ts` that a visible alias
  appears in `/v1beta/models` as a synthetic Gemini model entry with
  the expected name, displayName, and supportedGenerationMethods. The
  prior code path was untested; a refactor of `loadGeminiModels` would
  not have been caught.

- Important #2: Keep the pre-alias-table `rewriteResponsesEntryModelAlias`
  shim that swaps `codex-auto-review` -> `gpt-5.4` before the matcher
  runs (option B from the review). Add a code comment above it
  explaining the carveout: the seeded alias is `on_conflict='real-only'`
  and on a Codex upstream that exposes a real `codex-auto-review` model
  the alias would otherwise lose, breaking parity with Codex CLI's
  native behavior. The shim is temporary pending a deliberate Codex
  behavior change.

- Minor #6: Switch the `0046_model_aliases.sql` seed `INSERT` to
  `INSERT OR IGNORE` so a fresh local-dev replay doesn't trip the
  PRIMARY KEY uniqueness check.

- Minor #8: Add `ORDER BY alias` to `loadAllAliases` so the `/v1/models`
  listing emits alias entries deterministically across runtimes.

The unit-test fan-out reflects adding `c: AuthedContext` to `GatewayCtx`
so the serve layer can call Hono's `c.header` directly. Test stubs go
through the shared `stubAuthedContext` helper.
---
 .../gateway/migrations/0046_model_aliases.sql |  2 +-
 .../src/control-plane/model-aliases/repo.ts   |  4 +-
 .../chat/chat-completions/attempt_test.ts     |  2 +
 .../demote-developer-to-system_test.ts        |  2 +
 .../demote-interleaved-system-to-user_test.ts |  2 +
 ...le-reasoning-on-forced-tool-choice_test.ts |  2 +
 .../include-usage-stream-options_test.ts      |  2 +
 .../interceptors/normalize-usage_test.ts      |  2 +
 .../vendor-deepseek-normalize_test.ts         |  2 +
 .../vendor-kimi-normalize_test.ts             |  2 +
 .../vendor-qwen-normalize_test.ts             |  2 +
 .../data-plane/chat/chat-completions/serve.ts |  6 +-
 .../chat/chat-completions/serve_test.ts       |  2 +
 .../data-plane/chat/gemini/attempt_test.ts    |  2 +
 .../strip-safety-settings_test.ts             |  2 +
 .../strip-unsupported-part-fields_test.ts     |  2 +
 .../strip-unsupported-tools_test.ts           |  2 +
 .../suppress-thought-parts_test.ts            |  2 +
 .../data-plane/chat/gemini/respond_test.ts    |  2 +
 .../src/data-plane/chat/gemini/serve.ts       |  9 +--
 .../src/data-plane/chat/gemini/serve_test.ts  |  2 +
 .../data-plane/chat/messages/attempt_test.ts  |  2 +
 .../demote-interleaved-system-to-user_test.ts |  2 +
 ...le-reasoning-on-forced-tool-choice_test.ts |  2 +
 .../strip-billing-attribution_test.ts         |  2 +
 .../interceptors/web-search-shim_test.ts      |  2 +
 .../data-plane/chat/messages/respond_test.ts  |  2 +
 .../src/data-plane/chat/messages/serve.ts     |  9 +--
 .../data-plane/chat/messages/serve_test.ts    |  2 +
 .../data-plane/chat/responses/attempt_test.ts |  2 +
 .../src/data-plane/chat/responses/http.ts     | 17 ++++++
 .../canonicalize-encrypted-content_test.ts    |  2 +
 .../demote-developer-to-system_test.ts        |  2 +
 .../demote-interleaved-system-to-user_test.ts |  2 +
 ...le-reasoning-on-forced-tool-choice_test.ts |  2 +
 .../interceptors/retry-cyber-policy_test.ts   |  2 +
 .../interceptors/server-tool-shim_test.ts     |  3 +
 .../image-generation-integration_test.ts      |  2 +
 .../server-tools/image-generation_test.ts     |  2 +
 .../vendor-deepseek-normalize_test.ts         |  2 +
 .../vendor-qwen-normalize_test.ts             |  2 +
 .../src/data-plane/chat/responses/serve.ts    |  9 +--
 .../data-plane/chat/responses/serve_test.ts   |  2 +
 .../src/data-plane/chat/shared/gateway-ctx.ts | 39 +++++++++---
 .../data-plane/chat/shared/respond_test.ts    |  2 +
 .../chat/shared/upstream-telemetry_test.ts    |  2 +
 .../src/data-plane/completions/serve.ts       |  7 +--
 .../src/data-plane/completions/serve_test.ts  | 47 +++++++++++++++
 .../src/data-plane/embeddings/serve.ts        |  7 +--
 .../src/data-plane/embeddings/serve_test.ts   | 56 +++++++++++++++++
 .../gateway/src/data-plane/images/serve.ts    | 15 ++---
 .../src/data-plane/images/serve_test.ts       | 60 +++++++++++++++++++
 .../src/data-plane/models/gemini_test.ts      | 46 ++++++++++++++
 .../data-plane/shared/passthrough-serve.ts    |  9 ++-
 .../gateway/src/test-helpers/gateway-ctx.ts   | 10 ++++
 55 files changed, 384 insertions(+), 45 deletions(-)

diff --git a/packages/gateway/migrations/0046_model_aliases.sql b/packages/gateway/migrations/0046_model_aliases.sql
index c934d77b6..d76687bbd 100644
--- a/packages/gateway/migrations/0046_model_aliases.sql
+++ b/packages/gateway/migrations/0046_model_aliases.sql
@@ -10,5 +10,5 @@ CREATE TABLE model_aliases (
   updated_at INTEGER NOT NULL DEFAULT (unixepoch())
 );
 
-INSERT INTO model_aliases (alias, target_model_id, rules_json, on_conflict)
+INSERT OR IGNORE INTO model_aliases (alias, target_model_id, rules_json, on_conflict)
 VALUES ('codex-auto-review', 'gpt-5.4', '{"reasoning":{"effort":"low"}}', 'real-only');
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index 4c13cd09b..a7cfd963f 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -13,9 +13,11 @@ interface ModelAliasRow {
 
 // The model_aliases table is operator-managed and small (dozens of rows at
 // most), so the data plane reads the full table per request — no cache layer.
+// `ORDER BY alias` makes the read deterministic so `/v1/models` and friends
+// emit alias entries in a stable, operator-predictable order across runtimes.
 export const loadAllAliases = async (db: SqlDatabase): Promise<readonly ModelAlias[]> => {
   const { results } = await db
-    .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases')
+    .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases ORDER BY alias')
     .all<ModelAliasRow>();
   return results.map(toModelAlias);
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
index 62b814359..29f9d1b1b 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
@@ -3,6 +3,7 @@ import { test, vi } from 'vitest';
 import { chatCompletionsAttempt } from './attempt.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -23,6 +24,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
index 83d9bccb7..b84aea40f 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { withDemoteDeveloperToSystem } from './demote-developer-to-system.ts';
 import type { ChatCompletionsInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import { eventResult } from '@floway-dev/provider';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
index 156389a46..4ecb21e72 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { withInterleavedSystemDemotedToUser } from './demote-interleaved-system-to-user.ts';
 import type { ChatCompletionsInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import { eventResult } from '@floway-dev/provider';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index e46726510..0ff191572 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts';
 import type { ChatCompletionsInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import { eventResult } from '@floway-dev/provider';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
index e3e4147a2..f8d0c33c5 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { withUsageStreamOptionsIncluded } from './include-usage-stream-options.ts';
 import type { ChatCompletionsInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import { eventResult } from '@floway-dev/provider';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
index 0b6fed4f1..b9dcb1b9c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { withUsageNormalized } from './normalize-usage.ts';
 import type { ChatCompletionsInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
index 81be2c3ab..7f96ee5f2 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import type { ChatCompletionsInvocation } from './types.ts';
 import { withVendorDeepseekChatCompletionsNormalize } from './vendor-deepseek-normalize.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -20,6 +21,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
index 1cfc304b7..2547a30b4 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import type { ChatCompletionsInvocation } from './types.ts';
 import { withVendorKimiChatCompletionsNormalize } from './vendor-kimi-normalize.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
index 0506a1e25..e374ea3b4 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import type { ChatCompletionsInvocation } from './types.ts';
 import { withVendorQwenChatCompletionsNormalize } from './vendor-qwen-normalize.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import { eventResult } from '@floway-dev/provider';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 1347dd6bd..28e10cfd5 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -6,6 +6,7 @@ import { applyAliasRulesToChatCompletions } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -50,9 +51,10 @@ export const chatCompletionsServe = {
     }
     // Apply operator-locked alias rules to the inbound IR before the
     // attempt runs its interceptor chain. The matching `x-floway-alias`
-    // header rides out via ctx.responseHeaders.
+    // header is staged via Hono's `c.header` so it survives `streamSSE`'s
+    // internal `c.newResponse`.
     if (candidate.aliasRules) applyAliasRulesToChatCompletions(payload, candidate.aliasRules);
-    if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
+    if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
     return await chatCompletionsAttempt.generate({ payload, ctx, store, candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 402803203..37441a5ba 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -2,6 +2,7 @@ import { test, vi } from 'vitest';
 
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -49,6 +50,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
index 29a4e9bc5..bc7c7949b 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
@@ -3,6 +3,7 @@ import { test, vi } from 'vitest';
 import { geminiAttempt } from './attempt.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -24,6 +25,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
index d4b48ac3a..98d15dd39 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
@@ -1,6 +1,7 @@
 import { test } from 'vitest';
 
 import { stripSafetySettings } from './strip-safety-settings.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
index 3b02b63f8..e74d45a4f 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
@@ -1,6 +1,7 @@
 import { test } from 'vitest';
 
 import { stripUnsupportedPartFields } from './strip-unsupported-part-fields.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
index 6a2c20ef7..689ee6d0e 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
@@ -1,6 +1,7 @@
 import { test } from 'vitest';
 
 import { stripUnsupportedTools } from './strip-unsupported-tools.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
index eb67a0092..33e49a791 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
@@ -1,6 +1,7 @@
 import { test } from 'vitest';
 
 import { suppressThoughtParts } from './suppress-thought-parts.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -15,6 +16,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
index 31981b544..61e0132a7 100644
--- a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
@@ -2,6 +2,7 @@ import { Hono } from 'hono';
 import { test } from 'vitest';
 
 import { respondGemini } from './respond.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import { eventFrame } from '@floway-dev/protocols/common';
@@ -26,6 +27,7 @@ const ctx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index e1f61d628..7d1cf345e 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -6,6 +6,7 @@ import { applyAliasRulesToGemini } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
 import type { ExecuteResult, PlainResult } from '@floway-dev/provider';
@@ -60,10 +61,10 @@ export const geminiServe = {
       );
     }
     // Operator-locked alias rules apply to the Gemini IR before the attempt
-    // runs; the matching `x-floway-alias` header rides out via
-    // ctx.responseHeaders.
+    // runs; the matching `x-floway-alias` header is staged via Hono's
+    // `c.header` so it survives `streamSSE`'s internal `c.newResponse`.
     if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
-    if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
+    if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
     return await geminiAttempt.generate({ payload, ctx, store, candidate, headers });
   },
 
@@ -97,7 +98,7 @@ export const geminiServe = {
       );
     }
     if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
-    if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
+    if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
     return await geminiAttempt.countTokens({ payload, ctx, store, candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 070d44471..3ef8114e8 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -2,6 +2,7 @@ import { test, vi } from 'vitest';
 
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -48,6 +49,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
index f9192e289..41a96f0de 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
@@ -3,6 +3,7 @@ import { test, vi } from 'vitest';
 import { messagesAttempt } from './attempt.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -23,6 +24,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
index 9df67c5c0..bf29636a7 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { demoteInterleavedSystemToUser } from './demote-interleaved-system-to-user.ts';
 import type { MessagesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 7b7045355..9db2fe856 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts';
 import type { MessagesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
index 8ae90e232..c08720c71 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { stripBillingAttribution } from './strip-billing-attribution.ts';
 import type { MessagesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -16,6 +17,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
index dca97addd..8ddfa03e7 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
@@ -13,6 +13,7 @@ import {
 } from './web-search-shim.ts';
 import { initRepo } from '../../../../repo/index.ts';
 import { InMemoryRepo } from '../../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import { DEFAULT_SEARCH_CONFIG } from '../../../tools/web-search/search-config.ts';
 import type { WebSearchProvider, WebSearchProviderResult } from '../../../tools/web-search/types.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
@@ -58,6 +59,7 @@ const gatewayCtx = (apiKeyId: string = 'test-key'): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/messages/respond_test.ts b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
index 79d0a9db6..0caef38b6 100644
--- a/packages/gateway/src/data-plane/chat/messages/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
@@ -4,6 +4,7 @@ import { test } from 'vitest';
 import { createMessagesStreamUsageState, respondMessages, tokenUsageFromMessagesFrame } from './respond.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -534,6 +535,7 @@ const makeRespondCtx = (): GatewayCtx => ({
   wantsStream: false,
   runtimeLocation: 'TEST',
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
   currentColo: 'TEST',
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index 30282afd4..8b0b337f1 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -6,6 +6,7 @@ import { applyAliasRulesToMessages } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ExecuteResult, PlainResult } from '@floway-dev/provider';
@@ -59,10 +60,10 @@ export const messagesServe = {
     // Operator-locked alias rules go onto the inbound IR before the attempt
     // begins so the per-protocol interceptor chain (and any downstream
     // translate pass) sees the already-injected fields. The matching
-    // `x-floway-alias` response header is staged on the gateway-stamped
-    // header set; the http wrapper flushes it onto the outgoing Response.
+    // `x-floway-alias` header is staged via Hono's `c.header` so it
+    // survives `streamSSE`'s internal `c.newResponse`.
     if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
-    if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
+    if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
     return await messagesAttempt.generate({ payload, ctx, store, candidate, headers });
   },
 
@@ -96,7 +97,7 @@ export const messagesServe = {
     // rules apply uniformly regardless of endpoint, and the response header
     // rides out the same way.
     if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
-    if (candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', candidate.aliasName);
+    if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
     return await messagesAttempt.countTokens({ payload, ctx, store, candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index 734bad296..bb1dfcfb0 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -2,6 +2,7 @@ import { test, vi } from 'vitest';
 
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -46,6 +47,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
index a952627b4..ceba118d7 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
@@ -7,6 +7,7 @@ import { createResponsesHttpStore } from './items/store.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { StoredResponsesItem } from '../../../repo/types.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -25,6 +26,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/responses/http.ts b/packages/gateway/src/data-plane/chat/responses/http.ts
index 54c497a0a..9346d5576 100644
--- a/packages/gateway/src/data-plane/chat/responses/http.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http.ts
@@ -16,6 +16,23 @@ import { internalErrorResult, toInternalDebugError } from '@floway-dev/provider'
 // performance telemetry, and usage accounting all see the real model name
 // (and the `low` reasoning effort the alias implies — generate only;
 // compact carries no `reasoning` field).
+//
+// This shim predates the operator-managed alias table seeded by migration
+// `0046_model_aliases.sql`. The two paths overlap on `/v1/responses` —
+// rewriting at this entry swaps the inbound `model` to `gpt-5.4` BEFORE the
+// alias matcher in `enumerateProviderCandidates` runs, so the alias row
+// never matches for this surface. The carveout is deliberate: the seeded
+// alias is stored with `on_conflict='real-only'`, which means on a Codex
+// upstream that exposes a real `codex-auto-review` model the alias would
+// silently lose to the real id and the `reasoning.effort=low` rule would
+// never apply — breaking parity with Codex CLI's native auto-review
+// behavior. Other inbound surfaces (`/v1/messages`, `/v1/chat/completions`,
+// `/v1beta/…`) carry no entry-level shim and reach the alias matcher
+// unchanged; they observe `real-only` semantics as designed.
+//
+// The shim is a temporary carveout pending a follow-up that either deletes
+// it after a deliberate Codex behavior change (e.g. switching to
+// `both-alias-first`) or migrates the entire surface to the alias table.
 const rewriteResponsesEntryModelAlias = (payload: ResponsesPayload, stampReasoningEffort: boolean): ResponsesPayload => {
   if (payload.model !== CODEX_AUTO_REVIEW_ALIAS) return payload;
   if (!stampReasoningEffort) return { ...payload, model: CODEX_AUTO_REVIEW_TARGET };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
index f904f85d5..444ce069d 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { withReasoningEncryptedContentCanonicalized } from './canonicalize-encrypted-content.ts';
 import type { ResponsesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
index a193d01db..a2951ef03 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { withDemoteDeveloperToSystem } from './demote-developer-to-system.ts';
 import type { ResponsesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { doneFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
index ae1fc3970..f4f26c112 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { withInterleavedSystemDemotedToUser } from './demote-interleaved-system-to-user.ts';
 import type { ResponsesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { doneFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index dcddbd6c8..ea7b872ab 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts';
 import type { ResponsesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { doneFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
index 3cf947b01..5b9bb4c91 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { withCyberPolicyRetried } from './retry-cyber-policy.ts';
 import type { ResponsesInvocation } from './types.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -45,6 +46,7 @@ const stubCtx = (overrides: { abortSignal?: AbortSignal } = {}): GatewayCtx => (
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
   ...overrides,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
index 6688dcec7..ff2e0f910 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
@@ -16,6 +16,7 @@ import { SHIM_TOOL_NAME, webSearchServerTool } from './server-tools/web-search.t
 import type { ResponsesInterceptor, ResponsesInvocation } from './types.ts';
 import { initRepo } from '../../../../repo/index.ts';
 import { InMemoryRepo } from '../../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import { resolveConfiguredWebSearchProvider } from '../../../tools/web-search/provider.ts';
 import type {
   ConfiguredWebSearchProvider,
@@ -348,6 +349,7 @@ const makeGatewayCtx = (apiKeyId: string = 'k1'): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
@@ -4497,6 +4499,7 @@ test('downstream AbortSignal threads through to provider search / fetchPage and
     currentColo: 'TEST',
     dump: null,
     backgroundScheduler: () => {},
+    c: stubAuthedContext(),
     responseHeaders: new Headers(),
     requestStartedAt: 0,
     abortSignal: controller.signal,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
index 46e30b043..8f1abec4c 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
@@ -2,6 +2,7 @@ import { beforeEach, test, vi } from 'vitest';
 
 import { initRepo } from '../../../../../repo/index.ts';
 import { InMemoryRepo } from '../../../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../../items/store.ts';
 import type { ResponsesInvocation } from '../types.ts';
@@ -144,6 +145,7 @@ const gatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
index da94b3068..16f0415e1 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
@@ -19,6 +19,7 @@ import {
 } from './image-generation.ts';
 import { initRepo } from '../../../../../repo/index.ts';
 import { InMemoryRepo } from '../../../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../../items/store.ts';
 import type { ResponsesInvocation } from '../types.ts';
@@ -56,6 +57,7 @@ const gatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
index 7db1b6360..c873f9225 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import type { ResponsesInvocation } from './types.ts';
 import { withVendorDeepseekResponsesNormalize } from './vendor-deepseek-normalize.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { doneFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
index 23afe8462..6417306ff 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import type { ResponsesInvocation } from './types.ts';
 import { withVendorQwenResponsesNormalize } from './vendor-qwen-normalize.ts';
+import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { doneFrame } from '@floway-dev/protocols/common';
@@ -17,6 +18,7 @@ const stubCtx: GatewayCtx = {
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/serve.ts b/packages/gateway/src/data-plane/chat/responses/serve.ts
index e66a2a29a..81035f20d 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve.ts
@@ -4,6 +4,7 @@ import type { ResponsesSnapshotMode, StatefulResponsesStore } from './items/stor
 import { prepareResponsesServePlan } from './serve-prep.ts';
 import { applyAliasRulesToResponses } from '../../model-aliases/apply.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
+import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -48,10 +49,10 @@ export const responsesServe = {
     });
     if (plan.kind === 'failure') return plan.result;
     // Operator-locked alias rules apply to the prepared inbound IR before
-    // the attempt runs; the `x-floway-alias` header rides out via
-    // ctx.responseHeaders.
+    // the attempt runs; the `x-floway-alias` header is staged via Hono's
+    // `c.header` so it survives `streamSSE`'s internal `c.newResponse`.
     if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
-    if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName);
+    if (plan.candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', plan.candidate.aliasName);
     const effectiveSnapshotMode: ResponsesSnapshotMode = snapshotMode !== 'none' && containsCompactionTrigger(plan.prepared.input)
       ? 'replace'
       : snapshotMode;
@@ -73,7 +74,7 @@ export const responsesServe = {
     // applying uniformly keeps the operator's intent expressed at the
     // inbound boundary regardless of which endpoint runs.
     if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
-    if (plan.candidate.aliasName) ctx.responseHeaders.set('x-floway-alias', plan.candidate.aliasName);
+    if (plan.candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', plan.candidate.aliasName);
     return await responsesAttempt.compact({ payload: plan.prepared, ctx, store, candidate: plan.candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index 07369504d..b42f5bdbf 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -5,6 +5,7 @@ import { createResponsesHttpStore, MemoryStatefulResponsesBacking, LayeredStatef
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { StoredResponsesItem, StoredResponsesSnapshot } from '../../../repo/types.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
@@ -57,6 +58,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   currentColo: 'TEST',
   dump: null,
   backgroundScheduler: () => {},
+  c: stubAuthedContext(),
   responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
index 0e199e403..ad0ef2169 100644
--- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
+++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
@@ -6,6 +6,14 @@ import { getCurrentColo } from '../../../runtime/runtime-info.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 
 export interface GatewayCtx {
+  // The inbound Hono context. Carried so the serve layer can stage
+  // response headers via `c.header(name, value)` — the Hono-documented
+  // knob that survives `streamSSE`'s internal `c.newResponse` for the
+  // streaming surfaces. For non-streaming surfaces that build the
+  // outgoing Response via the Web `Response.json` constructor (which
+  // bypasses Hono's context), the same value also lands on
+  // `responseHeaders` so `finalizeGatewayResponse` can stamp it.
+  readonly c: AuthedContext;
   readonly apiKeyId: string;
   readonly upstreamIds: readonly string[] | null;
   readonly abortSignal?: AbortSignal;
@@ -23,14 +31,14 @@ export interface GatewayCtx {
   readonly currentColo: string;
   // Null when the api key has no retention configured, in which case
   // `finalizeGatewayResponse` short-circuits the dump tee and returns the
-  // response untouched (headers from `responseHeaders` are still applied).
+  // response untouched (entries from `responseHeaders` are still applied).
   readonly dump: DumpAccumulator | null;
-  // Per-request response-header staging. The data-plane writes alias-aware
-  // and similar non-upstream headers here mid-request; the inbound HTTP
-  // wrapper merges them onto the final outgoing Response before
-  // `dump?.finalize`. Mutable on purpose — the serve layer owns the
-  // chosen candidate and is the right seam for stamping the
-  // `x-floway-alias` header.
+  // Per-request response-header staging for the non-streaming and error
+  // paths that build their Response via the Web `Response.json` constructor
+  // rather than through Hono's `c.json`/`streamSSE`. The serve layer writes
+  // gateway-stamped headers (e.g. `x-floway-alias`) here in lockstep with
+  // its `ctx.c.header(...)` call; `finalizeGatewayResponse` then merges
+  // them onto the outgoing Response.
   readonly responseHeaders: Headers;
 }
 
@@ -67,6 +75,7 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
   if (opts.model !== undefined) dump?.requestedModel(opts.model);
   const colo = getCurrentColo(c.req.raw);
   return {
+    c,
     apiKeyId: apiKey.id,
     upstreamIds,
     abortSignal: controller?.signal,
@@ -81,10 +90,22 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
   };
 };
 
+// Stage one gateway response header so it lands on the outgoing Response
+// regardless of which builder produced it. Calls Hono's `c.header` (the
+// only knob that survives `streamSSE`'s internal `c.newResponse`) AND
+// stages on the per-ctx `responseHeaders` bag that `finalizeGatewayResponse`
+// merges onto Web-`Response.json`-built non-streaming responses.
+export const stageGatewayResponseHeader = (ctx: GatewayCtx, name: string, value: string): void => {
+  ctx.c.header(name, value);
+  ctx.responseHeaders.set(name, value);
+};
+
 // Apply ctx-stamped response headers onto the outgoing Response and then run
 // the dump-accumulator's finalize tee. Every inbound HTTP wrapper returns its
-// response through this seam so alias and other gateway-stamped headers ride
-// out uniformly across happy-path, error, and passthrough paths.
+// response through this seam so gateway-stamped headers ride out uniformly
+// across happy-path, error, and passthrough paths — including the
+// non-streaming surfaces that build their Response via Web `Response.json`
+// rather than Hono's `c.json`.
 export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => {
   for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value);
   return ctx.dump?.finalize(response) ?? response;
diff --git a/packages/gateway/src/data-plane/chat/shared/respond_test.ts b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
index fd506b083..738bb6399 100644
--- a/packages/gateway/src/data-plane/chat/shared/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
@@ -4,6 +4,7 @@ import type { GatewayCtx } from './gateway-ctx.ts';
 import { SourceStreamState, recordPerformance, recordUsage } from './respond.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import type { PerformanceTelemetryContext, TelemetryModelIdentity } from '@floway-dev/provider';
 import { assertEquals } from '@floway-dev/test-utils';
 
@@ -45,6 +46,7 @@ const setup = (): Harness => {
       dump: null,
       backgroundScheduler: promise => { background.push(promise); },
       requestStartedAt,
+      c: stubAuthedContext(),
       responseHeaders: new Headers(),
     }),
   };
diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
index 374cab4fe..d3e9ad8b0 100644
--- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
@@ -4,6 +4,7 @@ import type { GatewayCtx } from './gateway-ctx.ts';
 import { withUpstreamTelemetry } from './upstream-telemetry.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
+import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { PerformanceTelemetryContext } from '@floway-dev/provider';
@@ -24,6 +25,7 @@ const baseCtx = (overrides: Partial<GatewayCtx> = {}): GatewayCtx => {
     apiKeyId: 'key_1',
     upstreamIds: null,
     wantsStream: true,
+    c: stubAuthedContext(),
     responseHeaders: new Headers(),
     requestStartedAt: 0,
     runtimeLocation: 'TEST',
diff --git a/packages/gateway/src/data-plane/completions/serve.ts b/packages/gateway/src/data-plane/completions/serve.ts
index 2ab560c4b..d72ed42c2 100644
--- a/packages/gateway/src/data-plane/completions/serve.ts
+++ b/packages/gateway/src/data-plane/completions/serve.ts
@@ -10,7 +10,7 @@ import type { Context } from 'hono';
 
 import { tokenUsageFromCompletionsUsage } from './usage.ts';
 import type { TokenUsage } from '../../repo/types.ts';
-import { createGatewayCtxFromHono } from '../chat/shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
 import { readRequestBody } from '../chat/shared/request-body.ts';
 import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
 import { isOpenAIUsageOnlyEventShape, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -65,8 +65,7 @@ export const completions = async (c: Context): Promise<Response> => {
   });
   if (request.type === 'invalid') {
     ctx.dump?.error('gateway');
-    const response = passthroughApiError(c, request.message, 400);
-    return (ctx.dump?.finalize(response) ?? response);
+    return finalizeGatewayResponse(ctx, passthroughApiError(c, request.message, 400));
   }
 
   ctx.dump?.requestedModel(request.model);
@@ -115,5 +114,5 @@ export const completions = async (c: Context): Promise<Response> => {
           },
         },
   });
-  return (ctx.dump?.finalize(response) ?? response);
+  return finalizeGatewayResponse(ctx, response);
 };
diff --git a/packages/gateway/src/data-plane/completions/serve_test.ts b/packages/gateway/src/data-plane/completions/serve_test.ts
index 1cd2263bb..1dd90c6ed 100644
--- a/packages/gateway/src/data-plane/completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/completions/serve_test.ts
@@ -2,6 +2,7 @@ import { test } from 'vitest';
 
 import { initDumpBroker, initDumpStore } from '../../dump/registry.ts';
 import { installDumpStubs } from '../../dump/test-fixtures.ts';
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCustomUpstreamRecord, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
 import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
 import { assertEquals, assertExists, jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
@@ -397,3 +398,49 @@ test('/v1/completions streaming records usage row, request_total+upstream_succes
     assertEquals(frames[3]?.type, 'done');
   }
 });
+
+// Alias header coverage for /v1/completions: the matched alias name rides
+// out on `x-floway-alias`. Non-streaming path uses passthrough's `json`
+// branch; the streaming path stamps the same header via Hono's `c.header`
+// before `streamSSE` builds the response.
+test('/v1/completions stamps x-floway-alias when the request hits an aliased model', async () => {
+  const { apiKey, repo } = await setupAppTest();
+  await registerCompletionsUpstream(repo);
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'completions-alias',
+      targetModelId: 'davinci-002',
+      upstreamIds: [],
+      rules: {},
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      createdAt: 0,
+    },
+  ]);
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'passthrough.example.com' && url.pathname === '/v1/completions') {
+        return jsonResponse({
+          id: 'cmpl_resp',
+          object: 'text_completion',
+          created: 1,
+          model: 'davinci-002',
+          choices: [{ index: 0, text: ' world', finish_reason: 'stop' }],
+          usage: { prompt_tokens: 5, completion_tokens: 1, total_tokens: 6 },
+        });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/completions', {
+        method: 'POST',
+        headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
+        body: JSON.stringify({ model: 'completions-alias', prompt: 'hello' }),
+      });
+      assertEquals(response.status, 200);
+      assertEquals(response.headers.get('x-floway-alias'), 'completions-alias');
+    },
+  );
+});
diff --git a/packages/gateway/src/data-plane/embeddings/serve.ts b/packages/gateway/src/data-plane/embeddings/serve.ts
index 6262546e5..9c33e6736 100644
--- a/packages/gateway/src/data-plane/embeddings/serve.ts
+++ b/packages/gateway/src/data-plane/embeddings/serve.ts
@@ -3,7 +3,7 @@
 
 import type { Context } from 'hono';
 
-import { createGatewayCtxFromHono } from '../chat/shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
 import { readRequestBody } from '../chat/shared/request-body.ts';
 import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
 import { tokenUsageFromEmbeddingsBody } from '../shared/telemetry/usage.ts';
@@ -49,8 +49,7 @@ export const embeddings = async (c: Context): Promise<Response> => {
   const request = prepareEmbeddingsRequest(requestBody.bytes);
   if (request.type === 'invalid') {
     ctx.dump?.error('gateway');
-    const response = passthroughApiError(c, request.message, 400);
-    return (ctx.dump?.finalize(response) ?? response);
+    return finalizeGatewayResponse(ctx, passthroughApiError(c, request.message, 400));
   }
 
   ctx.dump?.requestedModel(request.model);
@@ -66,5 +65,5 @@ export const embeddings = async (c: Context): Promise<Response> => {
     },
     response: { format: 'json', extractBilling: tokenUsageFromEmbeddingsBody },
   });
-  return (ctx.dump?.finalize(response) ?? response);
+  return finalizeGatewayResponse(ctx, response);
 };
diff --git a/packages/gateway/src/data-plane/embeddings/serve_test.ts b/packages/gateway/src/data-plane/embeddings/serve_test.ts
index bf86dc9a7..c6c44f61b 100644
--- a/packages/gateway/src/data-plane/embeddings/serve_test.ts
+++ b/packages/gateway/src/data-plane/embeddings/serve_test.ts
@@ -1,5 +1,6 @@
 import { test } from 'vitest';
 
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCustomUpstreamRecord, copilotModels, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
 import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
 import { jsonResponse, withMockedFetch, assertEquals, assertExists } from '@floway-dev/test-utils';
@@ -495,3 +496,58 @@ test('/v1/embeddings rejects malformed body at the provider-independent boundary
     },
   );
 });
+
+// Critical alias header coverage for the passthrough surface: the matched
+// alias name must ride out on `x-floway-alias` so downstream observers can
+// tell a real-model hit from an alias-routed one. Goes through Hono's
+// `c.header` in `passthroughServe`, mirroring the chat path.
+test('/v1/embeddings stamps x-floway-alias when the request hits an aliased model', async () => {
+  const { apiKey, repo } = await setupAppTest();
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'embed-alias',
+      targetModelId: 'text-embedding-real',
+      upstreamIds: [],
+      rules: {},
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      createdAt: 0,
+    },
+  ]);
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({
+          token: 'copilot-access-token',
+          expires_at: 4102444800,
+          refresh_in: 3600,
+          endpoints: { api: 'https://api.individual.githubcopilot.com' },
+        });
+      }
+      if (url.pathname === '/models') {
+        return jsonResponse(copilotModels([{ id: 'text-embedding-real', supported_endpoints: ['/embeddings'] }]));
+      }
+      if (url.pathname === '/embeddings') {
+        return jsonResponse({
+          object: 'list',
+          model: 'text-embedding-real',
+          data: [{ object: 'embedding', index: 0, embedding: [0.1] }],
+          usage: { prompt_tokens: 1, total_tokens: 1 },
+        });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/embeddings', {
+        method: 'POST',
+        headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
+        body: JSON.stringify({ model: 'embed-alias', input: 'hello' }),
+      });
+      assertEquals(response.status, 200);
+      assertEquals(response.headers.get('x-floway-alias'), 'embed-alias');
+    },
+  );
+});
diff --git a/packages/gateway/src/data-plane/images/serve.ts b/packages/gateway/src/data-plane/images/serve.ts
index 405b29b03..58f8a7a25 100644
--- a/packages/gateway/src/data-plane/images/serve.ts
+++ b/packages/gateway/src/data-plane/images/serve.ts
@@ -10,7 +10,7 @@
 
 import type { Context } from 'hono';
 
-import { createGatewayCtxFromHono } from '../chat/shared/gateway-ctx.ts';
+import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
 import { readRequestBody } from '../chat/shared/request-body.ts';
 import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
 import { tokenUsageFromImagesBody } from '../shared/telemetry/usage.ts';
@@ -48,8 +48,7 @@ export const imagesGenerations = async (c: Context): Promise<Response> => {
   const request = prepareImagesGenerationsRequest(requestBody.bytes);
   if (request.type === 'invalid') {
     ctx.dump?.error('gateway');
-    const response = passthroughApiError(c, request.message, 400);
-    return (ctx.dump?.finalize(response) ?? response);
+    return finalizeGatewayResponse(ctx, passthroughApiError(c, request.message, 400));
   }
 
   ctx.dump?.requestedModel(request.model);
@@ -65,7 +64,7 @@ export const imagesGenerations = async (c: Context): Promise<Response> => {
     },
     response: { format: 'json', extractBilling: tokenUsageFromImagesBody },
   });
-  return (ctx.dump?.finalize(response) ?? response);
+  return finalizeGatewayResponse(ctx, response);
 };
 
 export const imagesEdits = async (c: Context): Promise<Response> => {
@@ -82,15 +81,13 @@ export const imagesEdits = async (c: Context): Promise<Response> => {
     // parser's error text. The wording is enough for a client to know
     // they sent the wrong content type or a malformed body.
     ctx.dump?.error('gateway');
-    const response = passthroughApiError(c, 'Image edits request body must be a valid multipart/form-data payload.', 400);
-    return (ctx.dump?.finalize(response) ?? response);
+    return finalizeGatewayResponse(ctx, passthroughApiError(c, 'Image edits request body must be a valid multipart/form-data payload.', 400));
   }
 
   const modelRaw = form.get('model');
   if (typeof modelRaw !== 'string' || modelRaw.length === 0) {
     ctx.dump?.error('gateway');
-    const response = passthroughApiError(c, 'Image edits request body must include a model field.', 400);
-    return (ctx.dump?.finalize(response) ?? response);
+    return finalizeGatewayResponse(ctx, passthroughApiError(c, 'Image edits request body must include a model field.', 400));
   }
 
   ctx.dump?.requestedModel(modelRaw);
@@ -115,5 +112,5 @@ export const imagesEdits = async (c: Context): Promise<Response> => {
     },
     response: { format: 'json', extractBilling: tokenUsageFromImagesBody },
   });
-  return (ctx.dump?.finalize(response) ?? response);
+  return finalizeGatewayResponse(ctx, response);
 };
diff --git a/packages/gateway/src/data-plane/images/serve_test.ts b/packages/gateway/src/data-plane/images/serve_test.ts
index 85b5f1adf..f241ad89d 100644
--- a/packages/gateway/src/data-plane/images/serve_test.ts
+++ b/packages/gateway/src/data-plane/images/serve_test.ts
@@ -1,5 +1,6 @@
 import { test } from 'vitest';
 
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCustomUpstreamRecord, copilotModels, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
 import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
 import { jsonResponse, withMockedFetch, assertEquals, assertExists } from '@floway-dev/test-utils';
@@ -233,3 +234,62 @@ test('/v1/images/edits forwards a multipart request through an Azure model and r
   const usageRows = await repo.usage.listAll();
   assertEquals(usageRows.some(row => row.model === 'gpt-image-2' && row.tokens.input === 7 && row.tokens.output === 11), true);
 });
+
+// Alias header coverage for /v1/images/generations: an alias whose target is
+// an image-generation model must surface its name on `x-floway-alias` for
+// downstream observability.
+test('/v1/images/generations stamps x-floway-alias when the request hits an aliased model', async () => {
+  const { apiKey, repo } = await setupAppTest();
+  clearInProcessCopilotTokenCache();
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'image-alias',
+      targetModelId: 'gpt-image-2',
+      upstreamIds: [],
+      rules: {},
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      createdAt: 0,
+    },
+  ]);
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_images',
+    name: 'Custom Image Provider',
+    sortOrder: 100,
+    config: {
+      baseUrl: 'https://images.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-images',
+      endpoints: {},
+    },
+  }));
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.hostname === 'api.individual.githubcopilot.com' && url.pathname === '/models') {
+        return jsonResponse(copilotModels([{ id: 'copilot-chat', supported_endpoints: ['/chat/completions'] }]));
+      }
+      if (url.hostname === 'images.example.com' && url.pathname === '/v1/models') {
+        return jsonResponse({ data: [{ id: 'gpt-image-2' }] });
+      }
+      if (url.hostname === 'images.example.com' && url.pathname === '/v1/images/generations') {
+        return jsonResponse({ data: [{ b64_json: 'aGVsbG8=' }] });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/images/generations', {
+        method: 'POST',
+        headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
+        body: JSON.stringify({ model: 'image-alias', prompt: 'hi' }),
+      });
+      assertEquals(response.status, 200);
+      assertEquals(response.headers.get('x-floway-alias'), 'image-alias');
+    },
+  );
+});
diff --git a/packages/gateway/src/data-plane/models/gemini_test.ts b/packages/gateway/src/data-plane/models/gemini_test.ts
index 7f4cedaec..0327bae33 100644
--- a/packages/gateway/src/data-plane/models/gemini_test.ts
+++ b/packages/gateway/src/data-plane/models/gemini_test.ts
@@ -1,5 +1,6 @@
 import { test } from 'vitest';
 
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
 import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
 import { jsonResponse, withMockedFetch, assertEquals } from '@floway-dev/test-utils';
@@ -408,3 +409,48 @@ test('/v1beta/models hides malformed upstream response bodies', async () => {
     },
   );
 });
+
+// Gemini's `Model` resource is closed (no `aliasedFrom` extension), so the
+// `/v1beta/models` surface advertises an alias entry as a synthetic Gemini
+// model carrying the alias id and the target's display fields. This test
+// guards the synthetic shape — name, displayName, supportedGenerationMethods
+// — so a future refactor of `loadGeminiModels` cannot silently drop the
+// alias entries.
+test('/v1beta/models appends visible aliases as synthetic Gemini model entries', async () => {
+  const { repo, apiKey } = await setupAppTest();
+
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'codex-auto-review',
+      targetModelId: 'gpt-gemini-list',
+      upstreamIds: [],
+      rules: { reasoning: { effort: 'low' } },
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      createdAt: 1_700_000_000,
+    },
+  ]);
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.pathname === '/models') {
+        return jsonResponse(copilotModels([{ id: 'gpt-gemini-list', display_name: 'GPT Gemini List' }]));
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1beta/models', { headers: { 'x-api-key': apiKey.key } });
+      assertEquals(response.status, 200);
+      const body = await response.json() as { models: Array<{ name: string; displayName: string; supportedGenerationMethods: string[] }> };
+      const aliasEntry = body.models.find(m => m.name === 'models/codex-auto-review');
+      if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
+      assertEquals(aliasEntry.displayName, 'codex-auto-review');
+      assertEquals(aliasEntry.supportedGenerationMethods, ['generateContent', 'streamGenerateContent', 'countTokens']);
+    },
+  );
+});
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 6c917fb86..b566b582d 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -24,6 +24,7 @@ import type { AuthedContext } from '../../middleware/auth.ts';
 import { getRepo } from '../../repo/index.ts';
 import type { TokenUsage } from '../../repo/types.ts';
 import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
+import { stageGatewayResponseHeader } from '../chat/shared/gateway-ctx.ts';
 import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
 import { resolveModelForRequest } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
@@ -155,8 +156,10 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
     // rules themselves never apply here — the inbound payload (embeddings,
     // images, /v1/completions) has no protocol-extension slots for the rule
     // knobs. We still surface the matched alias name on the
-    // `x-floway-alias` response header and trace one log line per dropped
-    // rule so an operator can confirm the rewrite ran.
+    // `x-floway-alias` response header (staged via Hono's `c.header` so it
+    // survives `streamSSE`'s internal `c.newResponse` on the streaming
+    // `/v1/completions` path) and trace one log line per dropped rule so an
+    // operator can confirm the rewrite ran.
     const aliases = await getRepo().modelAliases.loadAll();
     // Each match is one (upstream, upstream-catalog id) pair that interprets
     // the inbound public id. Iteration order follows configured sort_order
@@ -172,7 +175,7 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
     for (const match of matches) {
       if (!bindingServesEndpoint(match.binding)) continue;
       if (match.aliasName !== undefined) {
-        ctx.responseHeaders.set('x-floway-alias', match.aliasName);
+        stageGatewayResponseHeader(ctx, 'x-floway-alias', match.aliasName);
         traceDroppedAliasRulesForPassthrough(match.aliasName, aliases, sourceApi);
       }
 
diff --git a/packages/gateway/src/test-helpers/gateway-ctx.ts b/packages/gateway/src/test-helpers/gateway-ctx.ts
index 3ebf1f474..047981ed1 100644
--- a/packages/gateway/src/test-helpers/gateway-ctx.ts
+++ b/packages/gateway/src/test-helpers/gateway-ctx.ts
@@ -1,4 +1,13 @@
 import type { GatewayCtx } from '../data-plane/chat/shared/gateway-ctx.ts';
+import type { AuthedContext } from '../middleware/auth.ts';
+
+// Minimal stub for the Hono `c` carried on `GatewayCtx`. Only `c.header`
+// is touched by the serve layer (to stamp `x-floway-alias`); unit tests
+// that don't exercise the alias branch never call it. Integration tests
+// that need real Hono behavior build the ctx via `createGatewayCtxFromHono`
+// against a real `makeApp()` request rather than going through this stub.
+export const stubAuthedContext = (): AuthedContext =>
+  ({ header: () => {} } as unknown as AuthedContext);
 
 // Shared minimal GatewayCtx for tests that exercise serve / respond /
 // interceptor code in isolation. Defaults satisfy every required field; pass
@@ -7,6 +16,7 @@ import type { GatewayCtx } from '../data-plane/chat/shared/gateway-ctx.ts';
 // construct one and spread `{ abortSignal: controller.signal,
 // downstreamAbortController: controller }` into the overrides.
 export const mockGatewayCtx = (overrides: Partial<GatewayCtx> = {}): GatewayCtx => ({
+  c: stubAuthedContext(),
   apiKeyId: 'key_test',
   upstreamIds: null,
   wantsStream: false,

From 17a7877c5fdcda16064bf9d972675583b5c69acb Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 01:38:34 +0800
Subject: [PATCH 008/170] revert(translate): restore pre-extension native field
 translation on *-via-messages
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Task 3 (`e1891e1d`) added synthesis of `thinking.display` from Responses-native
`reasoning.summary` and Gemini-native `thinkingConfig.includeThoughts`, plus a
new native-to-native `service_tier` carry on Responses → Messages. These are
NATIVE fields with translation behavior the prior pairs had already decided;
the alias work should not have reshaped that contract.

Revert the native-field paths in:

- responses-via-messages: drop `reasoning.summary` → `thinking.display` and
  `service_tier` → `service_tier` propagation. Keep the new extension-field
  carries (`thinking_budget`, `adaptive_thinking`, `anthropic_speed`).
- gemini-via-messages: drop `thinkingConfig.includeThoughts` →
  `thinking.display` propagation. Keep `generationConfig.serviceTier`,
  `verbosity`, and top-level `anthropicSpeed` extension carries.

Tests that asserted the new native-field synthesis are removed; the existing
extension-field tests stay untouched.
---
 .../src/gemini-via-messages/request.ts        |  8 ------
 .../src/gemini-via-messages/request_test.ts   | 16 -----------
 .../src/responses-via-messages/request.ts     | 28 ++++++-------------
 .../responses-via-messages/request_test.ts    | 23 ---------------
 4 files changed, 8 insertions(+), 67 deletions(-)

diff --git a/packages/translate/src/gemini-via-messages/request.ts b/packages/translate/src/gemini-via-messages/request.ts
index 91fbe0493..c109da54d 100644
--- a/packages/translate/src/gemini-via-messages/request.ts
+++ b/packages/translate/src/gemini-via-messages/request.ts
@@ -161,14 +161,6 @@ const applyThinkingConfig = (request: MessagesPayload, thinkingConfig?: GeminiTh
     }
   }
 
-  // `includeThoughts` materializes onto `thinking.display`: true → summarized
-  // (Anthropic redacts to a single-block summary), false → omitted (no
-  // thinking surface at all). Skip when the source did not express either.
-  if (thinkingConfig.includeThoughts !== undefined && request.thinking?.type !== 'disabled') {
-    const display = thinkingConfig.includeThoughts === true ? ('summarized' as const) : ('omitted' as const);
-    request.thinking = request.thinking ? { ...request.thinking, display } : { type: 'enabled', display };
-  }
-
   const effort = geminiThinkingLevelEffort(thinkingConfig);
   // Spread to merge with any output_config fields a sibling helper has
   // already written (e.g. structured-output `format` from
diff --git a/packages/translate/src/gemini-via-messages/request_test.ts b/packages/translate/src/gemini-via-messages/request_test.ts
index b10339a49..555c12aa4 100644
--- a/packages/translate/src/gemini-via-messages/request_test.ts
+++ b/packages/translate/src/gemini-via-messages/request_test.ts
@@ -428,22 +428,6 @@ test('buildTargetRequest emits generationConfig.serviceTier onto Messages servic
   assertEquals(result.service_tier, 'priority');
 });
 
-test('buildTargetRequest maps includeThoughts onto thinking.display (true → summarized, false → omitted)', () => {
-  const summarized = buildTargetRequest(
-    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: true } } },
-    'claude-test',
-    noOptions,
-  );
-  const omitted = buildTargetRequest(
-    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { includeThoughts: false } } },
-    'claude-test',
-    noOptions,
-  );
-
-  assertEquals(summarized.thinking, { type: 'enabled', display: 'summarized' });
-  assertEquals(omitted.thinking, { type: 'enabled', display: 'omitted' });
-});
-
 test('buildTargetRequest drops verbosity extension on Messages (no slot)', () => {
   const result = buildTargetRequest(
     { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { verbosity: 'low' } },
diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts
index 504ca45fa..f16acb936 100644
--- a/packages/translate/src/responses-via-messages/request.ts
+++ b/packages/translate/src/responses-via-messages/request.ts
@@ -1,7 +1,7 @@
 import { parseToolArgumentsObject } from '../shared/messages/tool-arguments.ts';
 import { responsesReasoningToMessagesUpstreamBlock } from '../shared/messages-and-responses/reasoning.ts';
 import { buildCustomToolInputSchema } from '../shared/responses-via/custom-tool-wrap.ts';
-import { buildMessagesThinkingFromExtensions, mapSummaryToAnthropicDisplay } from '../shared/via-messages/anthropic-extensions.ts';
+import { buildMessagesThinkingFromExtensions } from '../shared/via-messages/anthropic-extensions.ts';
 import { applyLastMessageCacheBreakpoint, applyLastToolCacheBreakpoint, EPHEMERAL_CACHE_CONTROL } from '../shared/via-messages/cache-breakpoints.ts';
 import { fetchRemoteImage, type RemoteImageLoader, resolveImageUrlToMessagesImage } from '../shared/via-messages/remote-images.ts';
 import {
@@ -332,29 +332,18 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
   if (formatSchema) outputConfig.format = { type: 'json_schema', schema: formatSchema };
   const hasOutputConfig = Object.keys(outputConfig).length > 0;
 
-  // Native Responses → Messages: `reasoning.summary` materializes onto the
-  // Messages-native `thinking.display`. Extension-driven thinking
-  // (`thinking_budget`, `adaptive_thinking`) takes precedence over the
-  // summary-only fallback because the alias write-side validator pins
-  // facets one-at-a-time; when neither extension is set and summary is the
-  // only signal, we synthesize `thinking.{type:'enabled', display}` so the
-  // display reaches the wire.
+  // Extension-driven thinking (`thinking_budget`, `adaptive_thinking`) wins
+  // over the native `effort === 'none'` disable, so the alias write-side
+  // facets that target the structured thinking slot survive the legacy
+  // disable shortcut. Native `reasoning.summary` and `service_tier` do not
+  // surface onto Messages — the Responses-native vocabulary keeps its
+  // pre-existing translation contract and rides the upstream sanitizer.
   const extensionThinking = buildMessagesThinkingFromExtensions({
     thinkingBudget: payload.thinking_budget,
     adaptiveThinking: payload.adaptive_thinking,
   });
   const disabledThinking = effort === 'none' ? { type: 'disabled' as const } : undefined;
-  const summaryDisplay = payload.reasoning?.summary !== undefined ? mapSummaryToAnthropicDisplay(payload.reasoning.summary) : undefined;
-  const fallbackDisplayThinking =
-    !extensionThinking && !disabledThinking && summaryDisplay !== undefined
-      ? { type: 'enabled' as const, display: summaryDisplay as NonNullable<MessagesPayload['thinking']>['display'] }
-      : undefined;
-  const thinkingFromExtensions = extensionThinking
-    ? summaryDisplay !== undefined
-      ? { ...extensionThinking, display: summaryDisplay as NonNullable<MessagesPayload['thinking']>['display'] }
-      : extensionThinking
-    : undefined;
-  const thinking = thinkingFromExtensions ?? disabledThinking ?? fallbackDisplayThinking;
+  const thinking = extensionThinking ?? disabledThinking;
 
   // Responses `metadata` is intentionally omitted on the Messages path;
   // not coerced into Anthropic metadata.user_id, prompt-cache, or safety
@@ -372,7 +361,6 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
     ...(thinking ? { thinking } : {}),
     ...(hasOutputConfig ? { output_config: outputConfig } : {}),
     ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
-    ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
   };
 
   return { target, customToolNames };
diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts
index f36ff89f6..57951048c 100644
--- a/packages/translate/src/responses-via-messages/request_test.ts
+++ b/packages/translate/src/responses-via-messages/request_test.ts
@@ -664,35 +664,12 @@ test('translateResponsesToMessages emits adaptive_thinking onto thinking.{adapti
   assertEquals(result.target.thinking, { type: 'adaptive' });
 });
 
-test('translateResponsesToMessages maps reasoning.summary onto thinking.display (concise|detailed → summarized, omitted → omitted)', async () => {
-  const concise = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'concise' } }));
-  const detailed = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'detailed' } }));
-  const omitted = await translateResponsesToMessages(minimalResponsesPayload({ reasoning: { effort: 'high', summary: 'omitted' } }));
-
-  assertEquals(concise.target.thinking, { type: 'enabled', display: 'summarized' });
-  assertEquals(detailed.target.thinking, { type: 'enabled', display: 'summarized' });
-  assertEquals(omitted.target.thinking, { type: 'enabled', display: 'omitted' });
-});
-
 test('translateResponsesToMessages emits anthropic_speed onto speed', async () => {
   const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_speed: 'fast' }));
   assertEquals(result.target.speed, 'fast');
 });
 
-test('translateResponsesToMessages forwards service_tier verbatim', async () => {
-  const result = await translateResponsesToMessages(minimalResponsesPayload({ service_tier: 'priority' }));
-  assertEquals(result.target.service_tier, 'priority');
-});
-
 test('translateResponsesToMessages leaves anthropic_beta as inbound residue for the gateway header pass', async () => {
   const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_beta: ['fast-mode-2026-02-01'] }));
   assertEquals('anthropic_beta' in result.target, false);
 });
-
-test('translateResponsesToMessages emission stack: budget + summary writes display onto the budget-driven block', async () => {
-  const result = await translateResponsesToMessages(minimalResponsesPayload({
-    thinking_budget: 2048,
-    reasoning: { effort: 'medium', summary: 'concise' },
-  }));
-  assertEquals(result.target.thinking, { type: 'enabled', budget_tokens: 2048, display: 'summarized' });
-});

From 6d13258431e2f2bb5508739f44d1082a38642e02 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 01:38:52 +0800
Subject: [PATCH 009/170] feat(aliases): drop responses entry shim; enumerate
 aliases per upstream + form
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two follow-up changes to the alias data-plane:

1. Remove the `/v1/responses` entry-level `codex-auto-review → gpt-5.4`
   rewrite shim. The seed alias in `0046_model_aliases.sql` now routes
   `codex-auto-review` everywhere through the normal matcher. On a Codex
   upstream that exposes a real `codex-auto-review`, `on_conflict=real-only`
   lets the real id win — Codex CLI callers wanting the previous shim
   behaviour must set `effort=low` themselves or pick a different
   `onConflict`. All other inbound surfaces are unchanged.

2. List aliases per-upstream and per-addressable-form in `/v1/models` and the
   Gemini `/v1beta/models` listing, instead of one synthetic entry per alias.
   Each visible alias now emits one entry per (provider, listed form) pair
   whose raw catalog can resolve the target, so dual-listed upstreams emit
   both `codex-auto-review` and `<prefix>/codex-auto-review`. Aliases whose
   target is not reachable from any upstream produce zero entries; the
   previous "no silent hide" rule no longer fits a per-upstream model.

   A new `display_name` column on `model_aliases` (migration `0047`) carries
   an operator-set label; the listing composes it as `${upstream}: ${alias
   displayName}` when set, or `${upstream}: ${target displayName}${rules
   summary}` otherwise. The rules-summary formatter and display-name
   composer live in `control-plane/model-aliases/display.ts` and are
   covered by unit tests.

   The shared per-upstream alias emission helper sits in
   `data-plane/models/alias-listing.ts` and is reused by both the OpenAI and
   Gemini listings. `getModelsForListing` exposes the per-upstream raw
   catalog alongside the merged public model list so we collect catalogs
   once per request even when many aliases need them.
---
 .../0047_model_aliases_display_name.sql       |   3 +
 .../control-plane/model-aliases/display.ts    |  39 ++++
 .../model-aliases/display_test.ts             |  75 +++++++
 .../src/control-plane/model-aliases/repo.ts   |   4 +-
 .../control-plane/model-aliases/repo_test.ts  |  23 +++
 .../src/control-plane/model-aliases/types.ts  |   5 +
 .../src/data-plane/chat/responses/http.ts     |  41 +---
 .../data-plane/chat/responses/http_test.ts    |  60 ------
 .../src/data-plane/models/alias-listing.ts    |  52 +++++
 .../gateway/src/data-plane/models/gemini.ts   |  39 ++--
 .../src/data-plane/models/gemini_test.ts      |   2 +-
 .../gateway/src/data-plane/models/load.ts     |  51 +++--
 .../src/data-plane/models/serve_test.ts       | 184 +++++++++++++++++-
 .../src/data-plane/providers/registry.ts      |  37 +++-
 14 files changed, 472 insertions(+), 143 deletions(-)
 create mode 100644 packages/gateway/migrations/0047_model_aliases_display_name.sql
 create mode 100644 packages/gateway/src/control-plane/model-aliases/display.ts
 create mode 100644 packages/gateway/src/control-plane/model-aliases/display_test.ts
 create mode 100644 packages/gateway/src/data-plane/models/alias-listing.ts

diff --git a/packages/gateway/migrations/0047_model_aliases_display_name.sql b/packages/gateway/migrations/0047_model_aliases_display_name.sql
new file mode 100644
index 000000000..9d21ed9a1
--- /dev/null
+++ b/packages/gateway/migrations/0047_model_aliases_display_name.sql
@@ -0,0 +1,3 @@
+ALTER TABLE model_aliases ADD COLUMN display_name TEXT;
+
+UPDATE model_aliases SET display_name = 'Codex Auto Review' WHERE alias = 'codex-auto-review';
diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
new file mode 100644
index 000000000..576c823a8
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/display.ts
@@ -0,0 +1,39 @@
+import type { ModelAliasRules } from './types.ts';
+
+// Render the rule set as a parenthesized, comma-joined string so the
+// `/v1/models` listing can suffix it onto the target model's display name when
+// the operator did not supply an explicit alias `displayName`. Empty rules
+// produce an empty string (no parentheses); the join order is fixed across
+// fields so a given rule set always renders the same way.
+//
+// `anthropicBeta` is sorted at format time so two operators carrying the same
+// token set in different orders see the same label.
+export const formatAliasRulesSummary = (rules: ModelAliasRules): string => {
+  const parts: string[] = [];
+  if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
+  if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
+  if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
+  if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
+  if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
+  if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
+  if (rules.anthropicSpeed !== undefined) parts.push(`${rules.anthropicSpeed} speed`);
+  if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
+    parts.push(rules.anthropicBeta.toSorted().join('/'));
+  }
+  return parts.length > 0 ? ` (${parts.join(', ')})` : '';
+};
+
+// Compose the final per-entry display name shown in `/v1/models`. The
+// upstream name always leads so an operator scanning the listing sees which
+// upstream each row belongs to before reading the alias-specific part.
+export const composeAliasDisplayName = (input: {
+  upstreamDisplayName: string;
+  aliasDisplayName?: string;
+  targetDisplayName: string;
+  rules: ModelAliasRules;
+}): string => {
+  if (input.aliasDisplayName !== undefined) {
+    return `${input.upstreamDisplayName}: ${input.aliasDisplayName}`;
+  }
+  return `${input.upstreamDisplayName}: ${input.targetDisplayName}${formatAliasRulesSummary(input.rules)}`;
+};
diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/gateway/src/control-plane/model-aliases/display_test.ts
new file mode 100644
index 000000000..7ba7700d0
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/display_test.ts
@@ -0,0 +1,75 @@
+import { describe, expect, test } from 'vitest';
+
+import { composeAliasDisplayName, formatAliasRulesSummary } from './display.ts';
+
+describe('formatAliasRulesSummary', () => {
+  test('returns empty string when no rules are set', () => {
+    expect(formatAliasRulesSummary({})).toBe('');
+  });
+
+  test('formats each rule field with its canonical suffix', () => {
+    expect(formatAliasRulesSummary({ reasoning: { effort: 'high' } })).toBe(' (high effort)');
+    expect(formatAliasRulesSummary({ reasoning: { budgetTokens: 4096 } })).toBe(' (4096tk reasoning)');
+    expect(formatAliasRulesSummary({ reasoning: { adaptive: true } })).toBe(' (adaptive reasoning)');
+    expect(formatAliasRulesSummary({ reasoning: { summary: 'detailed' } })).toBe(' (detailed summary)');
+    expect(formatAliasRulesSummary({ verbosity: 'low' })).toBe(' (low verbosity)');
+    expect(formatAliasRulesSummary({ serviceTier: 'priority' })).toBe(' (priority tier)');
+    expect(formatAliasRulesSummary({ anthropicSpeed: 'fast' })).toBe(' (fast speed)');
+  });
+
+  test('sorts anthropicBeta tokens and joins with slashes', () => {
+    expect(formatAliasRulesSummary({ anthropicBeta: ['extended-thinking', 'fast-mode-2026-02-01'] })).toBe(
+      ' (extended-thinking/fast-mode-2026-02-01)',
+    );
+    expect(formatAliasRulesSummary({ anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] })).toBe(
+      ' (extended-thinking/fast-mode-2026-02-01)',
+    );
+  });
+
+  test('drops anthropicBeta when the token list is empty', () => {
+    expect(formatAliasRulesSummary({ anthropicBeta: [] })).toBe('');
+  });
+
+  test('joins multiple fields with comma in deterministic order', () => {
+    expect(
+      formatAliasRulesSummary({
+        reasoning: { effort: 'low', summary: 'concise' },
+        verbosity: 'high',
+        anthropicSpeed: 'fast',
+      }),
+    ).toBe(' (low effort, concise summary, high verbosity, fast speed)');
+  });
+});
+
+describe('composeAliasDisplayName', () => {
+  test('uses alias displayName when set, suppressing the rules summary', () => {
+    expect(
+      composeAliasDisplayName({
+        upstreamDisplayName: 'Azure',
+        aliasDisplayName: 'Codex Auto Review',
+        targetDisplayName: 'GPT-5.4',
+        rules: { reasoning: { effort: 'low' } },
+      }),
+    ).toBe('Azure: Codex Auto Review');
+  });
+
+  test('falls back to target displayName with rules suffix when alias displayName is missing', () => {
+    expect(
+      composeAliasDisplayName({
+        upstreamDisplayName: 'Azure',
+        targetDisplayName: 'GPT-5.4',
+        rules: { reasoning: { effort: 'low' } },
+      }),
+    ).toBe('Azure: GPT-5.4 (low effort)');
+  });
+
+  test('omits the rules suffix when rules are empty', () => {
+    expect(
+      composeAliasDisplayName({
+        upstreamDisplayName: 'Azure',
+        targetDisplayName: 'GPT-5.4',
+        rules: {},
+      }),
+    ).toBe('Azure: GPT-5.4');
+  });
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index a7cfd963f..3718b5fd5 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -8,6 +8,7 @@ interface ModelAliasRow {
   rules_json: string;
   visible_in_models_list: number;
   on_conflict: OnConflict;
+  display_name: string | null;
   created_at: number;
 }
 
@@ -17,7 +18,7 @@ interface ModelAliasRow {
 // emit alias entries in a stable, operator-predictable order across runtimes.
 export const loadAllAliases = async (db: SqlDatabase): Promise<readonly ModelAlias[]> => {
   const { results } = await db
-    .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at FROM model_aliases ORDER BY alias')
+    .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at FROM model_aliases ORDER BY alias')
     .all<ModelAliasRow>();
   return results.map(toModelAlias);
 };
@@ -29,6 +30,7 @@ const toModelAlias = (row: ModelAliasRow): ModelAlias => ({
   rules: parseJsonField<ModelAlias['rules']>(row.alias, 'rules_json', row.rules_json),
   visibleInModelsList: row.visible_in_models_list === 1,
   onConflict: row.on_conflict,
+  ...(row.display_name !== null ? { displayName: row.display_name } : {}),
   createdAt: row.created_at,
 });
 
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
index ff1efa046..5f1e4fa6d 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
@@ -22,6 +22,7 @@ test('loadAllAliases reads the seed row from a freshly migrated database', async
     rules: { reasoning: { effort: 'low' } },
     visibleInModelsList: true,
     onConflict: 'real-only',
+    displayName: 'Codex Auto Review',
   });
 });
 
@@ -72,6 +73,28 @@ test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_mo
   });
 });
 
+test('loadAllAliases reads display_name and omits the field when SQL stored NULL', async () => {
+  const db = await createSqliteTestDb();
+  await db.exec('DELETE FROM model_aliases');
+  await db
+    .prepare(
+      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
+    )
+    .bind('with-label', 'gpt-5.4', '[]', '{}', 1, 'real-only', 'Pretty Label', 1_700_000_000)
+    .run();
+  await db
+    .prepare(
+      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
+    )
+    .bind('no-label', 'gpt-5.4', '[]', '{}', 1, 'real-only', null, 1_700_000_001)
+    .run();
+
+  const byAlias = new Map((await loadAllAliases(db)).map(entry => [entry.alias, entry]));
+  assertEquals(byAlias.get('with-label')?.displayName, 'Pretty Label');
+  // SQL NULL becomes undefined on the typed row so callers can branch on `=== undefined`.
+  assertEquals('displayName' in (byAlias.get('no-label') ?? {}), false);
+});
+
 test('loadAllAliases surfaces malformed rules_json as a descriptive error', async () => {
   const db = await createSqliteTestDb();
   await db.exec('DELETE FROM model_aliases');
diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts
index 7594ceff6..3170b4b47 100644
--- a/packages/gateway/src/control-plane/model-aliases/types.ts
+++ b/packages/gateway/src/control-plane/model-aliases/types.ts
@@ -23,6 +23,11 @@ export type ModelAlias = {
   readonly rules: ModelAliasRules;
   readonly visibleInModelsList: boolean;
   readonly onConflict: OnConflict;
+  // Operator-set human-readable label shown after the upstream display name in
+  // `/v1/models` entries (e.g. "Azure: Codex Auto Review"). When unset, the
+  // listing falls back to the alias target's display name plus a rules-summary
+  // suffix; see `data-plane/model-aliases/display.ts`.
+  readonly displayName?: string;
   // Unix epoch seconds stamped at row insertion. Surfaced on the
   // `/v1/models` synthesized alias entry so callers see when an alias was
   // declared, matching the `created` semantics of the real entries.
diff --git a/packages/gateway/src/data-plane/chat/responses/http.ts b/packages/gateway/src/data-plane/chat/responses/http.ts
index 9346d5576..671e935ed 100644
--- a/packages/gateway/src/data-plane/chat/responses/http.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http.ts
@@ -3,7 +3,6 @@ import { respondResponses } from './respond.ts';
 import { PreviousResponseNotFoundError } from './serve-prep.ts';
 import { responsesServe } from './serve.ts';
 import type { AuthedContext } from '../../../middleware/auth.ts';
-import { CODEX_AUTO_REVIEW_ALIAS, CODEX_AUTO_REVIEW_TARGET } from '../../codex/auto-review-alias.ts';
 import { inboundHeadersForUpstream } from '../../shared/inbound-headers.ts';
 import { createGatewayCtxFromHono, finalizeGatewayResponse, type GatewayCtx } from '../shared/gateway-ctx.ts';
 import { readRequestBody, type RequestBody } from '../shared/request-body.ts';
@@ -11,38 +10,6 @@ import { providerModelsUnavailableResponse } from '../shared/upstream-models-err
 import type { ResponsesPayload } from '@floway-dev/protocols/responses';
 import { internalErrorResult, toInternalDebugError } from '@floway-dev/provider';
 
-// Codex sends auto-review requests over the Responses wire API as a
-// `codex-auto-review` model id; rewrite at the entry so downstream routing,
-// performance telemetry, and usage accounting all see the real model name
-// (and the `low` reasoning effort the alias implies — generate only;
-// compact carries no `reasoning` field).
-//
-// This shim predates the operator-managed alias table seeded by migration
-// `0046_model_aliases.sql`. The two paths overlap on `/v1/responses` —
-// rewriting at this entry swaps the inbound `model` to `gpt-5.4` BEFORE the
-// alias matcher in `enumerateProviderCandidates` runs, so the alias row
-// never matches for this surface. The carveout is deliberate: the seeded
-// alias is stored with `on_conflict='real-only'`, which means on a Codex
-// upstream that exposes a real `codex-auto-review` model the alias would
-// silently lose to the real id and the `reasoning.effort=low` rule would
-// never apply — breaking parity with Codex CLI's native auto-review
-// behavior. Other inbound surfaces (`/v1/messages`, `/v1/chat/completions`,
-// `/v1beta/…`) carry no entry-level shim and reach the alias matcher
-// unchanged; they observe `real-only` semantics as designed.
-//
-// The shim is a temporary carveout pending a follow-up that either deletes
-// it after a deliberate Codex behavior change (e.g. switching to
-// `both-alias-first`) or migrates the entire surface to the alias table.
-const rewriteResponsesEntryModelAlias = (payload: ResponsesPayload, stampReasoningEffort: boolean): ResponsesPayload => {
-  if (payload.model !== CODEX_AUTO_REVIEW_ALIAS) return payload;
-  if (!stampReasoningEffort) return { ...payload, model: CODEX_AUTO_REVIEW_TARGET };
-  return {
-    ...payload,
-    model: CODEX_AUTO_REVIEW_TARGET,
-    reasoning: { ...(payload.reasoning ?? {}), effort: 'low' },
-  };
-};
-
 // OpenAI's verbatim previous_response_not_found envelope. Codex compares this
 // body byte-for-byte against upstream — see the cross-references on
 // `PreviousResponseNotFoundError` in serve-prep.ts.
@@ -77,15 +44,15 @@ const respondWithInternalError = async (c: AuthedContext, error: unknown, reques
   return finalizeGatewayResponse(effectiveCtx, response);
 };
 
-const parsePayload = (requestBody: RequestBody, stampReasoningEffort: boolean): ResponsesPayload =>
-  rewriteResponsesEntryModelAlias(JSON.parse(new TextDecoder().decode(requestBody.bytes)) as ResponsesPayload, stampReasoningEffort);
+const parsePayload = (requestBody: RequestBody): ResponsesPayload =>
+  JSON.parse(new TextDecoder().decode(requestBody.bytes)) as ResponsesPayload;
 
 export const responsesHttp = {
   generate: async (c: AuthedContext): Promise<Response> => {
     const requestBody = await readRequestBody(c);
     let ctx: GatewayCtx | undefined;
     try {
-      const payload = parsePayload(requestBody, true);
+      const payload = parsePayload(requestBody);
       const wantsStream = payload.stream === true;
       ctx = createGatewayCtxFromHono(c, { wantsStream, requestBody, model: payload.model });
       const store = createResponsesHttpStore(ctx.apiKeyId, payload.store ?? undefined);
@@ -106,7 +73,7 @@ export const responsesHttp = {
     const requestBody = await readRequestBody(c);
     let ctx: GatewayCtx | undefined;
     try {
-      const payload = parsePayload(requestBody, false);
+      const payload = parsePayload(requestBody);
       ctx = createGatewayCtxFromHono(c, { wantsStream: false, requestBody, model: payload.model });
       const store = createResponsesHttpStore(ctx.apiKeyId, payload.store ?? undefined);
       const result = await responsesServe.compact({ payload, ctx, store, headers: inboundHeadersForUpstream(c) });
diff --git a/packages/gateway/src/data-plane/chat/responses/http_test.ts b/packages/gateway/src/data-plane/chat/responses/http_test.ts
index d0e78cdaf..7425b640b 100644
--- a/packages/gateway/src/data-plane/chat/responses/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http_test.ts
@@ -284,63 +284,3 @@ test('POST /v1/responses renders a routing-unavailable 400 when a forcing item n
   const body = await response.json() as { error: { code: string } };
   assertEquals(body.error.code, 'responses_item_routing_unavailable');
 });
-
-test('POST /v1/responses rewrites the codex-auto-review alias before routing', async () => {
-  installRepo();
-  seenModels.length = 0;
-  const observedBodies: { reasoning?: { effort?: string } }[] = [];
-  const callResponses = vi.fn(async (_model: unknown, body: unknown): Promise<ProviderStreamResult<ResponsesStreamEvent>> => {
-    observedBodies.push(body as { reasoning?: { effort?: string } });
-    return {
-      ok: true,
-      events: makeProviderEvents([completedEvent()]),
-      modelKey: 'test-model-key',
-      headers: new Headers(),
-    };
-  });
-  queueCandidates([makeCandidate({ callResponses })]);
-
-  const response = await makeApp().request('/v1/responses', {
-    method: 'POST',
-    headers: new Headers({ 'content-type': 'application/json' }),
-    body: JSON.stringify({ model: 'codex-auto-review', input: 'hello' }),
-  });
-
-  assertEquals(response.status, 200);
-  assertEquals(seenModels, ['gpt-5.4']);
-  const observed = observedBodies[0];
-  if (observed === undefined) throw new Error('expected callResponses to receive a body');
-  assertEquals(observed.reasoning?.effort, 'low');
-});
-
-test('POST /v1/responses/compact rewrites the codex-auto-review alias to gpt-5.4 with no reasoning field', async () => {
-  installRepo();
-  seenModels.length = 0;
-  const observedBodies: { reasoning?: unknown }[] = [];
-  const compactionItem = { type: 'compaction' as const, id: 'cmp_1', encrypted_content: 'ENC' };
-  const compactionResult: ResponsesResult = {
-    ...makeResponsesResult(),
-    object: 'response.compaction',
-    output: [compactionItem] as unknown as ResponsesResult['output'],
-  };
-  const callResponsesCompact = vi.fn(async (_model: unknown, body: unknown) => {
-    observedBodies.push(body as { reasoning?: unknown });
-    return { ok: true as const, result: compactionResult, modelKey: 'test-model-key' };
-  });
-  queueCandidates([makeCandidate({ callResponsesCompact })]);
-
-  const response = await makeApp().request('/v1/responses/compact', {
-    method: 'POST',
-    headers: new Headers({ 'content-type': 'application/json' }),
-    body: JSON.stringify({
-      model: 'codex-auto-review',
-      input: [{ type: 'message', role: 'user', content: 'kept' }],
-    }),
-  });
-
-  assertEquals(response.status, 200);
-  assertEquals(seenModels, ['gpt-5.4']);
-  const observed = observedBodies[0];
-  if (observed === undefined) throw new Error('expected callResponsesCompact to receive a body');
-  assertEquals(observed.reasoning, undefined);
-});
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
new file mode 100644
index 000000000..e41400111
--- /dev/null
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -0,0 +1,52 @@
+import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
+import type { ModelProviderInstance, UpstreamModel } from '@floway-dev/provider';
+
+// One emission slot for an alias: a (provider, addressable form) pair where
+// the provider's raw catalog carries the alias target id, plus the matched
+// UpstreamModel so the synthesized listing entry can borrow the target's
+// limits, owner, and cost without re-querying.
+export interface AliasListingEmission {
+  provider: ModelProviderInstance;
+  form: 'unprefixed' | 'prefixed';
+  target: UpstreamModel;
+}
+
+// Per-upstream alias enumeration shared by `/v1/models` and the Gemini
+// `/models` listings. An alias with empty `upstreamIds` matches every
+// reachable provider; a non-empty list narrows the candidate set. Per
+// provider, the alias emits one entry per `listed` form when its target sits
+// in the upstream's raw catalog. Upstreams that do not carry the target — or
+// whose operator disabled the target — drop the alias entirely for that row.
+export const aliasListingEmissions = (
+  alias: ModelAlias,
+  providers: readonly ModelProviderInstance[],
+  rawCatalogs: ReadonlyMap<string, readonly UpstreamModel[]>,
+): AliasListingEmission[] => {
+  const out: AliasListingEmission[] = [];
+  const upstreamFilter = alias.upstreamIds.length > 0 ? new Set(alias.upstreamIds) : null;
+  for (const provider of providers) {
+    if (upstreamFilter !== null && !upstreamFilter.has(provider.upstream)) continue;
+    const catalog = rawCatalogs.get(provider.upstream);
+    if (catalog === undefined) continue;
+    const disabled = new Set(provider.disabledPublicModelIds);
+    const target = catalog.find(m => m.id === alias.targetModelId && !disabled.has(m.id));
+    if (target === undefined) continue;
+    const cfg = provider.modelPrefix;
+    if (cfg === null) {
+      out.push({ provider, form: 'unprefixed', target });
+    } else {
+      for (const form of cfg.listed) {
+        out.push({ provider, form, target });
+      }
+    }
+  }
+  return out;
+};
+
+// The public id form an alias emission carries on the wire. Bare alias name
+// for the unprefixed form; provider prefix + alias name for the prefixed
+// form. Mirrors how real models are surfaced in the same listing pass.
+export const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): string => {
+  const cfg = emission.provider.modelPrefix;
+  return emission.form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias;
+};
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 33dbefa40..0d08e4445 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -1,6 +1,8 @@
 import type { Context } from 'hono';
 
+import { aliasListingEmissions, aliasPublicId } from './alias-listing.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
+import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
 import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
@@ -8,7 +10,7 @@ import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts';
-import { getInternalModels } from '../providers/registry.ts';
+import { getModelsForListing } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { ModelPricing } from '@floway-dev/protocols/common';
 import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -70,25 +72,32 @@ const loadGeminiModels = async (
   scheduler: BackgroundScheduler,
   aliases: readonly ModelAlias[],
 ): Promise<GeminiModel[]> => {
-  const models = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler);
+  const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler);
   // Only chat models are representable in the Gemini /models shape.
   const realChatEntries = models.filter(model => model.kind === 'chat').map(toGeminiModel);
-  // Visible aliases append in `loadAllAliases` order; the Gemini surface
-  // carries no `aliasedFrom` extension (Gemini's `Model` resource is closed)
-  // so the entry advertises the alias id plus the target's display fields.
-  const byId = new Map<string, InternalModel>(models.map(m => [m.id, m]));
+  // Per-upstream alias enumeration mirrors `/v1/models`. Each emission becomes
+  // one Gemini Model entry whose id and displayName reflect that specific
+  // (provider, addressable form) pair; targets of the wrong kind never reach
+  // here because they were already filtered out of the catalog walk.
   const aliasEntries: GeminiModel[] = [];
   for (const alias of aliases) {
     if (!alias.visibleInModelsList) continue;
-    const target = byId.get(alias.targetModelId);
-    if (target && target.kind !== 'chat') continue;
-    aliasEntries.push(toGeminiModel({
-      ...(target ?? {} as InternalModel),
-      id: alias.alias,
-      display_name: alias.alias,
-      kind: 'chat',
-      limits: target?.limits ?? {},
-    }));
+    for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
+      if (emission.target.kind !== 'chat') continue;
+      const targetDisplayName = emission.target.display_name ?? emission.target.id;
+      aliasEntries.push(toGeminiModel({
+        ...emission.target,
+        id: aliasPublicId(alias, emission),
+        display_name: composeAliasDisplayName({
+          upstreamDisplayName: emission.provider.name,
+          aliasDisplayName: alias.displayName,
+          targetDisplayName,
+          rules: alias.rules,
+        }),
+        kind: 'chat',
+        limits: emission.target.limits ?? {},
+      }));
+    }
   }
   return [...realChatEntries, ...aliasEntries];
 };
diff --git a/packages/gateway/src/data-plane/models/gemini_test.ts b/packages/gateway/src/data-plane/models/gemini_test.ts
index 0327bae33..6a2d9a887 100644
--- a/packages/gateway/src/data-plane/models/gemini_test.ts
+++ b/packages/gateway/src/data-plane/models/gemini_test.ts
@@ -449,7 +449,7 @@ test('/v1beta/models appends visible aliases as synthetic Gemini model entries',
       const body = await response.json() as { models: Array<{ name: string; displayName: string; supportedGenerationMethods: string[] }> };
       const aliasEntry = body.models.find(m => m.name === 'models/codex-auto-review');
       if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
-      assertEquals(aliasEntry.displayName, 'codex-auto-review');
+      assertEquals(aliasEntry.displayName, 'GitHub Copilot (tester): GPT Gemini List (low effort)');
       assertEquals(aliasEntry.supportedGenerationMethods, ['generateContent', 'streamGenerateContent', 'countTokens']);
     },
   );
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 585b5d638..569a601e2 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,5 +1,7 @@
+import { aliasListingEmissions, aliasPublicId, type AliasListingEmission } from './alias-listing.ts';
+import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
 import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-import { getInternalModels } from '../providers/registry.ts';
+import { getModelsForListing } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
 import type { Fetcher, InternalModel } from '@floway-dev/provider';
@@ -22,20 +24,21 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
   return info;
 };
 
-// Synthesize one PublicModel for each visible alias, appended after the real
-// entries. The owner falls back to the alias-target's `owned_by` on whichever
-// real entry resolves it; if the target isn't present on any reachable
-// upstream, the entry still appears (operator-declared; the listing reflects
-// operator intent) with a `floway` owner so the row is unambiguous.
-export const toPublicModelFromAlias = (alias: ModelAlias, byId: ReadonlyMap<string, InternalModel>): PublicModel => {
-  const target = byId.get(alias.targetModelId);
+const publicModelForAliasEmission = (alias: ModelAlias, emission: AliasListingEmission): PublicModel => {
+  const { provider, target } = emission;
+  const targetDisplayName = target.display_name ?? target.id;
   const info: PublicModel = {
-    id: alias.alias,
+    id: aliasPublicId(alias, emission),
     object: 'model',
     type: 'model',
-    display_name: alias.alias,
-    limits: target?.limits ? { ...target.limits } : {},
-    kind: target?.kind ?? 'chat',
+    display_name: composeAliasDisplayName({
+      upstreamDisplayName: provider.name,
+      aliasDisplayName: alias.displayName,
+      targetDisplayName,
+      rules: alias.rules,
+    }),
+    limits: target.limits ? { ...target.limits } : {},
+    kind: target.kind,
     created: alias.createdAt,
     created_at: new Date(alias.createdAt * 1000).toISOString(),
     aliasedFrom: {
@@ -45,7 +48,8 @@ export const toPublicModelFromAlias = (alias: ModelAlias, byId: ReadonlyMap<stri
       onConflict: alias.onConflict,
     },
   };
-  info.owned_by = target?.owned_by ?? alias.upstreamIds[0] ?? 'floway';
+  info.owned_by = target.owned_by ?? provider.upstream;
+  if (target.cost) info.cost = target.cost;
   return info;
 };
 
@@ -55,14 +59,19 @@ export const loadModels = async (
   scheduler: BackgroundScheduler,
   aliases: readonly ModelAlias[],
 ): Promise<PublicModelsResponse> => {
-  const internal = await getInternalModels(upstreamFilter, fetcherForUpstream, scheduler);
-  const realEntries = internal.map(toPublicModel);
-  const byId = new Map<string, InternalModel>(internal.map(m => [m.id, m]));
-  // Visible aliases append in `loadAllAliases` order, after every real entry.
-  // The spec's no-silent-hide policy keeps disabled-target aliases visible —
-  // the user-facing failure on call is the canonical signal, not the
-  // listing.
-  const aliasEntries = aliases.filter(a => a.visibleInModelsList).map(a => toPublicModelFromAlias(a, byId));
+  const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler);
+  const realEntries = models.map(toPublicModel);
+  // Per-upstream alias enumeration: for each visible alias, emit one entry per
+  // (provider, addressable form) pair where the provider can resolve the
+  // alias's target. Upstreams that do not carry the target produce no entry —
+  // the alias listing is strictly anchored to "can be served from here".
+  const aliasEntries: PublicModel[] = [];
+  for (const alias of aliases) {
+    if (!alias.visibleInModelsList) continue;
+    for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
+      aliasEntries.push(publicModelForAliasEmission(alias, emission));
+    }
+  }
   const data = [...realEntries, ...aliasEntries];
   return {
     object: 'list',
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 855eca5fa..17647565b 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -711,7 +711,7 @@ test('/v1/models omits aliases marked visibleInModelsList=false', async () => {
   );
 });
 
-test('/v1/models lists an alias whose target is not present on any upstream (no silent hide)', async () => {
+test('/v1/models omits an alias whose target is not in any reachable upstream catalog', async () => {
   const { repo, apiKey } = await setupAppTest();
 
   (repo.modelAliases as MemoryModelAliasesRepo).setAll([
@@ -755,12 +755,182 @@ test('/v1/models lists an alias whose target is not present on any upstream (no
     },
     async () => {
       const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
-      const body = await response.json() as { data: Array<{ id: string; aliasedFrom?: { targetModelId: string }; owned_by?: string }> };
-      const orphan = body.data.find(m => m.id === 'orphan-alias');
-      if (!orphan) throw new Error('expected orphan-alias entry');
-      assertEquals(orphan.aliasedFrom?.targetModelId, 'never-resolves');
-      // No matching real entry → owner falls back to the alias's primary upstream id.
-      assertEquals(orphan.owned_by, 'up_oai');
+      const body = await response.json() as { data: Array<{ id: string }> };
+      // Per-upstream alias enumeration: an alias whose target cannot be served
+      // by any reachable upstream produces zero entries — there is no surface
+      // form to attach the alias to. A request for `orphan-alias` still
+      // returns the canonical user-facing model-missing error.
+      assertEquals(body.data.map(m => m.id).includes('orphan-alias'), false);
+    },
+  );
+});
+
+test('/v1/models emits the alias on each reachable upstream + listed form, with display_name composed from the upstream label', async () => {
+  const { repo, apiKey } = await setupAppTest();
+
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'codex-auto-review',
+      targetModelId: 'gpt-5.4',
+      upstreamIds: [],
+      rules: { reasoning: { effort: 'low' } },
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      displayName: 'Codex Auto Review',
+      createdAt: 1_700_000_000,
+    },
+  ]);
+
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_azure',
+    name: 'Azure',
+    sortOrder: 100,
+    config: {
+      baseUrl: 'https://azure.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-azure',
+      endpoints: { chatCompletions: {} },
+    },
+    modelPrefix: { prefix: 'azure/', addressable: ['unprefixed', 'prefixed'], listed: ['unprefixed', 'prefixed'] },
+  }));
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
+      if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
+        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4', display_name: 'GPT-5.4' }] });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+      const body = await response.json() as { data: Array<{ id: string; display_name: string; aliasedFrom?: unknown }> };
+      // Both addressable forms appear because the upstream listed both.
+      const bare = body.data.find(m => m.id === 'codex-auto-review');
+      const prefixed = body.data.find(m => m.id === 'azure/codex-auto-review');
+      if (!bare || !prefixed) throw new Error('expected both bare and prefixed alias entries');
+      assertEquals(bare.display_name, 'Azure: Codex Auto Review');
+      assertEquals(prefixed.display_name, 'Azure: Codex Auto Review');
+    },
+  );
+});
+
+test('/v1/models falls back to target display_name + rules summary when the alias has no displayName', async () => {
+  const { repo, apiKey } = await setupAppTest();
+
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'codex-auto-review',
+      targetModelId: 'gpt-5.4',
+      upstreamIds: [],
+      rules: { reasoning: { effort: 'low' } },
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      createdAt: 1_700_000_000,
+    },
+  ]);
+
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_azure',
+    name: 'Azure',
+    sortOrder: 100,
+    config: {
+      baseUrl: 'https://azure.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-azure',
+      endpoints: { chatCompletions: {} },
+    },
+  }));
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
+      if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
+        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4', display_name: 'GPT-5.4' }] });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+      const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
+      const entry = body.data.find(m => m.id === 'codex-auto-review');
+      if (!entry) throw new Error('expected codex-auto-review alias entry');
+      assertEquals(entry.display_name, 'Azure: GPT-5.4 (low effort)');
+    },
+  );
+});
+
+test('/v1/models honours alias upstreamIds — only emits on the named upstream', async () => {
+  const { repo, apiKey } = await setupAppTest();
+
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'codex-auto-review',
+      targetModelId: 'gpt-5.4',
+      upstreamIds: ['up_azure'],
+      rules: {},
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      createdAt: 1_700_000_000,
+    },
+  ]);
+
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_azure',
+    name: 'Azure',
+    sortOrder: 100,
+    config: {
+      baseUrl: 'https://azure.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-azure',
+      endpoints: { chatCompletions: {} },
+    },
+  }));
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_other',
+    name: 'Other',
+    sortOrder: 200,
+    config: {
+      baseUrl: 'https://other.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-other',
+      endpoints: { chatCompletions: {} },
+    },
+  }));
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
+      // Both upstreams expose gpt-5.4 — but the alias is restricted to up_azure.
+      if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
+        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
+      }
+      if (url.pathname === '/v1/models' && url.hostname === 'other.example.com') {
+        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+      const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
+      const aliasRows = body.data.filter(m => m.id === 'codex-auto-review');
+      assertEquals(aliasRows.length, 1);
+      assertEquals(aliasRows[0].display_name, 'Azure: gpt-5.4');
     },
   );
 });
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index e29df1c6a..c8556ca45 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -20,6 +20,10 @@ interface ProviderModelsResult {
   // order as the input `providers` list so the model-missing renderer can
   // surface a stable, dashboard-aligned list.
   failedUpstreams: string[];
+  // Raw per-upstream catalogs collected during the fan-out. Aliases consume
+  // this to enumerate per-upstream entries by addressable form without paying
+  // a second round-trip.
+  rawCatalogs: Map<string, readonly UpstreamModel[]>;
 }
 
 const NO_UPSTREAM_CONFIGURED_MESSAGE = 'No upstream provider configured — connect GitHub Copilot or add a Custom/Azure upstream in the dashboard';
@@ -143,6 +147,7 @@ const collectProviderModels = async (
   scheduler: BackgroundScheduler,
 ): Promise<ProviderModelsResult> => {
   const byId = new Map<string, ResolvedModel>();
+  const rawCatalogs = new Map<string, readonly UpstreamModel[]>();
   let sawSuccess = false;
   let lastError: unknown = null;
   const failedUpstreams: string[] = [];
@@ -174,6 +179,7 @@ const collectProviderModels = async (
     }
     sawSuccess = true;
     const { instance, models: providedModels } = result.value;
+    rawCatalogs.set(instance.upstream, providedModels);
     // Operator-disabled public model ids vanish entirely for this upstream:
     // dropped before they reach the catalog map, so they appear in no /models
     // listing and resolve to nothing for routing. The disable is per-upstream,
@@ -208,7 +214,7 @@ const collectProviderModels = async (
     }
   }
 
-  return { models: [...byId.values()], sawSuccess, lastError, failedUpstreams };
+  return { models: [...byId.values()], sawSuccess, lastError, failedUpstreams, rawCatalogs };
 };
 
 // Public-facing model-id ordering, applied in getModels() to every list that
@@ -264,6 +270,35 @@ export const getModels = async (
   return [];
 };
 
+// Returns the merged public model list AND the per-upstream raw catalogs and
+// provider instances. Listing surfaces (`/v1/models`, Gemini `/models`) use the
+// extra channels to synthesize alias entries that reflect which upstreams can
+// actually serve each alias's target and in which addressable form. Computing
+// both off the same `collectProviderModels` pass keeps catalog fetches to one
+// round per upstream regardless of how many alias rows reference each target.
+export interface PublicModelsListing {
+  models: ResolvedModel[];
+  providers: readonly ModelProviderInstance[];
+  rawCatalogs: ReadonlyMap<string, readonly UpstreamModel[]>;
+}
+
+export const getModelsForListing = async (
+  upstreamFilter: readonly string[] | null,
+  fetcherForUpstream: (upstreamId: string) => Fetcher,
+  scheduler: BackgroundScheduler,
+): Promise<PublicModelsListing> => {
+  const providers = await listModelProviders(upstreamFilter);
+  if (providers.length === 0) {
+    throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
+  }
+
+  const { models, sawSuccess, lastError, rawCatalogs } = await collectProviderModels(providers, fetcherForUpstream, scheduler);
+
+  if (sawSuccess) return { models: models.sort((a, b) => compareModelIds(a.id, b.id)), providers, rawCatalogs };
+  if (lastError) throw lastError;
+  return { models: [], providers, rawCatalogs };
+};
+
 export const getInternalModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,

From fcb360afcb1cfc23982484dcae1d61c6f705fd84 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 01:51:45 +0800
Subject: [PATCH 010/170] revert(translate): restore pre-extension native field
 translation on remaining pairs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Task 3 (`e1891e1d`) also reshaped NATIVE-field translation on the
remaining three pairs the first revert wave (`17a7877c`) did not cover.
The alias work should only have added emission of the new Floway
extension fields; native-to-native handling on these pairs had been
decided in the prior contract and is restored here.

Revert the native-field paths in:

- gemini-via-responses: restore the pre-Task-3 `reasoning` block shape
  where `includeThoughts: true` paired with a non-`none` effort produces
  `summary: 'detailed'`; drop the `false → 'omitted'` synthesis Task 3
  added. Keep `verbosity` and `serviceTier` extension carries
  (Floway-only fields on Gemini IR).
- messages-via-responses: drop `thinking.display` → `reasoning.summary`
  synthesis and the `service_tier` → `service_tier` native-to-native
  propagation. Keep the `verbosity` extension carry under `text`. The
  unused `mapAnthropicDisplayToSummary` helper is deleted.
- messages-via-chat-completions: drop the `service_tier` →
  `service_tier` native-to-native propagation. Keep the `verbosity`
  extension carry.

Tests that asserted the new native-field behavior are removed; the
extension-field tests stay untouched.
---
 .../src/gemini-via-responses/request.ts       | 18 ++++------
 .../src/gemini-via-responses/request_test.ts  | 20 -----------
 .../messages-via-chat-completions/request.ts  |  1 -
 .../request_test.ts                           | 11 ------
 .../src/messages-via-responses/request.ts     | 16 ++-------
 .../messages-via-responses/request_test.ts    | 36 -------------------
 .../shared/messages-via/reasoning-summary.ts  | 21 -----------
 7 files changed, 9 insertions(+), 114 deletions(-)
 delete mode 100644 packages/translate/src/shared/messages-via/reasoning-summary.ts

diff --git a/packages/translate/src/gemini-via-responses/request.ts b/packages/translate/src/gemini-via-responses/request.ts
index df85e4729..36ceb37f0 100644
--- a/packages/translate/src/gemini-via-responses/request.ts
+++ b/packages/translate/src/gemini-via-responses/request.ts
@@ -152,18 +152,12 @@ const applyGenerationConfig = (request: ResponsesPayload, generationConfig?: Gem
   if (generationConfig.serviceTier != null) request.service_tier = generationConfig.serviceTier;
 
   const effort = geminiReasoningEffort(generationConfig.thinkingConfig);
-  const summary =
-    generationConfig.thinkingConfig?.includeThoughts === true
-      ? ('detailed' as const)
-      : generationConfig.thinkingConfig?.includeThoughts === false
-        ? ('omitted' as const)
-        : undefined;
-  if (effort || summary !== undefined) {
-    request.reasoning = {
-      ...(effort ? { effort } : {}),
-      ...(summary !== undefined && effort !== 'none' ? { summary } : {}),
-    };
-  }
+  if (!effort) return;
+
+  request.reasoning = {
+    effort,
+    ...(effort !== 'none' && generationConfig.thinkingConfig?.includeThoughts === true ? { summary: 'detailed' as const } : {}),
+  };
 };
 
 const buildTools = (payload: GeminiPayload): ResponsesTool[] | undefined => {
diff --git a/packages/translate/src/gemini-via-responses/request_test.ts b/packages/translate/src/gemini-via-responses/request_test.ts
index 66476f0d8..9d568605f 100644
--- a/packages/translate/src/gemini-via-responses/request_test.ts
+++ b/packages/translate/src/gemini-via-responses/request_test.ts
@@ -432,26 +432,6 @@ test('buildTargetRequest emits generationConfig.serviceTier onto Responses servi
   assertEquals(result.service_tier, 'priority');
 });
 
-test('buildTargetRequest maps includeThoughts onto reasoning.summary (true → detailed, false → omitted)', () => {
-  const withSummary = buildTargetRequest(
-    {
-      contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
-      generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: true } },
-    },
-    'gpt-test',
-  );
-  const withoutSummary = buildTargetRequest(
-    {
-      contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
-      generationConfig: { thinkingConfig: { thinkingLevel: 'high', includeThoughts: false } },
-    },
-    'gpt-test',
-  );
-
-  assertEquals(withSummary.reasoning, { effort: 'high', summary: 'detailed' });
-  assertEquals(withoutSummary.reasoning, { effort: 'high', summary: 'omitted' });
-});
-
 test('buildTargetRequest drops top-level Anthropic extensions on Responses', () => {
   const result = buildTargetRequest(
     {
diff --git a/packages/translate/src/messages-via-chat-completions/request.ts b/packages/translate/src/messages-via-chat-completions/request.ts
index bd08e1b26..6ae2d8a95 100644
--- a/packages/translate/src/messages-via-chat-completions/request.ts
+++ b/packages/translate/src/messages-via-chat-completions/request.ts
@@ -291,7 +291,6 @@ export const translateMessagesToChatCompletions = (payload: MessagesPayload): Ch
     tool_choice: translateMessagesToolChoice(payload.tool_choice, clientTools),
     ...(responseFormat ? { response_format: responseFormat } : {}),
     ...(payload.verbosity != null ? { verbosity: payload.verbosity } : {}),
-    ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
   };
 };
 
diff --git a/packages/translate/src/messages-via-chat-completions/request_test.ts b/packages/translate/src/messages-via-chat-completions/request_test.ts
index 50f326860..861962bf8 100644
--- a/packages/translate/src/messages-via-chat-completions/request_test.ts
+++ b/packages/translate/src/messages-via-chat-completions/request_test.ts
@@ -494,17 +494,6 @@ test('translateMessagesToChatCompletions emits verbosity extension verbatim', ()
   assertEquals(result.verbosity, 'low');
 });
 
-test('translateMessagesToChatCompletions forwards service_tier verbatim', () => {
-  const result = translateMessagesToChatCompletions({
-    model: 'gpt-test',
-    max_tokens: 256,
-    messages: [{ role: 'user', content: 'hi' }],
-    service_tier: 'priority',
-  });
-
-  assertEquals(result.service_tier, 'priority');
-});
-
 test('translateMessagesToChatCompletions drops Anthropic-only knobs that have no Chat-completions slot', () => {
   const result = translateMessagesToChatCompletions({
     model: 'gpt-test',
diff --git a/packages/translate/src/messages-via-responses/request.ts b/packages/translate/src/messages-via-responses/request.ts
index b1c593443..1adfcaeda 100644
--- a/packages/translate/src/messages-via-responses/request.ts
+++ b/packages/translate/src/messages-via-responses/request.ts
@@ -1,7 +1,6 @@
 import { openAiJsonSchemaCoreFromMessagesFormat } from '../shared/messages/structured-output.ts';
 import { messagesReasoningBlockToResponsesReasoning } from '../shared/messages-and-responses/reasoning.ts';
 import { resolveMessagesReasoningEffort } from '../shared/messages-via/reasoning-effort.ts';
-import { mapAnthropicDisplayToSummary } from '../shared/messages-via/reasoning-summary.ts';
 import { normalizeMessagesToolInputSchema } from '../shared/messages-via/tool-schema.ts';
 import {
   type MessagesAssistantMessage,
@@ -208,15 +207,7 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response
   // Responses upstream may reject it. Translation stays pairwise and leaves
   // target-side validation to the selected upstream endpoint.
   const effort = resolveMessagesReasoningEffort(payload);
-  const display = payload.thinking?.display;
-  const summary = display !== undefined ? mapAnthropicDisplayToSummary(display) : undefined;
-  const reasoning =
-    effort !== undefined || summary !== undefined
-      ? {
-          ...(effort !== undefined ? { effort } : {}),
-          ...(summary !== undefined ? { summary } : {}),
-        }
-      : undefined;
+  const reasoning = effort ? { effort } : undefined;
   const clientTools = getClientTools(payload.tools);
   const instructions = translateSystemPrompt(payload.system);
   const jsonSchema = openAiJsonSchemaCoreFromMessagesFormat(payload.output_config?.format);
@@ -225,8 +216,8 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response
   const text = formatPart || verbosityPart ? { ...formatPart, ...verbosityPart } : undefined;
 
   // Keep fallback semantics strict: do not synthesize `temperature: 1`,
-  // `store: false`, or `parallel_tool_calls: true` when the Messages source
-  // did not express those knobs.
+  // `store: false`, `parallel_tool_calls: true`, or `reasoning.summary` when the
+  // Messages source did not express those knobs.
   return {
     model: payload.model,
     input: translateMessagesInput(payload.messages),
@@ -240,7 +231,6 @@ export const translateMessagesToResponses = (payload: MessagesPayload): Response
     stream: true,
     ...(reasoning ? { reasoning } : {}),
     ...(text ? { text } : {}),
-    ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
   };
 };
 
diff --git a/packages/translate/src/messages-via-responses/request_test.ts b/packages/translate/src/messages-via-responses/request_test.ts
index 2846f1d39..a773f72ef 100644
--- a/packages/translate/src/messages-via-responses/request_test.ts
+++ b/packages/translate/src/messages-via-responses/request_test.ts
@@ -529,42 +529,6 @@ test('translateMessagesToResponses co-emits verbosity with json_schema format un
   assertEquals(result.text?.format?.type, 'json_schema');
 });
 
-test('translateMessagesToResponses maps thinking.display onto reasoning.summary (summarized → concise, omitted → omitted, full → detailed)', () => {
-  const summarized = translateMessagesToResponses({
-    model: 'gpt-test',
-    max_tokens: 256,
-    messages: [{ role: 'user', content: 'hi' }],
-    thinking: { type: 'enabled', display: 'summarized' },
-  });
-  const omitted = translateMessagesToResponses({
-    model: 'gpt-test',
-    max_tokens: 256,
-    messages: [{ role: 'user', content: 'hi' }],
-    thinking: { type: 'enabled', display: 'omitted' },
-  });
-  const full = translateMessagesToResponses({
-    model: 'gpt-test',
-    max_tokens: 256,
-    messages: [{ role: 'user', content: 'hi' }],
-    thinking: { type: 'enabled', display: 'full' },
-  });
-
-  assertEquals(summarized.reasoning?.summary, 'concise');
-  assertEquals(omitted.reasoning?.summary, 'omitted');
-  assertEquals(full.reasoning?.summary, 'detailed');
-});
-
-test('translateMessagesToResponses forwards service_tier verbatim', () => {
-  const result = translateMessagesToResponses({
-    model: 'gpt-test',
-    max_tokens: 256,
-    messages: [{ role: 'user', content: 'hi' }],
-    service_tier: 'priority',
-  });
-
-  assertEquals(result.service_tier, 'priority');
-});
-
 test('translateMessagesToResponses drops Anthropic-only mode knobs the Responses wire cannot express', () => {
   const result = translateMessagesToResponses({
     model: 'gpt-test',
diff --git a/packages/translate/src/shared/messages-via/reasoning-summary.ts b/packages/translate/src/shared/messages-via/reasoning-summary.ts
deleted file mode 100644
index 6d12bab9b..000000000
--- a/packages/translate/src/shared/messages-via/reasoning-summary.ts
+++ /dev/null
@@ -1,21 +0,0 @@
-import type { MessagesThinkingDisplay } from '@floway-dev/protocols/messages';
-
-// Reverse of via-messages/anthropic-extensions.ts mapSummaryToAnthropicDisplay.
-// Anthropic's `summarized` collapsed both `concise` and `detailed`; we pick
-// `concise` as the canonical reverse since it is Responses' more compact
-// summary mode and round-tripping through the gateway should not silently
-// inflate verbosity. Unknown operator-typed values pass through verbatim so
-// the Responses upstream gets the original spelling and decides for itself
-// whether to accept it.
-export const mapAnthropicDisplayToSummary = (display: MessagesThinkingDisplay | string): string | undefined => {
-  switch (display) {
-  case 'summarized':
-    return 'concise';
-  case 'omitted':
-    return 'omitted';
-  case 'full':
-    return 'detailed';
-  default:
-    return display;
-  }
-};

From 96afa8376b3d5d08d9a69f243a70ca381c6dfd35 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 02:04:58 +0800
Subject: [PATCH 011/170] refactor(aliases): split upstream-label prefix from
 composeAliasDisplayName

The alias-local display name (operator-set displayName, or synthesized
target + rules summary) is independent of which addressable form the
entry surfaces under. The upstream-label prefix (`${upstream.name}: `)
belongs at the caller, mirroring the real-model path in
`registry.ts` where the synthesized prefix is added only on the
`prefixed` listing form.

Result: a bare alias listing (`codex-auto-review` on a no-prefix or
unprefixed-listed upstream) reads as `"Codex Auto Review"` or
`"GPT-5.4 (low effort)"` without an upstream label, matching how a
bare real model renders. The prefixed form (`azure/codex-auto-review`)
keeps the `"Azure: Codex Auto Review"` shape unchanged.
---
 .../src/control-plane/model-aliases/display.ts    | 15 +++++++--------
 .../control-plane/model-aliases/display_test.ts   |  9 +++------
 packages/gateway/src/data-plane/models/gemini.ts  | 13 ++++++-------
 .../gateway/src/data-plane/models/gemini_test.ts  |  2 +-
 packages/gateway/src/data-plane/models/load.ts    | 15 +++++++--------
 .../gateway/src/data-plane/models/serve_test.ts   |  8 ++++----
 6 files changed, 28 insertions(+), 34 deletions(-)

diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
index 576c823a8..cb82bc75f 100644
--- a/packages/gateway/src/control-plane/model-aliases/display.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display.ts
@@ -23,17 +23,16 @@ export const formatAliasRulesSummary = (rules: ModelAliasRules): string => {
   return parts.length > 0 ? ` (${parts.join(', ')})` : '';
 };
 
-// Compose the final per-entry display name shown in `/v1/models`. The
-// upstream name always leads so an operator scanning the listing sees which
-// upstream each row belongs to before reading the alias-specific part.
+// Compose the alias-local display name — what the operator named the alias
+// (when set) or a synthesized target + rules summary. Independent of which
+// upstream is surfacing the alias; the prefixed listing form prepends the
+// upstream display name at the call site, mirroring the real-model path in
+// `registry.ts`.
 export const composeAliasDisplayName = (input: {
-  upstreamDisplayName: string;
   aliasDisplayName?: string;
   targetDisplayName: string;
   rules: ModelAliasRules;
 }): string => {
-  if (input.aliasDisplayName !== undefined) {
-    return `${input.upstreamDisplayName}: ${input.aliasDisplayName}`;
-  }
-  return `${input.upstreamDisplayName}: ${input.targetDisplayName}${formatAliasRulesSummary(input.rules)}`;
+  if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
+  return `${input.targetDisplayName}${formatAliasRulesSummary(input.rules)}`;
 };
diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/gateway/src/control-plane/model-aliases/display_test.ts
index 7ba7700d0..d45a1b339 100644
--- a/packages/gateway/src/control-plane/model-aliases/display_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display_test.ts
@@ -45,31 +45,28 @@ describe('composeAliasDisplayName', () => {
   test('uses alias displayName when set, suppressing the rules summary', () => {
     expect(
       composeAliasDisplayName({
-        upstreamDisplayName: 'Azure',
         aliasDisplayName: 'Codex Auto Review',
         targetDisplayName: 'GPT-5.4',
         rules: { reasoning: { effort: 'low' } },
       }),
-    ).toBe('Azure: Codex Auto Review');
+    ).toBe('Codex Auto Review');
   });
 
   test('falls back to target displayName with rules suffix when alias displayName is missing', () => {
     expect(
       composeAliasDisplayName({
-        upstreamDisplayName: 'Azure',
         targetDisplayName: 'GPT-5.4',
         rules: { reasoning: { effort: 'low' } },
       }),
-    ).toBe('Azure: GPT-5.4 (low effort)');
+    ).toBe('GPT-5.4 (low effort)');
   });
 
   test('omits the rules suffix when rules are empty', () => {
     expect(
       composeAliasDisplayName({
-        upstreamDisplayName: 'Azure',
         targetDisplayName: 'GPT-5.4',
         rules: {},
       }),
-    ).toBe('Azure: GPT-5.4');
+    ).toBe('GPT-5.4');
   });
 });
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 0d08e4445..3d3b5ab2c 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -84,16 +84,15 @@ const loadGeminiModels = async (
     if (!alias.visibleInModelsList) continue;
     for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
       if (emission.target.kind !== 'chat') continue;
-      const targetDisplayName = emission.target.display_name ?? emission.target.id;
+      const aliasLocalName = composeAliasDisplayName({
+        aliasDisplayName: alias.displayName,
+        targetDisplayName: emission.target.display_name ?? emission.target.id,
+        rules: alias.rules,
+      });
       aliasEntries.push(toGeminiModel({
         ...emission.target,
         id: aliasPublicId(alias, emission),
-        display_name: composeAliasDisplayName({
-          upstreamDisplayName: emission.provider.name,
-          aliasDisplayName: alias.displayName,
-          targetDisplayName,
-          rules: alias.rules,
-        }),
+        display_name: emission.form === 'prefixed' ? `${emission.provider.name}: ${aliasLocalName}` : aliasLocalName,
         kind: 'chat',
         limits: emission.target.limits ?? {},
       }));
diff --git a/packages/gateway/src/data-plane/models/gemini_test.ts b/packages/gateway/src/data-plane/models/gemini_test.ts
index 6a2d9a887..584054328 100644
--- a/packages/gateway/src/data-plane/models/gemini_test.ts
+++ b/packages/gateway/src/data-plane/models/gemini_test.ts
@@ -449,7 +449,7 @@ test('/v1beta/models appends visible aliases as synthetic Gemini model entries',
       const body = await response.json() as { models: Array<{ name: string; displayName: string; supportedGenerationMethods: string[] }> };
       const aliasEntry = body.models.find(m => m.name === 'models/codex-auto-review');
       if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
-      assertEquals(aliasEntry.displayName, 'GitHub Copilot (tester): GPT Gemini List (low effort)');
+      assertEquals(aliasEntry.displayName, 'GPT Gemini List (low effort)');
       assertEquals(aliasEntry.supportedGenerationMethods, ['generateContent', 'streamGenerateContent', 'countTokens']);
     },
   );
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 569a601e2..2d054dc88 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -25,18 +25,17 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
 };
 
 const publicModelForAliasEmission = (alias: ModelAlias, emission: AliasListingEmission): PublicModel => {
-  const { provider, target } = emission;
-  const targetDisplayName = target.display_name ?? target.id;
+  const { provider, target, form } = emission;
+  const aliasLocalName = composeAliasDisplayName({
+    aliasDisplayName: alias.displayName,
+    targetDisplayName: target.display_name ?? target.id,
+    rules: alias.rules,
+  });
   const info: PublicModel = {
     id: aliasPublicId(alias, emission),
     object: 'model',
     type: 'model',
-    display_name: composeAliasDisplayName({
-      upstreamDisplayName: provider.name,
-      aliasDisplayName: alias.displayName,
-      targetDisplayName,
-      rules: alias.rules,
-    }),
+    display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName,
     limits: target.limits ? { ...target.limits } : {},
     kind: target.kind,
     created: alias.createdAt,
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 17647565b..f3c45e313 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -765,7 +765,7 @@ test('/v1/models omits an alias whose target is not in any reachable upstream ca
   );
 });
 
-test('/v1/models emits the alias on each reachable upstream + listed form, with display_name composed from the upstream label', async () => {
+test('/v1/models emits the alias on each reachable upstream + listed form; prefixed entries carry the upstream label, unprefixed entries do not', async () => {
   const { repo, apiKey } = await setupAppTest();
 
   (repo.modelAliases as MemoryModelAliasesRepo).setAll([
@@ -814,7 +814,7 @@ test('/v1/models emits the alias on each reachable upstream + listed form, with
       const bare = body.data.find(m => m.id === 'codex-auto-review');
       const prefixed = body.data.find(m => m.id === 'azure/codex-auto-review');
       if (!bare || !prefixed) throw new Error('expected both bare and prefixed alias entries');
-      assertEquals(bare.display_name, 'Azure: Codex Auto Review');
+      assertEquals(bare.display_name, 'Codex Auto Review');
       assertEquals(prefixed.display_name, 'Azure: Codex Auto Review');
     },
   );
@@ -865,7 +865,7 @@ test('/v1/models falls back to target display_name + rules summary when the alia
       const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
       const entry = body.data.find(m => m.id === 'codex-auto-review');
       if (!entry) throw new Error('expected codex-auto-review alias entry');
-      assertEquals(entry.display_name, 'Azure: GPT-5.4 (low effort)');
+      assertEquals(entry.display_name, 'GPT-5.4 (low effort)');
     },
   );
 });
@@ -930,7 +930,7 @@ test('/v1/models honours alias upstreamIds — only emits on the named upstream'
       const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
       const aliasRows = body.data.filter(m => m.id === 'codex-auto-review');
       assertEquals(aliasRows.length, 1);
-      assertEquals(aliasRows[0].display_name, 'Azure: gpt-5.4');
+      assertEquals(aliasRows[0].display_name, 'gpt-5.4');
     },
   );
 });

From 6e3a6d7a554d0ca806d5ebd31c79950454794bfd Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 02:44:34 +0800
Subject: [PATCH 012/170] refactor(aliases): synthesize alias listing entries
 inside getModelsForListing

The three listing endpoints (/v1/models data plane, /api/models control
plane, /v1beta/models Gemini) each independently looped over aliases and
re-built the per-emission entry. Move the fan-out to a single
synthesizeListedAliases() called once inside getModelsForListing(); the
function returns ListedModel[] (ResolvedModel + optional aliasedFrom)
that every surface mapper consumes uniformly.

Side effect: the control-plane /api/models was previously alias-blind,
because the dashboard hit getModels() instead of the listing function.
Now it goes through the shared path and the dashboard Models page
surfaces alias rows with their aliasedFrom provenance.
---
 .../src/control-plane/models/routes.ts        | 22 +++--
 .../src/control-plane/models/routes_test.ts   | 41 +++++++++
 .../src/data-plane/models/alias-listing.ts    | 85 +++++++++++++++++--
 .../gateway/src/data-plane/models/gemini.ts   | 38 ++-------
 .../gateway/src/data-plane/models/load.ts     | 54 ++----------
 .../src/data-plane/providers/registry.ts      | 21 +++--
 6 files changed, 168 insertions(+), 93 deletions(-)

diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 3146a3edc..9db27e7f4 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -1,21 +1,25 @@
 import type { Context } from 'hono';
 
+import type { ListedModel } from '../../data-plane/models/alias-listing.ts';
 import { toPublicModel } from '../../data-plane/models/load.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from '../../data-plane/models/shared.ts';
-import { getModels } from '../../data-plane/providers/registry.ts';
+import { getModelsForListing } from '../../data-plane/providers/registry.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
 import { ProviderModelsUnavailableError } from '@floway-dev/provider';
-import type { ResolvedModel, UpstreamProviderKind } from '@floway-dev/provider';
+import type { UpstreamProviderKind } from '@floway-dev/provider';
 
 // Same DTO as the public /models endpoint, plus one dashboard-only field:
 // `upstreams` lists every provider binding for this model as { kind, id, name }
 // triples. A single model id can be served by mixed provider kinds (e.g. one
 // azure deployment + one custom upstream both expose `gpt-5.5`), so a flat
-// `provider`/`upstream_ids` split would misrepresent that.
+// `provider`/`upstream_ids` split would misrepresent that. Alias entries
+// carry a single binding (the upstream that resolves their target) and the
+// `aliasedFrom` provenance flows through `toPublicModel`.
 interface ControlPlaneModel extends PublicModel {
   upstreams: { kind: UpstreamProviderKind; id: string; name: string }[];
 }
@@ -24,7 +28,7 @@ interface ControlPlaneModelsResponse extends Omit<PublicModelsResponse, 'data'>
   data: ControlPlaneModel[];
 }
 
-const toControlPlaneModel = (model: ResolvedModel): ControlPlaneModel => ({
+const toControlPlaneModel = (model: ListedModel): ControlPlaneModel => ({
   ...toPublicModel(model),
   upstreams: model.providers.map(binding => ({ kind: binding.providerKind, id: binding.upstream, name: binding.upstreamName })),
 });
@@ -35,8 +39,16 @@ export const controlPlaneModels = async (c: Context) => {
     // like the data-plane /models endpoint. On a session request there is no
     // API key, so this resolves to the user's per-user upstream cap: a user who
     // has had an upstream removed must not see its models in the Models tab.
+    // Aliases come from the same repo singleton the data plane uses, so the
+    // dashboard sees exactly the alias rows the runtime would honour.
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    const models = await getModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c));
+    const aliases = await getRepo().modelAliases.loadAll();
+    const { models } = await getModelsForListing(
+      effectiveUpstreamIdsFromContext(c),
+      fetcherForUpstream,
+      backgroundSchedulerFromContext(c),
+      aliases,
+    );
     const data = models.map(toControlPlaneModel);
     const response: ControlPlaneModelsResponse = {
       object: 'list',
diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts
index 7c611c31a..0be710c36 100644
--- a/packages/gateway/src/control-plane/models/routes_test.ts
+++ b/packages/gateway/src/control-plane/models/routes_test.ts
@@ -1,5 +1,6 @@
 import { test } from 'vitest';
 
+import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
 import type { UpstreamRecord } from '@floway-dev/provider';
 import { assertEquals, jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
@@ -113,3 +114,43 @@ test('/api/models is scoped to the caller\'s effective upstreams — a removed u
     assertEquals(ids.includes('azure-public'), false);
   });
 });
+
+test('/api/models appends visible alias entries with aliasedFrom alongside real catalog rows', async () => {
+  const { apiKey, repo } = await setupAppTest();
+  await repo.upstreams.save(buildCustomUpstreamRecord({ id: 'up_custom_models', sortOrder: 100 }));
+
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'codex-auto-review',
+      displayName: 'Codex Auto Review',
+      targetModelId: 'custom-model',
+      upstreamIds: [],
+      rules: { reasoning: { effort: 'low' } },
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      createdAt: 1_700_000_000,
+    },
+    {
+      alias: 'hidden-alias',
+      targetModelId: 'custom-model',
+      upstreamIds: [],
+      rules: {},
+      visibleInModelsList: false,
+      onConflict: 'real-only',
+      createdAt: 1_700_000_001,
+    },
+  ]);
+
+  await withMockedFetch(modelsFetchHandler, async () => {
+    const response = await requestApp('/api/models', { headers: { 'x-api-key': apiKey.key } });
+    assertEquals(response.status, 200);
+    const body = (await response.json()) as { data: Array<{ id: string; display_name: string; upstreams: Array<{ kind: string; id: string; name: string }>; aliasedFrom?: { targetModelId: string; rules: Record<string, unknown> } }> };
+    const aliasEntry = body.data.find(model => model.id === 'codex-auto-review');
+    if (!aliasEntry) throw new Error('expected codex-auto-review alias entry on /api/models');
+    assertEquals(aliasEntry.display_name, 'Codex Auto Review');
+    assertEquals(aliasEntry.upstreams, [{ kind: 'custom', id: 'up_custom_models', name: 'Custom Provider' }]);
+    assertEquals(aliasEntry.aliasedFrom?.targetModelId, 'custom-model');
+    assertEquals(aliasEntry.aliasedFrom?.rules, { reasoning: { effort: 'low' } });
+    assertEquals(body.data.some(model => model.id === 'hidden-alias'), false);
+  });
+});
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index e41400111..2f1880fe5 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -1,23 +1,33 @@
+import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
 import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-import type { ModelProviderInstance, UpstreamModel } from '@floway-dev/provider';
+import type { PublicModel } from '@floway-dev/protocols/common';
+import type { ModelProviderInstance, ProviderModelRecord, ResolvedModel, UpstreamModel } from '@floway-dev/provider';
 
 // One emission slot for an alias: a (provider, addressable form) pair where
 // the provider's raw catalog carries the alias target id, plus the matched
 // UpstreamModel so the synthesized listing entry can borrow the target's
 // limits, owner, and cost without re-querying.
-export interface AliasListingEmission {
+interface AliasListingEmission {
   provider: ModelProviderInstance;
   form: 'unprefixed' | 'prefixed';
   target: UpstreamModel;
 }
 
-// Per-upstream alias enumeration shared by `/v1/models` and the Gemini
-// `/models` listings. An alias with empty `upstreamIds` matches every
-// reachable provider; a non-empty list narrows the candidate set. Per
+// A `ResolvedModel` that may carry an `aliasedFrom` provenance — what
+// `getModelsForListing` returns when alias entries have been interleaved into
+// the catalog. Each listing endpoint's mapper (`toPublicModel`,
+// `toControlPlaneModel`, `toGeminiModel`) reads the same shape, so the alias
+// fan-out happens exactly once instead of being re-implemented per surface.
+export type ListedModel = ResolvedModel & {
+  readonly aliasedFrom?: NonNullable<PublicModel['aliasedFrom']>;
+};
+
+// Per-upstream alias enumeration. An alias with empty `upstreamIds` matches
+// every reachable provider; a non-empty list narrows the candidate set. Per
 // provider, the alias emits one entry per `listed` form when its target sits
 // in the upstream's raw catalog. Upstreams that do not carry the target — or
 // whose operator disabled the target — drop the alias entirely for that row.
-export const aliasListingEmissions = (
+const aliasListingEmissions = (
   alias: ModelAlias,
   providers: readonly ModelProviderInstance[],
   rawCatalogs: ReadonlyMap<string, readonly UpstreamModel[]>,
@@ -46,7 +56,68 @@ export const aliasListingEmissions = (
 // The public id form an alias emission carries on the wire. Bare alias name
 // for the unprefixed form; provider prefix + alias name for the prefixed
 // form. Mirrors how real models are surfaced in the same listing pass.
-export const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): string => {
+const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): string => {
   const cfg = emission.provider.modelPrefix;
   return emission.form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias;
 };
+
+// Turn an alias emission into a `ListedModel` that walks the same listing
+// pipeline as real catalog entries. The synthesized `providers` array carries
+// a single binding pointing at the alias's target on this upstream, so the
+// dashboard's per-binding view renders correctly without alias-specific
+// branching. `aliasedFrom` rides out as the public protocol extension.
+//
+// Display name: the alias-local part (operator displayName, or
+// `${target.display_name} (rules summary)`) lives by itself for the
+// `unprefixed` listing form; the `prefixed` form mirrors the real-model path
+// in `registry.ts` and prepends `${provider.name}: ` so the upstream is
+// visible at a glance.
+const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmission): ListedModel => {
+  const { provider, target, form } = emission;
+  const aliasLocalName = composeAliasDisplayName({
+    aliasDisplayName: alias.displayName,
+    targetDisplayName: target.display_name ?? target.id,
+    rules: alias.rules,
+  });
+  const record: ProviderModelRecord = {
+    upstream: provider.upstream,
+    upstreamName: provider.name,
+    providerKind: provider.providerKind,
+    provider: provider.provider,
+    upstreamModel: target,
+    enabledFlags: target.enabledFlags,
+    supportsResponsesItemReference: provider.supportsResponsesItemReference,
+  };
+  const { providerData: _providerData, endpoints, id: _targetId, display_name: _targetDisplay, created: _targetCreated, ...rest } = target;
+  return {
+    ...rest,
+    id: aliasPublicId(alias, emission),
+    display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName,
+    created: alias.createdAt,
+    endpoints: { ...endpoints },
+    providers: [record],
+    aliasedFrom: {
+      targetModelId: alias.targetModelId,
+      upstreamIds: alias.upstreamIds,
+      rules: alias.rules,
+      onConflict: alias.onConflict,
+    },
+  };
+};
+
+// Single-pass alias fan-out used by every listing surface. Visibility filter
+// honoured here; per-surface callers just map ListedModel → their own DTO.
+export const synthesizeListedAliases = (
+  aliases: readonly ModelAlias[],
+  providers: readonly ModelProviderInstance[],
+  rawCatalogs: ReadonlyMap<string, readonly UpstreamModel[]>,
+): ListedModel[] => {
+  const out: ListedModel[] = [];
+  for (const alias of aliases) {
+    if (!alias.visibleInModelsList) continue;
+    for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
+      out.push(aliasEmissionToListedModel(alias, emission));
+    }
+  }
+  return out;
+};
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 3d3b5ab2c..f01579f9a 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -1,8 +1,6 @@
 import type { Context } from 'hono';
 
-import { aliasListingEmissions, aliasPublicId } from './alias-listing.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
-import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
 import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
@@ -34,6 +32,10 @@ interface GeminiModel {
   cost?: ModelPricing;
 }
 
+// Gemini's Model resource is closed (no `aliasedFrom` extension), so an alias
+// arrives here through `getModelsForListing` looking like any other chat
+// model — `id`, `display_name`, `limits`, `cost` already finalized by
+// `synthesizeListedAliases` — and the mapper has no alias-specific branch.
 const toGeminiModel = (model: InternalModel): GeminiModel => {
   const limits = model.limits;
   const inputTokenLimit = limits.max_prompt_tokens ?? limits.max_context_window_tokens;
@@ -72,33 +74,11 @@ const loadGeminiModels = async (
   scheduler: BackgroundScheduler,
   aliases: readonly ModelAlias[],
 ): Promise<GeminiModel[]> => {
-  const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler);
-  // Only chat models are representable in the Gemini /models shape.
-  const realChatEntries = models.filter(model => model.kind === 'chat').map(toGeminiModel);
-  // Per-upstream alias enumeration mirrors `/v1/models`. Each emission becomes
-  // one Gemini Model entry whose id and displayName reflect that specific
-  // (provider, addressable form) pair; targets of the wrong kind never reach
-  // here because they were already filtered out of the catalog walk.
-  const aliasEntries: GeminiModel[] = [];
-  for (const alias of aliases) {
-    if (!alias.visibleInModelsList) continue;
-    for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
-      if (emission.target.kind !== 'chat') continue;
-      const aliasLocalName = composeAliasDisplayName({
-        aliasDisplayName: alias.displayName,
-        targetDisplayName: emission.target.display_name ?? emission.target.id,
-        rules: alias.rules,
-      });
-      aliasEntries.push(toGeminiModel({
-        ...emission.target,
-        id: aliasPublicId(alias, emission),
-        display_name: emission.form === 'prefixed' ? `${emission.provider.name}: ${aliasLocalName}` : aliasLocalName,
-        kind: 'chat',
-        limits: emission.target.limits ?? {},
-      }));
-    }
-  }
-  return [...realChatEntries, ...aliasEntries];
+  const { models } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler, aliases);
+  // Only chat models are representable in the Gemini /models shape — alias
+  // entries whose target is non-chat fall out of this filter just like real
+  // non-chat catalog entries do.
+  return models.filter(model => model.kind === 'chat').map(toGeminiModel);
 };
 
 export const serveGeminiModels = async (c: Context): Promise<Response> => {
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 2d054dc88..5f2a32d64 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,12 +1,15 @@
-import { aliasListingEmissions, aliasPublicId, type AliasListingEmission } from './alias-listing.ts';
-import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
+import type { ListedModel } from './alias-listing.ts';
 import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
 import { getModelsForListing } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
 import type { Fetcher, InternalModel } from '@floway-dev/provider';
 
-export const toPublicModel = (model: InternalModel): PublicModel => {
+// Maps a single listed catalog entry (real or alias) to the wire DTO. Alias
+// entries arrive with `aliasedFrom` pre-populated by
+// `synthesizeListedAliases`; this mapper just rides it through so every
+// listing surface sees the same provenance field.
+export const toPublicModel = (model: InternalModel & { aliasedFrom?: ListedModel['aliasedFrom'] }): PublicModel => {
   const info: PublicModel = {
     id: model.id,
     object: 'model',
@@ -21,34 +24,7 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
     info.created_at = new Date(model.created * 1000).toISOString();
   }
   if (model.cost) info.cost = model.cost;
-  return info;
-};
-
-const publicModelForAliasEmission = (alias: ModelAlias, emission: AliasListingEmission): PublicModel => {
-  const { provider, target, form } = emission;
-  const aliasLocalName = composeAliasDisplayName({
-    aliasDisplayName: alias.displayName,
-    targetDisplayName: target.display_name ?? target.id,
-    rules: alias.rules,
-  });
-  const info: PublicModel = {
-    id: aliasPublicId(alias, emission),
-    object: 'model',
-    type: 'model',
-    display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName,
-    limits: target.limits ? { ...target.limits } : {},
-    kind: target.kind,
-    created: alias.createdAt,
-    created_at: new Date(alias.createdAt * 1000).toISOString(),
-    aliasedFrom: {
-      targetModelId: alias.targetModelId,
-      upstreamIds: alias.upstreamIds,
-      rules: alias.rules,
-      onConflict: alias.onConflict,
-    },
-  };
-  info.owned_by = target.owned_by ?? provider.upstream;
-  if (target.cost) info.cost = target.cost;
+  if (model.aliasedFrom) info.aliasedFrom = model.aliasedFrom;
   return info;
 };
 
@@ -58,20 +34,8 @@ export const loadModels = async (
   scheduler: BackgroundScheduler,
   aliases: readonly ModelAlias[],
 ): Promise<PublicModelsResponse> => {
-  const { models, providers, rawCatalogs } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler);
-  const realEntries = models.map(toPublicModel);
-  // Per-upstream alias enumeration: for each visible alias, emit one entry per
-  // (provider, addressable form) pair where the provider can resolve the
-  // alias's target. Upstreams that do not carry the target produce no entry —
-  // the alias listing is strictly anchored to "can be served from here".
-  const aliasEntries: PublicModel[] = [];
-  for (const alias of aliases) {
-    if (!alias.visibleInModelsList) continue;
-    for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
-      aliasEntries.push(publicModelForAliasEmission(alias, emission));
-    }
-  }
-  const data = [...realEntries, ...aliasEntries];
+  const { models } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler, aliases);
+  const data = models.map(toPublicModel);
   return {
     object: 'list',
     has_more: false,
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index c8556ca45..e6dcb214f 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -2,6 +2,7 @@ import { fetchUpstreamModelsCached } from './models-cache.ts';
 import type { ModelAlias, ModelAliasRules } from '../../control-plane/model-aliases/types.ts';
 import { getRepo } from '../../repo/index.ts';
 import { matchAlias } from '../model-aliases/match.ts';
+import { synthesizeListedAliases, type ListedModel } from '../models/alias-listing.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import { type ModelEndpointKey, type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common';
 import type { InternalModel, ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
@@ -271,13 +272,14 @@ export const getModels = async (
 };
 
 // Returns the merged public model list AND the per-upstream raw catalogs and
-// provider instances. Listing surfaces (`/v1/models`, Gemini `/models`) use the
-// extra channels to synthesize alias entries that reflect which upstreams can
-// actually serve each alias's target and in which addressable form. Computing
-// both off the same `collectProviderModels` pass keeps catalog fetches to one
-// round per upstream regardless of how many alias rows reference each target.
+// provider instances. Listing surfaces (`/v1/models`, `/api/models`, Gemini
+// `/models`) use the same call so alias entries — synthesized once via
+// `synthesizeListedAliases` against the same `(providers, rawCatalogs)` pair —
+// are interleaved into the catalog before it returns. Per-surface mappers
+// then walk one uniform `ListedModel[]` instead of re-implementing alias
+// fan-out three times.
 export interface PublicModelsListing {
-  models: ResolvedModel[];
+  models: ListedModel[];
   providers: readonly ModelProviderInstance[];
   rawCatalogs: ReadonlyMap<string, readonly UpstreamModel[]>;
 }
@@ -286,6 +288,7 @@ export const getModelsForListing = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
+  aliases: readonly ModelAlias[],
 ): Promise<PublicModelsListing> => {
   const providers = await listModelProviders(upstreamFilter);
   if (providers.length === 0) {
@@ -294,7 +297,11 @@ export const getModelsForListing = async (
 
   const { models, sawSuccess, lastError, rawCatalogs } = await collectProviderModels(providers, fetcherForUpstream, scheduler);
 
-  if (sawSuccess) return { models: models.sort((a, b) => compareModelIds(a.id, b.id)), providers, rawCatalogs };
+  if (sawSuccess) {
+    const real = models.sort((a, b) => compareModelIds(a.id, b.id));
+    const aliasEntries = synthesizeListedAliases(aliases, providers, rawCatalogs);
+    return { models: [...real, ...aliasEntries], providers, rawCatalogs };
+  }
   if (lastError) throw lastError;
   return { models: [], providers, rawCatalogs };
 };

From 06c789b29b6180d16fc79721873064b352d85566 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 02:56:30 +0800
Subject: [PATCH 013/170] fix(aliases): dedupe alias listing emissions whose
 public id collides
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two no-prefix upstreams both serving the alias target produced two
identical `codex-auto-review` rows in /v1/models and /api/models —
visible in the dashboard Models list as duplicate cards.

mergeIntoCatalog dedupes real models the same way; alias entries now go
through the equivalent union (endpoints OR-ed, kind re-derived, provider
bindings concatenated) so a single alias surfaces as one row whose
`upstreams` field carries every backing binding.
---
 .../src/data-plane/models/alias-listing.ts    | 28 ++++++--
 .../src/data-plane/models/serve_test.ts       | 65 +++++++++++++++++++
 .../src/data-plane/providers/registry.ts      |  2 +-
 3 files changed, 89 insertions(+), 6 deletions(-)

diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index 2f1880fe5..d1de04b09 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -1,6 +1,7 @@
 import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
 import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-import type { PublicModel } from '@floway-dev/protocols/common';
+import { unionEndpoints } from '../providers/registry.ts';
+import { kindForEndpoints, type PublicModel } from '@floway-dev/protocols/common';
 import type { ModelProviderInstance, ProviderModelRecord, ResolvedModel, UpstreamModel } from '@floway-dev/provider';
 
 // One emission slot for an alias: a (provider, addressable form) pair where
@@ -106,18 +107,35 @@ const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmi
 };
 
 // Single-pass alias fan-out used by every listing surface. Visibility filter
-// honoured here; per-surface callers just map ListedModel → their own DTO.
+// honoured here. Emissions whose synthesized public id collides — two
+// no-prefix upstreams both serving the alias target, or two prefix-aliased
+// upstreams sharing a prefix — merge into one row with the bindings
+// appended, mirroring how `mergeIntoCatalog` collapses duplicate real-model
+// ids; the dashboard then renders a single alias row whose `upstreams` lists
+// every backing binding instead of N identical rows.
 export const synthesizeListedAliases = (
   aliases: readonly ModelAlias[],
   providers: readonly ModelProviderInstance[],
   rawCatalogs: ReadonlyMap<string, readonly UpstreamModel[]>,
 ): ListedModel[] => {
-  const out: ListedModel[] = [];
+  const byId = new Map<string, ListedModel>();
   for (const alias of aliases) {
     if (!alias.visibleInModelsList) continue;
     for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
-      out.push(aliasEmissionToListedModel(alias, emission));
+      const next = aliasEmissionToListedModel(alias, emission);
+      const existing = byId.get(next.id);
+      if (existing === undefined) {
+        byId.set(next.id, next);
+        continue;
+      }
+      const endpoints = unionEndpoints(existing.endpoints, next.endpoints);
+      byId.set(next.id, {
+        ...existing,
+        endpoints,
+        kind: kindForEndpoints(endpoints),
+        providers: [...existing.providers, ...next.providers],
+      });
     }
   }
-  return out;
+  return [...byId.values()];
 };
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index f3c45e313..cc5227967 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -934,3 +934,68 @@ test('/v1/models honours alias upstreamIds — only emits on the named upstream'
     },
   );
 });
+
+test('/v1/models merges alias emissions whose synthesized public id collides — one row, multiple backing upstreams', async () => {
+  const { repo, apiKey } = await setupAppTest();
+
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'codex-auto-review',
+      displayName: 'Codex Auto Review',
+      targetModelId: 'gpt-5.4',
+      upstreamIds: [],
+      rules: { reasoning: { effort: 'low' } },
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      createdAt: 1_700_000_000,
+    },
+  ]);
+
+  // Two no-prefix upstreams both serve gpt-5.4 — without dedupe, the alias
+  // would emit two `codex-auto-review` rows. With dedupe, the dashboard sees
+  // one row whose `upstreams` field lists both bindings, exactly like real
+  // models that exist on multiple upstreams.
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_alpha',
+    name: 'Alpha',
+    sortOrder: 100,
+    config: {
+      baseUrl: 'https://alpha.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-alpha',
+      endpoints: { chatCompletions: {} },
+    },
+  }));
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_beta',
+    name: 'Beta',
+    sortOrder: 200,
+    config: {
+      baseUrl: 'https://beta.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-beta',
+      endpoints: { chatCompletions: {} },
+    },
+  }));
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
+      if (url.pathname === '/v1/models' && (url.hostname === 'alpha.example.com' || url.hostname === 'beta.example.com')) {
+        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+      const body = await response.json() as { data: Array<{ id: string }> };
+      const rows = body.data.filter(m => m.id === 'codex-auto-review');
+      assertEquals(rows.length, 1);
+    },
+  );
+});
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index e6dcb214f..53619a613 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -89,7 +89,7 @@ export const listModelProviders = async (
 // Merge two capability maps: a key present in either side is present in the
 // result, and its sub-capability flags are OR-ed so a sub-cap advertised by
 // either provider survives.
-const unionEndpoints = (a: ModelEndpoints, b: ModelEndpoints): ModelEndpoints => {
+export const unionEndpoints = (a: ModelEndpoints, b: ModelEndpoints): ModelEndpoints => {
   const result: ModelEndpoints = { ...a };
   for (const key of Object.keys(b) as ModelEndpointKey[]) {
     const merged = { ...result[key], ...b[key] };

From e118cd1b0f056b3145abaea627d10b7da2be6735 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 03:07:47 +0800
Subject: [PATCH 014/170] feat(web): render alias rule badges on dashboard
 model rows

Each rule field on an alias entry's aliasedFrom now appears as its own
badge appended after the existing context/prompt/output badges, so the
seed codex-auto-review shows "low effort" next to its upstream pills.

Per-field labels move into a shared formatAliasRuleBadges helper in
@floway-dev/protocols/common; the gateway's formatAliasRulesSummary
derives from it (same wording, joined with commas, wrapped in parens
when used as the synthesized display-name suffix). Dashboard and
gateway therefore stay in lockstep on rule labels without parallel
formatters drifting.
---
 apps/web/src/api/types.ts                     |  4 ++-
 .../src/components/models/ModelInfoBar.vue    |  7 +++++
 .../control-plane/model-aliases/display.ts    | 26 +++++--------------
 packages/protocols/src/common/models.ts       | 22 ++++++++++++++++
 4 files changed, 39 insertions(+), 20 deletions(-)

diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 1dd0c2c03..4ff2f417a 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -6,10 +6,11 @@ import type {
   ModelEndpoints,
   ModelKind,
   ModelPricing,
+  PublicModelAliasedFrom,
 } from '@floway-dev/protocols/common';
 import type { AddressableForm, ModelPrefixConfig } from '@floway-dev/provider/model-prefix';
 
-export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing };
+export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing, PublicModelAliasedFrom };
 export type { AddressableForm, ModelPrefixConfig };
 
 export type UpstreamProviderKind = 'custom' | 'azure' | 'copilot' | 'codex' | 'claude-code' | 'ollama';
@@ -326,6 +327,7 @@ export interface PublicModel {
   endpoints?: Record<string, ModelEndpointInfo>;
   cost?: ModelPricing;
   kind?: ModelKind;
+  aliasedFrom?: PublicModelAliasedFrom;
 }
 
 export interface ControlPlaneModel extends PublicModel {
diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index f8bf98b6e..b66b53a70 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -1,6 +1,7 @@
 <script setup lang="ts">
 import type { ControlPlaneModel } from '../../api/types.ts';
 import { providerBadgeClass, providerMeta } from '../upstreams/provider-meta.ts';
+import { formatAliasRuleBadges } from '@floway-dev/protocols/common';
 
 defineProps<{
   model: ControlPlaneModel;
@@ -43,6 +44,12 @@ const formatTokenLimit = (n: number) => {
           <span v-if="model.limits?.max_output_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
             output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
           </span>
+          <span
+            v-for="badge in (model.aliasedFrom ? formatAliasRuleBadges(model.aliasedFrom.rules) : [])"
+            :key="badge"
+            class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400"
+            :title="`alias rule from ${model.aliasedFrom?.targetModelId}`"
+          >{{ badge }}</span>
         </div>
       </div>
       <button class="btn-ghost text-[11px] flex shrink-0 items-center gap-1" @click="$emit('clear')">
diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
index cb82bc75f..1fc9e6181 100644
--- a/packages/gateway/src/control-plane/model-aliases/display.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display.ts
@@ -1,25 +1,13 @@
 import type { ModelAliasRules } from './types.ts';
+import { formatAliasRuleBadges } from '@floway-dev/protocols/common';
 
-// Render the rule set as a parenthesized, comma-joined string so the
-// `/v1/models` listing can suffix it onto the target model's display name when
-// the operator did not supply an explicit alias `displayName`. Empty rules
-// produce an empty string (no parentheses); the join order is fixed across
-// fields so a given rule set always renders the same way.
-//
-// `anthropicBeta` is sorted at format time so two operators carrying the same
-// token set in different orders see the same label.
+// Render the closed rule set as a parenthesized suffix the gateway appends to
+// the target model's display name when the operator did not supply an
+// explicit alias `displayName`. The per-rule labels come from the protocol's
+// shared `formatAliasRuleBadges` so the dashboard's per-badge view and this
+// inline suffix always agree on wording and order.
 export const formatAliasRulesSummary = (rules: ModelAliasRules): string => {
-  const parts: string[] = [];
-  if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
-  if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
-  if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
-  if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
-  if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
-  if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
-  if (rules.anthropicSpeed !== undefined) parts.push(`${rules.anthropicSpeed} speed`);
-  if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
-    parts.push(rules.anthropicBeta.toSorted().join('/'));
-  }
+  const parts = formatAliasRuleBadges(rules);
   return parts.length > 0 ? ` (${parts.join(', ')})` : '';
 };
 
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 383e0ffe0..c05904787 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -136,6 +136,28 @@ export interface PublicModelAliasedFrom {
   onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
 }
 
+// Per-rule short labels for the closed knob set an alias may lock. Returned
+// in the deterministic order the dashboard and the synthesized display name
+// both render, so the order an operator sees stays stable across surfaces
+// regardless of how the JSON key order arrived. Each entry is meant to render
+// as its own badge in the dashboard `/models` row and is joined with `, ` to
+// build the parenthesized rules summary the gateway appends when an alias has
+// no explicit `displayName`.
+export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): string[] => {
+  const parts: string[] = [];
+  if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
+  if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
+  if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
+  if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
+  if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
+  if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
+  if (rules.anthropicSpeed !== undefined) parts.push(`${rules.anthropicSpeed} speed`);
+  if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
+    parts.push([...rules.anthropicBeta].sort().join('/'));
+  }
+  return parts;
+};
+
 export interface PublicModelsResponse {
   // OpenAI container
   object: 'list';

From f98aa6c995f52be106ba1f790fa158a8f2ecd7f3 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 03:19:06 +0800
Subject: [PATCH 015/170] feat(web): alias badges read as key: value with
 outline style
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Each alias entry's row now leads with `alias of: <target>` followed by
one per-rule badge in `label: value` form (or label-only for boolean
toggles like `adaptive reasoning`). Outline border, no fill, low
contrast — distinct from the highlighted upstream pills, lighter than
the filled context/prompt/output limits.

The shared helper returns rich items so each surface can format as it
likes. The gateway's parenthesized display-name suffix keeps its
compact `value label` form independently.
---
 .../src/components/models/ModelInfoBar.vue    | 22 ++++++----
 .../control-plane/model-aliases/display.ts    | 23 +++++++---
 packages/protocols/src/common/models.ts       | 42 +++++++++++--------
 3 files changed, 57 insertions(+), 30 deletions(-)

diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index b66b53a70..a70fa7f93 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -1,7 +1,7 @@
 <script setup lang="ts">
-import type { ControlPlaneModel } from '../../api/types.ts';
+import type { ControlPlaneModel, PublicModelAliasedFrom } from '../../api/types.ts';
 import { providerBadgeClass, providerMeta } from '../upstreams/provider-meta.ts';
-import { formatAliasRuleBadges } from '@floway-dev/protocols/common';
+import { formatAliasRuleBadges, type AliasRuleBadge } from '@floway-dev/protocols/common';
 
 defineProps<{
   model: ControlPlaneModel;
@@ -14,6 +14,15 @@ const formatTokenLimit = (n: number) => {
   if (n >= 1_000) return `${(n / 1_000).toFixed(n % 1_000 === 0 ? 0 : 1)}k`;
   return n.toString();
 };
+
+// `alias of: <target>` always leads the alias badge sequence so the operator
+// reading the row sees what the alias resolves to before scanning the rule
+// pills. The rule badges follow in the order `formatAliasRuleBadges`
+// returns, keeping dashboard and any future alias-aware tooling in lockstep.
+const aliasBadges = (aliasedFrom: PublicModelAliasedFrom): AliasRuleBadge[] => [
+  { label: 'alias of', value: aliasedFrom.targetModelId },
+  ...formatAliasRuleBadges(aliasedFrom.rules),
+];
 </script>
 
 <template>
@@ -45,11 +54,10 @@ const formatTokenLimit = (n: number) => {
             output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
           </span>
           <span
-            v-for="badge in (model.aliasedFrom ? formatAliasRuleBadges(model.aliasedFrom.rules) : [])"
-            :key="badge"
-            class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400"
-            :title="`alias rule from ${model.aliasedFrom?.targetModelId}`"
-          >{{ badge }}</span>
+            v-for="badge in (model.aliasedFrom ? aliasBadges(model.aliasedFrom) : [])"
+            :key="`${badge.label}:${badge.value ?? ''}`"
+            class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/[0.08] text-gray-400"
+          >{{ badge.label }}{{ badge.value !== undefined ? `: ${badge.value}` : '' }}</span>
         </div>
       </div>
       <button class="btn-ghost text-[11px] flex shrink-0 items-center gap-1" @click="$emit('clear')">
diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
index 1fc9e6181..80b567488 100644
--- a/packages/gateway/src/control-plane/model-aliases/display.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display.ts
@@ -1,13 +1,26 @@
 import type { ModelAliasRules } from './types.ts';
-import { formatAliasRuleBadges } from '@floway-dev/protocols/common';
 
 // Render the closed rule set as a parenthesized suffix the gateway appends to
 // the target model's display name when the operator did not supply an
-// explicit alias `displayName`. The per-rule labels come from the protocol's
-// shared `formatAliasRuleBadges` so the dashboard's per-badge view and this
-// inline suffix always agree on wording and order.
+// explicit alias `displayName`. The wording stays compact (`value label`,
+// joined with commas) because the suffix has to fit alongside the target
+// name in narrow listings — the dashboard's per-badge view uses
+// `formatAliasRuleBadges` for the self-describing `label: value` form.
+//
+// `anthropicBeta` is sorted at format time so two operators carrying the same
+// token set in different orders see the same label.
 export const formatAliasRulesSummary = (rules: ModelAliasRules): string => {
-  const parts = formatAliasRuleBadges(rules);
+  const parts: string[] = [];
+  if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
+  if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
+  if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
+  if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
+  if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
+  if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
+  if (rules.anthropicSpeed !== undefined) parts.push(`${rules.anthropicSpeed} speed`);
+  if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
+    parts.push([...rules.anthropicBeta].sort().join('/'));
+  }
   return parts.length > 0 ? ` (${parts.join(', ')})` : '';
 };
 
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index c05904787..7b058edde 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -136,26 +136,32 @@ export interface PublicModelAliasedFrom {
   onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
 }
 
-// Per-rule short labels for the closed knob set an alias may lock. Returned
-// in the deterministic order the dashboard and the synthesized display name
-// both render, so the order an operator sees stays stable across surfaces
-// regardless of how the JSON key order arrived. Each entry is meant to render
-// as its own badge in the dashboard `/models` row and is joined with `, ` to
-// build the parenthesized rules summary the gateway appends when an alias has
-// no explicit `displayName`.
-export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): string[] => {
-  const parts: string[] = [];
-  if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
-  if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
-  if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
-  if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
-  if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
-  if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
-  if (rules.anthropicSpeed !== undefined) parts.push(`${rules.anthropicSpeed} speed`);
+// One badge per rule field on an alias, in a `${label}` / `${label}: ${value}`
+// shape the dashboard renders inline next to the model row. Returned in a
+// deterministic order so the badge sequence stays stable across surfaces and
+// across JSON key arrivals. Boolean toggles render label-only (no colon);
+// every other field renders as `${label}: ${value}`. The gateway's
+// `formatAliasRulesSummary` uses its own labels for the parenthesized
+// display-name suffix — the two surfaces deliberately diverge so the suffix
+// stays compact while the badge view stays self-describing.
+export interface AliasRuleBadge {
+  label: string;
+  value?: string;
+}
+
+export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): AliasRuleBadge[] => {
+  const out: AliasRuleBadge[] = [];
+  if (rules.reasoning?.effort !== undefined) out.push({ label: 'effort', value: rules.reasoning.effort });
+  if (rules.reasoning?.budgetTokens !== undefined) out.push({ label: 'reasoning budget', value: `${rules.reasoning.budgetTokens}tk` });
+  if (rules.reasoning?.adaptive === true) out.push({ label: 'adaptive reasoning' });
+  if (rules.reasoning?.summary !== undefined) out.push({ label: 'reasoning summary', value: rules.reasoning.summary });
+  if (rules.verbosity !== undefined) out.push({ label: 'verbosity', value: rules.verbosity });
+  if (rules.serviceTier !== undefined) out.push({ label: 'service tier', value: rules.serviceTier });
+  if (rules.anthropicSpeed !== undefined) out.push({ label: 'speed', value: rules.anthropicSpeed });
   if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
-    parts.push([...rules.anthropicBeta].sort().join('/'));
+    out.push({ label: 'anthropic beta', value: [...rules.anthropicBeta].sort().join('/') });
   }
-  return parts;
+  return out;
 };
 
 export interface PublicModelsResponse {

From ca6fac5e9b559c09bc484b1c25084eaf04bd240d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 03:29:05 +0800
Subject: [PATCH 016/170] chore(aliases): strip uncommitted-spec references
 from comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Comments must not reference in-progress design docs that live under
docs/superpowers/ (gitignored). Stripping the "See docs/..." tails from
JSDocs on the protocol-extension fields and the apply.ts header — the
preceding sentences already document the translation contract.
---
 packages/gateway/src/data-plane/model-aliases/apply.ts |  3 +--
 packages/protocols/src/chat-completions/index.ts       | 10 +++++-----
 packages/protocols/src/common/models.ts                |  1 -
 packages/protocols/src/extensions/index.ts             |  3 +--
 packages/protocols/src/gemini/index.ts                 |  8 ++++----
 packages/protocols/src/messages/index.ts               |  2 +-
 packages/protocols/src/responses/index.ts              |  8 ++++----
 .../src/chat-completions-via-messages/request.ts       |  2 +-
 packages/translate/src/shared/gemini-via/gemini.ts     |  2 --
 9 files changed, 17 insertions(+), 22 deletions(-)

diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index 9a2bb5950..bd7503141 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -8,8 +8,7 @@ import { mapSummaryToAnthropicDisplay } from '@floway-dev/translate/via-messages
 // Each function writes the alias rules into the inbound IR's slot best suited
 // to the host protocol: native when the protocol can express the concept,
 // extension otherwise. Writes overwrite any user-supplied value — aliases are
-// operator-locked per Goal 3. Mapping table is the single source of truth in
-// docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+// operator-locked.
 
 export const applyAliasRulesToChatCompletions = (payload: ChatCompletionsPayload, rules: ModelAliasRules): void => {
   // reasoning.effort is native; budget/adaptive/summary ride on extension slots
diff --git a/packages/protocols/src/chat-completions/index.ts b/packages/protocols/src/chat-completions/index.ts
index 8804fd449..072348261 100644
--- a/packages/protocols/src/chat-completions/index.ts
+++ b/packages/protocols/src/chat-completions/index.ts
@@ -30,15 +30,15 @@ export interface ChatCompletionsPayload {
   tool_choice?: 'none' | 'auto' | 'required' | { type: 'function'; function: { name: string } } | null;
   /** Request usage stats in streaming responses */
   stream_options?: { include_usage: boolean } | null;
-  /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. */
   thinking_budget?: number;
-  /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. */
   adaptive_thinking?: boolean;
-  /** Floway protocol extension. Translated to OpenAI Responses `reasoning.summary` / Anthropic `thinking.display` / Gemini `thinkingConfig.includeThoughts` when routed to those upstreams; dropped on OpenAI Chat targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to OpenAI Responses `reasoning.summary` / Anthropic `thinking.display` / Gemini `thinkingConfig.includeThoughts` when routed to those upstreams; dropped on OpenAI Chat targets. */
   reasoning_summary?: string;
-  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
   anthropic_speed?: string;
-  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
   anthropic_beta?: readonly string[];
 }
 
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 7b058edde..278a6254b 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -114,7 +114,6 @@ export interface PublicModel {
   // gateway appends to the listing. Clients that do not know about the
   // field ignore it; alias-aware clients (dashboard, CLI shims) render the
   // alias's target id and rules from this payload directly.
-  // See docs/superpowers/specs/2026-06-25-model-aliases-design.md.
   aliasedFrom?: PublicModelAliasedFrom;
 }
 
diff --git a/packages/protocols/src/extensions/index.ts b/packages/protocols/src/extensions/index.ts
index b6579ce2b..3f2c67750 100644
--- a/packages/protocols/src/extensions/index.ts
+++ b/packages/protocols/src/extensions/index.ts
@@ -2,8 +2,7 @@
  * Closed enumeration of Floway protocol extension fields that the gateway
  * adds to each inbound IR on top of the host protocol's own schema. The
  * per-upstream sanitizer in the gateway reads this manifest to strip any
- * extension residue before the upstream HTTP call. See
- * docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+ * extension residue before the upstream HTTP call.
  */
 export const FLOWAY_EXTENSION_FIELDS = {
   chatCompletions: ['thinking_budget', 'adaptive_thinking', 'reasoning_summary', 'anthropic_speed', 'anthropic_beta'] as const,
diff --git a/packages/protocols/src/gemini/index.ts b/packages/protocols/src/gemini/index.ts
index c3e7e646a..d3786aad5 100644
--- a/packages/protocols/src/gemini/index.ts
+++ b/packages/protocols/src/gemini/index.ts
@@ -6,9 +6,9 @@ export interface GeminiPayload {
   generationConfig?: GeminiGenerationConfig;
   safetySettings?: GeminiSafetySetting[];
   cachedContent?: string;
-  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
   anthropicSpeed?: string;
-  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
   anthropicBeta?: readonly string[];
 }
 
@@ -42,9 +42,9 @@ export interface GeminiGenerationConfig {
   responseMimeType?: string;
   responseSchema?: unknown;
   thinkingConfig?: GeminiThinkingConfig;
-  /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. */
   verbosity?: string;
-  /** Floway protocol extension. Translated to OpenAI Chat `service_tier` / Responses `service_tier` / Anthropic `service_tier` when routed to those upstreams; dropped on Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to OpenAI Chat `service_tier` / Responses `service_tier` / Anthropic `service_tier` when routed to those upstreams; dropped on Gemini targets. */
   serviceTier?: string;
 }
 
diff --git a/packages/protocols/src/messages/index.ts b/packages/protocols/src/messages/index.ts
index 663dcef24..43ed05197 100644
--- a/packages/protocols/src/messages/index.ts
+++ b/packages/protocols/src/messages/index.ts
@@ -56,7 +56,7 @@ export interface MessagesPayload {
   // protocol layer because the gateway treats `speed: 'fast'` as the canonical
   // client signal regardless of which upstream serves it.
   speed?: 'standard' | 'fast' | (string & {});
-  /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to OpenAI Chat `verbosity` / Responses `text.verbosity` when routed to those upstreams; dropped on Anthropic Messages and Gemini targets. */
   verbosity?: string;
 }
 
diff --git a/packages/protocols/src/responses/index.ts b/packages/protocols/src/responses/index.ts
index 39af5e148..2669fe41d 100644
--- a/packages/protocols/src/responses/index.ts
+++ b/packages/protocols/src/responses/index.ts
@@ -37,13 +37,13 @@ export interface ResponsesPayload {
   prompt_cache_key?: string | null;
   safety_identifier?: string | null;
   service_tier?: 'default' | 'auto' | 'flex' | 'priority' | 'scale' | (string & {}) | null;
-  /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to Anthropic `thinking.budget_tokens` / Gemini `thinkingConfig.thinkingBudget` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. */
   thinking_budget?: number;
-  /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. */
   adaptive_thinking?: boolean;
-  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
   anthropic_speed?: string;
-  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. See docs/superpowers/specs/2026-06-25-model-aliases-design.md. */
+  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
   anthropic_beta?: readonly string[];
 }
 
diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts
index 5e83a230b..f4ff25285 100644
--- a/packages/translate/src/chat-completions-via-messages/request.ts
+++ b/packages/translate/src/chat-completions-via-messages/request.ts
@@ -193,7 +193,7 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
   // slots. `anthropic_beta` is body-side residue that the per-upstream
   // sanitizer strips after translation; the gateway-side rule-apply pass owns
   // moving its value onto the outbound `anthropic-beta` header before the
-  // upstream call. See docs/superpowers/specs/2026-06-25-model-aliases-design.md.
+  // upstream call.
   const thinking = buildMessagesThinkingFromExtensions({
     thinkingBudget: payload.thinking_budget,
     adaptiveThinking: payload.adaptive_thinking,
diff --git a/packages/translate/src/shared/gemini-via/gemini.ts b/packages/translate/src/shared/gemini-via/gemini.ts
index 99d8b4872..0c98c09e3 100644
--- a/packages/translate/src/shared/gemini-via/gemini.ts
+++ b/packages/translate/src/shared/gemini-via/gemini.ts
@@ -120,8 +120,6 @@ export const geminiFunctionResponsePart = (part: GeminiPart, ids: GeminiToolCall
 // Reasoning effort is freeform on the inbound IRs (per Goal 2: never gate
 // operator-typed values), but the gateway publishes a canonical closed set so
 // translate-side mappers can normalize without rewriting unknown values.
-// References:
-// - docs/superpowers/specs/2026-06-25-model-aliases-design.md (Translate Layer)
 export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
 
 export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig): ReasoningEffort | undefined => {

From 322e5abdc47aab3da735ae951589f9e7ebda1d71 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 03:29:06 +0800
Subject: [PATCH 017/170] fix(translate): propagate native service_tier on
 responses-via-messages

Both Responses and Messages carry native service_tier; the translator
silently dropped it, so an alias serviceTier rule landing on a Responses
inbound that routed to a Messages upstream vanished. Spread it onto the
target alongside the other native fields.
---
 packages/translate/src/responses-via-messages/request.ts | 8 +++++---
 1 file changed, 5 insertions(+), 3 deletions(-)

diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts
index f16acb936..9bfd8bc51 100644
--- a/packages/translate/src/responses-via-messages/request.ts
+++ b/packages/translate/src/responses-via-messages/request.ts
@@ -335,9 +335,10 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
   // Extension-driven thinking (`thinking_budget`, `adaptive_thinking`) wins
   // over the native `effort === 'none'` disable, so the alias write-side
   // facets that target the structured thinking slot survive the legacy
-  // disable shortcut. Native `reasoning.summary` and `service_tier` do not
-  // surface onto Messages — the Responses-native vocabulary keeps its
-  // pre-existing translation contract and rides the upstream sanitizer.
+  // disable shortcut. Native `reasoning.summary` does not surface onto
+  // Messages — the Responses-native vocabulary keeps its pre-existing
+  // translation contract and rides the upstream sanitizer. `service_tier`
+  // is native on both protocols and propagates verbatim.
   const extensionThinking = buildMessagesThinkingFromExtensions({
     thinkingBudget: payload.thinking_budget,
     adaptiveThinking: payload.adaptive_thinking,
@@ -360,6 +361,7 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
     tool_choice: translateToolChoice(payload.tool_choice),
     ...(thinking ? { thinking } : {}),
     ...(hasOutputConfig ? { output_config: outputConfig } : {}),
+    ...(payload.service_tier != null ? { service_tier: payload.service_tier } : {}),
     ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
   };
 

From 9f38cef6263e9980d5dfc2ee887cdaef24773846 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 03:35:51 +0800
Subject: [PATCH 018/170] refactor(aliases): inline AliasMatchResult
 single-field wrapper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

matchAlias returns the alias directly; the sole caller (pushInterpretation
in registry.ts) was already destructuring the wrapper away. Both the
review and cleanup passes converged on this — remove the indirection.
---
 packages/gateway/src/data-plane/model-aliases/match.ts    | 8 ++------
 .../gateway/src/data-plane/model-aliases/match_test.ts    | 4 ++--
 packages/gateway/src/data-plane/providers/registry.ts     | 5 ++---
 3 files changed, 6 insertions(+), 11 deletions(-)

diff --git a/packages/gateway/src/data-plane/model-aliases/match.ts b/packages/gateway/src/data-plane/model-aliases/match.ts
index f297d1a50..edb31d071 100644
--- a/packages/gateway/src/data-plane/model-aliases/match.ts
+++ b/packages/gateway/src/data-plane/model-aliases/match.ts
@@ -1,9 +1,5 @@
 import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
 
-export interface AliasMatchResult {
-  readonly alias: ModelAlias;
-}
-
 // Lookup an alias for the (post-prefix-strip) lookupId against the upstream's
 // id. An empty `upstreamIds` filter on the alias means "match any upstream";
 // a non-empty filter must include the upstream's id.
@@ -11,9 +7,9 @@ export const matchAlias = (
   lookupId: string,
   upstreamId: string,
   aliases: readonly ModelAlias[],
-): AliasMatchResult | undefined => {
+): ModelAlias | undefined => {
   const hit = aliases.find(a => a.alias === lookupId);
   if (!hit) return undefined;
   if (hit.upstreamIds.length > 0 && !hit.upstreamIds.includes(upstreamId)) return undefined;
-  return { alias: hit };
+  return hit;
 };
diff --git a/packages/gateway/src/data-plane/model-aliases/match_test.ts b/packages/gateway/src/data-plane/model-aliases/match_test.ts
index 7252078c9..b3fbf5596 100644
--- a/packages/gateway/src/data-plane/model-aliases/match_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/match_test.ts
@@ -17,7 +17,7 @@ const make = (overrides: Partial<ModelAlias>): ModelAlias => ({
 describe('matchAlias', () => {
   test('matches by exact lookupId when alias has no upstream filter', () => {
     const aliases = [make({ alias: 'codex-auto-review', targetModelId: 'gpt-5.4' })];
-    expect(matchAlias('codex-auto-review', 'up-1', aliases)?.alias.alias).toBe('codex-auto-review');
+    expect(matchAlias('codex-auto-review', 'up-1', aliases)?.alias).toBe('codex-auto-review');
   });
 
   test('does not match when lookupId differs', () => {
@@ -46,7 +46,7 @@ describe('matchAlias', () => {
       make({ alias: 'a', targetModelId: 'first', rules: { reasoning: { effort: 'low' } } }),
       make({ alias: 'a', targetModelId: 'second' }),
     ];
-    expect(matchAlias('a', 'up-x', aliases)?.alias).toEqual(aliases[0]);
+    expect(matchAlias('a', 'up-x', aliases)).toEqual(aliases[0]);
   });
 
   test('returns undefined for an empty alias list', () => {
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 53619a613..3645a041b 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -393,12 +393,11 @@ const pushInterpretation = (
   lookupId: string,
   aliases: readonly ModelAlias[],
 ): void => {
-  const hit = matchAlias(lookupId, provider.upstream, aliases);
-  if (!hit) {
+  const alias = matchAlias(lookupId, provider.upstream, aliases);
+  if (!alias) {
     out.push({ provider, lookupId });
     return;
   }
-  const { alias } = hit;
   const aliasInterp: ModelInterpretation = {
     provider,
     lookupId: alias.targetModelId,

From 1fd7bba13523bdeb8761436dd4cc6a1aef8c6917 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 03:56:20 +0800
Subject: [PATCH 019/170] refactor(aliases): inline single-use display/listing
 helpers

formatAliasRulesSummary was only consumed by composeAliasDisplayName in
the same file; the standalone export existed so the test could import it
directly (anti-test-bending). aliasPublicId was a 2-line ternary used
exactly once inside aliasEmissionToListedModel. Both now live at their
call site; tests target the surviving public entry.
---
 .../control-plane/model-aliases/display.ts    | 44 +++++----
 .../model-aliases/display_test.ts             | 90 +++++++++----------
 .../src/data-plane/models/alias-listing.ts    | 16 ++--
 3 files changed, 72 insertions(+), 78 deletions(-)

diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
index 80b567488..57ecd0eec 100644
--- a/packages/gateway/src/control-plane/model-aliases/display.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display.ts
@@ -1,16 +1,25 @@
 import type { ModelAliasRules } from './types.ts';
 
-// Render the closed rule set as a parenthesized suffix the gateway appends to
-// the target model's display name when the operator did not supply an
-// explicit alias `displayName`. The wording stays compact (`value label`,
-// joined with commas) because the suffix has to fit alongside the target
-// name in narrow listings — the dashboard's per-badge view uses
-// `formatAliasRuleBadges` for the self-describing `label: value` form.
+// Compose the alias-local display name — what the operator named the alias
+// (when set) or a synthesized target + rules summary. Independent of which
+// upstream is surfacing the alias; the prefixed listing form prepends the
+// upstream display name at the call site, mirroring the real-model path in
+// `registry.ts`.
 //
-// `anthropicBeta` is sorted at format time so two operators carrying the same
-// token set in different orders see the same label.
-export const formatAliasRulesSummary = (rules: ModelAliasRules): string => {
+// The synthesized form's parenthesized rules suffix uses the compact
+// `value label` wording so it fits alongside the target name in narrow
+// listings — the dashboard's per-badge view uses `formatAliasRuleBadges`
+// for the self-describing `label: value` form. `anthropicBeta` tokens are
+// sorted so two operators carrying the same set in different orders see
+// the same label.
+export const composeAliasDisplayName = (input: {
+  aliasDisplayName?: string;
+  targetDisplayName: string;
+  rules: ModelAliasRules;
+}): string => {
+  if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
   const parts: string[] = [];
+  const { rules } = input;
   if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
   if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
   if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
@@ -21,19 +30,6 @@ export const formatAliasRulesSummary = (rules: ModelAliasRules): string => {
   if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
     parts.push([...rules.anthropicBeta].sort().join('/'));
   }
-  return parts.length > 0 ? ` (${parts.join(', ')})` : '';
-};
-
-// Compose the alias-local display name — what the operator named the alias
-// (when set) or a synthesized target + rules summary. Independent of which
-// upstream is surfacing the alias; the prefixed listing form prepends the
-// upstream display name at the call site, mirroring the real-model path in
-// `registry.ts`.
-export const composeAliasDisplayName = (input: {
-  aliasDisplayName?: string;
-  targetDisplayName: string;
-  rules: ModelAliasRules;
-}): string => {
-  if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
-  return `${input.targetDisplayName}${formatAliasRulesSummary(input.rules)}`;
+  const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : '';
+  return `${input.targetDisplayName}${suffix}`;
 };
diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/gateway/src/control-plane/model-aliases/display_test.ts
index d45a1b339..a9eec7070 100644
--- a/packages/gateway/src/control-plane/model-aliases/display_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display_test.ts
@@ -1,72 +1,72 @@
 import { describe, expect, test } from 'vitest';
 
-import { composeAliasDisplayName, formatAliasRulesSummary } from './display.ts';
+import { composeAliasDisplayName } from './display.ts';
 
-describe('formatAliasRulesSummary', () => {
-  test('returns empty string when no rules are set', () => {
-    expect(formatAliasRulesSummary({})).toBe('');
-  });
-
-  test('formats each rule field with its canonical suffix', () => {
-    expect(formatAliasRulesSummary({ reasoning: { effort: 'high' } })).toBe(' (high effort)');
-    expect(formatAliasRulesSummary({ reasoning: { budgetTokens: 4096 } })).toBe(' (4096tk reasoning)');
-    expect(formatAliasRulesSummary({ reasoning: { adaptive: true } })).toBe(' (adaptive reasoning)');
-    expect(formatAliasRulesSummary({ reasoning: { summary: 'detailed' } })).toBe(' (detailed summary)');
-    expect(formatAliasRulesSummary({ verbosity: 'low' })).toBe(' (low verbosity)');
-    expect(formatAliasRulesSummary({ serviceTier: 'priority' })).toBe(' (priority tier)');
-    expect(formatAliasRulesSummary({ anthropicSpeed: 'fast' })).toBe(' (fast speed)');
+describe('composeAliasDisplayName', () => {
+  test('uses alias displayName when set, suppressing the rules summary', () => {
+    expect(
+      composeAliasDisplayName({
+        aliasDisplayName: 'Codex Auto Review',
+        targetDisplayName: 'GPT-5.4',
+        rules: { reasoning: { effort: 'low' } },
+      }),
+    ).toBe('Codex Auto Review');
   });
 
-  test('sorts anthropicBeta tokens and joins with slashes', () => {
-    expect(formatAliasRulesSummary({ anthropicBeta: ['extended-thinking', 'fast-mode-2026-02-01'] })).toBe(
-      ' (extended-thinking/fast-mode-2026-02-01)',
-    );
-    expect(formatAliasRulesSummary({ anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] })).toBe(
-      ' (extended-thinking/fast-mode-2026-02-01)',
-    );
+  test('omits the rules suffix when rules are empty', () => {
+    expect(
+      composeAliasDisplayName({
+        targetDisplayName: 'GPT-5.4',
+        rules: {},
+      }),
+    ).toBe('GPT-5.4');
   });
 
-  test('drops anthropicBeta when the token list is empty', () => {
-    expect(formatAliasRulesSummary({ anthropicBeta: [] })).toBe('');
+  test('formats each rule field with its canonical suffix when alias displayName is missing', () => {
+    const target = 'GPT-5.4';
+    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { effort: 'high' } } })).toBe('GPT-5.4 (high effort)');
+    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { budgetTokens: 4096 } } })).toBe('GPT-5.4 (4096tk reasoning)');
+    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { adaptive: true } } })).toBe('GPT-5.4 (adaptive reasoning)');
+    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { summary: 'detailed' } } })).toBe('GPT-5.4 (detailed summary)');
+    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { verbosity: 'low' } })).toBe('GPT-5.4 (low verbosity)');
+    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { serviceTier: 'priority' } })).toBe('GPT-5.4 (priority tier)');
+    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { anthropicSpeed: 'fast' } })).toBe('GPT-5.4 (fast speed)');
   });
 
-  test('joins multiple fields with comma in deterministic order', () => {
+  test('sorts anthropicBeta tokens and joins with slashes', () => {
     expect(
-      formatAliasRulesSummary({
-        reasoning: { effort: 'low', summary: 'concise' },
-        verbosity: 'high',
-        anthropicSpeed: 'fast',
+      composeAliasDisplayName({
+        targetDisplayName: 'Claude',
+        rules: { anthropicBeta: ['extended-thinking', 'fast-mode-2026-02-01'] },
       }),
-    ).toBe(' (low effort, concise summary, high verbosity, fast speed)');
-  });
-});
-
-describe('composeAliasDisplayName', () => {
-  test('uses alias displayName when set, suppressing the rules summary', () => {
+    ).toBe('Claude (extended-thinking/fast-mode-2026-02-01)');
     expect(
       composeAliasDisplayName({
-        aliasDisplayName: 'Codex Auto Review',
-        targetDisplayName: 'GPT-5.4',
-        rules: { reasoning: { effort: 'low' } },
+        targetDisplayName: 'Claude',
+        rules: { anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] },
       }),
-    ).toBe('Codex Auto Review');
+    ).toBe('Claude (extended-thinking/fast-mode-2026-02-01)');
   });
 
-  test('falls back to target displayName with rules suffix when alias displayName is missing', () => {
+  test('drops anthropicBeta when the token list is empty', () => {
     expect(
       composeAliasDisplayName({
-        targetDisplayName: 'GPT-5.4',
-        rules: { reasoning: { effort: 'low' } },
+        targetDisplayName: 'Claude',
+        rules: { anthropicBeta: [] },
       }),
-    ).toBe('GPT-5.4 (low effort)');
+    ).toBe('Claude');
   });
 
-  test('omits the rules suffix when rules are empty', () => {
+  test('joins multiple fields with comma in deterministic order', () => {
     expect(
       composeAliasDisplayName({
         targetDisplayName: 'GPT-5.4',
-        rules: {},
+        rules: {
+          reasoning: { effort: 'low', summary: 'concise' },
+          verbosity: 'high',
+          anthropicSpeed: 'fast',
+        },
       }),
-    ).toBe('GPT-5.4');
+    ).toBe('GPT-5.4 (low effort, concise summary, high verbosity, fast speed)');
   });
 });
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index d1de04b09..e9762ed35 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -54,14 +54,6 @@ const aliasListingEmissions = (
   return out;
 };
 
-// The public id form an alias emission carries on the wire. Bare alias name
-// for the unprefixed form; provider prefix + alias name for the prefixed
-// form. Mirrors how real models are surfaced in the same listing pass.
-const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): string => {
-  const cfg = emission.provider.modelPrefix;
-  return emission.form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias;
-};
-
 // Turn an alias emission into a `ListedModel` that walks the same listing
 // pipeline as real catalog entries. The synthesized `providers` array carries
 // a single binding pointing at the alias's target on this upstream, so the
@@ -73,6 +65,10 @@ const aliasPublicId = (alias: ModelAlias, emission: AliasListingEmission): strin
 // `unprefixed` listing form; the `prefixed` form mirrors the real-model path
 // in `registry.ts` and prepends `${provider.name}: ` so the upstream is
 // visible at a glance.
+//
+// Public id: bare alias name for the unprefixed form; provider prefix + alias
+// name for the prefixed form. Mirrors how real models are surfaced in the
+// same listing pass.
 const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmission): ListedModel => {
   const { provider, target, form } = emission;
   const aliasLocalName = composeAliasDisplayName({
@@ -80,6 +76,8 @@ const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmi
     targetDisplayName: target.display_name ?? target.id,
     rules: alias.rules,
   });
+  const cfg = provider.modelPrefix;
+  const publicId = form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias;
   const record: ProviderModelRecord = {
     upstream: provider.upstream,
     upstreamName: provider.name,
@@ -92,7 +90,7 @@ const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmi
   const { providerData: _providerData, endpoints, id: _targetId, display_name: _targetDisplay, created: _targetCreated, ...rest } = target;
   return {
     ...rest,
-    id: aliasPublicId(alias, emission),
+    id: publicId,
     display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName,
     created: alias.createdAt,
     endpoints: { ...endpoints },

From 7d4a3e34da80afb2cf03fdecb26bddf533774285 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 03:56:20 +0800
Subject: [PATCH 020/170] refactor(aliases): collapse passthrough drop-trace
 into the shared sanitize helper

The passthrough serve was re-emitting the floway.alias.drop log shape
that chat/shared/sanitize.ts already owns, and re-finding the matched
alias by name to walk its rules. ModelAliasRules now rides through
resolveModelForRequest alongside aliasName, so passthrough has the
rules in hand; the rules walker moves into sanitize.ts as
traceAllRulesDropped and reuses createSanitizeTraceCtx so both
surfaces emit identical trace lines.
---
 .../src/data-plane/chat/shared/sanitize.ts    | 23 ++++++++++++++
 .../src/data-plane/providers/registry.ts      |  6 +++-
 .../data-plane/shared/passthrough-serve.ts    | 30 +++----------------
 3 files changed, 32 insertions(+), 27 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
index 832f8f41d..8f8b878e1 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -1,3 +1,4 @@
+import type { ModelAliasRules } from '../../../control-plane/model-aliases/types.ts';
 import { FLOWAY_EXTENSION_FIELDS } from '@floway-dev/protocols/extensions';
 
 export interface SanitizeTraceCtx {
@@ -49,3 +50,25 @@ export const sanitizeForGeminiUpstream = (body: Record<string, unknown>, trace?:
     stripKeys(generationConfig as Record<string, unknown>, FLOWAY_EXTENSION_FIELDS.gemini.generationConfig, 'gemini', trace, 'generationConfig.');
   }
 };
+
+// Walks the alias rules object and emits one trace line per non-empty rule
+// field. Used by inbound surfaces that have no protocol-extension slots for
+// the rules in the first place (embeddings, images, /v1/completions) — the
+// rules are structurally dropped before the upstream call, and this helper
+// gives the operator the same `floway.alias.drop` signal the chat
+// sanitizers produce when they strip extension residue.
+export const traceAllRulesDropped = (
+  rules: ModelAliasRules,
+  targetProtocol: string,
+  trace: SanitizeTraceCtx,
+): void => {
+  if (rules.reasoning) {
+    for (const key of Object.keys(rules.reasoning)) {
+      trace.emit({ alias: trace.aliasName, field: `reasoning.${key}`, targetProtocol });
+    }
+  }
+  if (rules.verbosity !== undefined) trace.emit({ alias: trace.aliasName, field: 'verbosity', targetProtocol });
+  if (rules.serviceTier !== undefined) trace.emit({ alias: trace.aliasName, field: 'serviceTier', targetProtocol });
+  if (rules.anthropicSpeed !== undefined) trace.emit({ alias: trace.aliasName, field: 'anthropicSpeed', targetProtocol });
+  if (rules.anthropicBeta?.length) trace.emit({ alias: trace.aliasName, field: 'anthropicBeta', targetProtocol });
+};
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 3645a041b..1264eed0d 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -332,6 +332,10 @@ export interface ProviderModelResolution {
   // this onto the `x-floway-alias` response header so alias-served calls are
   // observable without enabling any extra mode.
   aliasName?: string;
+  // Operator-locked rules carried alongside `aliasName`. Set in lockstep so
+  // passthrough callers can trace the dropped rule fields without re-finding
+  // the matched alias by name.
+  aliasRules?: ModelAliasRules;
 }
 
 export interface ModelInterpretation {
@@ -525,7 +529,7 @@ export const resolveModelForRequest = async (
   // `x-floway-alias` header without re-deriving the match.
   const matches: ProviderModelResolution[] = resolutions.map(r =>
     r.interpretation.aliasName !== undefined
-      ? { ...r.resolved, aliasName: r.interpretation.aliasName }
+      ? { ...r.resolved, aliasName: r.interpretation.aliasName, aliasRules: r.interpretation.aliasRules }
       : r.resolved);
   return { matches, failedUpstreams };
 };
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index b566b582d..31b162e9a 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -23,6 +23,7 @@ import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import type { AuthedContext } from '../../middleware/auth.ts';
 import { getRepo } from '../../repo/index.ts';
 import type { TokenUsage } from '../../repo/types.ts';
+import { createSanitizeTraceCtx, traceAllRulesDropped } from '../chat/shared/sanitize.ts';
 import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
 import { stageGatewayResponseHeader } from '../chat/shared/gateway-ctx.ts';
 import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
@@ -119,31 +120,6 @@ interface PassthroughServeContext {
 export const passthroughApiError = (c: Context, message: string, status: ContentfulStatusCode): Response =>
   c.json({ error: { message, type: 'api_error' } }, status);
 
-// Emit one trace line per rule field present on the matched alias when the
-// inbound endpoint has no slot for the rule. The passthrough endpoints
-// (embeddings, images, /v1/completions) carry no Floway-extension fields
-// so a non-empty `rules` object is structurally dropped before the upstream
-// call; emitting one trace line per knob gives an operator the same signal
-// the chat sanitizers do.
-const traceDroppedAliasRulesForPassthrough = (
-  aliasName: string,
-  aliases: readonly { alias: string; rules: Record<string, unknown> }[],
-  sourceApi: PassthroughServeApiName,
-): void => {
-  const matched = aliases.find(a => a.alias === aliasName);
-  if (!matched) return;
-  const rules = matched.rules as { reasoning?: Record<string, unknown>; verbosity?: unknown; serviceTier?: unknown; anthropicSpeed?: unknown; anthropicBeta?: readonly unknown[] };
-  const fields: string[] = [];
-  if (rules.reasoning) for (const key of Object.keys(rules.reasoning)) fields.push(`reasoning.${key}`);
-  if (rules.verbosity !== undefined) fields.push('verbosity');
-  if (rules.serviceTier !== undefined) fields.push('serviceTier');
-  if (rules.anthropicSpeed !== undefined) fields.push('anthropicSpeed');
-  if (rules.anthropicBeta?.length) fields.push('anthropicBeta');
-  for (const field of fields) {
-    console.warn('floway.alias.drop', JSON.stringify({ alias: aliasName, field, targetProtocol: sourceApi }));
-  }
-};
-
 export const passthroughServe = async (input: PassthroughServeContext): Promise<Response> => {
   const { c, ctx, sourceApi, model, bindingServesEndpoint, call, response: responseHandling } = input;
   const requestStartedAt = performance.now();
@@ -176,7 +152,9 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
       if (!bindingServesEndpoint(match.binding)) continue;
       if (match.aliasName !== undefined) {
         stageGatewayResponseHeader(ctx, 'x-floway-alias', match.aliasName);
-        traceDroppedAliasRulesForPassthrough(match.aliasName, aliases, sourceApi);
+        if (match.aliasRules) {
+          traceAllRulesDropped(match.aliasRules, sourceApi, createSanitizeTraceCtx(match.aliasName));
+        }
       }
 
       const recorder = createUpstreamLatencyRecorder();

From e79d27ba7523800afed921da82bfb3dfb7a8ddd2 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 03:56:20 +0800
Subject: [PATCH 021/170] docs(aliases): note summary='auto' is an explicit
 no-op on Messages target
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

mapSummaryToAnthropicDisplay('auto') returns undefined, so the apply
step has always left a user-supplied thinking.display untouched in
that case. The comment now spells out that this is intentional —
'auto' means "defer to upstream default", and operator-locked
overwrite applies to every other summary value.
---
 packages/gateway/src/data-plane/model-aliases/apply.ts | 5 +++++
 1 file changed, 5 insertions(+)

diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index bd7503141..283845982 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -56,6 +56,11 @@ export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: Model
   }
   if (rules.reasoning?.summary !== undefined) {
     const display = mapSummaryToAnthropicDisplay(rules.reasoning.summary);
+    // summary='auto' maps to undefined and is an explicit no-op on the
+    // Messages path — the operator chose "let upstream default decide", so
+    // we neither synthesize a thinking block nor overwrite a user-supplied
+    // thinking.display. Every other summary value enforces operator-locked
+    // overwrite.
     if (display !== undefined) {
       // When no prior thinking branch ran (no effort/budget/adaptive in this
       // rule), synthesize `thinking: {type:'enabled', display}` so the

From 7122dc7bc435b9f4c144880d7827cc983fc1edf0 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 04:04:18 +0800
Subject: [PATCH 022/170] feat(aliases): drop redundant anthropicSpeed alias
 knob
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The cross-protocol service_tier↔speed:'fast' bridge that #114 added to the
*-via-messages and messages-via-* translators makes the alias-extension
knob anthropicSpeed redundant — operators who want speed: 'fast' on a
Messages upstream can set serviceTier: 'fast' on the alias and the bridge
handles the wire-level conversion in both directions.

Removed before any external client relies on it (the alias schema is not
yet public — PR is still open):

- ModelAliasRules.anthropicSpeed plus the matching PublicModelAliasedFrom
  field on /v1/models.
- The anthropic_speed Chat / Responses extension fields, the top-level
  anthropicSpeed Gemini field, and their entries in
  FLOWAY_EXTENSION_FIELDS.
- The four applyAliasRules* branches that wrote the knob into each
  inbound IR's natural slot, plus the matching emit branches in
  chat-completions-via-messages, responses-via-messages, and
  gemini-via-messages translators.
- The trace-helper and display/badge formatters that surfaced the field.
- All tests asserting either side of the now-removed contract.

anthropicBeta is unrelated (Anthropic beta header tokens) and is kept
intact. The native Messages `speed` field is also untouched — callers
hitting the Messages inbound directly still control it.
---
 .../control-plane/model-aliases/display.ts    |  1 -
 .../model-aliases/display_test.ts             |  5 ++---
 .../src/control-plane/model-aliases/types.ts  |  1 -
 .../src/data-plane/chat/shared/sanitize.ts    |  1 -
 .../data-plane/chat/shared/sanitize_test.ts   |  8 +++----
 .../src/data-plane/model-aliases/apply.ts     | 16 ++++++--------
 .../data-plane/model-aliases/apply_test.ts    | 21 ++++++-------------
 .../protocols/src/chat-completions/index.ts   |  2 --
 packages/protocols/src/common/models.ts       |  2 --
 packages/protocols/src/extensions/index.ts    |  6 +++---
 packages/protocols/src/gemini/index.ts        |  2 --
 packages/protocols/src/responses/index.ts     |  2 --
 .../chat-completions-via-messages/request.ts  |  5 +----
 .../request_test.ts                           | 11 ----------
 .../request_test.ts                           |  2 --
 .../request_test.ts                           |  6 +-----
 .../src/gemini-via-messages/request.ts        |  5 -----
 .../src/gemini-via-messages/request_test.ts   | 10 ---------
 .../src/gemini-via-responses/request_test.ts  |  3 ---
 .../request_test.ts                           |  1 -
 .../messages-via-responses/request_test.ts    |  1 -
 .../request_test.ts                           |  2 --
 .../src/responses-via-messages/request.ts     |  5 +----
 .../responses-via-messages/request_test.ts    |  5 -----
 24 files changed, 24 insertions(+), 99 deletions(-)

diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
index 57ecd0eec..f831273d8 100644
--- a/packages/gateway/src/control-plane/model-aliases/display.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display.ts
@@ -26,7 +26,6 @@ export const composeAliasDisplayName = (input: {
   if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
   if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
   if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
-  if (rules.anthropicSpeed !== undefined) parts.push(`${rules.anthropicSpeed} speed`);
   if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
     parts.push([...rules.anthropicBeta].sort().join('/'));
   }
diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/gateway/src/control-plane/model-aliases/display_test.ts
index a9eec7070..40dbd2fec 100644
--- a/packages/gateway/src/control-plane/model-aliases/display_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/display_test.ts
@@ -30,7 +30,6 @@ describe('composeAliasDisplayName', () => {
     expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { summary: 'detailed' } } })).toBe('GPT-5.4 (detailed summary)');
     expect(composeAliasDisplayName({ targetDisplayName: target, rules: { verbosity: 'low' } })).toBe('GPT-5.4 (low verbosity)');
     expect(composeAliasDisplayName({ targetDisplayName: target, rules: { serviceTier: 'priority' } })).toBe('GPT-5.4 (priority tier)');
-    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { anthropicSpeed: 'fast' } })).toBe('GPT-5.4 (fast speed)');
   });
 
   test('sorts anthropicBeta tokens and joins with slashes', () => {
@@ -64,9 +63,9 @@ describe('composeAliasDisplayName', () => {
         rules: {
           reasoning: { effort: 'low', summary: 'concise' },
           verbosity: 'high',
-          anthropicSpeed: 'fast',
+          serviceTier: 'flex',
         },
       }),
-    ).toBe('GPT-5.4 (low effort, concise summary, high verbosity, fast speed)');
+    ).toBe('GPT-5.4 (low effort, concise summary, high verbosity, flex tier)');
   });
 });
diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts
index 3170b4b47..e7d9f64bd 100644
--- a/packages/gateway/src/control-plane/model-aliases/types.ts
+++ b/packages/gateway/src/control-plane/model-aliases/types.ts
@@ -10,7 +10,6 @@ export type ModelAliasRules = {
   };
   readonly verbosity?: string;
   readonly serviceTier?: string;
-  readonly anthropicSpeed?: string;
   readonly anthropicBeta?: readonly string[];
 };
 
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
index 8f8b878e1..57a6959dc 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -69,6 +69,5 @@ export const traceAllRulesDropped = (
   }
   if (rules.verbosity !== undefined) trace.emit({ alias: trace.aliasName, field: 'verbosity', targetProtocol });
   if (rules.serviceTier !== undefined) trace.emit({ alias: trace.aliasName, field: 'serviceTier', targetProtocol });
-  if (rules.anthropicSpeed !== undefined) trace.emit({ alias: trace.aliasName, field: 'anthropicSpeed', targetProtocol });
   if (rules.anthropicBeta?.length) trace.emit({ alias: trace.aliasName, field: 'anthropicBeta', targetProtocol });
 };
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
index eebcd5d06..da2e72d0d 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
@@ -30,7 +30,7 @@ test('sanitizeForMessagesUpstream strips verbosity and emits one trace line', ()
 test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves native fields', () => {
   const body: Record<string, unknown> = {
     thinking_budget: 4096,
-    anthropic_speed: 'fast',
+    anthropic_beta: ['ctx-1m'],
     reasoning_effort: 'high',
     model: 'x',
   };
@@ -40,7 +40,7 @@ test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves nat
   assertEquals(lines.length, 2);
   assertEquals(lines.every(l => l.alias === 'alias-1' && l.targetProtocol === 'chat-completions'), true);
   const droppedFields = lines.map(l => l.field).sort();
-  assertEquals(droppedFields, ['anthropic_speed', 'thinking_budget']);
+  assertEquals(droppedFields, ['anthropic_beta', 'thinking_budget']);
 });
 
 test('sanitizeForResponsesUpstream strips extensions without a trace context', () => {
@@ -52,14 +52,14 @@ test('sanitizeForResponsesUpstream strips extensions without a trace context', (
 test('sanitizeForGeminiUpstream walks top-level and generationConfig', () => {
   const body: Record<string, unknown> = {
     generationConfig: { verbosity: 'low', thinkingConfig: { thinkingBudget: 100 } },
-    anthropicSpeed: 'fast',
+    anthropicBeta: ['ctx-1m'],
   };
   const { ctx, lines } = makeTrace('alias-g');
   sanitizeForGeminiUpstream(body, ctx);
   assertEquals(body, { generationConfig: { thinkingConfig: { thinkingBudget: 100 } } });
   assertEquals(lines.length, 2);
   const droppedFields = lines.map(l => l.field).sort();
-  assertEquals(droppedFields, ['anthropicSpeed', 'generationConfig.verbosity']);
+  assertEquals(droppedFields, ['anthropicBeta', 'generationConfig.verbosity']);
   assertEquals(lines.every(l => l.alias === 'alias-g' && l.targetProtocol === 'gemini'), true);
 });
 
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index 283845982..e90e2fbed 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -19,26 +19,24 @@ export const applyAliasRulesToChatCompletions = (payload: ChatCompletionsPayload
   if (rules.reasoning?.summary !== undefined) payload.reasoning_summary = rules.reasoning.summary;
   if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
   if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
-  if (rules.anthropicSpeed !== undefined) payload.anthropic_speed = rules.anthropicSpeed;
   if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
 };
 
 export const applyAliasRulesToResponses = (payload: ResponsesPayload, rules: ModelAliasRules): void => {
   // reasoning.{effort, summary} and text.verbosity / service_tier are native;
-  // budget/adaptive ride on extension slots; the two anthropic_* knobs only
-  // matter when this Responses inbound lands on a Messages upstream.
+  // budget/adaptive ride on extension slots; anthropic_beta only matters when
+  // this Responses inbound lands on a Messages upstream.
   if (rules.reasoning?.effort !== undefined) payload.reasoning = { ...payload.reasoning, effort: rules.reasoning.effort };
   if (rules.reasoning?.summary !== undefined) payload.reasoning = { ...payload.reasoning, summary: rules.reasoning.summary };
   if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens;
   if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true;
   if (rules.verbosity !== undefined) payload.text = { ...payload.text, verbosity: rules.verbosity };
   if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
-  if (rules.anthropicSpeed !== undefined) payload.anthropic_speed = rules.anthropicSpeed;
   if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
 };
 
 export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: ModelAliasRules): void => {
-  // Anthropic has natives for effort, thinking, speed, and service_tier; only
+  // Anthropic has natives for effort, thinking, and service_tier; only
   // verbosity is a Floway extension on this inbound. anthropic_beta is the
   // wire header — the attempt layer reads `candidate.aliasRules.anthropicBeta`
   // and merges via mergeAnthropicBetaTokens, so we do not stamp the body here.
@@ -72,14 +70,13 @@ export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: Model
   }
   if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
   if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
-  if (rules.anthropicSpeed !== undefined) payload.speed = rules.anthropicSpeed;
 };
 
 export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAliasRules): void => {
   // All four reasoning knobs ride on the native thinkingConfig; verbosity and
-  // serviceTier ride on extension slots under generationConfig; the
-  // anthropic_* knobs ride on top-level extension slots so the existing
-  // gemini-via-messages translator picks them up there.
+  // serviceTier ride on extension slots under generationConfig; anthropicBeta
+  // rides on a top-level extension slot so the existing gemini-via-messages
+  // translator picks it up there.
   const hasThinking = rules.reasoning?.effort !== undefined
     || rules.reasoning?.budgetTokens !== undefined
     || rules.reasoning?.adaptive === true
@@ -104,6 +101,5 @@ export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAlia
     if (rules.serviceTier !== undefined) generationConfig.serviceTier = rules.serviceTier;
     payload.generationConfig = generationConfig;
   }
-  if (rules.anthropicSpeed !== undefined) payload.anthropicSpeed = rules.anthropicSpeed;
   if (rules.anthropicBeta?.length) payload.anthropicBeta = [...rules.anthropicBeta];
 };
diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
index e05f40c91..3dfba9d45 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
@@ -43,14 +43,13 @@ describe('applyAliasRulesToChatCompletions', () => {
     expect(payload.reasoning_summary).toBe('detailed');
   });
 
-  test('writes verbosity, serviceTier, anthropicSpeed, anthropicBeta to their slots', () => {
+  test('writes verbosity, serviceTier, anthropicBeta to their slots', () => {
     const payload = cc();
     applyAliasRulesToChatCompletions(payload, {
-      verbosity: 'low', serviceTier: 'flex', anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'],
+      verbosity: 'low', serviceTier: 'flex', anthropicBeta: ['ctx-1m'],
     });
     expect(payload.verbosity).toBe('low');
     expect(payload.service_tier).toBe('flex');
-    expect(payload.anthropic_speed).toBe('fast');
     expect(payload.anthropic_beta).toEqual(['ctx-1m']);
   });
 
@@ -100,10 +99,9 @@ describe('applyAliasRulesToResponses', () => {
     expect(payload.service_tier).toBe('flex');
   });
 
-  test('writes anthropicSpeed / anthropicBeta to extension slots', () => {
+  test('writes anthropicBeta to extension slot', () => {
     const payload = resp();
-    applyAliasRulesToResponses(payload, { anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'] });
-    expect(payload.anthropic_speed).toBe('fast');
+    applyAliasRulesToResponses(payload, { anthropicBeta: ['ctx-1m'] });
     expect(payload.anthropic_beta).toEqual(['ctx-1m']);
   });
 });
@@ -133,12 +131,6 @@ describe('applyAliasRulesToMessages', () => {
     expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 1024, display: 'summarized' });
   });
 
-  test('writes anthropicSpeed to native speed', () => {
-    const payload = msg();
-    applyAliasRulesToMessages(payload, { anthropicSpeed: 'fast' });
-    expect(payload.speed).toBe('fast');
-  });
-
   test('writes serviceTier to native service_tier', () => {
     const payload = msg();
     applyAliasRulesToMessages(payload, { serviceTier: 'priority' });
@@ -203,10 +195,9 @@ describe('applyAliasRulesToGemini', () => {
     expect(payload.generationConfig?.serviceTier).toBe('flex');
   });
 
-  test('writes anthropicSpeed / anthropicBeta to top-level extension slots', () => {
+  test('writes anthropicBeta to top-level extension slot', () => {
     const payload = gem();
-    applyAliasRulesToGemini(payload, { anthropicSpeed: 'fast', anthropicBeta: ['ctx-1m'] });
-    expect(payload.anthropicSpeed).toBe('fast');
+    applyAliasRulesToGemini(payload, { anthropicBeta: ['ctx-1m'] });
     expect(payload.anthropicBeta).toEqual(['ctx-1m']);
   });
 
diff --git a/packages/protocols/src/chat-completions/index.ts b/packages/protocols/src/chat-completions/index.ts
index 072348261..fc8a76d7f 100644
--- a/packages/protocols/src/chat-completions/index.ts
+++ b/packages/protocols/src/chat-completions/index.ts
@@ -36,8 +36,6 @@ export interface ChatCompletionsPayload {
   adaptive_thinking?: boolean;
   /** Floway protocol extension. Translated to OpenAI Responses `reasoning.summary` / Anthropic `thinking.display` / Gemini `thinkingConfig.includeThoughts` when routed to those upstreams; dropped on OpenAI Chat targets. */
   reasoning_summary?: string;
-  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
-  anthropic_speed?: string;
   /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
   anthropic_beta?: readonly string[];
 }
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 278a6254b..97c9283a2 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -129,7 +129,6 @@ export interface PublicModelAliasedFrom {
     };
     verbosity?: string;
     serviceTier?: string;
-    anthropicSpeed?: string;
     anthropicBeta?: readonly string[];
   };
   onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
@@ -156,7 +155,6 @@ export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): A
   if (rules.reasoning?.summary !== undefined) out.push({ label: 'reasoning summary', value: rules.reasoning.summary });
   if (rules.verbosity !== undefined) out.push({ label: 'verbosity', value: rules.verbosity });
   if (rules.serviceTier !== undefined) out.push({ label: 'service tier', value: rules.serviceTier });
-  if (rules.anthropicSpeed !== undefined) out.push({ label: 'speed', value: rules.anthropicSpeed });
   if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
     out.push({ label: 'anthropic beta', value: [...rules.anthropicBeta].sort().join('/') });
   }
diff --git a/packages/protocols/src/extensions/index.ts b/packages/protocols/src/extensions/index.ts
index 3f2c67750..00edf50f1 100644
--- a/packages/protocols/src/extensions/index.ts
+++ b/packages/protocols/src/extensions/index.ts
@@ -5,11 +5,11 @@
  * extension residue before the upstream HTTP call.
  */
 export const FLOWAY_EXTENSION_FIELDS = {
-  chatCompletions: ['thinking_budget', 'adaptive_thinking', 'reasoning_summary', 'anthropic_speed', 'anthropic_beta'] as const,
-  responses: ['thinking_budget', 'adaptive_thinking', 'anthropic_speed', 'anthropic_beta'] as const,
+  chatCompletions: ['thinking_budget', 'adaptive_thinking', 'reasoning_summary', 'anthropic_beta'] as const,
+  responses: ['thinking_budget', 'adaptive_thinking', 'anthropic_beta'] as const,
   messages: ['verbosity'] as const,
   gemini: {
-    topLevel: ['anthropicSpeed', 'anthropicBeta'] as const,
+    topLevel: ['anthropicBeta'] as const,
     generationConfig: ['verbosity', 'serviceTier'] as const,
   },
 } as const;
diff --git a/packages/protocols/src/gemini/index.ts b/packages/protocols/src/gemini/index.ts
index d3786aad5..f8878eda2 100644
--- a/packages/protocols/src/gemini/index.ts
+++ b/packages/protocols/src/gemini/index.ts
@@ -6,8 +6,6 @@ export interface GeminiPayload {
   generationConfig?: GeminiGenerationConfig;
   safetySettings?: GeminiSafetySetting[];
   cachedContent?: string;
-  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
-  anthropicSpeed?: string;
   /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
   anthropicBeta?: readonly string[];
 }
diff --git a/packages/protocols/src/responses/index.ts b/packages/protocols/src/responses/index.ts
index 2669fe41d..53cf084af 100644
--- a/packages/protocols/src/responses/index.ts
+++ b/packages/protocols/src/responses/index.ts
@@ -41,8 +41,6 @@ export interface ResponsesPayload {
   thinking_budget?: number;
   /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. */
   adaptive_thinking?: boolean;
-  /** Floway protocol extension. Translated to Anthropic `speed` when routed to a Messages upstream; dropped elsewhere. */
-  anthropic_speed?: string;
   /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
   anthropic_beta?: readonly string[];
 }
diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts
index eb6647861..d85c8e301 100644
--- a/packages/translate/src/chat-completions-via-messages/request.ts
+++ b/packages/translate/src/chat-completions-via-messages/request.ts
@@ -202,9 +202,7 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
 
   // `service_tier: 'fast'` from the Chat Completions caller maps to
   // Anthropic's `speed: 'fast'`; all other defined service_tier values
-  // pass through as `service_tier` on the Messages wire. An explicit
-  // `anthropic_speed` from the alias-extension layer rides through
-  // independently and may co-set `speed`.
+  // pass through as `service_tier` on the Messages wire.
   const serviceTierFields: Partial<MessagesPayload> =
     payload.service_tier === 'fast'
       ? { speed: 'fast' }
@@ -230,7 +228,6 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
     ...(payload.tool_choice != null ? { tool_choice: translateChatCompletionsToolChoice(payload.tool_choice) } : {}),
     ...(hasOutputConfig ? { output_config: outputConfig } : {}),
     ...(thinking ? { thinking } : {}),
-    ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
     ...serviceTierFields,
   };
 };
diff --git a/packages/translate/src/chat-completions-via-messages/request_test.ts b/packages/translate/src/chat-completions-via-messages/request_test.ts
index 01bfc0788..9dede4c21 100644
--- a/packages/translate/src/chat-completions-via-messages/request_test.ts
+++ b/packages/translate/src/chat-completions-via-messages/request_test.ts
@@ -1261,17 +1261,6 @@ test('translateChatCompletionsToMessages merges reasoning_summary onto budget-dr
   assertEquals(result.thinking, { type: 'enabled', budget_tokens: 2048, display: 'summarized' });
 });
 
-test('translateChatCompletionsToMessages emits anthropic_speed onto Messages speed', async () => {
-  const result = await translateChatCompletionsToMessages(
-    mkPayload({
-      messages: [{ role: 'user', content: 'hi' }],
-      anthropic_speed: 'fast',
-    }),
-  );
-
-  assertEquals(result.speed, 'fast');
-});
-
 test('translateChatCompletionsToMessages forwards service_tier verbatim', async () => {
   const result = await translateChatCompletionsToMessages(
     mkPayload({
diff --git a/packages/translate/src/chat-completions-via-responses/request_test.ts b/packages/translate/src/chat-completions-via-responses/request_test.ts
index 137df562f..70af2527d 100644
--- a/packages/translate/src/chat-completions-via-responses/request_test.ts
+++ b/packages/translate/src/chat-completions-via-responses/request_test.ts
@@ -461,7 +461,6 @@ test('translateChatCompletionsToResponses leaves Messages-only extensions as inb
     messages: [{ role: 'user', content: 'hi' }],
     thinking_budget: 4096,
     adaptive_thinking: true,
-    anthropic_speed: 'fast',
     anthropic_beta: ['fast-mode-2026-02-01'],
   });
 
@@ -469,7 +468,6 @@ test('translateChatCompletionsToResponses leaves Messages-only extensions as inb
   // residue. Translate must not invent a target field.
   assertEquals('thinking_budget' in result, false);
   assertEquals('adaptive_thinking' in result, false);
-  assertEquals('anthropic_speed' in result, false);
   assertEquals('anthropic_beta' in result, false);
 });
 
diff --git a/packages/translate/src/gemini-via-chat-completions/request_test.ts b/packages/translate/src/gemini-via-chat-completions/request_test.ts
index 45f98c146..debd6b707 100644
--- a/packages/translate/src/gemini-via-chat-completions/request_test.ts
+++ b/packages/translate/src/gemini-via-chat-completions/request_test.ts
@@ -498,19 +498,15 @@ test('buildTargetRequest emits generationConfig.serviceTier onto Chat service_ti
   assertEquals(result.service_tier, 'priority');
 });
 
-test('buildTargetRequest drops top-level Anthropic extensions (anthropicSpeed, anthropicBeta) on Chat', () => {
+test('buildTargetRequest drops top-level Anthropic extensions (anthropicBeta) on Chat', () => {
   const result = buildTargetRequest(
     {
       contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
-      anthropicSpeed: 'fast',
       anthropicBeta: ['fast-mode-2026-02-01'],
     },
     'gpt-test',
   );
 
-  assertEquals('anthropicSpeed' in result, false);
-  assertEquals('anthropic_speed' in result, false);
-  assertEquals('speed' in result, false);
   assertEquals('anthropicBeta' in result, false);
   assertEquals('anthropic_beta' in result, false);
 });
diff --git a/packages/translate/src/gemini-via-messages/request.ts b/packages/translate/src/gemini-via-messages/request.ts
index c109da54d..f8a0fa0c1 100644
--- a/packages/translate/src/gemini-via-messages/request.ts
+++ b/packages/translate/src/gemini-via-messages/request.ts
@@ -265,11 +265,6 @@ export const buildTargetRequest = (
 
   applyGenerationConfig(request, payload.generationConfig, fallbackMaxOutputTokens);
 
-  // Top-level Gemini Floway extensions: `anthropicSpeed` is the only one
-  // with a Messages-natural slot. `anthropicBeta` is header-bound at the
-  // gateway boundary (Task 5) since translate functions do not own headers.
-  if (payload.anthropicSpeed != null) request.speed = payload.anthropicSpeed;
-
   const tools = buildTools(payload);
   if (tools) request.tools = tools;
   applyLastToolCacheBreakpoint(request.tools);
diff --git a/packages/translate/src/gemini-via-messages/request_test.ts b/packages/translate/src/gemini-via-messages/request_test.ts
index 555c12aa4..870d3f388 100644
--- a/packages/translate/src/gemini-via-messages/request_test.ts
+++ b/packages/translate/src/gemini-via-messages/request_test.ts
@@ -408,16 +408,6 @@ test('buildTargetRequest rejects a part with no recognized content field', () =>
 
 // ── Floway extension emission ──
 
-test('buildTargetRequest emits top-level anthropicSpeed onto Messages speed', () => {
-  const result = buildTargetRequest(
-    { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], anthropicSpeed: 'fast' },
-    'claude-test',
-    noOptions,
-  );
-
-  assertEquals(result.speed, 'fast');
-});
-
 test('buildTargetRequest emits generationConfig.serviceTier onto Messages service_tier', () => {
   const result = buildTargetRequest(
     { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { serviceTier: 'priority' } },
diff --git a/packages/translate/src/gemini-via-responses/request_test.ts b/packages/translate/src/gemini-via-responses/request_test.ts
index 9d568605f..747bfed02 100644
--- a/packages/translate/src/gemini-via-responses/request_test.ts
+++ b/packages/translate/src/gemini-via-responses/request_test.ts
@@ -436,13 +436,10 @@ test('buildTargetRequest drops top-level Anthropic extensions on Responses', ()
   const result = buildTargetRequest(
     {
       contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
-      anthropicSpeed: 'fast',
       anthropicBeta: ['fast-mode-2026-02-01'],
     },
     'gpt-test',
   );
 
-  assertEquals('anthropicSpeed' in result, false);
-  assertEquals('anthropic_speed' in result, false);
   assertEquals('anthropicBeta' in result, false);
 });
diff --git a/packages/translate/src/messages-via-chat-completions/request_test.ts b/packages/translate/src/messages-via-chat-completions/request_test.ts
index 9692cb1ea..4cc6f81bb 100644
--- a/packages/translate/src/messages-via-chat-completions/request_test.ts
+++ b/packages/translate/src/messages-via-chat-completions/request_test.ts
@@ -509,7 +509,6 @@ test('translateMessagesToChatCompletions drops Anthropic-only knobs that have no
   assertEquals(result.reasoning_effort, 'medium');
   assertEquals('thinking_budget' in result, false);
   assertEquals('reasoning_summary' in result, false);
-  assertEquals('anthropic_speed' in result, false);
 });
 
 test('translateMessagesToChatCompletions does not emit verbosity when the extension is unset', () => {
diff --git a/packages/translate/src/messages-via-responses/request_test.ts b/packages/translate/src/messages-via-responses/request_test.ts
index 3ee95b4e6..4c32d1b44 100644
--- a/packages/translate/src/messages-via-responses/request_test.ts
+++ b/packages/translate/src/messages-via-responses/request_test.ts
@@ -542,7 +542,6 @@ test('translateMessagesToResponses drops Anthropic-only mode knobs the Responses
   // bridge test below and is intentionally excluded here.
   assertEquals('thinking_budget' in result, false);
   assertEquals('adaptive_thinking' in result, false);
-  assertEquals('anthropic_speed' in result, false);
   assertEquals('anthropic_beta' in result, false);
 });
 
diff --git a/packages/translate/src/responses-via-chat-completions/request_test.ts b/packages/translate/src/responses-via-chat-completions/request_test.ts
index 448222f45..7ebfb6e15 100644
--- a/packages/translate/src/responses-via-chat-completions/request_test.ts
+++ b/packages/translate/src/responses-via-chat-completions/request_test.ts
@@ -1486,13 +1486,11 @@ test('translateResponsesToChatCompletions leaves Messages-only extensions as inb
     input: [{ type: 'message', role: 'user', content: 'hi' }],
     thinking_budget: 4096,
     adaptive_thinking: true,
-    anthropic_speed: 'fast',
     anthropic_beta: ['fast-mode-2026-02-01'],
   });
 
   assertEquals('thinking_budget' in result.target, false);
   assertEquals('adaptive_thinking' in result.target, false);
-  assertEquals('anthropic_speed' in result.target, false);
   assertEquals('anthropic_beta' in result.target, false);
 });
 
diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts
index fa608d971..a1896a1f3 100644
--- a/packages/translate/src/responses-via-messages/request.ts
+++ b/packages/translate/src/responses-via-messages/request.ts
@@ -348,9 +348,7 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
   // `service_tier: 'fast'` from the Responses caller maps to Anthropic's
   // `speed: 'fast'`; all other defined service_tier values pass through as
   // `service_tier` on the Messages wire (Anthropic accepts 'auto',
-  // 'standard_only', and future literals). An explicit `anthropic_speed`
-  // from the alias-extension layer rides through independently and may
-  // co-set `speed`.
+  // 'standard_only', and future literals).
   const serviceTierFields: Partial<MessagesPayload> =
     payload.service_tier === 'fast'
       ? { speed: 'fast' }
@@ -373,7 +371,6 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
     tool_choice: translateToolChoice(payload.tool_choice),
     ...(thinking ? { thinking } : {}),
     ...(hasOutputConfig ? { output_config: outputConfig } : {}),
-    ...(payload.anthropic_speed != null ? { speed: payload.anthropic_speed } : {}),
     ...serviceTierFields,
   };
 
diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts
index c8dbd8bcb..291bddad5 100644
--- a/packages/translate/src/responses-via-messages/request_test.ts
+++ b/packages/translate/src/responses-via-messages/request_test.ts
@@ -709,11 +709,6 @@ test('translateResponsesToMessages emits adaptive_thinking onto thinking.{adapti
   assertEquals(result.target.thinking, { type: 'adaptive' });
 });
 
-test('translateResponsesToMessages emits anthropic_speed onto speed', async () => {
-  const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_speed: 'fast' }));
-  assertEquals(result.target.speed, 'fast');
-});
-
 test('translateResponsesToMessages leaves anthropic_beta as inbound residue for the gateway header pass', async () => {
   const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_beta: ['fast-mode-2026-02-01'] }));
   assertEquals('anthropic_beta' in result.target, false);

From 8824e820d23ff0e493114b4ed31538250d8d9a4a Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 04:13:35 +0800
Subject: [PATCH 023/170] feat(aliases): extend ModelAliasesRepo with
 save/create/delete

Mirror the proxies repo's CRUD shape: `create` rejects PK collisions with
a typed `{ reason: 'duplicate' }` so the route layer can map to 409 without
driver-specific error parsing, `save` upserts in place (preserving the
existing row's createdAt on conflict), `delete` returns whether the row
was removed. `getByAlias` is the targeted lookup the PATCH handler uses to
merge a partial body against the persisted row.

In-memory impl now sorts loadAll by alias to match the SQL `ORDER BY alias`
contract; the Map keyed by alias keeps PK semantics 1:1 with SQLite.
---
 .../src/control-plane/model-aliases/repo.ts   | 66 ++++++++++++++++++-
 packages/gateway/src/repo/memory.ts           | 36 ++++++++--
 packages/gateway/src/repo/sql.ts              | 18 ++++-
 packages/gateway/src/repo/types.ts            | 13 +++-
 4 files changed, 124 insertions(+), 9 deletions(-)

diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index 3718b5fd5..1eaae7d16 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -12,17 +12,81 @@ interface ModelAliasRow {
   created_at: number;
 }
 
+const ALIAS_COLUMNS = 'alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at';
+
 // The model_aliases table is operator-managed and small (dozens of rows at
 // most), so the data plane reads the full table per request — no cache layer.
 // `ORDER BY alias` makes the read deterministic so `/v1/models` and friends
 // emit alias entries in a stable, operator-predictable order across runtimes.
 export const loadAllAliases = async (db: SqlDatabase): Promise<readonly ModelAlias[]> => {
   const { results } = await db
-    .prepare('SELECT alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at FROM model_aliases ORDER BY alias')
+    .prepare(`SELECT ${ALIAS_COLUMNS} FROM model_aliases ORDER BY alias`)
     .all<ModelAliasRow>();
   return results.map(toModelAlias);
 };
 
+export const getAliasByName = async (db: SqlDatabase, alias: string): Promise<ModelAlias | null> => {
+  const row = await db
+    .prepare(`SELECT ${ALIAS_COLUMNS} FROM model_aliases WHERE alias = ?`)
+    .bind(alias)
+    .first<ModelAliasRow>();
+  return row ? toModelAlias(row) : null;
+};
+
+// Plain INSERT — surfaces a PK collision through the `duplicate` return so
+// the route layer can map it to 409 without parsing driver-specific error
+// strings. SQLite/D1 both raise a constraint failure on conflict; we detect
+// it with a single SELECT round-trip rather than catching the throw because
+// the driver error shape varies between node:sqlite and D1.
+export const insertAlias = async (db: SqlDatabase, alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> => {
+  const existing = await db
+    .prepare('SELECT 1 FROM model_aliases WHERE alias = ?')
+    .bind(alias.alias)
+    .first<{ 1: number }>();
+  if (existing) return { ok: false, reason: 'duplicate' };
+  await db
+    .prepare(`INSERT INTO model_aliases (${ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`)
+    .bind(...bindValues(alias))
+    .run();
+  return { ok: true };
+};
+
+// UPSERT — on conflict the row is overwritten in place, but `created_at`
+// is preserved (the row's first INSERT wins, matching how `proxies.save`
+// keeps the original creation timestamp on a re-save).
+export const saveAlias = async (db: SqlDatabase, alias: ModelAlias): Promise<void> => {
+  await db
+    .prepare(
+      `INSERT INTO model_aliases (${ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
+       ON CONFLICT (alias) DO UPDATE SET
+         target_model_id = excluded.target_model_id,
+         upstream_ids_json = excluded.upstream_ids_json,
+         rules_json = excluded.rules_json,
+         visible_in_models_list = excluded.visible_in_models_list,
+         on_conflict = excluded.on_conflict,
+         display_name = excluded.display_name,
+         updated_at = unixepoch()`,
+    )
+    .bind(...bindValues(alias))
+    .run();
+};
+
+export const deleteAlias = async (db: SqlDatabase, alias: string): Promise<{ deleted: boolean }> => {
+  const result = await db.prepare('DELETE FROM model_aliases WHERE alias = ?').bind(alias).run();
+  return { deleted: (result.meta.changes ?? 0) > 0 };
+};
+
+const bindValues = (alias: ModelAlias): unknown[] => [
+  alias.alias,
+  alias.targetModelId,
+  JSON.stringify(alias.upstreamIds),
+  JSON.stringify(alias.rules),
+  alias.visibleInModelsList ? 1 : 0,
+  alias.onConflict,
+  alias.displayName ?? null,
+  alias.createdAt,
+];
+
 const toModelAlias = (row: ModelAliasRow): ModelAlias => ({
   alias: row.alias,
   targetModelId: row.target_model_id,
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index 85f01b621..c424426ee 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -918,17 +918,41 @@ export class InMemoryRepo implements Repo {
   }
 }
 
-// Test-only in-memory backing for the alias table. The list starts empty
-// and can be reseeded via `setAll` so tests exercising alias-resolution
-// behavior do not depend on a live SQL database.
+// Test-only in-memory backing for the alias table. Mirrors SqlModelAliasesRepo:
+// `loadAll` returns rows sorted by alias, `create` rejects PK collisions,
+// `save` upserts in place. `setAll` is the test seam: tests that pre-populate
+// the table for read-only data-plane assertions reach for it directly.
 export class MemoryModelAliasesRepo implements ModelAliasesRepo {
-  private rows: readonly ModelAlias[] = [];
+  private rows = new Map<string, ModelAlias>();
 
   loadAll(): Promise<readonly ModelAlias[]> {
-    return Promise.resolve(this.rows);
+    return Promise.resolve([...this.rows.values()].sort((a, b) => a.alias.localeCompare(b.alias)));
+  }
+
+  getByAlias(alias: string): Promise<ModelAlias | null> {
+    return Promise.resolve(this.rows.get(alias) ?? null);
+  }
+
+  create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> {
+    if (this.rows.has(alias.alias)) return Promise.resolve({ ok: false, reason: 'duplicate' });
+    this.rows.set(alias.alias, alias);
+    return Promise.resolve({ ok: true });
+  }
+
+  save(alias: ModelAlias): Promise<void> {
+    // Preserve the original row's createdAt on an upsert so re-saves do not
+    // overwrite the local deployment's first-seen timestamp.
+    const existing = this.rows.get(alias.alias);
+    const preserved = existing ? { ...alias, createdAt: existing.createdAt } : alias;
+    this.rows.set(preserved.alias, preserved);
+    return Promise.resolve();
+  }
+
+  delete(alias: string): Promise<{ deleted: boolean }> {
+    return Promise.resolve({ deleted: this.rows.delete(alias) });
   }
 
   setAll(rows: readonly ModelAlias[]): void {
-    this.rows = rows;
+    this.rows = new Map(rows.map(row => [row.alias, row]));
   }
 }
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index 109b35024..044f79aae 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -35,7 +35,7 @@ import type {
   UsersRepo,
 } from './types.ts';
 import { serializeStoredConfig, serializeStoredState } from './upstream-json.ts';
-import { loadAllAliases } from '../control-plane/model-aliases/repo.ts';
+import { deleteAlias, getAliasByName, insertAlias, loadAllAliases, saveAlias } from '../control-plane/model-aliases/repo.ts';
 import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
 import { latencyBucketForMs } from '../shared/performance-histogram.ts';
 import { generateSessionToken } from '../shared/session-tokens.ts';
@@ -1628,4 +1628,20 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
   loadAll(): Promise<readonly ModelAlias[]> {
     return loadAllAliases(this.db);
   }
+
+  getByAlias(alias: string): Promise<ModelAlias | null> {
+    return getAliasByName(this.db, alias);
+  }
+
+  create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> {
+    return insertAlias(this.db, alias);
+  }
+
+  save(alias: ModelAlias): Promise<void> {
+    return saveAlias(this.db, alias);
+  }
+
+  delete(alias: string): Promise<{ deleted: boolean }> {
+    return deleteAlias(this.db, alias);
+  }
 }
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index d282aaa98..8ed620849 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -337,7 +337,18 @@ export interface Repo {
 }
 
 // Operator-managed alias table; small (dozens of rows at most) and read
-// per request, so the repo deliberately exposes only a full-table fetch.
+// per request, so the repo deliberately exposes only a full-table fetch
+// plus the targeted mutations the control-plane CRUD needs.
 export interface ModelAliasesRepo {
   loadAll(): Promise<readonly ModelAlias[]>;
+  getByAlias(alias: string): Promise<ModelAlias | null>;
+  // INSERT-only — fails with `duplicate` on PK conflict so the route layer
+  // surfaces 409 to the dashboard instead of silently overwriting an
+  // existing row.
+  create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }>;
+  // UPSERT semantics — used by import/restore flows that need to land a row
+  // regardless of whether it already exists.
+  save(alias: ModelAlias): Promise<void>;
+  // Returns whether a row was actually removed; routes treat false as 404.
+  delete(alias: string): Promise<{ deleted: boolean }>;
 }

From 9abf5c28f5708c90460873cbf0d49f1dcc82b291 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 04:15:34 +0800
Subject: [PATCH 024/170] feat(aliases): add REST CRUD endpoints under
 /api/aliases
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Operator-managed alias rows previously had no admin surface — the only
write path was a hand-edited migration. Wire admin-only CRUD next to the
existing model-aliases code:

  GET    /api/aliases             list, sorted by alias
  POST   /api/aliases             create; 409 on PK conflict
  PATCH  /api/aliases/:alias      partial update; 404 when missing
  DELETE /api/aliases/:alias      idempotent-shaped 204/404

The Zod schemas mirror the closed rule knob set (reasoning effort /
budgetTokens / adaptive / summary, verbosity, serviceTier,
anthropicBeta[]) under `.strict()` so an unknown rule key is a 400 — but
each value stays freeform: a newly-introduced upstream-side enum ships
through without a gateway code change (Goal 2). Alias names are bounded
by the same `[A-Za-z0-9_.:-/]+` grammar the real model ids already use.

PATCH propagates the absent/null distinction for `displayName` so the
operator can clear an operator-set label back to the synthesized
fallback without dropping into a separate "reset" route.
---
 .../src/control-plane/model-aliases/routes.ts |  80 ++++++
 .../model-aliases/routes_test.ts              | 267 ++++++++++++++++++
 .../control-plane/model-aliases/serialize.ts  |  31 ++
 packages/gateway/src/control-plane/routes.ts  |   8 +-
 packages/gateway/src/control-plane/schemas.ts |  70 +++++
 5 files changed, 455 insertions(+), 1 deletion(-)
 create mode 100644 packages/gateway/src/control-plane/model-aliases/routes.ts
 create mode 100644 packages/gateway/src/control-plane/model-aliases/routes_test.ts
 create mode 100644 packages/gateway/src/control-plane/model-aliases/serialize.ts

diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
new file mode 100644
index 000000000..961a0da73
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/routes.ts
@@ -0,0 +1,80 @@
+import type { Context } from 'hono';
+
+import { aliasToJson } from './serialize.ts';
+import type { ModelAlias, ModelAliasRules } from './types.ts';
+import { type CtxWithJson } from '../../middleware/zod-validator.ts';
+import { getRepo } from '../../repo/index.ts';
+import type { createAliasBody, updateAliasBody } from '../schemas.ts';
+
+export const listAliases = async (c: Context) => {
+  const aliases = await getRepo().modelAliases.loadAll();
+  return c.json(aliases.map(aliasToJson));
+};
+
+export const createAlias = async (c: CtxWithJson<typeof createAliasBody>) => {
+  const body = c.req.valid('json');
+  const record: ModelAlias = {
+    alias: body.alias,
+    targetModelId: body.targetModelId,
+    upstreamIds: body.upstreamIds,
+    rules: body.rules,
+    visibleInModelsList: body.visibleInModelsList,
+    // `real-only` is the safe default: an alias whose target id collides with
+    // a real model id stays hidden until the operator opts the alias into one
+    // of the surfacing modes. Matches the migration's column default.
+    onConflict: body.onConflict ?? 'real-only',
+    ...(body.displayName !== undefined ? { displayName: body.displayName } : {}),
+    createdAt: Math.floor(Date.now() / 1000),
+  };
+
+  const result = await getRepo().modelAliases.create(record);
+  if (!result.ok) {
+    return c.json({ error: { type: 'conflict', message: `Alias "${body.alias}" already exists` } }, 409);
+  }
+
+  return c.json(aliasToJson(record), 201);
+};
+
+export const updateAlias = async (c: CtxWithJson<typeof updateAliasBody>) => {
+  const aliasName = c.req.param('alias') ?? '';
+  const body = c.req.valid('json');
+
+  const repo = getRepo();
+  const existing = await repo.modelAliases.getByAlias(aliasName);
+  if (!existing) return c.json({ error: 'Alias not found' }, 404);
+
+  // Field-by-field merge so an absent field preserves the existing value.
+  // `displayName` accepts an explicit null to clear the operator-set label
+  // back to the synthesized fallback; we use Object.hasOwn to keep the
+  // absent / null distinction that `??` would collapse.
+  const merged: ModelAlias = {
+    alias: existing.alias,
+    targetModelId: body.targetModelId ?? existing.targetModelId,
+    upstreamIds: body.upstreamIds ?? existing.upstreamIds,
+    rules: body.rules ?? existing.rules,
+    visibleInModelsList: body.visibleInModelsList ?? existing.visibleInModelsList,
+    onConflict: body.onConflict ?? existing.onConflict,
+    createdAt: existing.createdAt,
+    ...nextDisplayName(existing, body.displayName),
+  };
+
+  await repo.modelAliases.save(merged);
+  return c.json(aliasToJson(merged));
+};
+
+const nextDisplayName = (existing: ModelAlias, patch: string | null | undefined): { displayName?: string } => {
+  if (patch === undefined) return existing.displayName !== undefined ? { displayName: existing.displayName } : {};
+  if (patch === null) return {};
+  return { displayName: patch };
+};
+
+export const deleteAlias = async (c: Context) => {
+  const aliasName = c.req.param('alias') ?? '';
+  const { deleted } = await getRepo().modelAliases.delete(aliasName);
+  if (!deleted) return c.json({ error: 'Alias not found' }, 404);
+  return c.body(null, 204);
+};
+
+// Re-export so the routes module can wire the type-level `Rules` carrier
+// through the RPC client without consumers having to chase the alias subtree.
+export type { ModelAliasRules };
diff --git a/packages/gateway/src/control-plane/model-aliases/routes_test.ts b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
new file mode 100644
index 000000000..4143ed1cf
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
@@ -0,0 +1,267 @@
+import { test } from 'vitest';
+
+import type { SerializedModelAlias } from './serialize.ts';
+import { requestApp, setupAppTest } from '../../test-helpers.ts';
+import { assertEquals } from '@floway-dev/test-utils';
+
+const authedGet = (adminSession: string): RequestInit => ({
+  method: 'GET',
+  headers: { 'x-floway-session': adminSession },
+});
+
+const authedJson = (adminSession: string, method: 'POST' | 'PATCH' | 'DELETE', body?: unknown): RequestInit => ({
+  method,
+  headers: {
+    'content-type': 'application/json',
+    'x-floway-session': adminSession,
+  },
+  ...(body === undefined ? {} : { body: JSON.stringify(body) }),
+});
+
+const baseCreate = (overrides: Record<string, unknown> = {}) => ({
+  alias: 'opus-xhigh',
+  targetModelId: 'claude-opus-4-6',
+  upstreamIds: [],
+  rules: { reasoning: { effort: 'xhigh' } },
+  visibleInModelsList: true,
+  onConflict: 'real-only',
+  ...overrides,
+});
+
+test('GET /api/aliases returns rows sorted by alias', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.save({
+    alias: 'zzz-late',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: {},
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    createdAt: 1_700_000_001,
+  });
+  await repo.modelAliases.save({
+    alias: 'aaa-early',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: {},
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    createdAt: 1_700_000_000,
+  });
+
+  const resp = await requestApp('/api/aliases', authedGet(adminSession));
+  assertEquals(resp.status, 200);
+  const list = (await resp.json()) as SerializedModelAlias[];
+  assertEquals(list.map(a => a.alias), ['aaa-early', 'zzz-late']);
+});
+
+test('POST /api/aliases creates a row and echoes the serialized shape', async () => {
+  const { repo, adminSession } = await setupAppTest();
+
+  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({
+    displayName: 'Opus Extra-High',
+    upstreamIds: ['up_a', 'up_b'],
+    rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] },
+  })));
+  assertEquals(resp.status, 201);
+  const created = (await resp.json()) as SerializedModelAlias;
+  assertEquals(created.alias, 'opus-xhigh');
+  assertEquals(created.target_model_id, 'claude-opus-4-6');
+  assertEquals(created.upstream_ids, ['up_a', 'up_b']);
+  assertEquals(created.rules, { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] });
+  assertEquals(created.visible_in_models_list, true);
+  assertEquals(created.on_conflict, 'real-only');
+  assertEquals(created.display_name, 'Opus Extra-High');
+  assertEquals(typeof created.created_at, 'number');
+
+  const stored = await repo.modelAliases.getByAlias('opus-xhigh');
+  assertEquals(stored?.targetModelId, 'claude-opus-4-6');
+  assertEquals(stored?.displayName, 'Opus Extra-High');
+});
+
+test('POST /api/aliases defaults onConflict to real-only when omitted', async () => {
+  const { adminSession } = await setupAppTest();
+
+  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', {
+    alias: 'no-onconflict',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: {},
+    visibleInModelsList: true,
+  }));
+  assertEquals(resp.status, 201);
+  const created = (await resp.json()) as SerializedModelAlias;
+  assertEquals(created.on_conflict, 'real-only');
+});
+
+test('POST /api/aliases returns 409 on duplicate alias', async () => {
+  const { adminSession } = await setupAppTest();
+
+  const first = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
+  assertEquals(first.status, 201);
+
+  const dup = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
+  assertEquals(dup.status, 409);
+  const body = (await dup.json()) as { error: { type: string; message: string } };
+  assertEquals(body.error.type, 'conflict');
+});
+
+test('POST /api/aliases rejects an empty alias name with 400', async () => {
+  const { adminSession } = await setupAppTest();
+  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ alias: '' })));
+  assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases rejects an alias containing whitespace with 400', async () => {
+  const { adminSession } = await setupAppTest();
+  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ alias: 'has space' })));
+  assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases rejects an unknown rule key with 400', async () => {
+  const { adminSession } = await setupAppTest();
+  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({
+    rules: { reasoning: { effort: 'high' }, mysteryKnob: true } as unknown as Record<string, unknown>,
+  })));
+  assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases rejects an invalid onConflict value with 400', async () => {
+  const { adminSession } = await setupAppTest();
+  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ onConflict: 'mystery-mode' })));
+  assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases requires admin auth (non-admin api key returns 403)', async () => {
+  const { adminSession, apiKey } = await setupAppTest();
+
+  // Sanity: the admin call succeeds so the failure below pins the auth gate,
+  // not a request-shape mistake shared by both calls.
+  const adminResp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
+  assertEquals(adminResp.status, 201);
+
+  const userResp = await requestApp('/api/aliases', {
+    method: 'POST',
+    headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
+    body: JSON.stringify(baseCreate({ alias: 'other' })),
+  });
+  assertEquals(userResp.status, 403);
+});
+
+test('PATCH /api/aliases/:alias merges a partial body and preserves untouched fields', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.save({
+    alias: 'opus-xhigh',
+    targetModelId: 'claude-opus-4-6',
+    upstreamIds: ['up_a'],
+    rules: { reasoning: { effort: 'xhigh' } },
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    displayName: 'Existing Label',
+    createdAt: 1_700_000_000,
+  });
+
+  const resp = await requestApp('/api/aliases/opus-xhigh', authedJson(adminSession, 'PATCH', {
+    rules: { reasoning: { effort: 'medium' }, serviceTier: 'priority' },
+  }));
+  assertEquals(resp.status, 200);
+  const updated = (await resp.json()) as SerializedModelAlias;
+  // Patched fields took effect.
+  assertEquals(updated.rules, { reasoning: { effort: 'medium' }, serviceTier: 'priority' });
+  // Untouched fields preserved verbatim.
+  assertEquals(updated.target_model_id, 'claude-opus-4-6');
+  assertEquals(updated.upstream_ids, ['up_a']);
+  assertEquals(updated.visible_in_models_list, true);
+  assertEquals(updated.display_name, 'Existing Label');
+  assertEquals(updated.created_at, 1_700_000_000);
+});
+
+test('PATCH /api/aliases/:alias accepts displayName=null to clear the label', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.save({
+    alias: 'opus-xhigh',
+    targetModelId: 'claude-opus-4-6',
+    upstreamIds: [],
+    rules: {},
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    displayName: 'Existing Label',
+    createdAt: 1_700_000_000,
+  });
+
+  const resp = await requestApp('/api/aliases/opus-xhigh', authedJson(adminSession, 'PATCH', { displayName: null }));
+  assertEquals(resp.status, 200);
+  const updated = (await resp.json()) as SerializedModelAlias;
+  assertEquals(updated.display_name, null);
+
+  const stored = await repo.modelAliases.getByAlias('opus-xhigh');
+  assertEquals(stored?.displayName, undefined);
+});
+
+test('PATCH /api/aliases/:alias returns 404 when the alias does not exist', async () => {
+  const { adminSession } = await setupAppTest();
+  const resp = await requestApp('/api/aliases/nope', authedJson(adminSession, 'PATCH', { visibleInModelsList: false }));
+  assertEquals(resp.status, 404);
+});
+
+test('PATCH /api/aliases/:alias requires admin auth', async () => {
+  const { repo, adminSession: _adminSession, apiKey } = await setupAppTest();
+  await repo.modelAliases.save({
+    alias: 'opus-xhigh',
+    targetModelId: 'claude-opus-4-6',
+    upstreamIds: [],
+    rules: {},
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    createdAt: 1_700_000_000,
+  });
+
+  const userResp = await requestApp('/api/aliases/opus-xhigh', {
+    method: 'PATCH',
+    headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
+    body: JSON.stringify({ visibleInModelsList: false }),
+  });
+  assertEquals(userResp.status, 403);
+});
+
+test('DELETE /api/aliases/:alias returns 204 on success and removes the row', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.save({
+    alias: 'doomed',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: {},
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    createdAt: 1_700_000_000,
+  });
+
+  const resp = await requestApp('/api/aliases/doomed', authedJson(adminSession, 'DELETE'));
+  assertEquals(resp.status, 204);
+  assertEquals(await repo.modelAliases.getByAlias('doomed'), null);
+});
+
+test('DELETE /api/aliases/:alias returns 404 when the alias does not exist', async () => {
+  const { adminSession } = await setupAppTest();
+  const resp = await requestApp('/api/aliases/nope', authedJson(adminSession, 'DELETE'));
+  assertEquals(resp.status, 404);
+});
+
+test('DELETE /api/aliases/:alias requires admin auth', async () => {
+  const { repo, apiKey } = await setupAppTest();
+  await repo.modelAliases.save({
+    alias: 'doomed',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: {},
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    createdAt: 1_700_000_000,
+  });
+
+  const resp = await requestApp('/api/aliases/doomed', {
+    method: 'DELETE',
+    headers: { 'x-api-key': apiKey.key },
+  });
+  assertEquals(resp.status, 403);
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize.ts b/packages/gateway/src/control-plane/model-aliases/serialize.ts
new file mode 100644
index 000000000..5889997cc
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/serialize.ts
@@ -0,0 +1,31 @@
+// Wire-format projection for the operator-managed model_aliases rows. The
+// dashboard reads the same shape it sends back for create/update; the few
+// snake_cased fields (`visible_in_models_list`, `on_conflict`, `created_at`,
+// `display_name`) follow the rest of the control-plane HTTP surface.
+
+import type { ModelAlias, ModelAliasRules, OnConflict } from './types.ts';
+
+export interface SerializedModelAlias {
+  alias: string;
+  target_model_id: string;
+  upstream_ids: string[];
+  rules: ModelAliasRules;
+  visible_in_models_list: boolean;
+  on_conflict: OnConflict;
+  display_name: string | null;
+  created_at: number;
+}
+
+export const aliasToJson = (alias: ModelAlias): SerializedModelAlias => ({
+  alias: alias.alias,
+  target_model_id: alias.targetModelId,
+  // Defensive copy: the readonly arrays inside ModelAlias are shared with
+  // callers, and JSON serialization would otherwise expose the same backing
+  // array used by `loadAll`.
+  upstream_ids: [...alias.upstreamIds],
+  rules: alias.rules,
+  visible_in_models_list: alias.visibleInModelsList,
+  on_conflict: alias.onConflict,
+  display_name: alias.displayName ?? null,
+  created_at: alias.createdAt,
+});
diff --git a/packages/gateway/src/control-plane/routes.ts b/packages/gateway/src/control-plane/routes.ts
index 94b5f06ff..35ceb71de 100644
--- a/packages/gateway/src/control-plane/routes.ts
+++ b/packages/gateway/src/control-plane/routes.ts
@@ -5,10 +5,11 @@ import { authLogin, authLogout, authMe } from './auth/routes.ts';
 import { copilotQuota } from './copilot-quota/routes.ts';
 import { exportData, importData } from './data-transfer/routes.ts';
 import { dumpRoutes } from './dump.ts';
+import { createAlias, deleteAlias, listAliases, updateAlias } from './model-aliases/routes.ts';
 import { controlPlaneModels } from './models/routes.ts';
 import { performanceOverview, performanceTelemetry } from './performance/routes.ts';
 import { createProxy, deleteProxy, listAllBackoffs, listProxies, listProxyBackoffs, resetProxyBackoffs, testProxy, updateProxy } from './proxies/routes.ts';
-import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
+import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createAliasBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateAliasBody, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
 import { getSearchConfigRoute, putSearchConfigRoute, testSearchConfigRoute } from './search-config/routes.ts';
 import { searchUsage } from './search-usage/routes.ts';
 import { tokenUsage } from './token-usage/routes.ts';
@@ -100,6 +101,11 @@ export const controlPlaneRoutes = new Hono<{ Variables: AuthVars }>()
     .get('/search-config', getSearchConfigRoute)
     .put('/search-config', zValidator('json', searchConfigSchema), putSearchConfigRoute)
     .post('/search-config/test', zValidator('json', searchConfigSchema), testSearchConfigRoute)
+    // Model aliases.
+    .get('/aliases', listAliases)
+    .post('/aliases', zValidator('json', createAliasBody), createAlias)
+    .patch('/aliases/:alias', zValidator('json', updateAliasBody), updateAlias)
+    .delete('/aliases/:alias', deleteAlias)
     .get('/export', zValidator('query', exportQuery), exportData)
     .post('/import', zValidator('json', importBody), importData));
 
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index f718539ee..60f5df604 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -635,3 +635,73 @@ export const performanceQuery = z.object({
   bucket: z.enum(['hour', '4h', '8h', 'day', 'all']).optional(),
   timezone_offset_minutes: z.string().optional(),
 });
+
+// --- model aliases ---
+//
+// Operator-managed alias rows wired through `/api/aliases`. The schemas
+// describe the request bodies the dashboard sends; deeper invariants
+// (the alias's target model exists in the catalog, the upstream ids
+// resolve, etc.) are intentionally NOT enforced here — an alias is allowed
+// to point at a target that is currently absent so an operator can pre-
+// stage the row before the upstream is registered, mirroring how the
+// catalog tolerates pending references.
+
+// Mirror the public model-id grammar: letters, digits, `_ . : - / `. Matches
+// the surface ids the dashboard already accepts in the models picker and the
+// `/v1/models` listing, so an alias name is interchangeable with a real id at
+// the request boundary.
+export const MODEL_ALIAS_PATTERN = /^[A-Za-z0-9_.:\-/]+$/;
+
+const aliasNameSchema = z.string().min(1).regex(MODEL_ALIAS_PATTERN, 'alias must be 1+ chars of [A-Za-z0-9_.:-/]');
+
+// Rule field values pass through to the upstream verbatim — the gateway
+// deliberately does not enum-gate operator input here. The Goal-2 contract
+// is that a freshly added enum upstream-side ships through without a
+// gateway code change, so we validate shape (non-empty string, in-range
+// number) but never set membership.
+const aliasReasoningSchema = z.object({
+  effort: z.string().min(1).optional(),
+  budgetTokens: z.number().int().nonnegative().optional(),
+  adaptive: z.boolean().optional(),
+  summary: z.string().min(1).optional(),
+}).strict().refine(
+  r => r.effort !== undefined || r.budgetTokens !== undefined || r.adaptive !== undefined || r.summary !== undefined,
+  { message: 'reasoning must declare at least one of effort, budgetTokens, adaptive, summary' },
+);
+
+const aliasRulesSchema = z.object({
+  reasoning: aliasReasoningSchema.optional(),
+  verbosity: z.string().min(1).optional(),
+  serviceTier: z.string().min(1).optional(),
+  // Each beta header token is a non-empty string. Empty arrays are accepted
+  // (the dashboard sends `[]` when the operator clears every tag) and are
+  // semantically equivalent to omitting the field.
+  anthropicBeta: z.array(z.string().min(1)).optional(),
+}).strict();
+
+const onConflictSchema = z.enum(['alias-only', 'real-only', 'both-real-first', 'both-alias-first']);
+const upstreamIdsSchema = z.array(z.string().min(1));
+
+export const createAliasBody = z.object({
+  alias: aliasNameSchema,
+  targetModelId: z.string().min(1),
+  upstreamIds: upstreamIdsSchema,
+  rules: aliasRulesSchema,
+  visibleInModelsList: z.boolean(),
+  // Defaults to `'real-only'` server-side when omitted so the dashboard's
+  // "create" form does not have to ship a default — the route layer fills it.
+  onConflict: onConflictSchema.optional(),
+  displayName: z.string().min(1).optional(),
+});
+
+// PATCH accepts a partial shape. `displayName` is nullable so the operator
+// can clear an existing label back to the synthesized fallback; absent vs.
+// null is meaningful and propagated through to the handler via Object.hasOwn.
+export const updateAliasBody = z.object({
+  targetModelId: z.string().min(1).optional(),
+  upstreamIds: upstreamIdsSchema.optional(),
+  rules: aliasRulesSchema.optional(),
+  visibleInModelsList: z.boolean().optional(),
+  onConflict: onConflictSchema.optional(),
+  displayName: z.string().min(1).nullable().optional(),
+});

From 26d3dd9b987365f57d10f863e2fc40803c6a9640 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 04:23:39 +0800
Subject: [PATCH 025/170] feat(web): alias-list settings card + edit dialog
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adds the dashboard surface for the new /api/aliases CRUD endpoints:

  - useModelAliases composable mirrors the proxies-store pattern (module-
    scoped cache, error / loading refs, load()).
  - AliasesSettingsCard slots into the Settings page directly under
    ProxiesSettingsCard, sharing the glass-card styling and animate-in
    delay ordering.
  - AliasRow surfaces the alias id, optional display name, target model,
    rule badges (sourced from formatAliasRuleBadges so the badge order
    matches the rest of the dashboard), and an `on_conflict` chip.
  - AliasEditDialog is a single modal for both create and edit. Reasoning
    is rendered as a None / Effort / Budget / Adaptive radio + a separate
    summary input so the mutually-exclusive wire shape is visible at a
    glance. Suggestion hints come from the target model's chat.reasoning
    metadata when it matches a real catalog entry, but every value field
    stays freeform — Goal 2.

Co-located component-level smoke tests use @vue/test-utils (newly added
as a devDep) plus happy-dom. The dialog tests stub the api client, the
two stores, and reka-ui's portaling Dialog so the form mounts inline
where assertions can reach the inputs and read the posted JSON.

The gateway package's exports map gains two new type-only subpaths
(`./control-plane/model-aliases/serialize`, `./control-plane/model-aliases/types`)
so apps/web can pull `SerializedModelAlias` and `ModelAliasRules` as the
source-of-truth types without crossing the existing deep-import ban.
---
 apps/web/package.json                         |   1 +
 apps/web/src/api/types.ts                     |   2 +
 .../components/alias-edit/AliasEditDialog.vue | 349 ++++++++++++++++++
 .../alias-edit/AliasEditDialog_test.ts        | 167 +++++++++
 apps/web/src/components/settings/AliasRow.vue |  90 +++++
 .../src/components/settings/AliasRow_test.ts  | 128 +++++++
 .../settings/AliasesSettingsCard.vue          |  65 ++++
 apps/web/src/composables/useModelAliases.ts   |  29 ++
 apps/web/src/pages/dashboard/settings.vue     |  31 +-
 apps/web/vitest.config.ts                     |   5 +
 packages/gateway/package.json                 |   2 +
 pnpm-lock.yaml                                | 234 ++++++++++++
 12 files changed, 1100 insertions(+), 3 deletions(-)
 create mode 100644 apps/web/src/components/alias-edit/AliasEditDialog.vue
 create mode 100644 apps/web/src/components/alias-edit/AliasEditDialog_test.ts
 create mode 100644 apps/web/src/components/settings/AliasRow.vue
 create mode 100644 apps/web/src/components/settings/AliasRow_test.ts
 create mode 100644 apps/web/src/components/settings/AliasesSettingsCard.vue
 create mode 100644 apps/web/src/composables/useModelAliases.ts

diff --git a/apps/web/package.json b/apps/web/package.json
index 2bab9e262..75a4ca003 100644
--- a/apps/web/package.json
+++ b/apps/web/package.json
@@ -42,6 +42,7 @@
     "@unocss/preset-wind3": "^66.5.10",
     "@unocss/reset": "^66.5.10",
     "@vitejs/plugin-vue": "^6.0.1",
+    "@vue/test-utils": "^2.4.11",
     "happy-dom": "^20.0.0",
     "unocss": "^66.5.10",
     "unocss-preset-animations": "^1.3.0",
diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 8f11835e2..66216b812 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -305,6 +305,8 @@ export interface FlagDef {
 // than redeclaring the shape) makes any future field rename a compile error
 // here instead of a runtime mismatch the next time someone refreshes the page.
 export type { SerializedProxyRecord as ProxyRecord, SerializedBackoffRow as BackoffRow } from '@floway-dev/gateway/control-plane/proxies/serialize';
+export type { SerializedModelAlias as ModelAlias } from '@floway-dev/gateway/control-plane/model-aliases/serialize';
+export type { ModelAliasRules, OnConflict as ModelAliasOnConflict } from '@floway-dev/gateway/control-plane/model-aliases/types';
 
 // 409 body returned by DELETE /api/proxies/:id when the row is referenced
 // by an upstream's fallback list.
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
new file mode 100644
index 000000000..e247137b0
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -0,0 +1,349 @@
+<script setup lang="ts">
+// Operator editor for one alias. The form is intentionally Goal-2-friendly:
+// every "enum" field below is rendered as a plain text input with hints
+// pulled from the target model's chat metadata (when available) and from
+// well-known wire values. The dashboard never gates the value set so a new
+// upstream-side level (e.g. an "xhigh" effort that shipped this morning)
+// can flow through without a frontend release.
+
+import { computed, ref } from 'vue';
+
+import { callApi, useApi } from '../../api/client.ts';
+import type { ModelAlias, ModelAliasOnConflict } from '../../api/types.ts';
+import { useModelsStore } from '../../composables/useModels.ts';
+import { useUpstreamsStore } from '../../composables/useUpstreams.ts';
+import { Button, Checkbox, Dialog, Input, Select, TagCombobox } from '@floway-dev/ui';
+
+// Mutable mirror of @floway-dev/protocols ModelAliasRules — the wire shape
+// is `readonly` at the contract boundary, but the form mutates it in place
+// while the operator is editing. The Hono RPC client expects the mutable
+// version too.
+interface MutableRules {
+  reasoning?: {
+    effort?: string;
+    budgetTokens?: number;
+    adaptive?: boolean;
+    summary?: string;
+  };
+  verbosity?: string;
+  serviceTier?: string;
+  anthropicBeta?: string[];
+}
+
+const open = defineModel<boolean>('open', { required: true });
+
+const props = defineProps<{
+  /** null = create; non-null = edit (alias is the PK, so editing it is disabled). */
+  record: ModelAlias | null;
+}>();
+
+const emit = defineEmits<{
+  saved: [];
+}>();
+
+const api = useApi();
+const modelsStore = useModelsStore();
+const upstreamsStore = useUpstreamsStore();
+
+const mode = computed<'create' | 'edit'>(() => (props.record ? 'edit' : 'create'));
+
+// --- form state ---
+
+const aliasName = ref(props.record?.alias ?? '');
+const displayName = ref(props.record?.display_name ?? '');
+const targetModelId = ref(props.record?.target_model_id ?? '');
+const upstreamIds = ref<string[]>([...(props.record?.upstream_ids ?? [])]);
+const visibleInModelsList = ref(props.record?.visible_in_models_list ?? true);
+const onConflict = ref<ModelAliasOnConflict>(props.record?.on_conflict ?? 'real-only');
+
+// Reasoning is modeled as a tagged radio + a separate summary input so the
+// three approaches (effort preset / token budget / adaptive) are mutually
+// exclusive in the wire shape but visible to the operator at a glance.
+type ReasoningMode = 'none' | 'effort' | 'budget' | 'adaptive';
+
+const initialReasoning = props.record?.rules.reasoning;
+const initialReasoningMode: ReasoningMode = initialReasoning?.effort !== undefined
+  ? 'effort'
+  : initialReasoning?.budgetTokens !== undefined
+    ? 'budget'
+    : initialReasoning?.adaptive === true
+      ? 'adaptive'
+      : 'none';
+
+const reasoningMode = ref<ReasoningMode>(initialReasoningMode);
+const reasoningEffort = ref(initialReasoning?.effort ?? '');
+const reasoningBudgetTokens = ref<string>(initialReasoning?.budgetTokens === undefined ? '' : String(initialReasoning.budgetTokens));
+const reasoningSummary = ref(initialReasoning?.summary ?? '');
+
+const verbosity = ref(props.record?.rules.verbosity ?? '');
+const serviceTier = ref(props.record?.rules.serviceTier ?? '');
+const anthropicBeta = ref<string[]>([...(props.record?.rules.anthropicBeta ?? [])]);
+
+// --- suggestion sources ---
+//
+// Models list seeds the target-model combobox and feeds the reasoning hint
+// lookup. `chat.reasoning` lives on per-model metadata the operator wired
+// at upstream-config time; surface its supported effort list / budget range
+// as combobox hints once a target id matches a real entry.
+
+const modelOptions = computed(() => (modelsStore.models.value ?? []).map(m => ({
+  value: m.id,
+  label: m.display_name ?? m.id,
+})));
+
+const upstreamItems = computed(() => (upstreamsStore.upstreams.value ?? []).map(u => ({
+  value: u.id,
+  label: u.name,
+  detail: u.id,
+})));
+
+const targetChat = computed(() => {
+  const match = modelsStore.models.value?.find(m => m.id === targetModelId.value);
+  return match && 'chat' in match ? (match as { chat?: { reasoning?: { effort?: { supported: string[] }; budget_tokens?: { min?: number; max?: number }; adaptive?: boolean } } }).chat : undefined;
+});
+
+const effortSuggestions = computed(() => targetChat.value?.reasoning?.effort?.supported ?? []);
+const budgetMin = computed(() => targetChat.value?.reasoning?.budget_tokens?.min);
+const budgetMax = computed(() => targetChat.value?.reasoning?.budget_tokens?.max);
+const adaptiveSupported = computed(() => targetChat.value?.reasoning?.adaptive === true);
+
+const SUMMARY_HINTS = ['auto', 'concise', 'detailed', 'omitted'];
+const VERBOSITY_HINTS = ['low', 'medium', 'high'];
+const SERVICE_TIER_HINTS = ['auto', 'default', 'flex', 'scale', 'priority', 'standard_only'];
+
+const onConflictOptions: { value: ModelAliasOnConflict; label: string }[] = [
+  { value: 'real-only', label: 'real-only — alias hidden when target id collides' },
+  { value: 'alias-only', label: 'alias-only — alias replaces a colliding real id' },
+  { value: 'both-real-first', label: 'both — real first' },
+  { value: 'both-alias-first', label: 'both — alias first' },
+];
+
+// --- save ---
+
+const saving = ref(false);
+const saveError = ref<string | null>(null);
+
+const trimOrUndef = (s: string): string | undefined => {
+  const t = s.trim();
+  return t === '' ? undefined : t;
+};
+
+const buildRules = (): MutableRules | { error: string } => {
+  const rules: MutableRules = {};
+
+  if (reasoningMode.value === 'effort') {
+    const v = trimOrUndef(reasoningEffort.value);
+    if (v === undefined) return { error: 'Reasoning effort cannot be empty' };
+    rules.reasoning = { effort: v };
+  } else if (reasoningMode.value === 'budget') {
+    const raw = reasoningBudgetTokens.value.trim();
+    if (raw === '' || !/^\d+$/.test(raw)) return { error: 'Reasoning budget tokens must be a non-negative integer' };
+    rules.reasoning = { budgetTokens: Number(raw) };
+  } else if (reasoningMode.value === 'adaptive') {
+    rules.reasoning = { adaptive: true };
+  }
+
+  const summary = trimOrUndef(reasoningSummary.value);
+  if (summary !== undefined) {
+    rules.reasoning = { ...(rules.reasoning ?? {}), summary };
+  }
+
+  const verb = trimOrUndef(verbosity.value);
+  if (verb !== undefined) rules.verbosity = verb;
+  const tier = trimOrUndef(serviceTier.value);
+  if (tier !== undefined) rules.serviceTier = tier;
+  const betas = anthropicBeta.value.map(s => s.trim()).filter(s => s !== '');
+  if (betas.length > 0) rules.anthropicBeta = betas;
+
+  return rules;
+};
+
+const save = async () => {
+  saveError.value = null;
+  const trimmedAlias = aliasName.value.trim();
+  const trimmedTarget = targetModelId.value.trim();
+  if (mode.value === 'create' && trimmedAlias === '') { saveError.value = 'Alias name is required'; return; }
+  if (trimmedTarget === '') { saveError.value = 'Target model id is required'; return; }
+
+  const rulesOrErr = buildRules();
+  if ('error' in rulesOrErr) { saveError.value = rulesOrErr.error; return; }
+
+  const displayNameValue = trimOrUndef(displayName.value);
+
+  saving.value = true;
+  try {
+    if (mode.value === 'create') {
+      const { error } = await callApi(() => api.api.aliases.$post({
+        json: {
+          alias: trimmedAlias,
+          targetModelId: trimmedTarget,
+          upstreamIds: [...upstreamIds.value],
+          rules: rulesOrErr,
+          visibleInModelsList: visibleInModelsList.value,
+          onConflict: onConflict.value,
+          ...(displayNameValue !== undefined ? { displayName: displayNameValue } : {}),
+        },
+      }));
+      if (error) { saveError.value = error.message; return; }
+    } else if (props.record) {
+      const { error } = await callApi(() => api.api.aliases[':alias'].$patch({
+        param: { alias: props.record!.alias },
+        json: {
+          targetModelId: trimmedTarget,
+          upstreamIds: [...upstreamIds.value],
+          rules: rulesOrErr,
+          visibleInModelsList: visibleInModelsList.value,
+          onConflict: onConflict.value,
+          // Carry an explicit null when the operator cleared the label so the
+          // backend wipes the display_name column rather than preserving the
+          // old value through the absent-field merge.
+          displayName: displayNameValue ?? null,
+        },
+      }));
+      if (error) { saveError.value = error.message; return; }
+    }
+    emit('saved');
+    // Also refresh the dashboard's /api/models cache so the new alias appears
+    // in the catalog. The settings.vue reloadAll handler does this too, but a
+    // direct call here keeps the modal-close semantics independent of the
+    // parent's reload wiring.
+    await modelsStore.load();
+    open.value = false;
+  } finally {
+    saving.value = false;
+  }
+};
+
+const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Alias: ${props.record?.alias ?? ''}`);
+
+const reasoningModeOptions: { value: ReasoningMode; label: string }[] = [
+  { value: 'none', label: 'None' },
+  { value: 'effort', label: 'Effort preset' },
+  { value: 'budget', label: 'Token budget' },
+  { value: 'adaptive', label: 'Adaptive' },
+];
+</script>
+
+<template>
+  <Dialog v-model:open="open" :title="title" size="xl">
+    <div class="space-y-5">
+      <p v-if="saveError" class="rounded-md border border-accent-rose/40 bg-accent-rose/10 px-3 py-2 text-sm text-accent-rose">
+        {{ saveError }}
+      </p>
+
+      <div class="grid grid-cols-1 gap-4 md:grid-cols-2">
+        <div class="space-y-1.5">
+          <label class="block text-xs font-medium text-gray-500">Alias name</label>
+          <Input v-model="aliasName" placeholder="codex-auto-review" :disabled="mode === 'edit'" class="font-mono" />
+          <p v-if="mode === 'edit'" class="text-xs text-gray-600">Alias names are the primary key and cannot be changed; delete and recreate to rename.</p>
+        </div>
+
+        <div class="space-y-1.5">
+          <label class="block text-xs font-medium text-gray-500">Display name <span class="text-gray-600">(optional)</span></label>
+          <Input v-model="displayName" placeholder="Codex Auto Review" />
+        </div>
+      </div>
+
+      <div class="space-y-1.5">
+        <label class="block text-xs font-medium text-gray-500">Target model id</label>
+        <Input v-model="targetModelId" placeholder="gpt-5.4" class="font-mono" list="alias-model-options" />
+        <datalist id="alias-model-options">
+          <option v-for="opt in modelOptions" :key="opt.value" :value="opt.value">{{ opt.label }}</option>
+        </datalist>
+      </div>
+
+      <div class="space-y-1.5">
+        <label class="block text-xs font-medium text-gray-500">Upstreams <span class="text-gray-600">(leave empty to allow any upstream that serves the target)</span></label>
+        <TagCombobox v-model="upstreamIds" :items="upstreamItems" placeholder="Pick an upstream..." empty-text="No upstreams match" />
+      </div>
+
+      <div class="grid grid-cols-1 gap-4 md:grid-cols-2">
+        <div class="space-y-1.5">
+          <label class="block text-xs font-medium text-gray-500">On conflict</label>
+          <Select v-model="onConflict" :options="onConflictOptions" />
+        </div>
+
+        <div class="flex items-center gap-2 pt-6">
+          <Checkbox v-model="visibleInModelsList" />
+          <label class="text-sm text-gray-300">Visible in <code class="rounded bg-white/[0.04] px-1 font-mono text-xs">/v1/models</code></label>
+        </div>
+      </div>
+
+      <section class="space-y-3 rounded-md border border-white/[0.06] bg-surface-800/50 p-4">
+        <h4 class="text-xs font-semibold uppercase tracking-wide text-gray-400">Reasoning</h4>
+        <div class="flex flex-wrap gap-3">
+          <label v-for="opt in reasoningModeOptions" :key="opt.value" class="inline-flex items-center gap-2 text-sm text-gray-300">
+            <input
+              type="radio"
+              :value="opt.value"
+              :checked="reasoningMode === opt.value"
+              :disabled="opt.value === 'adaptive' && !adaptiveSupported && reasoningMode !== 'adaptive'"
+              @change="reasoningMode = opt.value"
+            >
+            {{ opt.label }}
+          </label>
+        </div>
+
+        <div v-if="reasoningMode === 'effort'" class="space-y-1.5">
+          <label class="block text-xs font-medium text-gray-500">Effort</label>
+          <Input v-model="reasoningEffort" placeholder="high" list="alias-effort-options" />
+          <datalist id="alias-effort-options">
+            <option v-for="v in effortSuggestions" :key="v" :value="v" />
+          </datalist>
+          <p v-if="effortSuggestions.length > 0" class="text-xs text-gray-600">Target supports: {{ effortSuggestions.join(', ') }}</p>
+        </div>
+
+        <div v-if="reasoningMode === 'budget'" class="space-y-1.5">
+          <label class="block text-xs font-medium text-gray-500">Budget tokens</label>
+          <Input v-model="reasoningBudgetTokens" placeholder="4096" inputmode="numeric" class="font-mono" />
+          <p v-if="budgetMin !== undefined || budgetMax !== undefined" class="text-xs text-gray-600">
+            Target range:
+            <template v-if="budgetMin !== undefined">min {{ budgetMin }}</template>
+            <template v-if="budgetMin !== undefined && budgetMax !== undefined">, </template>
+            <template v-if="budgetMax !== undefined">max {{ budgetMax }}</template>
+          </p>
+        </div>
+
+        <div v-if="reasoningMode === 'adaptive' && !adaptiveSupported" class="rounded-md border border-amber-500/30 bg-amber-500/10 px-2 py-1 text-xs text-amber-300">
+          Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.
+        </div>
+
+        <div class="space-y-1.5">
+          <label class="block text-xs font-medium text-gray-500">Reasoning summary <span class="text-gray-600">(optional)</span></label>
+          <Input v-model="reasoningSummary" placeholder="auto" list="alias-summary-options" />
+          <datalist id="alias-summary-options">
+            <option v-for="v in SUMMARY_HINTS" :key="v" :value="v" />
+          </datalist>
+        </div>
+      </section>
+
+      <div class="grid grid-cols-1 gap-4 md:grid-cols-2">
+        <div class="space-y-1.5">
+          <label class="block text-xs font-medium text-gray-500">Verbosity</label>
+          <Input v-model="verbosity" placeholder="medium" list="alias-verbosity-options" />
+          <datalist id="alias-verbosity-options">
+            <option v-for="v in VERBOSITY_HINTS" :key="v" :value="v" />
+          </datalist>
+        </div>
+
+        <div class="space-y-1.5">
+          <label class="block text-xs font-medium text-gray-500">Service tier</label>
+          <Input v-model="serviceTier" placeholder="auto" list="alias-tier-options" />
+          <datalist id="alias-tier-options">
+            <option v-for="v in SERVICE_TIER_HINTS" :key="v" :value="v" />
+          </datalist>
+        </div>
+      </div>
+
+      <div class="space-y-1.5">
+        <label class="block text-xs font-medium text-gray-500">Anthropic beta headers <span class="text-gray-600">(comma- or Enter-separated tokens)</span></label>
+        <TagCombobox v-model="anthropicBeta" :items="[]" placeholder="extended-cache-ttl-2025-04-11" empty-text="Type a header token and press Enter" />
+      </div>
+
+      <div class="flex flex-wrap items-center gap-2 border-t border-white/[0.06] pt-5">
+        <Button :loading="saving" @click="save">Save</Button>
+        <Button variant="secondary" :disabled="saving" @click="open = false">Cancel</Button>
+      </div>
+    </div>
+  </Dialog>
+</template>
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
new file mode 100644
index 000000000..976a24a18
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -0,0 +1,167 @@
+// @vitest-environment happy-dom
+
+import { mount } from '@vue/test-utils';
+import { afterEach, beforeEach, expect, test, vi } from 'vitest';
+import { defineComponent, h, ref } from 'vue';
+
+import type { ModelAlias } from '../../api/types.ts';
+
+// Module-level mocks for the api client + every store the dialog imports.
+// The dialog stays as-is; we substitute the dependencies so the component
+// renders and submits without any real HTTP. callApi is exposed as a spy so
+// tests can read what was posted.
+const createAliasMock = vi.fn(async (_args: { json: unknown }) => new Response(JSON.stringify({}), { status: 201, headers: { 'content-type': 'application/json' } }));
+const patchAliasMock = vi.fn(async (_args: { param: { alias: string }; json: unknown }) => new Response(JSON.stringify({}), { status: 200, headers: { 'content-type': 'application/json' } }));
+
+vi.mock('../../api/client.ts', async () => {
+  const { callApi: realCallApi } = await vi.importActual<typeof import('../../api/client.ts')>('../../api/client.ts');
+  return {
+    useApi: () => ({
+      api: {
+        aliases: Object.assign(
+          { $post: (args: { json: unknown }) => createAliasMock(args) },
+          { ':alias': { $patch: (args: { param: { alias: string }; json: unknown }) => patchAliasMock(args) } },
+        ),
+      },
+    }),
+    callApi: realCallApi,
+  };
+});
+
+vi.mock('../../composables/useModels.ts', () => ({
+  useModelsStore: () => ({
+    models: {
+      value: [
+        { id: 'gpt-5.4', display_name: 'GPT-5.4', object: 'model', type: 'model', limits: {}, kind: 'chat', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' }, budget_tokens: { min: 1024, max: 8192 }, adaptive: true } } },
+        { id: 'claude-opus-4-6', display_name: 'Claude Opus 4.6', object: 'model', type: 'model', limits: {}, kind: 'chat' },
+      ],
+    },
+    loading: { value: false },
+    error: { value: null },
+    load: vi.fn(async () => undefined),
+  }),
+}));
+
+vi.mock('../../composables/useUpstreams.ts', () => ({
+  useUpstreamsStore: () => ({
+    upstreams: {
+      value: [
+        { id: 'up_oai', name: 'OpenAI' },
+        { id: 'up_anth', name: 'Anthropic' },
+      ],
+    },
+    loading: { value: false },
+    load: vi.fn(async () => undefined),
+  }),
+}));
+
+// reka-ui's Dialog mounts via Teleport into document.body and renders a
+// portal — we stub it down to a passthrough so happy-dom mounts the slot
+// content inline where assertions can reach it.
+vi.mock('@floway-dev/ui', async () => {
+  const real = await vi.importActual<typeof import('@floway-dev/ui')>('@floway-dev/ui');
+  const Passthrough = defineComponent({ name: 'Passthrough', setup(_props, { slots }) { return () => h('div', slots.default?.()); } });
+  return { ...real, Dialog: Passthrough };
+});
+
+beforeEach(() => {
+  createAliasMock.mockClear();
+  patchAliasMock.mockClear();
+});
+
+afterEach(() => {
+  vi.clearAllMocks();
+});
+
+test('AliasEditDialog (create mode) posts a payload matching the form state', async () => {
+  const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
+  const open = ref(true);
+
+  const wrapper = mount(defineComponent({
+    components: { AliasEditDialog },
+    setup() { return { open }; },
+    template: '<AliasEditDialog v-model:open="open" :record="null" />',
+  }));
+
+  // Fill the form: alias name + target id are the only required fields for
+  // the create-mode happy path. Everything else uses its default.
+  const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+  expect(aliasInput.exists()).toBe(true);
+  await aliasInput.setValue('opus-fast');
+
+  const targetInput = wrapper.find('input[placeholder="gpt-5.4"]');
+  expect(targetInput.exists()).toBe(true);
+  await targetInput.setValue('claude-opus-4-6');
+
+  // Click Save.
+  const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
+  expect(saveBtn).toBeDefined();
+  await saveBtn!.trigger('click');
+  // Flush microtasks so the async save completes.
+  await new Promise(r => setTimeout(r, 0));
+
+  expect(createAliasMock).toHaveBeenCalledTimes(1);
+  const args = createAliasMock.mock.calls[0]![0];
+  expect(args.json).toMatchObject({
+    alias: 'opus-fast',
+    targetModelId: 'claude-opus-4-6',
+    upstreamIds: [],
+    rules: {},
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+  });
+});
+
+test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the diff', async () => {
+  const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
+  const open = ref(true);
+  const record: ModelAlias = {
+    alias: 'opus-xhigh',
+    target_model_id: 'claude-opus-4-6',
+    upstream_ids: ['up_anth'],
+    rules: { reasoning: { effort: 'xhigh' } },
+    visible_in_models_list: true,
+    on_conflict: 'real-only',
+    display_name: 'Opus XHigh',
+    created_at: 1_700_000_000,
+  };
+
+  const wrapper = mount(defineComponent({
+    components: { AliasEditDialog },
+    setup() { return { open, record }; },
+    template: '<AliasEditDialog v-model:open="open" :record="record" />',
+  }));
+
+  // Alias name input should be disabled in edit mode (PK is immutable).
+  const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+  expect(aliasInput.exists()).toBe(true);
+  expect((aliasInput.element as HTMLInputElement).disabled).toBe(true);
+  expect((aliasInput.element as HTMLInputElement).value).toBe('opus-xhigh');
+
+  // Display name pre-filled.
+  const displayInput = wrapper.find('input[placeholder="Codex Auto Review"]');
+  expect((displayInput.element as HTMLInputElement).value).toBe('Opus XHigh');
+
+  // Target id pre-filled.
+  const targetInput = wrapper.find('input[placeholder="gpt-5.4"]');
+  expect((targetInput.element as HTMLInputElement).value).toBe('claude-opus-4-6');
+
+  // Change one field and submit; PATCH carries the merged shape (every editable
+  // field, not just the diff — the route layer merges against the stored row).
+  await targetInput.setValue('gpt-5.4');
+  const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
+  await saveBtn!.trigger('click');
+  await new Promise(r => setTimeout(r, 0));
+
+  expect(patchAliasMock).toHaveBeenCalledTimes(1);
+  const args = patchAliasMock.mock.calls[0]![0];
+  expect(args.param.alias).toBe('opus-xhigh');
+  expect(args.json).toMatchObject({
+    targetModelId: 'gpt-5.4',
+    upstreamIds: ['up_anth'],
+    rules: { reasoning: { effort: 'xhigh' } },
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    displayName: 'Opus XHigh',
+  });
+});
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
new file mode 100644
index 000000000..1f8a684d1
--- /dev/null
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -0,0 +1,90 @@
+<script setup lang="ts">
+import { computed } from 'vue';
+
+import type { ModelAlias } from '../../api/types.ts';
+import { formatAliasRuleBadges } from '@floway-dev/protocols/common';
+
+const props = defineProps<{
+  alias: ModelAlias;
+}>();
+
+defineEmits<{
+  edit: [];
+  delete: [];
+}>();
+
+// Effective label: operator-set display name when present, otherwise fall
+// back to the alias id itself. The "→ target" annotation is rendered
+// alongside the label rather than substituted in so an operator who picks a
+// long display name still sees what the alias points at.
+const labelText = computed(() => props.alias.display_name ?? props.alias.alias);
+
+const badges = computed(() => formatAliasRuleBadges(props.alias.rules));
+
+const onConflictBadgeClass = computed(() => {
+  switch (props.alias.on_conflict) {
+  case 'alias-only': return 'border-accent-violet/30 bg-accent-violet/10 text-accent-violet';
+  case 'real-only': return 'border-white/10 bg-white/5 text-gray-400';
+  case 'both-real-first':
+  case 'both-alias-first': return 'border-accent-cyan/30 bg-accent-cyan/10 text-accent-cyan';
+  }
+});
+</script>
+
+<template>
+  <div class="flex items-center gap-3 rounded-lg border border-white/5 bg-surface-800/80 px-3 py-2">
+    <span
+      class="shrink-0 rounded border px-2 py-0.5 text-xs font-semibold uppercase tracking-wide"
+      :class="onConflictBadgeClass"
+    >{{ alias.on_conflict }}</span>
+
+    <div class="min-w-0 flex-1 truncate">
+      <span class="text-sm font-semibold text-white">{{ labelText }}</span>
+      <span class="ml-2 font-mono text-xs text-gray-500">{{ alias.alias }}</span>
+      <span class="ml-2 text-xs text-gray-500">&rarr; {{ alias.target_model_id }}</span>
+    </div>
+
+    <div v-if="badges.length > 0" class="hidden shrink-0 items-center gap-1 sm:flex">
+      <span
+        v-for="badge in badges"
+        :key="badge.label"
+        class="rounded border border-white/10 bg-white/[0.02] px-1.5 py-0.5 text-[10px] uppercase tracking-wide text-gray-400"
+      >
+        {{ badge.label }}<template v-if="badge.value !== undefined">: <span class="text-gray-300 normal-case">{{ badge.value }}</span></template>
+      </span>
+    </div>
+
+    <span
+      v-if="!alias.visible_in_models_list"
+      class="hidden shrink-0 rounded border border-amber-500/30 bg-amber-500/10 px-1.5 py-0.5 text-[10px] uppercase tracking-wide text-amber-300 sm:inline"
+      title="Hidden from /v1/models"
+    >hidden</span>
+
+    <div class="flex shrink-0 items-center gap-1">
+      <button
+        type="button"
+        class="inline-flex h-8 w-8 items-center justify-center rounded-md p-1 text-gray-600 transition-colors hover:bg-white/[0.04] hover:text-accent-cyan"
+        aria-label="Edit alias"
+        title="Edit"
+        @click="$emit('edit')"
+      >
+        <svg class="h-4 w-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+          <path d="M17 3a2.83 2.83 0 1 1 4 4L7.5 20.5 2 22l1.5-5.5Z" />
+          <path d="m15 5 4 4" />
+        </svg>
+      </button>
+      <button
+        type="button"
+        class="inline-flex h-8 w-8 items-center justify-center rounded-md p-1 text-gray-600 transition-colors hover:bg-white/[0.04] hover:text-accent-rose"
+        aria-label="Delete alias"
+        title="Delete"
+        @click="$emit('delete')"
+      >
+        <svg class="h-4 w-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+          <polyline points="3 6 5 6 21 6" />
+          <path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2" />
+        </svg>
+      </button>
+    </div>
+  </div>
+</template>
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
new file mode 100644
index 000000000..cef3a9639
--- /dev/null
+++ b/apps/web/src/components/settings/AliasRow_test.ts
@@ -0,0 +1,128 @@
+// @vitest-environment happy-dom
+
+import { mount } from '@vue/test-utils';
+import { beforeEach, describe, expect, test, vi } from 'vitest';
+import { defineComponent } from 'vue';
+
+import AliasRow from './AliasRow.vue';
+import type { ModelAlias } from '../../api/types.ts';
+
+const baseAlias: ModelAlias = {
+  alias: 'opus-xhigh',
+  target_model_id: 'claude-opus-4-6',
+  upstream_ids: [],
+  rules: { reasoning: { effort: 'xhigh' } },
+  visible_in_models_list: true,
+  on_conflict: 'real-only',
+  display_name: 'Opus XHigh',
+  created_at: 1_700_000_000,
+};
+
+describe('AliasRow', () => {
+  beforeEach(() => {
+    vi.restoreAllMocks();
+  });
+
+  test('renders the display name, alias id, and target', () => {
+    const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
+    expect(wrapper.text()).toContain('Opus XHigh');
+    expect(wrapper.text()).toContain('opus-xhigh');
+    expect(wrapper.text()).toContain('claude-opus-4-6');
+  });
+
+  test('falls back to alias name when display_name is null', () => {
+    const wrapper = mount(AliasRow, { props: { alias: { ...baseAlias, display_name: null } } });
+    // alias id appears twice (label fallback + the small font-mono id), but the
+    // important assertion is that the label slot is non-empty.
+    expect(wrapper.text()).toContain('opus-xhigh');
+    expect(wrapper.text()).not.toContain('Opus XHigh');
+  });
+
+  test('emits edit and delete on the matching button clicks', async () => {
+    const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
+    await wrapper.find('[aria-label="Edit alias"]').trigger('click');
+    await wrapper.find('[aria-label="Delete alias"]').trigger('click');
+    expect(wrapper.emitted('edit')).toHaveLength(1);
+    expect(wrapper.emitted('delete')).toHaveLength(1);
+  });
+
+  test('shows a "hidden" badge when visible_in_models_list is false', () => {
+    const wrapper = mount(AliasRow, { props: { alias: { ...baseAlias, visible_in_models_list: false } } });
+    expect(wrapper.text()).toContain('hidden');
+  });
+
+  test('renders one rule badge per active rule field', () => {
+    const wrapper = mount(AliasRow, {
+      props: {
+        alias: {
+          ...baseAlias,
+          rules: { reasoning: { effort: 'high' }, verbosity: 'low', serviceTier: 'priority' },
+        },
+      },
+    });
+    // formatAliasRuleBadges drives the order: effort, verbosity, service tier.
+    const text = wrapper.text();
+    expect(text).toContain('effort: high');
+    expect(text).toContain('verbosity: low');
+    expect(text).toContain('service tier: priority');
+  });
+});
+
+// Bare-component smoke test for the card. We mock the composable so the
+// card renders deterministically without an HTTP round-trip; the stub
+// substitutes the same shape useModelAliases exposes.
+describe('AliasesSettingsCard', () => {
+  test('renders empty state when the store has no aliases', async () => {
+    vi.resetModules();
+    vi.doMock('../../composables/useModelAliases.ts', () => ({
+      useModelAliases: () => ({
+        aliases: { value: [] },
+        loading: { value: false },
+        error: { value: null },
+        load: vi.fn(),
+      }),
+    }));
+    vi.doMock('../../api/client.ts', () => ({
+      useApi: () => ({ api: { aliases: { ':alias': { $delete: vi.fn() } } } }),
+      callApi: vi.fn(),
+    }));
+    const { default: AliasesSettingsCard } = await import('./AliasesSettingsCard.vue');
+    const wrapper = mount(AliasesSettingsCard);
+    expect(wrapper.text()).toContain('No aliases configured');
+  });
+
+  test('renders one AliasRow per alias the store holds', async () => {
+    vi.resetModules();
+    const rows: ModelAlias[] = [
+      { ...baseAlias, alias: 'a-one' },
+      { ...baseAlias, alias: 'b-two', display_name: null },
+    ];
+    vi.doMock('../../composables/useModelAliases.ts', () => ({
+      useModelAliases: () => ({
+        aliases: { value: rows },
+        loading: { value: false },
+        error: { value: null },
+        load: vi.fn(),
+      }),
+    }));
+    vi.doMock('../../api/client.ts', () => ({
+      useApi: () => ({ api: { aliases: { ':alias': { $delete: vi.fn() } } } }),
+      callApi: vi.fn(),
+    }));
+    const { default: AliasesSettingsCard } = await import('./AliasesSettingsCard.vue');
+    const wrapper = mount(AliasesSettingsCard);
+    // Each row exposes its delete button by aria-label, so the count is a
+    // reliable proxy for "one AliasRow rendered per alias".
+    expect(wrapper.findAll('[aria-label="Delete alias"]').length).toBe(rows.length);
+    expect(wrapper.text()).toContain('a-one');
+    expect(wrapper.text()).toContain('b-two');
+  });
+});
+
+// Sanity: a stub wrapping the component above guards against template parse
+// regressions (an unknown directive or missing import would explode at mount
+// time even when no real backend is reachable).
+test('the test harness can mount a trivial component', () => {
+  const wrapper = mount(defineComponent({ template: '<span>ok</span>' }));
+  expect(wrapper.text()).toBe('ok');
+});
diff --git a/apps/web/src/components/settings/AliasesSettingsCard.vue b/apps/web/src/components/settings/AliasesSettingsCard.vue
new file mode 100644
index 000000000..da71b374f
--- /dev/null
+++ b/apps/web/src/components/settings/AliasesSettingsCard.vue
@@ -0,0 +1,65 @@
+<script setup lang="ts">
+import { computed } from 'vue';
+
+import AliasRow from './AliasRow.vue';
+import { callApi, useApi } from '../../api/client.ts';
+import type { ModelAlias } from '../../api/types.ts';
+import { useModelAliases } from '../../composables/useModelAliases.ts';
+import { Spinner } from '@floway-dev/ui';
+
+const emit = defineEmits<{
+  'add': [];
+  'edit': [record: ModelAlias];
+  'changed': [];
+}>();
+
+const api = useApi();
+const aliasesStore = useModelAliases();
+
+const aliases = computed<ModelAlias[]>(() => aliasesStore.aliases.value ?? []);
+
+const deleteAlias = async (record: ModelAlias) => {
+  if (!window.confirm(`Delete alias "${record.alias}"?`)) return;
+  const { error } = await callApi(() => api.api.aliases[':alias'].$delete({ param: { alias: record.alias } }));
+  if (error) {
+    window.alert(`Delete failed: ${error.message}`);
+    return;
+  }
+  emit('changed');
+};
+</script>
+
+<template>
+  <div class="glass-card p-5 sm:p-6 animate-in delay-2">
+    <div class="mb-4 flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between">
+      <div class="min-w-0">
+        <h3 class="text-white font-semibold mb-1">Aliases</h3>
+        <p class="text-sm text-gray-400">
+          Synthesized model ids that pin a target model plus a request-time rule overlay.
+          Surfaced in <code class="rounded bg-white/[0.04] px-1">/v1/models</code> per the conflict policy.
+        </p>
+      </div>
+      <button class="btn-primary !py-2.5 !px-3 text-xs whitespace-nowrap" @click="emit('add')">Add Alias</button>
+    </div>
+
+    <p v-if="aliasesStore.error.value" class="mb-3 rounded-md border border-accent-rose/40 bg-accent-rose/10 px-3 py-2 text-sm text-accent-rose">
+      Failed to load aliases: {{ aliasesStore.error.value }}
+    </p>
+
+    <p v-if="!aliasesStore.error.value && aliases.length === 0" class="text-sm text-gray-500">
+      No aliases configured. Add one to expose a model id with locked reasoning, service tier, or other rule overrides.
+    </p>
+
+    <div v-else-if="aliases.length > 0" class="space-y-2">
+      <AliasRow
+        v-for="alias in aliases"
+        :key="alias.alias"
+        :alias="alias"
+        @edit="emit('edit', alias)"
+        @delete="deleteAlias(alias)"
+      />
+    </div>
+
+    <Spinner v-if="aliasesStore.loading.value && aliases.length > 0" class="mt-3 h-4 w-4 text-gray-500" />
+  </div>
+</template>
diff --git a/apps/web/src/composables/useModelAliases.ts b/apps/web/src/composables/useModelAliases.ts
new file mode 100644
index 000000000..880c771f9
--- /dev/null
+++ b/apps/web/src/composables/useModelAliases.ts
@@ -0,0 +1,29 @@
+import { ref, shallowRef } from 'vue';
+
+import { callApi, useApi } from '../api/client.ts';
+import type { ModelAlias } from '../api/types.ts';
+
+// Module-scoped cache so concurrent callers share one fetch — mirrors the
+// proxies store pattern: settings tabs that mount in parallel reuse a single
+// in-flight request instead of fan-out per-component.
+const aliases = shallowRef<ModelAlias[] | null>(null);
+const loading = ref(false);
+const error = ref<string | null>(null);
+
+export const useModelAliases = () => {
+  const api = useApi();
+
+  const load = async () => {
+    loading.value = true;
+    error.value = null;
+    const { data, error: err } = await callApi<ModelAlias[]>(() => api.api.aliases.$get());
+    loading.value = false;
+    if (err) {
+      error.value = err.message;
+      return;
+    }
+    aliases.value = data;
+  };
+
+  return { aliases, loading, error, load };
+};
diff --git a/apps/web/src/pages/dashboard/settings.vue b/apps/web/src/pages/dashboard/settings.vue
index 83fedc993..636754fee 100644
--- a/apps/web/src/pages/dashboard/settings.vue
+++ b/apps/web/src/pages/dashboard/settings.vue
@@ -4,14 +4,17 @@ import { ref, watch } from 'vue';
 import { useRouter } from 'vue-router';
 
 import { callApi, useApi } from '../../api/client.ts';
-import type { ProxyRecord, SearchConfig, UpstreamProviderKind, UpstreamRecord } from '../../api/types.ts';
+import type { ModelAlias, ProxyRecord, SearchConfig, UpstreamProviderKind, UpstreamRecord } from '../../api/types.ts';
+import AliasEditDialog from '../../components/alias-edit/AliasEditDialog.vue';
 import ProxyEditDialog from '../../components/proxy-edit/ProxyEditDialog.vue';
+import AliasesSettingsCard from '../../components/settings/AliasesSettingsCard.vue';
 import ApiEndpointsSection from '../../components/settings/ApiEndpointsSection.vue';
 import ExportSection from '../../components/settings/ExportSection.vue';
 import ImportSection from '../../components/settings/ImportSection.vue';
 import ProxiesSettingsCard from '../../components/settings/ProxiesSettingsCard.vue';
 import SearchConfigSection from '../../components/settings/SearchConfigSection.vue';
 import UpstreamsSettingsCard from '../../components/settings/UpstreamsSettingsCard.vue';
+import { useModelAliases } from '../../composables/useModelAliases.ts';
 import { useModelsStore } from '../../composables/useModels.ts';
 import { useProxiesStore } from '../../composables/useProxies.ts';
 import { useRuntimeInfo } from '../../composables/useRuntimeInfo.ts';
@@ -31,6 +34,7 @@ export const useSettingsPageData = defineBasicLoader(async () => {
     useUpstreamsStore().load(),
     useModelsStore().load(),
     useProxiesStore().load(),
+    useModelAliases().load(),
     useRuntimeInfo().load(),
   ]);
   return {
@@ -49,6 +53,8 @@ const { upstreams, loading: storeLoading, load } = useUpstreamsStore();
 const modelsStore = useModelsStore();
 const proxiesStore = useProxiesStore();
 const { load: loadProxies } = proxiesStore;
+const aliasesStore = useModelAliases();
+const { load: loadAliases } = aliasesStore;
 const settingsData = useSettingsPageData();
 
 // Local working copy the child reorders via v-model:ordered; reloadAll
@@ -59,10 +65,10 @@ watch(upstreams, list => {
 }, { immediate: true });
 
 const reloadAll = async () => {
-  await Promise.all([load(), modelsStore.load(), loadProxies()]);
+  await Promise.all([load(), modelsStore.load(), loadProxies(), loadAliases()]);
 };
 
-// Proxy editor is hosted as a modal — v-if drives the unmount on close
+// Proxy + alias editors are hosted as modals — v-if drives the unmount on close
 // so the next open boots from a fresh script setup (no manual reset).
 const proxyDialogOpen = ref(false);
 const proxyDialogRecord = ref<ProxyRecord | null>(null);
@@ -70,6 +76,13 @@ const openProxyDialog = (record: ProxyRecord | null): void => {
   proxyDialogRecord.value = record;
   proxyDialogOpen.value = true;
 };
+
+const aliasDialogOpen = ref(false);
+const aliasDialogRecord = ref<ModelAlias | null>(null);
+const openAliasDialog = (record: ModelAlias | null): void => {
+  aliasDialogRecord.value = record;
+  aliasDialogOpen.value = true;
+};
 </script>
 
 <template>
@@ -89,6 +102,11 @@ const openProxyDialog = (record: ProxyRecord | null): void => {
           @edit="(record: ProxyRecord) => openProxyDialog(record)"
           @changed="reloadAll"
         />
+        <AliasesSettingsCard
+          @add="() => openAliasDialog(null)"
+          @edit="(record: ModelAlias) => openAliasDialog(record)"
+          @changed="reloadAll"
+        />
         <SearchConfigSection
           :initial-config="settingsData.data.value.searchConfig"
           :initial-error="settingsData.data.value.searchConfigError"
@@ -111,5 +129,12 @@ const openProxyDialog = (record: ProxyRecord | null): void => {
       :record="proxyDialogRecord"
       @saved="reloadAll"
     />
+
+    <AliasEditDialog
+      v-if="aliasDialogOpen"
+      v-model:open="aliasDialogOpen"
+      :record="aliasDialogRecord"
+      @saved="reloadAll"
+    />
   </div>
 </template>
diff --git a/apps/web/vitest.config.ts b/apps/web/vitest.config.ts
index 4b522bfd9..20fd5dbbc 100644
--- a/apps/web/vitest.config.ts
+++ b/apps/web/vitest.config.ts
@@ -1,6 +1,11 @@
+import Vue from '@vitejs/plugin-vue';
 import { defineConfig } from 'vitest/config';
 
 export default defineConfig({
+  // The Vue plugin is required for any test that mounts an SFC; logic-only
+  // tests don't need it, but adding it here is cheap and lets component
+  // tests (AliasRow, AliasEditDialog, ...) live next to the rest.
+  plugins: [Vue()],
   test: {
     // happy-dom provides DOM + EventSource for the dump-subscription
     // composable's tests. Node-env worked while the composable accepted
diff --git a/packages/gateway/package.json b/packages/gateway/package.json
index 0e763374d..99ce27a73 100644
--- a/packages/gateway/package.json
+++ b/packages/gateway/package.json
@@ -11,6 +11,8 @@
       "types": "./src/runtime/channel-broker-contract.ts"
     },
     "./control-plane/proxies/serialize": { "types": "./src/control-plane/proxies/serialize.ts" },
+    "./control-plane/model-aliases/serialize": { "types": "./src/control-plane/model-aliases/serialize.ts" },
+    "./control-plane/model-aliases/types": { "types": "./src/control-plane/model-aliases/types.ts" },
     "./control-plane/pricing/types": { "types": "./src/control-plane/pricing/types.ts" },
     "./data-plane/tools/web-search/types": {
       "import": "./src/data-plane/tools/web-search/types.ts",
diff --git a/pnpm-lock.yaml b/pnpm-lock.yaml
index bf3ef7f58..e1273e234 100644
--- a/pnpm-lock.yaml
+++ b/pnpm-lock.yaml
@@ -223,6 +223,9 @@ importers:
       '@vitejs/plugin-vue':
         specifier: ^6.0.1
         version: 6.0.7(vite@7.3.1(@types/node@22.19.19)(jiti@2.6.1)(tsx@4.22.4)(yaml@2.9.0))(vue@3.5.34(typescript@5.9.3))
+      '@vue/test-utils':
+        specifier: ^2.4.11
+        version: 2.4.11(@vue/compiler-dom@3.5.34)(@vue/server-renderer@3.5.34(vue@3.5.34(typescript@5.9.3)))(vue@3.5.34(typescript@5.9.3))
       happy-dom:
         specifier: ^20.0.0
         version: 20.10.6
@@ -1278,6 +1281,10 @@ packages:
   '@internationalized/number@3.6.6':
     resolution: {integrity: sha512-iFgmQaXHE0vytNfpLZWOC2mEJCBRzcUxt53Xf/yCXG93lRvqas237i3r7X4RKMwO3txiyZD4mQjKAByFv6UGSQ==}
 
+  '@isaacs/cliui@8.0.2':
+    resolution: {integrity: sha512-O8jcjabXaleOG9DQ0+ARXWZBTfnP4WNAqzuiJK7ll44AmxGKv/J2M4TPjxjY3znBCfvBXFzucm1twdyFybFqEA==}
+    engines: {node: '>=12'}
+
   '@jridgewell/gen-mapping@0.3.13':
     resolution: {integrity: sha512-2kkt/7niJ6MgEPxF0bYdQ6etZaA+fQvDcLKckhy1yIQOzaoKjBBjSj63/aLVjYE3qhRt5dvM+uUyfCg6UKCBbA==}
 
@@ -1330,6 +1337,9 @@ packages:
     resolution: {integrity: sha512-IYqDGiTXab6FniAgnSdZwgWbomxpy9FtYvLKs7wCUs2a8RkITG+DFGO1DM9cr+E3/RgADRpFjrKVaJ1z6sjtEg==}
     engines: {node: '>= 20.19.0'}
 
+  '@one-ini/wasm@0.1.1':
+    resolution: {integrity: sha512-XuySG1E38YScSJoMlqovLru4KTUNSjgVTIjyh7qMX6aNN5HY5Ct5LhRJdxO79JtTzKfzV/bnWpz+zquYrISsvw==}
+
   '@oxc-parser/binding-android-arm-eabi@0.131.0':
     resolution: {integrity: sha512-t2xicr9pfzkSRYx5aPqZqlLaayIwJTqgQ81Jor31Xep2nGyL2Aq3d0K5wOfeR7VevaSdxaS9dzSQP9xDwn8fDg==}
     engines: {node: ^20.19.0 || >=22.12.0}
@@ -1489,6 +1499,10 @@ packages:
     resolution: {integrity: sha512-C2Xj8FZ0uHWeCXXqX5B4/gVFQmtSkiuOolzAgutjTfseNOHT3pUjljDZsTSxXFGgio54bCzVFqmEOUrIVk8RDA==}
     engines: {node: '>=20.0.0'}
 
+  '@pkgjs/parseargs@0.11.0':
+    resolution: {integrity: sha512-+1VkjdD0QBLPodGrJUeqarH8VAIvQODIbwh9XpP5Syisf7YoQgsJKPNFoqqLQlu+VQ/tVSshMR6loPMn8U+dPg==}
+    engines: {node: '>=14'}
+
   '@polka/url@1.0.0-next.29':
     resolution: {integrity: sha512-wwQAWhWSuHaag8c4q/KN/vCoeOJYshAIvMQwD4GpSb3OiZklFfvAgmj0VCBBImRpuF/aFgIRzllXlVX93Jevww==}
 
@@ -2064,6 +2078,16 @@ packages:
   '@vue/shared@3.5.34':
     resolution: {integrity: sha512-24uqU4OIiX29ryC3MeWid/Xf2fa2EFRUVLb77nRhk+UrTVrh/XiGtFAFmJBAtBRbjwNdsPRP+jj/OL27Eg1NDA==}
 
+  '@vue/test-utils@2.4.11':
+    resolution: {integrity: sha512-GDqaqZsA6m2E5vNzej0aYiIb6BX8xV9pNSbbbXKOfEYwg7ZNblVX8suyqmUBThq8VIrgAJNxn+z72hVtUeiWHA==}
+    peerDependencies:
+      '@vue/compiler-dom': 3.x
+      '@vue/server-renderer': 3.x
+      vue: 3.x
+    peerDependenciesMeta:
+      '@vue/server-renderer':
+        optional: true
+
   '@vueuse/core@14.3.0':
     resolution: {integrity: sha512-aHfz47g0ZhMtTVHmIzMVpJy8ePhhOy68GY5bv110+5DVtZ+W7BsOx+m61UNQqfrWyPztIHIanWa3E2tib3NFIw==}
     peerDependencies:
@@ -2119,6 +2143,10 @@ packages:
     peerDependencies:
       vue: ^3.5.0
 
+  abbrev@2.0.0:
+    resolution: {integrity: sha512-6/mh1E2u2YgEsCHdY0Yx5oW+61gZU+1vXaoiHHrpKeuRNNgFvS+/jrwHiQhB5apAf5oB7UB7E19ol2R2LKH8hQ==}
+    engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
+
   acorn-jsx@5.3.2:
     resolution: {integrity: sha512-rq9s+JNhf0IChjtDXxllJ7g41oZk5SlXtp0LHwyA5cejwn7vKmKp4pPri6YEePv2PU65sAsegbXtIinmDFDXgQ==}
     peerDependencies:
@@ -2142,10 +2170,18 @@ packages:
     resolution: {integrity: sha512-quJQXlTSUGL2LH9SUXo8VwsY4soanhgo6LNSm84E1LBcE8s3O0wpdiRzyR9z/ZZJMlMWv37qOOb9pdJlMUEKFQ==}
     engines: {node: '>=8'}
 
+  ansi-regex@6.2.2:
+    resolution: {integrity: sha512-Bq3SmSpyFHaWjPk8If9yc6svM8c56dB5BAtW4Qbw5jHTwwXXcTLoRMkpDJp6VL0XzlWaCHTXrkFURMYmD0sLqg==}
+    engines: {node: '>=12'}
+
   ansi-styles@4.3.0:
     resolution: {integrity: sha512-zbB9rCJAT1rbjiVDb2hqKFHNYLxgtk8NURxZ3IZwD3F6NtxbXZQCnnSi1Lkx+IDohdPlFp222wVALIheZJQSEg==}
     engines: {node: '>=8'}
 
+  ansi-styles@6.2.3:
+    resolution: {integrity: sha512-4Dj6M28JB+oAH8kFkTLUo+a2jwOFkuqb3yucU0CANcRRUbxS0cP0nZYCGjcc3BNXwRIsUVmDGgzawme7zvJHvg==}
+    engines: {node: '>=12'}
+
   argparse@2.0.1:
     resolution: {integrity: sha512-8+9WqebbFzpX9OR+Wa6O29asIogeRMzcGtAINdpMHHyAg10f05aSFVBbcEqGf/PXw1EjAZ+q2/bEBg3DvurK3Q==}
 
@@ -2295,6 +2331,10 @@ packages:
   colorette@2.0.20:
     resolution: {integrity: sha512-IfEDxwoWIjkeXL1eXcDiow4UbKjhLdq6/EuSVR9GMN7KVH3r9gQ83e73hsz1Nd1T3ijd5xv1wcWRYO+D6kCI2w==}
 
+  commander@10.0.1:
+    resolution: {integrity: sha512-y4Mg2tXshplEbSGzx7amzPwKKOCGuoSRP/CjEdwwk0FOGlUbq6lKuoyDZTNZkmxHdJtp54hdfY/JUrdL7Xfdug==}
+    engines: {node: '>=14'}
+
   concat-map@0.0.1:
     resolution: {integrity: sha512-/Srv4dswyQNBfohGpz9o6Yb3Gz3SrUDqBH5rTuhGR7ahtlbYKnVxw2bCFMRljaA7EXHaXZ8wsHdodFvbkhKmqg==}
 
@@ -2309,6 +2349,9 @@ packages:
   confbox@0.2.4:
     resolution: {integrity: sha512-ysOGlgTFbN2/Y6Cg3Iye8YKulHw+R2fNXHrgSmXISQdMnomY6eNDprVdW9R5xBguEqI954+S6709UyiO7B+6OQ==}
 
+  config-chain@1.1.13:
+    resolution: {integrity: sha512-qj+f8APARXHrM0hraqXYb2/bOVSV4PvJQlNZ/DVj0QrmNM2q2euizkeuVckQ57J+W0mRH6Hvi+k50M4Jul2VRQ==}
+
   consola@3.4.2:
     resolution: {integrity: sha512-5IKcdX0nnYavi6G7TtOhwkYzyjfJlatbjMjuLSfE2kYT5pMDOilZ4OvMhi637CcDICTmz3wARPoyhqyX1Y+XvA==}
     engines: {node: ^14.18.0 || >=16.10.0}
@@ -2414,9 +2457,20 @@ packages:
   duplexer@0.1.2:
     resolution: {integrity: sha512-jtD6YG370ZCIi/9GTaJKQxWTZD045+4R4hTk/x1UyoqadyJ9x9CgSi1RlVDQF8U2sxLLSnFkCaMihqljHIWgMg==}
 
+  eastasianwidth@0.2.0:
+    resolution: {integrity: sha512-I88TYZWc9XiYHRQ4/3c5rjjfgkjhLyW2luGIheGERbNQ6OY7yTybanSpDXZa8y7VUP9YmDcYa+eyq4ca7iLqWA==}
+
+  editorconfig@1.0.7:
+    resolution: {integrity: sha512-e0GOtq/aTQhVdNyDU9e02+wz9oDDM+SIOQxWME2QRjzRX5yyLAuHDE+0aE8vHb9XRC8XD37eO2u57+F09JqFhw==}
+    engines: {node: '>=14'}
+    hasBin: true
+
   emoji-regex@8.0.0:
     resolution: {integrity: sha512-MSjYzcWNOA0ewAHpz0MxpYFvwg6yjy1NG3xteoqz644VCo/RPgnr1/GGt+ic3iJTzQ8Eu3TdM14SawnVUmGE6A==}
 
+  emoji-regex@9.2.2:
+    resolution: {integrity: sha512-L18DaJsXSUk2+42pv8mLs5jJT2hqFkFE4j21wOmgbUqsZ2hL72NsUU785g9RXgo3s0ZNgVl42TiHp3ZtOv/Vyg==}
+
   entities@7.0.1:
     resolution: {integrity: sha512-TWrgLOFUQTH994YUyl1yT4uyavY5nNB5muff+RtWaqNVCAK408b5ZnnbNAUEWLTCpum9w6arT70i1XdQ4UeOPA==}
     engines: {node: '>=0.12'}
@@ -2635,6 +2689,10 @@ packages:
     resolution: {integrity: sha512-dKx12eRCVIzqCxFGplyFKJMPvLEWgmNtUrpTiJIR5u97zEhRG8ySrtboPHZXx7daLxQVrl643cTzbab2tkQjxg==}
     engines: {node: '>= 0.4'}
 
+  foreground-child@3.3.1:
+    resolution: {integrity: sha512-gIXjKqtFuWEgzFRJA9WCQeSJLZDjgJUOMCMzxtvFq/37KojM1BFGufqsCy0r4qSQmYLsZYMeyRqzIWOMup03sw==}
+    engines: {node: '>=14'}
+
   fsevents@2.3.3:
     resolution: {integrity: sha512-5xoDfX+fL7faATnagmWPpbFtwh/R77WmMMqqHGS65C3vvB0YHrgF+B1YmZ3441tMj5n63k0212XNoJwzlhffQw==}
     engines: {node: ^8.16.0 || ^10.6.0 || >=11.0.0}
@@ -2677,6 +2735,11 @@ packages:
     resolution: {integrity: sha512-XxwI8EOhVQgWp6iDL+3b0r86f4d6AX6zSU55HfB4ydCEuXLXc5FcYeOu+nnGftS4TEju/11rt4KJPTMgbfmv4A==}
     engines: {node: '>=10.13.0'}
 
+  glob@10.5.0:
+    resolution: {integrity: sha512-DfXN8DfhJ7NH3Oe7cFmu3NCu1wKbkReJ8TorzSAFbSKrlNaQSKfIzqYqVY8zlbs2NLBbWpRiU52GX2PbaBVNkg==}
+    deprecated: Old versions of glob are not supported, and contain widely publicized security vulnerabilities, which have been fixed in the current version. Please update. Support for old versions may be purchased (at exorbitant rates) by contacting i@izs.me
+    hasBin: true
+
   globals@14.0.0:
     resolution: {integrity: sha512-oahGvuMGQlPw/ivIYBjVSrWAfWLBeku5tpPE2fOPLi+WHffIWbuh2tCjhyQhTBPMf5E9jDEH4FOmTYgYwbKwtQ==}
     engines: {node: '>=18'}
@@ -2767,6 +2830,9 @@ packages:
     resolution: {integrity: sha512-JmXMZ6wuvDmLiHEml9ykzqO6lwFbof0GG4IkcGaENdCRDDmMVnny7s5HsIgHCbaq0w2MyPhDqkhTUgS2LU2PHA==}
     engines: {node: '>=0.8.19'}
 
+  ini@1.3.8:
+    resolution: {integrity: sha512-JV/yugV2uzW5iMRSiZAyDtQd+nxtUnjeLt0acNdw98kKLrvuRVyB80tsREOE7yvGVgalhZ6RNXCmEHkUKBKxew==}
+
   internal-slot@1.1.0:
     resolution: {integrity: sha512-4gd7VpWNQNB4UKKCFFVcp1AVv+FMOgs9NKzjHKusc8jTMhd5eL1NqQqOpE0KzMds804/yHlglp3uxgluOqAPLw==}
     engines: {node: '>= 0.4'}
@@ -2887,10 +2953,21 @@ packages:
   isexe@2.0.0:
     resolution: {integrity: sha512-RHxMLp9lnKHGHRng9QFhRCMbYAcVpn69smSGcq3f36xjgVVWThj4qqLbTLlq7Ssj8B+fIQ1EuCEGI2lKsyQeIw==}
 
+  jackspeak@3.4.3:
+    resolution: {integrity: sha512-OGlZQpz2yfahA/Rd1Y8Cd9SIEsqvXkLVoSw/cgwhnhFMDbsQFeZYoJJ7bIZBS9BcamUW96asq/npPWugM+RQBw==}
+
   jiti@2.6.1:
     resolution: {integrity: sha512-ekilCSN1jwRvIbgeg/57YFh8qQDNbwDb9xT/qu2DAHbFFZUicIl4ygVaAvzveMhMVr3LnpSKTNnwt8PoOfmKhQ==}
     hasBin: true
 
+  js-beautify@1.15.4:
+    resolution: {integrity: sha512-9/KXeZUKKJwqCXUdBxFJ3vPh467OCckSBmYDwSK/EtV090K+iMJ7zx2S3HLVDIWFQdqMIsZWbnaGiba18aWhaA==}
+    engines: {node: '>=14'}
+    hasBin: true
+
+  js-cookie@3.0.8:
+    resolution: {integrity: sha512-yeJd4aNAdYZQjaon2bpD/Gb0B/omw7HQOsynXXcOiWVCacbBcPlgn8S/d1X6blFSaHao7ozqtW7NZW19xpCtIw==}
+
   js-yaml@4.1.1:
     resolution: {integrity: sha512-qQKT4zQxXl8lLwBtHMWwaTcGfFOZviOJet3Oy/xmGk2gZH677CJM9EvtfdSkgWcATZhj/55JZ0rmy3myCT5lsA==}
     hasBin: true
@@ -2947,6 +3024,9 @@ packages:
   lodash.merge@4.6.2:
     resolution: {integrity: sha512-0KpjqXRVvrYyCsX1swR/XTK0va6VQkQM6MNo7PqW77ByjAhoARA8EfrP1N4+KlKj8YS0ZUCtRT/YUuhyYDujIQ==}
 
+  lru-cache@10.4.3:
+    resolution: {integrity: sha512-JNAzZcXrCt42VGLuYz0zfAzDfAvJWW6AfYlDBQyDV5DClI2m5sAmK+OIO7s59XfsRsWHp02jAJrRadPRGTt6SQ==}
+
   magic-regexp@0.10.0:
     resolution: {integrity: sha512-Uly1Bu4lO1hwHUW0CQeSWuRtzCMNO00CmXtS8N6fyvB3B979GOEEeAkiTUDsmbYLAbvpUS/Kt5c4ibosAzVyVg==}
 
@@ -2982,6 +3062,10 @@ packages:
   minimist@1.2.8:
     resolution: {integrity: sha512-2yyAR8qBkN3YuheJanUpWC5U3bb5osDywNB8RzDVlDwDHbocAJveqqj1u8+SVD7jkWT4yvsHCpWqqWqAxb0zCA==}
 
+  minipass@7.1.3:
+    resolution: {integrity: sha512-tEBHqDnIoM/1rXME1zgka9g6Q2lcoCkxHLuc7ODJ5BxbP5d4c2Z5cGgtXAku59200Cx7diuHTOYfSBD8n6mm8A==}
+    engines: {node: '>=16 || 14 >=14.17'}
+
   mitt@3.0.1:
     resolution: {integrity: sha512-vKivATfr97l2/QBCYAkXYDbrIWPM2IIKEl7YPhjCvKlG3kE2gm+uBo6nEXK3M5/Ffh/FLpKExzOQ3JJoJGFKBw==}
 
@@ -3018,6 +3102,11 @@ packages:
   node-fetch-native@1.6.7:
     resolution: {integrity: sha512-g9yhqoedzIUm0nTnTqAQvueMPVOuIY16bqgAJJC8XOOubYFNwz6IER9qs0Gq2Xd0+CecCKFjtdDTMA4u4xG06Q==}
 
+  nopt@7.2.1:
+    resolution: {integrity: sha512-taM24ViiimT/XntxbPyJQzCG+p4EKOpgD3mxFwW38mGjVUrfERQOeY4EDHjdnptttfHuHQXFx+lTP08Q+mLa/w==}
+    engines: {node: ^14.17.0 || ^16.13.0 || >=18.0.0}
+    hasBin: true
+
   nprogress@0.2.0:
     resolution: {integrity: sha512-I19aIingLgR1fmhftnbWWO3dXc0hSxqHQHQb3H8m+K3TnEn/iSeTZZOyvKXWqQESMwuUVnatlCnZdLBZZt2VSA==}
 
@@ -3089,6 +3178,9 @@ packages:
     resolution: {integrity: sha512-LaNjtRWUBY++zB5nE/NwcaoMylSPk+S+ZHNB1TzdbMJMny6dynpAGt7X/tl/QYq3TIeE6nxHppbo2LGymrG5Pw==}
     engines: {node: '>=10'}
 
+  package-json-from-dist@1.0.1:
+    resolution: {integrity: sha512-UEZIS3/by4OC8vL3P2dTXRETpebLI2NiI5vIrjaD/5UtrkFX/tNbwjTSRAGC/+7CAo2pIcBaRgWmcBBHcsaCIw==}
+
   package-manager-detector@1.6.0:
     resolution: {integrity: sha512-61A5ThoTiDG/C8s8UMZwSorAGwMJ0ERVGj2OjoW5pAalsNOg15+iQiPzrLJ4jhZ1HJzmC2PIHT2oEiH3R5fzNA==}
 
@@ -3110,6 +3202,10 @@ packages:
   path-parse@1.0.7:
     resolution: {integrity: sha512-LDJzPVEEEPR+y48z93A0Ed0yXb8pAByGWo/k5YYdYgpY2/2EsOsksJrq7lOHxryrVOn1ejG6oAp8ahvOIQD8sw==}
 
+  path-scurry@1.11.1:
+    resolution: {integrity: sha512-Xa4Nw17FS9ApQFJ9umLiJS4orGjm7ZzwUrwamcGQuHSzDyth9boKDaycYdDcZDuqYATXw4HFXgaqWTctW/v1HA==}
+    engines: {node: '>=16 || 14 >=14.18'}
+
   path-to-regexp@6.3.0:
     resolution: {integrity: sha512-Yhpw4T9C6hPpgPeA28us07OJeqZ5EzQTkbfwuhsUg0c237RomFoETJgmp2sa3F/41gfLE6G5cqcYwznmeEeOlQ==}
 
@@ -3164,6 +3260,9 @@ packages:
     resolution: {integrity: sha512-DEvV2ZF2r2/63V+tK8hQvrR2ZGn10srHbXviTlcv7Kpzw8jWiNTqbVgjO3IY8RxrrOUF8VPMQQFysYYYv0YZxw==}
     engines: {node: '>=6'}
 
+  proto-list@1.2.4:
+    resolution: {integrity: sha512-vtK/94akxsTMhe0/cbfpR+syPuszcuwhqVjJq26CuNDgFGj682oRBXOP5MJpv2r7JtE8MsiepGIqvvOTBwn2vA==}
+
   punycode@2.3.1:
     resolution: {integrity: sha512-vYt7UD1U9Wg6138shLtLOvdAu+8DsC/ilFtEVHcH+wydcSpNE20AfSOduf6MkRFahL5FY7X1oU7nKVZFtfq8Fg==}
     engines: {node: '>=6'}
@@ -3311,6 +3410,10 @@ packages:
   siginfo@2.0.0:
     resolution: {integrity: sha512-ybx0WO1/8bSBLEWXZvEd7gMW3Sn3JFlW3TvX1nREbDLRNQNaeNN8WK0meBwPdAaOI7TtRRRJn/Es1zhrrCHu7g==}
 
+  signal-exit@4.1.0:
+    resolution: {integrity: sha512-bzyZ1e88w9O1iNJbKnOlvYTrWPDl46O1bG0D3XInv+9tkPrxrN8jUUTiFlDkkmKWgn1M6CfIA13SuGqOa9Korw==}
+    engines: {node: '>=14'}
+
   simple-swizzle@0.2.4:
     resolution: {integrity: sha512-nAu1WFPQSMNr2Zn9PGSZK9AGn4t/y97lEm+MXTtUDwfP0ksAIX4nO+6ruD9Jwut4C49SB1Ws+fbXsm/yScWOHw==}
 
@@ -3350,6 +3453,10 @@ packages:
     resolution: {integrity: sha512-wKyQRQpjJ0sIp62ErSZdGsjMJWsap5oRNihHhu6G7JVO/9jIB6UyevL+tXuOqrng8j/cxKTWyWUwvSTriiZz/g==}
     engines: {node: '>=8'}
 
+  string-width@5.1.2:
+    resolution: {integrity: sha512-HnLOCR3vjcY8beoNLtcjZ5/nxn2afmME6lhrDrebokqMap+XbeW8n9TXpPDOqdGK5qcI3oT0GKTW6wC7EMiVqA==}
+    engines: {node: '>=12'}
+
   string.prototype.trim@1.2.10:
     resolution: {integrity: sha512-Rs66F0P/1kedk5lyYyH9uBzuiI/kNRmwJAR9quK6VOtIpZ2G+hMZd+HQbbv25MgCA6gEffoMZYxlTod4WcdrKA==}
     engines: {node: '>= 0.4'}
@@ -3366,6 +3473,10 @@ packages:
     resolution: {integrity: sha512-Y38VPSHcqkFrCpFnQ9vuSXmquuv5oXOKpGeT6aGrr3o3Gc9AlVa6JBfUSOCnbxGGZF+/0ooI7KrPuUSztUdU5A==}
     engines: {node: '>=8'}
 
+  strip-ansi@7.2.0:
+    resolution: {integrity: sha512-yDPMNjp4WyfYBkHnjIRLfca1i6KMyGCtsVgoKe/z1+6vukgaENdgGBZt+ZmKPc4gavvEZ5OgHfHdrazhgNyG7w==}
+    engines: {node: '>=12'}
+
   strip-bom@3.0.0:
     resolution: {integrity: sha512-vavAMRXOgBVNF6nyEEmL3DBK19iRpDcoIwW+swQ+CbGiu7lju6t+JklA1MHweoWtadgt4ISVUsXLyDq34ddcwA==}
     engines: {node: '>=4'}
@@ -3623,6 +3734,9 @@ packages:
   vscode-uri@3.1.0:
     resolution: {integrity: sha512-/BpdSx+yCQGnCvecbyXdxHDkuk55/G3xwnC0GqY4gmQ3j+A+g8kzzgB4Nk/SINjqn6+waqw3EgbVF2QKExkRxQ==}
 
+  vue-component-type-helpers@3.3.5:
+    resolution: {integrity: sha512-Fe1jyPJoUGpJOYKOri44jduR7My4yYINOMJISuMAbmrs+L5LbIDUc8NTWZYY3EJLK0yPLuCmcd5zoCsE4k2/KA==}
+
   vue-demi@0.14.10:
     resolution: {integrity: sha512-nMZBOwuzabUO0nLgIcc6rycZEebF6eeUfaiQx9+WSk8e29IbLvPU9feI6tqW4kTo3hvoYAJkMh8n8D0fuISphg==}
     engines: {node: '>=12'}
@@ -3715,6 +3829,10 @@ packages:
     resolution: {integrity: sha512-YVGIj2kamLSTxw6NsZjoBxfSwsn0ycdesmc4p+Q21c5zPuZ1pl+NfxVdxPtdHvmNVOQ6XSYG4AUtyt/Fi7D16Q==}
     engines: {node: '>=10'}
 
+  wrap-ansi@8.1.0:
+    resolution: {integrity: sha512-si7QWI6zUMq56bESFvagtmzMdGOtoxfR+Sez11Mobfc7tm+VkUckk9bW2UeffTGVUbOksxmSw0AA2gs8g71NCQ==}
+    engines: {node: '>=12'}
+
   ws@8.18.0:
     resolution: {integrity: sha512-8VbfWfHLbbwu3+N6OKsOMpBdT4kXPDDB9cJk2bJ6mh9ucxdlnNvH1e+roYkKmN9Nxw2yjz7VzeO9oOz2zJ04Pw==}
     engines: {node: '>=10.0.0'}
@@ -4295,6 +4413,15 @@ snapshots:
     dependencies:
       '@swc/helpers': 0.5.21
 
+  '@isaacs/cliui@8.0.2':
+    dependencies:
+      string-width: 5.1.2
+      string-width-cjs: string-width@4.2.3
+      strip-ansi: 7.2.0
+      strip-ansi-cjs: strip-ansi@6.0.1
+      wrap-ansi: 8.1.0
+      wrap-ansi-cjs: wrap-ansi@7.0.0
+
   '@jridgewell/gen-mapping@0.3.13':
     dependencies:
       '@jridgewell/sourcemap-codec': 1.5.5
@@ -4342,6 +4469,8 @@ snapshots:
 
   '@noble/hashes@2.2.0': {}
 
+  '@one-ini/wasm@0.1.1': {}
+
   '@oxc-parser/binding-android-arm-eabi@0.131.0':
     optional: true
 
@@ -4502,6 +4631,9 @@ snapshots:
       tslib: 2.8.1
       tsyringe: 4.10.0
 
+  '@pkgjs/parseargs@0.11.0':
+    optional: true
+
   '@polka/url@1.0.0-next.29': {}
 
   '@poppinss/colors@4.1.6':
@@ -5144,6 +5276,15 @@ snapshots:
 
   '@vue/shared@3.5.34': {}
 
+  '@vue/test-utils@2.4.11(@vue/compiler-dom@3.5.34)(@vue/server-renderer@3.5.34(vue@3.5.34(typescript@5.9.3)))(vue@3.5.34(typescript@5.9.3))':
+    dependencies:
+      '@vue/compiler-dom': 3.5.34
+      js-beautify: 1.15.4
+      vue: 3.5.34(typescript@5.9.3)
+      vue-component-type-helpers: 3.3.5
+    optionalDependencies:
+      '@vue/server-renderer': 3.5.34(vue@3.5.34(typescript@5.9.3))
+
   '@vueuse/core@14.3.0(vue@3.5.34(typescript@5.9.3))':
     dependencies:
       '@types/web-bluetooth': 0.0.21
@@ -5166,6 +5307,8 @@ snapshots:
     dependencies:
       vue: 3.5.34(typescript@5.9.3)
 
+  abbrev@2.0.0: {}
+
   acorn-jsx@5.3.2(acorn@8.16.0):
     dependencies:
       acorn: 8.16.0
@@ -5185,10 +5328,14 @@ snapshots:
 
   ansi-regex@5.0.1: {}
 
+  ansi-regex@6.2.2: {}
+
   ansi-styles@4.3.0:
     dependencies:
       color-convert: 2.0.1
 
+  ansi-styles@6.2.3: {}
+
   argparse@2.0.1: {}
 
   aria-hidden@1.2.6:
@@ -5361,6 +5508,8 @@ snapshots:
 
   colorette@2.0.20: {}
 
+  commander@10.0.1: {}
+
   concat-map@0.0.1: {}
 
   concurrently@9.2.1:
@@ -5376,6 +5525,11 @@ snapshots:
 
   confbox@0.2.4: {}
 
+  config-chain@1.1.13:
+    dependencies:
+      ini: 1.3.8
+      proto-list: 1.2.4
+
   consola@3.4.2: {}
 
   cookie@1.1.1: {}
@@ -5468,8 +5622,19 @@ snapshots:
 
   duplexer@0.1.2: {}
 
+  eastasianwidth@0.2.0: {}
+
+  editorconfig@1.0.7:
+    dependencies:
+      '@one-ini/wasm': 0.1.1
+      commander: 10.0.1
+      minimatch: 9.0.9
+      semver: 7.7.4
+
   emoji-regex@8.0.0: {}
 
+  emoji-regex@9.2.2: {}
+
   entities@7.0.1: {}
 
   error-stack-parser-es@1.0.5: {}
@@ -5812,6 +5977,11 @@ snapshots:
     dependencies:
       is-callable: 1.2.7
 
+  foreground-child@3.3.1:
+    dependencies:
+      cross-spawn: 7.0.6
+      signal-exit: 4.1.0
+
   fsevents@2.3.3:
     optional: true
 
@@ -5864,6 +6034,15 @@ snapshots:
     dependencies:
       is-glob: 4.0.3
 
+  glob@10.5.0:
+    dependencies:
+      foreground-child: 3.3.1
+      jackspeak: 3.4.3
+      minimatch: 9.0.9
+      minipass: 7.1.3
+      package-json-from-dist: 1.0.1
+      path-scurry: 1.11.1
+
   globals@14.0.0: {}
 
   globalthis@1.0.4:
@@ -5937,6 +6116,8 @@ snapshots:
 
   imurmurhash@0.1.4: {}
 
+  ini@1.3.8: {}
+
   internal-slot@1.1.0:
     dependencies:
       es-errors: 1.3.0
@@ -6063,8 +6244,24 @@ snapshots:
 
   isexe@2.0.0: {}
 
+  jackspeak@3.4.3:
+    dependencies:
+      '@isaacs/cliui': 8.0.2
+    optionalDependencies:
+      '@pkgjs/parseargs': 0.11.0
+
   jiti@2.6.1: {}
 
+  js-beautify@1.15.4:
+    dependencies:
+      config-chain: 1.1.13
+      editorconfig: 1.0.7
+      glob: 10.5.0
+      js-cookie: 3.0.8
+      nopt: 7.2.1
+
+  js-cookie@3.0.8: {}
+
   js-yaml@4.1.1:
     dependencies:
       argparse: 2.0.1
@@ -6110,6 +6307,8 @@ snapshots:
 
   lodash.merge@4.6.2: {}
 
+  lru-cache@10.4.3: {}
+
   magic-regexp@0.10.0:
     dependencies:
       estree-walker: 3.0.3
@@ -6158,6 +6357,8 @@ snapshots:
 
   minimist@1.2.8: {}
 
+  minipass@7.1.3: {}
+
   mitt@3.0.1: {}
 
   mlly@1.8.2:
@@ -6188,6 +6389,10 @@ snapshots:
 
   node-fetch-native@1.6.7: {}
 
+  nopt@7.2.1:
+    dependencies:
+      abbrev: 2.0.0
+
   nprogress@0.2.0: {}
 
   nth-check@2.1.1:
@@ -6299,6 +6504,8 @@ snapshots:
     dependencies:
       p-limit: 3.1.0
 
+  package-json-from-dist@1.0.1: {}
+
   package-manager-detector@1.6.0: {}
 
   parent-module@1.0.1:
@@ -6313,6 +6520,11 @@ snapshots:
 
   path-parse@1.0.7: {}
 
+  path-scurry@1.11.1:
+    dependencies:
+      lru-cache: 10.4.3
+      minipass: 7.1.3
+
   path-to-regexp@6.3.0: {}
 
   pathe@2.0.3: {}
@@ -6361,6 +6573,8 @@ snapshots:
 
   prismjs@1.30.0: {}
 
+  proto-list@1.2.4: {}
+
   punycode@2.3.1: {}
 
   pvtsutils@1.3.6:
@@ -6611,6 +6825,8 @@ snapshots:
 
   siginfo@2.0.0: {}
 
+  signal-exit@4.1.0: {}
+
   simple-swizzle@0.2.4:
     dependencies:
       is-arrayish: 0.3.4
@@ -6646,6 +6862,12 @@ snapshots:
       is-fullwidth-code-point: 3.0.0
       strip-ansi: 6.0.1
 
+  string-width@5.1.2:
+    dependencies:
+      eastasianwidth: 0.2.0
+      emoji-regex: 9.2.2
+      strip-ansi: 7.2.0
+
   string.prototype.trim@1.2.10:
     dependencies:
       call-bind: 1.0.9
@@ -6673,6 +6895,10 @@ snapshots:
     dependencies:
       ansi-regex: 5.0.1
 
+  strip-ansi@7.2.0:
+    dependencies:
+      ansi-regex: 6.2.2
+
   strip-bom@3.0.0: {}
 
   strip-json-comments@3.1.1: {}
@@ -6959,6 +7185,8 @@ snapshots:
 
   vscode-uri@3.1.0: {}
 
+  vue-component-type-helpers@3.3.5: {}
+
   vue-demi@0.14.10(vue@3.5.34(typescript@5.9.3)):
     dependencies:
       vue: 3.5.34(typescript@5.9.3)
@@ -7083,6 +7311,12 @@ snapshots:
       string-width: 4.2.3
       strip-ansi: 6.0.1
 
+  wrap-ansi@8.1.0:
+    dependencies:
+      ansi-styles: 6.2.3
+      string-width: 5.1.2
+      strip-ansi: 7.2.0
+
   ws@8.18.0: {}
 
   ws@8.21.0: {}

From c4355345fbb7245dbf3514aafb243c65e175d761 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 04:38:59 +0800
Subject: [PATCH 026/170] chore(aliases): drop dead helpers and stale comments
 in control plane
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

routes.ts had a re-export of ModelAliasRules that no file imported; the
frontend pulls the type from packages/gateway/src/control-plane/model-aliases/types.ts
directly. The PATCH/DELETE param fallbacks (?? '') were dead — Hono only
dispatches the :alias routes when the segment is present, matching how
api-keys/users routes use param('id')!. repo.ts trimmed verbose
explanatory prose down to the one load-bearing fact. repo/types.ts
'save used by import/restore flows' was stale — only PATCH calls it.
---
 .../gateway/src/control-plane/model-aliases/repo.ts    |  7 ++-----
 .../gateway/src/control-plane/model-aliases/routes.ts  | 10 +++-------
 packages/gateway/src/repo/types.ts                     |  4 ++--
 3 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index 1eaae7d16..69d4d4fd1 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -33,11 +33,8 @@ export const getAliasByName = async (db: SqlDatabase, alias: string): Promise<Mo
   return row ? toModelAlias(row) : null;
 };
 
-// Plain INSERT — surfaces a PK collision through the `duplicate` return so
-// the route layer can map it to 409 without parsing driver-specific error
-// strings. SQLite/D1 both raise a constraint failure on conflict; we detect
-// it with a single SELECT round-trip rather than catching the throw because
-// the driver error shape varies between node:sqlite and D1.
+// Detects PK collision with a SELECT round-trip rather than catching the
+// INSERT throw — driver error shape differs between node:sqlite and D1.
 export const insertAlias = async (db: SqlDatabase, alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> => {
   const existing = await db
     .prepare('SELECT 1 FROM model_aliases WHERE alias = ?')
diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
index 961a0da73..b05608ec3 100644
--- a/packages/gateway/src/control-plane/model-aliases/routes.ts
+++ b/packages/gateway/src/control-plane/model-aliases/routes.ts
@@ -1,7 +1,7 @@
 import type { Context } from 'hono';
 
 import { aliasToJson } from './serialize.ts';
-import type { ModelAlias, ModelAliasRules } from './types.ts';
+import type { ModelAlias } from './types.ts';
 import { type CtxWithJson } from '../../middleware/zod-validator.ts';
 import { getRepo } from '../../repo/index.ts';
 import type { createAliasBody, updateAliasBody } from '../schemas.ts';
@@ -36,7 +36,7 @@ export const createAlias = async (c: CtxWithJson<typeof createAliasBody>) => {
 };
 
 export const updateAlias = async (c: CtxWithJson<typeof updateAliasBody>) => {
-  const aliasName = c.req.param('alias') ?? '';
+  const aliasName = c.req.param('alias')!;
   const body = c.req.valid('json');
 
   const repo = getRepo();
@@ -69,12 +69,8 @@ const nextDisplayName = (existing: ModelAlias, patch: string | null | undefined)
 };
 
 export const deleteAlias = async (c: Context) => {
-  const aliasName = c.req.param('alias') ?? '';
+  const aliasName = c.req.param('alias')!;
   const { deleted } = await getRepo().modelAliases.delete(aliasName);
   if (!deleted) return c.json({ error: 'Alias not found' }, 404);
   return c.body(null, 204);
 };
-
-// Re-export so the routes module can wire the type-level `Rules` carrier
-// through the RPC client without consumers having to chase the alias subtree.
-export type { ModelAliasRules };
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 8ed620849..41190352a 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -346,8 +346,8 @@ export interface ModelAliasesRepo {
   // surfaces 409 to the dashboard instead of silently overwriting an
   // existing row.
   create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }>;
-  // UPSERT semantics — used by import/restore flows that need to land a row
-  // regardless of whether it already exists.
+  // UPSERT — used by the PATCH update path; preserves created_at on re-save
+  // and bumps updated_at.
   save(alias: ModelAlias): Promise<void>;
   // Returns whether a row was actually removed; routes treat false as 404.
   delete(alias: string): Promise<{ deleted: boolean }>;

From 26f08e1b5438355f2459bf9c1b20b5b3f915d4b3 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 04:38:59 +0800
Subject: [PATCH 027/170] fix(aliases): make 0047 display_name seed idempotent

Guard the UPDATE with `AND display_name IS NULL` so a re-run against an
environment where the operator already renamed the seed doesn't wipe
their value. Migrations are tracked one-shot but defense in depth keeps
the local-dev replay path safe.
---
 .../gateway/migrations/0047_model_aliases_display_name.sql     | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/packages/gateway/migrations/0047_model_aliases_display_name.sql b/packages/gateway/migrations/0047_model_aliases_display_name.sql
index 9d21ed9a1..5fd9b2591 100644
--- a/packages/gateway/migrations/0047_model_aliases_display_name.sql
+++ b/packages/gateway/migrations/0047_model_aliases_display_name.sql
@@ -1,3 +1,4 @@
 ALTER TABLE model_aliases ADD COLUMN display_name TEXT;
 
-UPDATE model_aliases SET display_name = 'Codex Auto Review' WHERE alias = 'codex-auto-review';
+UPDATE model_aliases SET display_name = 'Codex Auto Review'
+  WHERE alias = 'codex-auto-review' AND display_name IS NULL;

From e6a6862c722e5d8a262a6d487fc6920bb24969ec Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 04:38:59 +0800
Subject: [PATCH 028/170] fix(aliases): drop dead anthropicBeta body write on
 Gemini apply path
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

applyAliasRulesToGemini was writing payload.anthropicBeta but nothing
read it — gemini-via-messages doesn't reference the field, and the
Messages attempt reads candidate.aliasRules.anthropicBeta directly for
the outbound anthropic-beta header. The sanitizer would strip the body
field on its way to upstream regardless. Removed the write and the
matching test; the header path is unchanged.

Also corrected the Messages apply doc that claimed "the write-side
validator forbids" adaptive + budgetTokens — the schema accepts both
today; the dashboard's tagged radio is what enforces exclusivity, and
the apply step picks adaptive when both arrive raw.
---
 .../gateway/src/data-plane/model-aliases/apply.ts | 15 +++++++--------
 .../src/data-plane/model-aliases/apply_test.ts    |  6 ------
 2 files changed, 7 insertions(+), 14 deletions(-)

diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index e90e2fbed..0791c5f7b 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -43,10 +43,9 @@ export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: Model
   if (rules.reasoning?.effort !== undefined) {
     payload.output_config = { ...payload.output_config, effort: rules.reasoning.effort };
   }
-  // Adaptive wins over budgetTokens when both arrive — the write-side
-  // validator forbids the combination, but the apply step has to make a
-  // choice if both slip through and the translate-layer policy is
-  // adaptive-first.
+  // The dashboard's tagged radio enforces mutual exclusivity between
+  // adaptive and budgetTokens; if both arrive through the raw API the apply
+  // step picks adaptive (matches the translate-layer adaptive-first policy).
   if (rules.reasoning?.adaptive === true) {
     payload.thinking = { type: 'adaptive' };
   } else if (rules.reasoning?.budgetTokens !== undefined) {
@@ -74,9 +73,10 @@ export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: Model
 
 export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAliasRules): void => {
   // All four reasoning knobs ride on the native thinkingConfig; verbosity and
-  // serviceTier ride on extension slots under generationConfig; anthropicBeta
-  // rides on a top-level extension slot so the existing gemini-via-messages
-  // translator picks it up there.
+  // serviceTier ride on extension slots under generationConfig. anthropicBeta
+  // doesn't surface on Gemini-inbound bodies — the gemini-via-messages
+  // translator doesn't read it, and the Messages attempt reads it off the
+  // candidate's aliasRules directly when stamping the outbound header.
   const hasThinking = rules.reasoning?.effort !== undefined
     || rules.reasoning?.budgetTokens !== undefined
     || rules.reasoning?.adaptive === true
@@ -101,5 +101,4 @@ export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAlia
     if (rules.serviceTier !== undefined) generationConfig.serviceTier = rules.serviceTier;
     payload.generationConfig = generationConfig;
   }
-  if (rules.anthropicBeta?.length) payload.anthropicBeta = [...rules.anthropicBeta];
 };
diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
index 3dfba9d45..c62bac7f7 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
@@ -195,12 +195,6 @@ describe('applyAliasRulesToGemini', () => {
     expect(payload.generationConfig?.serviceTier).toBe('flex');
   });
 
-  test('writes anthropicBeta to top-level extension slot', () => {
-    const payload = gem();
-    applyAliasRulesToGemini(payload, { anthropicBeta: ['ctx-1m'] });
-    expect(payload.anthropicBeta).toEqual(['ctx-1m']);
-  });
-
   test('preserves existing thinkingConfig entries when adding a new one', () => {
     const payload = gem({ generationConfig: { thinkingConfig: { thinkingBudget: 1024 } } });
     applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } });

From 784e19e3e07d4111a13893ee23e7cf00a8faeb6b Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 04:38:59 +0800
Subject: [PATCH 029/170] perf(web): drop duplicate /api/models reload on alias
 save
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

AliasEditDialog awaited modelsStore.load() after emitting saved, but
settings.vue's reloadAll handler (wired to @saved) also loads models —
so the dashboard fired the same GET /api/models twice per alias write.
The emit alone is the contract; drop the duplicate.
---
 apps/web/src/components/alias-edit/AliasEditDialog.vue | 5 -----
 1 file changed, 5 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index e247137b0..ce254521c 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -203,11 +203,6 @@ const save = async () => {
       if (error) { saveError.value = error.message; return; }
     }
     emit('saved');
-    // Also refresh the dashboard's /api/models cache so the new alias appears
-    // in the catalog. The settings.vue reloadAll handler does this too, but a
-    // direct call here keeps the modal-close semantics independent of the
-    // parent's reload wiring.
-    await modelsStore.load();
     open.value = false;
   } finally {
     saving.value = false;

From 1a54f3963ff3973c00282fcf67e060abe877bfae Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 12:23:14 +0800
Subject: [PATCH 030/170] feat(aliases): allow PATCH to rename alias by
 changing the PK

PR feedback: the alias name was treated as immutable because it is the
table PK. SQLite (and D1) do permit UPDATEing a PRIMARY KEY column, so
operators can now rename in place from a single PATCH instead of the
delete-and-recreate workaround.

Wire change: updateAliasBody gains an optional `alias` field. When the
body name differs from the path param, the handler runs a rename
codepath that 409s on collision and 404s on a missing source row. The
existing merged-save then proceeds against the new PK.

Repo change: ModelAliasesRepo grows a `rename(old, new)` method. The
SQL impl uses a pre-flight SELECT for the collision check (driver
error shape differs between node:sqlite and D1) plus
`UPDATE model_aliases SET alias = ? WHERE alias = ?`; `meta.changes`
distinguishes notFound from success. The Memory impl mirrors the
semantics for the test path. body.alias === path.alias collapses to a
no-op so the dashboard can always send the merged shape without
branching on whether a rename was requested.
---
 .../src/control-plane/model-aliases/repo.ts   | 20 +++++
 .../control-plane/model-aliases/repo_test.ts  | 52 ++++++++++-
 .../src/control-plane/model-aliases/routes.ts | 13 ++-
 .../model-aliases/routes_test.ts              | 87 +++++++++++++++++++
 packages/gateway/src/control-plane/schemas.ts |  9 +-
 packages/gateway/src/repo/memory.ts           | 10 +++
 packages/gateway/src/repo/sql.ts              |  6 +-
 packages/gateway/src/repo/types.ts            |  5 ++
 8 files changed, 196 insertions(+), 6 deletions(-)

diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
index 69d4d4fd1..f49c1e639 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo.ts
@@ -73,6 +73,26 @@ export const deleteAlias = async (db: SqlDatabase, alias: string): Promise<{ del
   return { deleted: (result.meta.changes ?? 0) > 0 };
 };
 
+// Updates the PK column in place. A pre-flight SELECT detects the destination
+// collision so the caller gets a structured `duplicate` reason instead of a
+// driver-specific SQLITE_CONSTRAINT thrown error (shape differs between
+// node:sqlite and D1). `meta.changes === 0` after the UPDATE means the source
+// row was gone — propagated as `notFound` for the 404 mapping.
+export const renameAlias = async (db: SqlDatabase, oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> => {
+  if (oldAlias === newAlias) return { ok: true };
+  const conflict = await db
+    .prepare('SELECT 1 FROM model_aliases WHERE alias = ?')
+    .bind(newAlias)
+    .first<{ 1: number }>();
+  if (conflict) return { ok: false, reason: 'duplicate' };
+  const result = await db
+    .prepare('UPDATE model_aliases SET alias = ?, updated_at = unixepoch() WHERE alias = ?')
+    .bind(newAlias, oldAlias)
+    .run();
+  if ((result.meta.changes ?? 0) === 0) return { ok: false, reason: 'notFound' };
+  return { ok: true };
+};
+
 const bindValues = (alias: ModelAlias): unknown[] => [
   alias.alias,
   alias.targetModelId,
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
index 5f1e4fa6d..32ba3aea0 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
@@ -1,6 +1,6 @@
 import { test } from 'vitest';
 
-import { loadAllAliases } from './repo.ts';
+import { loadAllAliases, renameAlias } from './repo.ts';
 import { createSqliteTestDb } from '../../repo/test-sqlite.ts';
 import { assertEquals, assertRejects } from '@floway-dev/test-utils';
 
@@ -120,3 +120,53 @@ test('loadAllAliases surfaces malformed upstream_ids_json as a descriptive error
 
   await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases upstream_ids_json for bad-upstreams');
 });
+
+test('renameAlias updates the PRIMARY KEY in place', async () => {
+  const db = await createSqliteTestDb();
+  await db.exec('DELETE FROM model_aliases');
+  await db
+    .prepare(
+      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
+    )
+    .bind('source', 'gpt-5.4', '[]', '{}', 1, 'real-only', 'Source Label', 1_700_000_000)
+    .run();
+
+  const result = await renameAlias(db, 'source', 'renamed');
+  assertEquals(result, { ok: true });
+
+  const remaining = await loadAllAliases(db);
+  assertEquals(remaining.map(a => a.alias), ['renamed']);
+  // Preserved row payload — only the PK changed; createdAt and displayName intact.
+  assertEquals(remaining[0]!.displayName, 'Source Label');
+  assertEquals(remaining[0]!.createdAt, 1_700_000_000);
+});
+
+test('renameAlias returns notFound when the source row is missing', async () => {
+  const db = await createSqliteTestDb();
+  await db.exec('DELETE FROM model_aliases');
+  const result = await renameAlias(db, 'ghost', 'new-name');
+  assertEquals(result, { ok: false, reason: 'notFound' });
+});
+
+test('renameAlias returns duplicate when the destination row already exists', async () => {
+  const db = await createSqliteTestDb();
+  await db.exec('DELETE FROM model_aliases');
+  await db
+    .prepare(
+      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
+    )
+    .bind('source', 'gpt-5.4', '[]', '{}', 1, 'real-only', 1_700_000_000)
+    .run();
+  await db
+    .prepare(
+      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
+    )
+    .bind('taken', 'gpt-5.4', '[]', '{}', 1, 'real-only', 1_700_000_001)
+    .run();
+
+  const result = await renameAlias(db, 'source', 'taken');
+  assertEquals(result, { ok: false, reason: 'duplicate' });
+  // Both rows still present.
+  const remaining = (await loadAllAliases(db)).map(a => a.alias).sort();
+  assertEquals(remaining, ['source', 'taken']);
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
index b05608ec3..611d8913e 100644
--- a/packages/gateway/src/control-plane/model-aliases/routes.ts
+++ b/packages/gateway/src/control-plane/model-aliases/routes.ts
@@ -43,12 +43,23 @@ export const updateAlias = async (c: CtxWithJson<typeof updateAliasBody>) => {
   const existing = await repo.modelAliases.getByAlias(aliasName);
   if (!existing) return c.json({ error: 'Alias not found' }, 404);
 
+  // Rename runs first so the merged save below targets the row at its new
+  // PK. A no-op (alias unchanged or omitted) returns ok without touching
+  // the row.
+  const nextAlias = body.alias ?? existing.alias;
+  if (nextAlias !== existing.alias) {
+    const renamed = await repo.modelAliases.rename(existing.alias, nextAlias);
+    if (!renamed.ok) {
+      return c.json({ error: { type: 'conflict', message: `Alias "${nextAlias}" already exists` } }, 409);
+    }
+  }
+
   // Field-by-field merge so an absent field preserves the existing value.
   // `displayName` accepts an explicit null to clear the operator-set label
   // back to the synthesized fallback; we use Object.hasOwn to keep the
   // absent / null distinction that `??` would collapse.
   const merged: ModelAlias = {
-    alias: existing.alias,
+    alias: nextAlias,
     targetModelId: body.targetModelId ?? existing.targetModelId,
     upstreamIds: body.upstreamIds ?? existing.upstreamIds,
     rules: body.rules ?? existing.rules,
diff --git a/packages/gateway/src/control-plane/model-aliases/routes_test.ts b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
index 4143ed1cf..bf1cd766c 100644
--- a/packages/gateway/src/control-plane/model-aliases/routes_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
@@ -204,6 +204,93 @@ test('PATCH /api/aliases/:alias returns 404 when the alias does not exist', asyn
   assertEquals(resp.status, 404);
 });
 
+test('PATCH /api/aliases/:alias renames the row when body.alias differs from the path', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.save({
+    alias: 'old-name',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: ['up_a'],
+    rules: { reasoning: { effort: 'high' } },
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    displayName: 'Old Label',
+    createdAt: 1_700_000_000,
+  });
+
+  const resp = await requestApp('/api/aliases/old-name', authedJson(adminSession, 'PATCH', {
+    alias: 'new-name',
+    rules: { reasoning: { effort: 'medium' } },
+  }));
+  assertEquals(resp.status, 200);
+  const updated = (await resp.json()) as SerializedModelAlias;
+  // Response carries the new alias and the patched rules; preserved fields stay intact.
+  assertEquals(updated.alias, 'new-name');
+  assertEquals(updated.target_model_id, 'gpt-5.4');
+  assertEquals(updated.upstream_ids, ['up_a']);
+  assertEquals(updated.rules, { reasoning: { effort: 'medium' } });
+  assertEquals(updated.display_name, 'Old Label');
+  assertEquals(updated.created_at, 1_700_000_000);
+
+  // Repo state: old row gone, new row present.
+  assertEquals(await repo.modelAliases.getByAlias('old-name'), null);
+  const stored = await repo.modelAliases.getByAlias('new-name');
+  assertEquals(stored?.alias, 'new-name');
+  assertEquals(stored?.rules, { reasoning: { effort: 'medium' } });
+});
+
+test('PATCH /api/aliases/:alias returns 409 when body.alias collides with an existing row', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.save({
+    alias: 'source',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: {},
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    createdAt: 1_700_000_000,
+  });
+  await repo.modelAliases.save({
+    alias: 'taken',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: {},
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    createdAt: 1_700_000_001,
+  });
+
+  const resp = await requestApp('/api/aliases/source', authedJson(adminSession, 'PATCH', { alias: 'taken' }));
+  assertEquals(resp.status, 409);
+  const body = (await resp.json()) as { error: { type: string; message: string } };
+  assertEquals(body.error.type, 'conflict');
+
+  // Both rows untouched.
+  assertEquals((await repo.modelAliases.getByAlias('source'))?.alias, 'source');
+  assertEquals((await repo.modelAliases.getByAlias('taken'))?.alias, 'taken');
+});
+
+test('PATCH /api/aliases/:alias treats body.alias === path as a no-op rename', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.save({
+    alias: 'same-name',
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: {},
+    visibleInModelsList: true,
+    onConflict: 'real-only',
+    createdAt: 1_700_000_000,
+  });
+
+  const resp = await requestApp('/api/aliases/same-name', authedJson(adminSession, 'PATCH', {
+    alias: 'same-name',
+    targetModelId: 'claude-opus-4-6',
+  }));
+  assertEquals(resp.status, 200);
+  const updated = (await resp.json()) as SerializedModelAlias;
+  assertEquals(updated.alias, 'same-name');
+  assertEquals(updated.target_model_id, 'claude-opus-4-6');
+});
+
 test('PATCH /api/aliases/:alias requires admin auth', async () => {
   const { repo, adminSession: _adminSession, apiKey } = await setupAppTest();
   await repo.modelAliases.save({
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index 60f5df604..f81c5df42 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -694,10 +694,13 @@ export const createAliasBody = z.object({
   displayName: z.string().min(1).optional(),
 });
 
-// PATCH accepts a partial shape. `displayName` is nullable so the operator
-// can clear an existing label back to the synthesized fallback; absent vs.
-// null is meaningful and propagated through to the handler via Object.hasOwn.
+// PATCH accepts a partial shape. `alias` is the row's primary key — when
+// present and different from the path param, the handler renames the row
+// (409 on collision). `displayName` is nullable so the operator can clear
+// an existing label back to the synthesized fallback; absent vs. null is
+// meaningful and propagated through to the handler via Object.hasOwn.
 export const updateAliasBody = z.object({
+  alias: aliasNameSchema.optional(),
   targetModelId: z.string().min(1).optional(),
   upstreamIds: upstreamIdsSchema.optional(),
   rules: aliasRulesSchema.optional(),
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index c424426ee..d7492938f 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -948,6 +948,16 @@ export class MemoryModelAliasesRepo implements ModelAliasesRepo {
     return Promise.resolve();
   }
 
+  rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> {
+    if (oldAlias === newAlias) return Promise.resolve({ ok: true });
+    if (this.rows.has(newAlias)) return Promise.resolve({ ok: false, reason: 'duplicate' });
+    const existing = this.rows.get(oldAlias);
+    if (!existing) return Promise.resolve({ ok: false, reason: 'notFound' });
+    this.rows.delete(oldAlias);
+    this.rows.set(newAlias, { ...existing, alias: newAlias });
+    return Promise.resolve({ ok: true });
+  }
+
   delete(alias: string): Promise<{ deleted: boolean }> {
     return Promise.resolve({ deleted: this.rows.delete(alias) });
   }
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index 044f79aae..75f814178 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -35,7 +35,7 @@ import type {
   UsersRepo,
 } from './types.ts';
 import { serializeStoredConfig, serializeStoredState } from './upstream-json.ts';
-import { deleteAlias, getAliasByName, insertAlias, loadAllAliases, saveAlias } from '../control-plane/model-aliases/repo.ts';
+import { deleteAlias, getAliasByName, insertAlias, loadAllAliases, renameAlias, saveAlias } from '../control-plane/model-aliases/repo.ts';
 import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
 import { latencyBucketForMs } from '../shared/performance-histogram.ts';
 import { generateSessionToken } from '../shared/session-tokens.ts';
@@ -1641,6 +1641,10 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
     return saveAlias(this.db, alias);
   }
 
+  rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> {
+    return renameAlias(this.db, oldAlias, newAlias);
+  }
+
   delete(alias: string): Promise<{ deleted: boolean }> {
     return deleteAlias(this.db, alias);
   }
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 41190352a..7d10f90ca 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -349,6 +349,11 @@ export interface ModelAliasesRepo {
   // UPSERT — used by the PATCH update path; preserves created_at on re-save
   // and bumps updated_at.
   save(alias: ModelAlias): Promise<void>;
+  // Updates the PK in place. Returns `notFound` when the source row is
+  // missing, `duplicate` when the destination name already exists; the
+  // route layer maps those to 404 / 409. SQLite (and D1) permit UPDATEing
+  // a PRIMARY KEY column.
+  rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }>;
   // Returns whether a row was actually removed; routes treat false as 404.
   delete(alias: string): Promise<{ deleted: boolean }>;
 }

From 7763ad9a2941b029de5c7754861fc74dad29635d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 12:23:24 +0800
Subject: [PATCH 031/170] fix(ui): dark theme combobox component with creatable
 values
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The alias edit dialog previously paired `<Input>` with HTML `<datalist>`
for its enum-ish fields (effort, summary, service tier, anthropic beta).
Browsers render the `<datalist>` popover with a white background and
dark text — jarring inside the dashboard's dark theme, and visually
inconsistent with the rest of the project's reka-ui-backed dropdowns.

New `<Combobox>` wraps reka-ui's `ComboboxRoot` with the same surface
palette as `Select.vue` and `TagCombobox.vue`. A synthesized
"Use '<typed>'" row keeps free-form values selectable via keyboard:
reka's Combobox only registers items present in the DOM, so a typed
value with no matching suggestion needs an explicit committable row to
flow through the arrow-keys + Enter path. The component also watches
the typed query and writes it to the model on every keystroke so a
submit that fires without a prior selection still carries the latest
text — alias rule fields pass through to upstream verbatim and the
gateway intentionally does not enum-gate them.
---
 packages/ui/src/Combobox.vue | 159 +++++++++++++++++++++++++++++++++++
 packages/ui/src/index.ts     |   1 +
 2 files changed, 160 insertions(+)
 create mode 100644 packages/ui/src/Combobox.vue

diff --git a/packages/ui/src/Combobox.vue b/packages/ui/src/Combobox.vue
new file mode 100644
index 000000000..6526888a4
--- /dev/null
+++ b/packages/ui/src/Combobox.vue
@@ -0,0 +1,159 @@
+<script setup lang="ts">
+// Single-select combobox with free-form input. Operator can type a value
+// the suggestion list does not contain and the typed string becomes the
+// model value verbatim — alias rule fields (effort, summary, service
+// tier, ...) pass through to the upstream and the gateway intentionally
+// does not enum-gate them, so unknown values must round-trip.
+//
+// Visual contract matches Select.vue / TagCombobox.vue (dark popover,
+// surface-700 trigger). HTML5 `<input list>` + `<datalist>` would have
+// been one line but the browser-rendered popover is white-on-dark-only
+// on every major browser, which is jarring inside the dashboard's dark
+// theme.
+import {
+  ComboboxAnchor, ComboboxContent, ComboboxEmpty, ComboboxGroup, ComboboxInput,
+  ComboboxItem, ComboboxItemIndicator, ComboboxPortal, ComboboxRoot, ComboboxTrigger,
+  ComboboxViewport, useFilter,
+} from 'reka-ui';
+import { computed, nextTick, ref, watch } from 'vue';
+
+interface Item {
+  value: string;
+  label?: string;
+}
+
+const value = defineModel<string>({ required: true });
+
+const props = withDefaults(defineProps<{
+  /** Suggestion list. Each item's `value` is what gets committed; `label` is the visible row text. */
+  items: readonly (string | Item)[];
+  placeholder?: string;
+  disabled?: boolean;
+  inputmode?: 'text' | 'numeric' | 'decimal';
+  /** Tailwind classes applied to the trigger input (e.g. `font-mono`). */
+  inputClass?: string;
+  /** Override the default "no matches" copy shown when the typed value already matches nothing. */
+  emptyText?: string;
+}>(), {
+  emptyText: 'No matches',
+});
+
+const { contains } = useFilter({ sensitivity: 'base' });
+
+// Normalize the items list to a single shape so the template only deals
+// with `{ value, label }`. Strings collapse to `{ value: s, label: s }`.
+const normalizedItems = computed<Item[]>(() => props.items.map(it =>
+  typeof it === 'string' ? { value: it, label: it } : { value: it.value, label: it.label ?? it.value }));
+
+// query mirrors value so the input always shows the committed string. Reka's
+// ComboboxInput owns the typed text via its own v-model; we keep them in
+// sync so an outside change to the model (form reset, prefill) updates the
+// visible text too.
+const query = ref(value.value);
+watch(value, v => { if (v !== query.value) query.value = v; });
+
+// Free-form commit: any keystroke or blur sets the model to the current
+// query so the dialog's submit handler always reads the latest typed text,
+// even when the operator never clicks a suggestion row. ComboboxRoot's
+// own onChange path still fires for clicked rows and just re-writes the
+// same value.
+watch(query, q => { value.value = q; });
+
+// Filter suggestions by the typed query against either value or label.
+// When the query exactly matches an item we still show it (the user can
+// re-pick the same row), but we hide a synthesized "create" row in that
+// case — it would be a no-op.
+const filteredItems = computed(() => normalizedItems.value.filter(item =>
+  query.value === '' || contains(item.label ?? item.value, query.value) || contains(item.value, query.value)));
+
+const trimmedQuery = computed(() => query.value.trim());
+const hasExactMatch = computed(() => normalizedItems.value.some(item => item.value === trimmedQuery.value));
+const showCreateOption = computed(() => trimmedQuery.value !== '' && !hasExactMatch.value);
+
+const open = ref(false);
+
+// Reka's Combobox only registers items present in the DOM. When the operator
+// types a brand-new value, surface a synthesized "Use 'foo'" row so the
+// arrow keys + Enter path still commits it. Without this row Enter on a
+// brand-new value falls back to default form behavior.
+const commitTyped = async () => {
+  value.value = trimmedQuery.value;
+  open.value = false;
+  await nextTick();
+};
+</script>
+
+<template>
+  <ComboboxRoot
+    v-model="value"
+    v-model:open="open"
+    :disabled="disabled"
+    :display-value="(v: string) => v"
+  >
+    <ComboboxAnchor as-child>
+      <div class="relative w-full">
+        <ComboboxInput
+          v-model="query"
+          :placeholder="placeholder"
+          :disabled="disabled"
+          :inputmode="inputmode"
+          :class="[
+            'h-9 w-full rounded-[10px] border border-white/[0.14] bg-surface-700 pl-3 pr-9 text-sm text-white',
+            'transition-colors hover:border-white/25',
+            'focus:outline-none focus:border-accent-cyan/50 focus:ring-1 focus:ring-accent-cyan/30',
+            'placeholder:text-gray-600',
+            'disabled:opacity-50 disabled:cursor-not-allowed',
+            inputClass,
+          ]"
+        />
+        <ComboboxTrigger
+          class="absolute inset-y-0 right-0 grid w-9 place-items-center text-gray-400 hover:text-gray-200"
+          tabindex="-1"
+        >
+          <i class="i-lucide-chevrons-up-down size-3.5" />
+        </ComboboxTrigger>
+      </div>
+    </ComboboxAnchor>
+
+    <ComboboxPortal>
+      <ComboboxContent
+        position="popper"
+        :side-offset="4"
+        class="z-50 max-h-72 w-[--reka-combobox-trigger-width] overflow-hidden rounded-[10px] border border-white/[0.06] bg-surface-800 text-white shadow-xl"
+      >
+        <ComboboxViewport class="p-1">
+          <ComboboxEmpty v-if="!showCreateOption" class="px-2 py-1.5 text-xs text-gray-500">
+            {{ emptyText }}
+          </ComboboxEmpty>
+          <ComboboxGroup>
+            <ComboboxItem
+              v-if="showCreateOption"
+              :value="trimmedQuery"
+              class="relative flex cursor-pointer select-none items-center rounded-sm py-1.5 pl-7 pr-2 text-sm text-accent-cyan outline-none data-[highlighted]:bg-accent-cyan/10"
+              @select="commitTyped"
+            >
+              <span class="absolute left-2 flex size-3.5 items-center justify-center">
+                <i class="i-lucide-plus size-3.5" />
+              </span>
+              <span class="truncate">Use "<span class="font-mono">{{ trimmedQuery }}</span>"</span>
+            </ComboboxItem>
+            <ComboboxItem
+              v-for="item in filteredItems"
+              :key="item.value"
+              :value="item.value"
+              class="relative flex cursor-pointer select-none items-center rounded-sm py-1.5 pl-7 pr-2 text-sm text-white outline-none data-[highlighted]:bg-accent-cyan/10 data-[highlighted]:text-accent-cyan"
+            >
+              <span class="absolute left-2 flex size-3.5 items-center justify-center">
+                <ComboboxItemIndicator>
+                  <i class="i-lucide-check size-3.5 text-accent-cyan" />
+                </ComboboxItemIndicator>
+              </span>
+              <span class="truncate">{{ item.label }}</span>
+              <span v-if="item.label !== item.value" class="ml-auto pl-3 font-mono text-xs text-gray-500">{{ item.value }}</span>
+            </ComboboxItem>
+          </ComboboxGroup>
+        </ComboboxViewport>
+      </ComboboxContent>
+    </ComboboxPortal>
+  </ComboboxRoot>
+</template>
diff --git a/packages/ui/src/index.ts b/packages/ui/src/index.ts
index afb922da4..4f9afa695 100644
--- a/packages/ui/src/index.ts
+++ b/packages/ui/src/index.ts
@@ -3,6 +3,7 @@ export { default as Button } from './Button.vue';
 export { default as Card } from './Card.vue';
 export { default as Checkbox } from './Checkbox.vue';
 export { default as Code } from './Code.vue';
+export { default as Combobox } from './Combobox.vue';
 export { default as Dialog } from './Dialog.vue';
 export { default as Input } from './Input.vue';
 export { default as NumberField } from './NumberField.vue';

From a5e729ada5438ca34e0f0b993e15fd8619d9e64b Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 12:23:42 +0800
Subject: [PATCH 032/170] refactor(web): flatten alias edit dialog and trim
 AliasRow
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

PR feedback on the alias editor:

- The alias name was a disabled field with a "delete and recreate to
  rename" hint. Operators wanted in-place rename. The input is now
  editable and the save handler PATCHes the row at its *original* PK
  with `body.alias = <new>`; the backend route maps that to a rename.
- Alias name and Target model id share a two-column first row (mirrors
  the CustomConfigPanel `grid grid-cols-1 gap-3 sm:grid-cols-2`
  pattern) so the header reads at a glance.
- On-conflict is no longer a label-only Select. Each option carries a
  one-line explanation rendered through Select's `description` slot —
  same pattern as Auth Style in CustomConfigPanel. Replaces the cryptic
  `real-only — alias hidden when target id collides` style copy with
  "Real model wins" / "Alias replaces real" plus the operational
  consequence underneath.
- The Reasoning section dropped its inner glass card and its
  None/Effort/Budget/Adaptive facet radio. Every input is now visible
  simultaneously; the wire schema already permits all four facets to
  coexist (apply layer has adaptive-first precedence). Forcing
  mutual-exclusivity at the UI level meant operators had to nuke an
  existing knob before setting another, which fought the actual
  workflow.
- Every enum-ish field switched from HTML `<datalist>` to the new
  `<Combobox>` so the popover finally matches the dark theme.

AliasRow loses its `on_conflict` badge: the row no longer prints
`real-only` / `alias-only` / `both-…` as a coloured chip — operator
feedback was that the inline label was noise and the same setting is
clearly visible inside the edit dialog. Upstream-id pills now render
in the row when the alias whitelists upstreams, replacing the
previously implicit "this alias is scoped" signal.
---
 .../components/alias-edit/AliasEditDialog.vue | 235 +++++++++---------
 .../alias-edit/AliasEditDialog_test.ts        |  42 +++-
 apps/web/src/components/settings/AliasRow.vue |  26 +-
 .../src/components/settings/AliasRow_test.ts  |  19 ++
 4 files changed, 188 insertions(+), 134 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index ce254521c..7b238d007 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -1,10 +1,10 @@
 <script setup lang="ts">
 // Operator editor for one alias. The form is intentionally Goal-2-friendly:
-// every "enum" field below is rendered as a plain text input with hints
+// every "enum" field below is rendered as a combobox with suggestions
 // pulled from the target model's chat metadata (when available) and from
-// well-known wire values. The dashboard never gates the value set so a new
-// upstream-side level (e.g. an "xhigh" effort that shipped this morning)
-// can flow through without a frontend release.
+// well-known wire values, but the operator can type any value verbatim so
+// a new upstream-side level (e.g. an "xhigh" effort that shipped this
+// morning) flows through without a frontend release.
 
 import { computed, ref } from 'vue';
 
@@ -12,7 +12,7 @@ import { callApi, useApi } from '../../api/client.ts';
 import type { ModelAlias, ModelAliasOnConflict } from '../../api/types.ts';
 import { useModelsStore } from '../../composables/useModels.ts';
 import { useUpstreamsStore } from '../../composables/useUpstreams.ts';
-import { Button, Checkbox, Dialog, Input, Select, TagCombobox } from '@floway-dev/ui';
+import { Button, Checkbox, Combobox, Dialog, Input, Select, TagCombobox } from '@floway-dev/ui';
 
 // Mutable mirror of @floway-dev/protocols ModelAliasRules — the wire shape
 // is `readonly` at the contract boundary, but the form mutates it in place
@@ -33,7 +33,7 @@ interface MutableRules {
 const open = defineModel<boolean>('open', { required: true });
 
 const props = defineProps<{
-  /** null = create; non-null = edit (alias is the PK, so editing it is disabled). */
+  /** null = create; non-null = edit. The alias name is editable in both modes. */
   record: ModelAlias | null;
 }>();
 
@@ -56,23 +56,16 @@ const upstreamIds = ref<string[]>([...(props.record?.upstream_ids ?? [])]);
 const visibleInModelsList = ref(props.record?.visible_in_models_list ?? true);
 const onConflict = ref<ModelAliasOnConflict>(props.record?.on_conflict ?? 'real-only');
 
-// Reasoning is modeled as a tagged radio + a separate summary input so the
-// three approaches (effort preset / token budget / adaptive) are mutually
-// exclusive in the wire shape but visible to the operator at a glance.
-type ReasoningMode = 'none' | 'effort' | 'budget' | 'adaptive';
-
+// Reasoning fields are flat: every input is always visible. The wire schema
+// still allows the four facets (effort / budget / adaptive / summary) to
+// coexist; the apply layer's adaptive-first precedence handles the runtime
+// resolution. Forcing mutual exclusivity at the UI level previously meant
+// operators had to nuke an existing knob before setting another, which
+// fought their actual workflow.
 const initialReasoning = props.record?.rules.reasoning;
-const initialReasoningMode: ReasoningMode = initialReasoning?.effort !== undefined
-  ? 'effort'
-  : initialReasoning?.budgetTokens !== undefined
-    ? 'budget'
-    : initialReasoning?.adaptive === true
-      ? 'adaptive'
-      : 'none';
-
-const reasoningMode = ref<ReasoningMode>(initialReasoningMode);
 const reasoningEffort = ref(initialReasoning?.effort ?? '');
 const reasoningBudgetTokens = ref<string>(initialReasoning?.budgetTokens === undefined ? '' : String(initialReasoning.budgetTokens));
+const reasoningAdaptive = ref(initialReasoning?.adaptive === true);
 const reasoningSummary = ref(initialReasoning?.summary ?? '');
 
 const verbosity = ref(props.record?.rules.verbosity ?? '');
@@ -111,11 +104,37 @@ const SUMMARY_HINTS = ['auto', 'concise', 'detailed', 'omitted'];
 const VERBOSITY_HINTS = ['low', 'medium', 'high'];
 const SERVICE_TIER_HINTS = ['auto', 'default', 'flex', 'scale', 'priority', 'standard_only'];
 
-const onConflictOptions: { value: ModelAliasOnConflict; label: string }[] = [
-  { value: 'real-only', label: 'real-only — alias hidden when target id collides' },
-  { value: 'alias-only', label: 'alias-only — alias replaces a colliding real id' },
-  { value: 'both-real-first', label: 'both — real first' },
-  { value: 'both-alias-first', label: 'both — alias first' },
+// Each on-conflict option carries a one-line explanation surfaced in the
+// Select popover so an operator picks by what happens at request time, not
+// by guessing what `real-only` / `alias-only` mean. Mirrors the Auth Style
+// pattern in CustomConfigPanel.
+interface OnConflictOption {
+  value: ModelAliasOnConflict;
+  label: string;
+  explanation: string;
+}
+
+const onConflictOptions: OnConflictOption[] = [
+  {
+    value: 'real-only',
+    label: 'Real model wins',
+    explanation: "When an upstream serves a real model with the same id as this alias, the real model is used and the alias's rules don't apply on that upstream.",
+  },
+  {
+    value: 'alias-only',
+    label: 'Alias replaces real',
+    explanation: 'The alias always wins, even when an upstream serves a real model with the same id.',
+  },
+  {
+    value: 'both-real-first',
+    label: 'Both, real first',
+    explanation: 'Both entries appear; routing prefers the real model when present, falling back to the alias.',
+  },
+  {
+    value: 'both-alias-first',
+    label: 'Both, alias first',
+    explanation: 'Both entries appear; routing prefers the alias when present, falling back to the real model.',
+  },
 ];
 
 // --- save ---
@@ -130,23 +149,23 @@ const trimOrUndef = (s: string): string | undefined => {
 
 const buildRules = (): MutableRules | { error: string } => {
   const rules: MutableRules = {};
+  const reasoning: NonNullable<MutableRules['reasoning']> = {};
+
+  const effort = trimOrUndef(reasoningEffort.value);
+  if (effort !== undefined) reasoning.effort = effort;
 
-  if (reasoningMode.value === 'effort') {
-    const v = trimOrUndef(reasoningEffort.value);
-    if (v === undefined) return { error: 'Reasoning effort cannot be empty' };
-    rules.reasoning = { effort: v };
-  } else if (reasoningMode.value === 'budget') {
-    const raw = reasoningBudgetTokens.value.trim();
-    if (raw === '' || !/^\d+$/.test(raw)) return { error: 'Reasoning budget tokens must be a non-negative integer' };
-    rules.reasoning = { budgetTokens: Number(raw) };
-  } else if (reasoningMode.value === 'adaptive') {
-    rules.reasoning = { adaptive: true };
+  const budgetRaw = reasoningBudgetTokens.value.trim();
+  if (budgetRaw !== '') {
+    if (!/^\d+$/.test(budgetRaw)) return { error: 'Reasoning budget tokens must be a non-negative integer' };
+    reasoning.budgetTokens = Number(budgetRaw);
   }
 
+  if (reasoningAdaptive.value) reasoning.adaptive = true;
+
   const summary = trimOrUndef(reasoningSummary.value);
-  if (summary !== undefined) {
-    rules.reasoning = { ...(rules.reasoning ?? {}), summary };
-  }
+  if (summary !== undefined) reasoning.summary = summary;
+
+  if (Object.keys(reasoning).length > 0) rules.reasoning = reasoning;
 
   const verb = trimOrUndef(verbosity.value);
   if (verb !== undefined) rules.verbosity = verb;
@@ -162,7 +181,7 @@ const save = async () => {
   saveError.value = null;
   const trimmedAlias = aliasName.value.trim();
   const trimmedTarget = targetModelId.value.trim();
-  if (mode.value === 'create' && trimmedAlias === '') { saveError.value = 'Alias name is required'; return; }
+  if (trimmedAlias === '') { saveError.value = 'Alias name is required'; return; }
   if (trimmedTarget === '') { saveError.value = 'Target model id is required'; return; }
 
   const rulesOrErr = buildRules();
@@ -186,9 +205,13 @@ const save = async () => {
       }));
       if (error) { saveError.value = error.message; return; }
     } else if (props.record) {
+      // PATCH addresses the row at its *original* PK; `alias` in the body
+      // requests a rename when it differs. The backend route handles the
+      // 409-on-collision path and the safe no-op when nothing changed.
       const { error } = await callApi(() => api.api.aliases[':alias'].$patch({
         param: { alias: props.record!.alias },
         json: {
+          alias: trimmedAlias,
           targetModelId: trimmedTarget,
           upstreamIds: [...upstreamIds.value],
           rules: rulesOrErr,
@@ -210,13 +233,6 @@ const save = async () => {
 };
 
 const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Alias: ${props.record?.alias ?? ''}`);
-
-const reasoningModeOptions: { value: ReasoningMode; label: string }[] = [
-  { value: 'none', label: 'None' },
-  { value: 'effort', label: 'Effort preset' },
-  { value: 'budget', label: 'Token budget' },
-  { value: 'adaptive', label: 'Adaptive' },
-];
 </script>
 
 <template>
@@ -226,36 +242,36 @@ const reasoningModeOptions: { value: ReasoningMode; label: string }[] = [
         {{ saveError }}
       </p>
 
-      <div class="grid grid-cols-1 gap-4 md:grid-cols-2">
-        <div class="space-y-1.5">
-          <label class="block text-xs font-medium text-gray-500">Alias name</label>
-          <Input v-model="aliasName" placeholder="codex-auto-review" :disabled="mode === 'edit'" class="font-mono" />
-          <p v-if="mode === 'edit'" class="text-xs text-gray-600">Alias names are the primary key and cannot be changed; delete and recreate to rename.</p>
+      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Alias name</label>
+          <Input v-model="aliasName" placeholder="codex-auto-review" class="font-mono" />
         </div>
 
-        <div class="space-y-1.5">
-          <label class="block text-xs font-medium text-gray-500">Display name <span class="text-gray-600">(optional)</span></label>
-          <Input v-model="displayName" placeholder="Codex Auto Review" />
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Target model id</label>
+          <Combobox v-model="targetModelId" :items="modelOptions" placeholder="gpt-5.4" input-class="font-mono" />
         </div>
       </div>
 
-      <div class="space-y-1.5">
-        <label class="block text-xs font-medium text-gray-500">Target model id</label>
-        <Input v-model="targetModelId" placeholder="gpt-5.4" class="font-mono" list="alias-model-options" />
-        <datalist id="alias-model-options">
-          <option v-for="opt in modelOptions" :key="opt.value" :value="opt.value">{{ opt.label }}</option>
-        </datalist>
+      <div>
+        <label class="mb-1.5 block text-xs font-medium text-gray-500">Display name <span class="text-gray-600">(optional)</span></label>
+        <Input v-model="displayName" placeholder="Codex Auto Review" />
       </div>
 
-      <div class="space-y-1.5">
-        <label class="block text-xs font-medium text-gray-500">Upstreams <span class="text-gray-600">(leave empty to allow any upstream that serves the target)</span></label>
+      <div>
+        <label class="mb-1.5 block text-xs font-medium text-gray-500">Upstreams <span class="text-gray-600">(leave empty to allow any upstream that serves the target)</span></label>
         <TagCombobox v-model="upstreamIds" :items="upstreamItems" placeholder="Pick an upstream..." empty-text="No upstreams match" />
       </div>
 
-      <div class="grid grid-cols-1 gap-4 md:grid-cols-2">
-        <div class="space-y-1.5">
-          <label class="block text-xs font-medium text-gray-500">On conflict</label>
-          <Select v-model="onConflict" :options="onConflictOptions" />
+      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">On conflict</label>
+          <Select v-model="onConflict" :options="onConflictOptions">
+            <template #description="{ option }">
+              <p class="text-[11px] text-gray-500">{{ option.explanation }}</p>
+            </template>
+          </Select>
         </div>
 
         <div class="flex items-center gap-2 pt-6">
@@ -264,34 +280,26 @@ const reasoningModeOptions: { value: ReasoningMode; label: string }[] = [
         </div>
       </div>
 
-      <section class="space-y-3 rounded-md border border-white/[0.06] bg-surface-800/50 p-4">
+      <div class="space-y-4">
         <h4 class="text-xs font-semibold uppercase tracking-wide text-gray-400">Reasoning</h4>
-        <div class="flex flex-wrap gap-3">
-          <label v-for="opt in reasoningModeOptions" :key="opt.value" class="inline-flex items-center gap-2 text-sm text-gray-300">
-            <input
-              type="radio"
-              :value="opt.value"
-              :checked="reasoningMode === opt.value"
-              :disabled="opt.value === 'adaptive' && !adaptiveSupported && reasoningMode !== 'adaptive'"
-              @change="reasoningMode = opt.value"
-            >
-            {{ opt.label }}
-          </label>
-        </div>
 
-        <div v-if="reasoningMode === 'effort'" class="space-y-1.5">
-          <label class="block text-xs font-medium text-gray-500">Effort</label>
-          <Input v-model="reasoningEffort" placeholder="high" list="alias-effort-options" />
-          <datalist id="alias-effort-options">
-            <option v-for="v in effortSuggestions" :key="v" :value="v" />
-          </datalist>
-          <p v-if="effortSuggestions.length > 0" class="text-xs text-gray-600">Target supports: {{ effortSuggestions.join(', ') }}</p>
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Effort</label>
+          <Combobox v-model="reasoningEffort" :items="effortSuggestions" placeholder="high" />
+          <p v-if="effortSuggestions.length > 0" class="mt-1 text-xs text-gray-600">Target supports: {{ effortSuggestions.join(', ') }}</p>
         </div>
 
-        <div v-if="reasoningMode === 'budget'" class="space-y-1.5">
-          <label class="block text-xs font-medium text-gray-500">Budget tokens</label>
-          <Input v-model="reasoningBudgetTokens" placeholder="4096" inputmode="numeric" class="font-mono" />
-          <p v-if="budgetMin !== undefined || budgetMax !== undefined" class="text-xs text-gray-600">
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Budget tokens</label>
+          <Input
+            v-model="reasoningBudgetTokens"
+            placeholder="4096"
+            inputmode="numeric"
+            class="font-mono"
+            :min="budgetMin"
+            :max="budgetMax"
+          />
+          <p v-if="budgetMin !== undefined || budgetMax !== undefined" class="mt-1 text-xs text-gray-600">
             Target range:
             <template v-if="budgetMin !== undefined">min {{ budgetMin }}</template>
             <template v-if="budgetMin !== undefined && budgetMax !== undefined">, </template>
@@ -299,39 +307,36 @@ const reasoningModeOptions: { value: ReasoningMode; label: string }[] = [
           </p>
         </div>
 
-        <div v-if="reasoningMode === 'adaptive' && !adaptiveSupported" class="rounded-md border border-amber-500/30 bg-amber-500/10 px-2 py-1 text-xs text-amber-300">
-          Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Summary</label>
+          <Combobox v-model="reasoningSummary" :items="SUMMARY_HINTS" placeholder="auto" />
         </div>
 
-        <div class="space-y-1.5">
-          <label class="block text-xs font-medium text-gray-500">Reasoning summary <span class="text-gray-600">(optional)</span></label>
-          <Input v-model="reasoningSummary" placeholder="auto" list="alias-summary-options" />
-          <datalist id="alias-summary-options">
-            <option v-for="v in SUMMARY_HINTS" :key="v" :value="v" />
-          </datalist>
+        <div>
+          <label class="inline-flex items-center gap-2">
+            <Checkbox v-model="reasoningAdaptive" />
+            <span class="text-sm text-gray-300">Adaptive reasoning</span>
+          </label>
+          <p v-if="reasoningAdaptive && !adaptiveSupported" class="mt-1 rounded-md border border-amber-500/30 bg-amber-500/10 px-2 py-1 text-xs text-amber-300">
+            Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.
+          </p>
         </div>
-      </section>
-
-      <div class="grid grid-cols-1 gap-4 md:grid-cols-2">
-        <div class="space-y-1.5">
-          <label class="block text-xs font-medium text-gray-500">Verbosity</label>
-          <Input v-model="verbosity" placeholder="medium" list="alias-verbosity-options" />
-          <datalist id="alias-verbosity-options">
-            <option v-for="v in VERBOSITY_HINTS" :key="v" :value="v" />
-          </datalist>
+      </div>
+
+      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Verbosity</label>
+          <Combobox v-model="verbosity" :items="VERBOSITY_HINTS" placeholder="medium" />
         </div>
 
-        <div class="space-y-1.5">
-          <label class="block text-xs font-medium text-gray-500">Service tier</label>
-          <Input v-model="serviceTier" placeholder="auto" list="alias-tier-options" />
-          <datalist id="alias-tier-options">
-            <option v-for="v in SERVICE_TIER_HINTS" :key="v" :value="v" />
-          </datalist>
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Service tier</label>
+          <Combobox v-model="serviceTier" :items="SERVICE_TIER_HINTS" placeholder="auto" />
         </div>
       </div>
 
-      <div class="space-y-1.5">
-        <label class="block text-xs font-medium text-gray-500">Anthropic beta headers <span class="text-gray-600">(comma- or Enter-separated tokens)</span></label>
+      <div>
+        <label class="mb-1.5 block text-xs font-medium text-gray-500">Anthropic beta headers <span class="text-gray-600">(comma- or Enter-separated tokens)</span></label>
         <TagCombobox v-model="anthropicBeta" :items="[]" placeholder="extended-cache-ttl-2025-04-11" empty-text="Type a header token and press Enter" />
       </div>
 
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index 976a24a18..bd6870b2d 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -112,7 +112,7 @@ test('AliasEditDialog (create mode) posts a payload matching the form state', as
   });
 });
 
-test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the diff', async () => {
+test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the merged shape', async () => {
   const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
   const open = ref(true);
   const record: ModelAlias = {
@@ -132,10 +132,10 @@ test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the diff', asyn
     template: '<AliasEditDialog v-model:open="open" :record="record" />',
   }));
 
-  // Alias name input should be disabled in edit mode (PK is immutable).
+  // Alias name input is editable in edit mode — the PK can now be renamed.
   const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
   expect(aliasInput.exists()).toBe(true);
-  expect((aliasInput.element as HTMLInputElement).disabled).toBe(true);
+  expect((aliasInput.element as HTMLInputElement).disabled).toBe(false);
   expect((aliasInput.element as HTMLInputElement).value).toBe('opus-xhigh');
 
   // Display name pre-filled.
@@ -157,6 +157,7 @@ test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the diff', asyn
   const args = patchAliasMock.mock.calls[0]![0];
   expect(args.param.alias).toBe('opus-xhigh');
   expect(args.json).toMatchObject({
+    alias: 'opus-xhigh',
     targetModelId: 'gpt-5.4',
     upstreamIds: ['up_anth'],
     rules: { reasoning: { effort: 'xhigh' } },
@@ -165,3 +166,38 @@ test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the diff', asyn
     displayName: 'Opus XHigh',
   });
 });
+
+test('AliasEditDialog (edit mode) PATCHes the original alias when the operator renames it', async () => {
+  const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
+  const open = ref(true);
+  const record: ModelAlias = {
+    alias: 'opus-xhigh',
+    target_model_id: 'claude-opus-4-6',
+    upstream_ids: [],
+    rules: {},
+    visible_in_models_list: true,
+    on_conflict: 'real-only',
+    display_name: null,
+    created_at: 1_700_000_000,
+  };
+
+  const wrapper = mount(defineComponent({
+    components: { AliasEditDialog },
+    setup() { return { open, record }; },
+    template: '<AliasEditDialog v-model:open="open" :record="record" />',
+  }));
+
+  const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+  await aliasInput.setValue('opus-renamed');
+
+  const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
+  await saveBtn!.trigger('click');
+  await new Promise(r => setTimeout(r, 0));
+
+  expect(patchAliasMock).toHaveBeenCalledTimes(1);
+  const args = patchAliasMock.mock.calls[0]![0];
+  // The PATCH path stays at the row's *original* PK; the rename is requested
+  // via `body.alias`, which the route handler maps to the rename codepath.
+  expect(args.param.alias).toBe('opus-xhigh');
+  expect(args.json).toMatchObject({ alias: 'opus-renamed' });
+});
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index 1f8a684d1..b00f36348 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -20,30 +20,24 @@ defineEmits<{
 const labelText = computed(() => props.alias.display_name ?? props.alias.alias);
 
 const badges = computed(() => formatAliasRuleBadges(props.alias.rules));
-
-const onConflictBadgeClass = computed(() => {
-  switch (props.alias.on_conflict) {
-  case 'alias-only': return 'border-accent-violet/30 bg-accent-violet/10 text-accent-violet';
-  case 'real-only': return 'border-white/10 bg-white/5 text-gray-400';
-  case 'both-real-first':
-  case 'both-alias-first': return 'border-accent-cyan/30 bg-accent-cyan/10 text-accent-cyan';
-  }
-});
 </script>
 
 <template>
   <div class="flex items-center gap-3 rounded-lg border border-white/5 bg-surface-800/80 px-3 py-2">
-    <span
-      class="shrink-0 rounded border px-2 py-0.5 text-xs font-semibold uppercase tracking-wide"
-      :class="onConflictBadgeClass"
-    >{{ alias.on_conflict }}</span>
-
     <div class="min-w-0 flex-1 truncate">
-      <span class="text-sm font-semibold text-white">{{ labelText }}</span>
-      <span class="ml-2 font-mono text-xs text-gray-500">{{ alias.alias }}</span>
+      <span class="font-mono text-xs text-gray-500">{{ alias.alias }}</span>
+      <span class="ml-2 text-sm font-semibold text-white">{{ labelText }}</span>
       <span class="ml-2 text-xs text-gray-500">&rarr; {{ alias.target_model_id }}</span>
     </div>
 
+    <div v-if="alias.upstream_ids.length > 0" class="hidden shrink-0 items-center gap-1 sm:flex">
+      <span
+        v-for="id in alias.upstream_ids"
+        :key="id"
+        class="rounded border border-white/10 bg-white/[0.02] px-1.5 py-0.5 font-mono text-[10px] text-gray-400"
+      >{{ id }}</span>
+    </div>
+
     <div v-if="badges.length > 0" class="hidden shrink-0 items-center gap-1 sm:flex">
       <span
         v-for="badge in badges"
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
index cef3a9639..7e1a6ea60 100644
--- a/apps/web/src/components/settings/AliasRow_test.ts
+++ b/apps/web/src/components/settings/AliasRow_test.ts
@@ -30,6 +30,25 @@ describe('AliasRow', () => {
     expect(wrapper.text()).toContain('claude-opus-4-6');
   });
 
+  test('does not render the on_conflict label as a badge', () => {
+    // The row used to surface `real-only` / `alias-only` as a coloured badge.
+    // Operator feedback was that the inline label was noisy and the same
+    // information lives inside the edit dialog. Asserting absence here pins
+    // the regression — the words must not slip back into the row template.
+    const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
+    expect(wrapper.text()).not.toContain('real-only');
+    expect(wrapper.text()).not.toContain('alias-only');
+  });
+
+  test('renders upstream-id pills when the alias whitelists upstreams', () => {
+    const wrapper = mount(AliasRow, {
+      props: { alias: { ...baseAlias, upstream_ids: ['up_anth', 'up_oai'] } },
+    });
+    const text = wrapper.text();
+    expect(text).toContain('up_anth');
+    expect(text).toContain('up_oai');
+  });
+
   test('falls back to alias name when display_name is null', () => {
     const wrapper = mount(AliasRow, { props: { alias: { ...baseAlias, display_name: null } } });
     // alias id appears twice (label fallback + the small font-mono id), but the

From ff479e471ae7926d299a362e5b6d92a0c11c0c57 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 12:57:44 +0800
Subject: [PATCH 033/170] feat(ui): combobox auto-opens on input focus
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Operators expected the popover to appear the moment focus lands in the
text input — tab into Effort, suggestions show up — instead of the
chevron-click-only pattern. reka-ui already supports this through
`open-on-focus` on `ComboboxRoot`; flip it on at the wrapper level so
every consumer (alias edit dialog, future settings forms) benefits at
once and the existing chevron-click toggle path stays intact.
---
 packages/ui/src/Combobox.vue | 1 +
 1 file changed, 1 insertion(+)

diff --git a/packages/ui/src/Combobox.vue b/packages/ui/src/Combobox.vue
index 6526888a4..970f16ce9 100644
--- a/packages/ui/src/Combobox.vue
+++ b/packages/ui/src/Combobox.vue
@@ -89,6 +89,7 @@ const commitTyped = async () => {
     v-model:open="open"
     :disabled="disabled"
     :display-value="(v: string) => v"
+    open-on-focus
   >
     <ComboboxAnchor as-child>
       <div class="relative w-full">

From 5a257529c633981756748133e79e8607a9b36d1b Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 12:57:59 +0800
Subject: [PATCH 034/170] feat(protocols): move composeAliasDisplayName to
 protocols/common
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The web dashboard wants to compute the same synthesized alias display
name the gateway uses (for the live placeholder under the Display name
input — show the operator what the fallback label would be). The
function previously lived in
`packages/gateway/src/control-plane/model-aliases/display.ts`, which
the SPA cannot import: per the architecture rules `apps/web` only
depends on `@floway-dev/ui`, `@floway-dev/proxy` (via its narrow subpath
exports), and type-imports from `@floway-dev/gateway/app-type`.

Move the function next to `formatAliasRuleBadges` in
`protocols/src/common/models.ts` — same audience, same rule-shape
input. The type changes from gateway-local `ModelAliasRules` to the
`PublicModelAliasedFrom['rules']` shape already declared in that file;
the two were structurally identical. Gateway call site swaps its
import to `@floway-dev/protocols/common`. The unit suite moves
alongside the function.
---
 .../control-plane/model-aliases/display.ts    | 34 -------------------
 .../src/data-plane/models/alias-listing.ts    |  3 +-
 packages/protocols/src/common/models.ts       | 33 ++++++++++++++++++
 .../src/common/models_alias-display_test.ts}  |  2 +-
 4 files changed, 35 insertions(+), 37 deletions(-)
 delete mode 100644 packages/gateway/src/control-plane/model-aliases/display.ts
 rename packages/{gateway/src/control-plane/model-aliases/display_test.ts => protocols/src/common/models_alias-display_test.ts} (98%)

diff --git a/packages/gateway/src/control-plane/model-aliases/display.ts b/packages/gateway/src/control-plane/model-aliases/display.ts
deleted file mode 100644
index f831273d8..000000000
--- a/packages/gateway/src/control-plane/model-aliases/display.ts
+++ /dev/null
@@ -1,34 +0,0 @@
-import type { ModelAliasRules } from './types.ts';
-
-// Compose the alias-local display name — what the operator named the alias
-// (when set) or a synthesized target + rules summary. Independent of which
-// upstream is surfacing the alias; the prefixed listing form prepends the
-// upstream display name at the call site, mirroring the real-model path in
-// `registry.ts`.
-//
-// The synthesized form's parenthesized rules suffix uses the compact
-// `value label` wording so it fits alongside the target name in narrow
-// listings — the dashboard's per-badge view uses `formatAliasRuleBadges`
-// for the self-describing `label: value` form. `anthropicBeta` tokens are
-// sorted so two operators carrying the same set in different orders see
-// the same label.
-export const composeAliasDisplayName = (input: {
-  aliasDisplayName?: string;
-  targetDisplayName: string;
-  rules: ModelAliasRules;
-}): string => {
-  if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
-  const parts: string[] = [];
-  const { rules } = input;
-  if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
-  if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
-  if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
-  if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
-  if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
-  if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
-  if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
-    parts.push([...rules.anthropicBeta].sort().join('/'));
-  }
-  const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : '';
-  return `${input.targetDisplayName}${suffix}`;
-};
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index e9762ed35..ea3fca570 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -1,7 +1,6 @@
-import { composeAliasDisplayName } from '../../control-plane/model-aliases/display.ts';
 import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
 import { unionEndpoints } from '../providers/registry.ts';
-import { kindForEndpoints, type PublicModel } from '@floway-dev/protocols/common';
+import { composeAliasDisplayName, kindForEndpoints, type PublicModel } from '@floway-dev/protocols/common';
 import type { ModelProviderInstance, ProviderModelRecord, ResolvedModel, UpstreamModel } from '@floway-dev/provider';
 
 // One emission slot for an alias: a (provider, addressable form) pair where
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 0fb869db9..e0d92c49a 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -186,6 +186,39 @@ export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): A
   return out;
 };
 
+// Compose the alias-local display name — what the operator named the alias
+// (when set) or a synthesized target + rules summary. Independent of which
+// upstream is surfacing the alias; the prefixed listing form prepends the
+// upstream display name at the call site, mirroring the real-model path in
+// the gateway's provider registry.
+//
+// The synthesized form's parenthesized rules suffix uses the compact
+// `value label` wording so it fits alongside the target name in narrow
+// listings — the dashboard's per-badge view uses `formatAliasRuleBadges`
+// for the self-describing `label: value` form. `anthropicBeta` tokens are
+// sorted so two operators carrying the same set in different orders see
+// the same label.
+export const composeAliasDisplayName = (input: {
+  aliasDisplayName?: string;
+  targetDisplayName: string;
+  rules: PublicModelAliasedFrom['rules'];
+}): string => {
+  if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
+  const parts: string[] = [];
+  const { rules } = input;
+  if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
+  if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
+  if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
+  if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
+  if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
+  if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
+  if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
+    parts.push([...rules.anthropicBeta].sort().join('/'));
+  }
+  const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : '';
+  return `${input.targetDisplayName}${suffix}`;
+};
+
 export interface PublicModelsResponse {
   // OpenAI container
   object: 'list';
diff --git a/packages/gateway/src/control-plane/model-aliases/display_test.ts b/packages/protocols/src/common/models_alias-display_test.ts
similarity index 98%
rename from packages/gateway/src/control-plane/model-aliases/display_test.ts
rename to packages/protocols/src/common/models_alias-display_test.ts
index 40dbd2fec..21473a3a4 100644
--- a/packages/gateway/src/control-plane/model-aliases/display_test.ts
+++ b/packages/protocols/src/common/models_alias-display_test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, test } from 'vitest';
 
-import { composeAliasDisplayName } from './display.ts';
+import { composeAliasDisplayName } from './models.ts';
 
 describe('composeAliasDisplayName', () => {
   test('uses alias displayName when set, suppressing the rules summary', () => {

From 39ba1873cae7e672480d8190298031f92629f5c4 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 12:58:11 +0800
Subject: [PATCH 035/170] refactor(web): alias edit dialog second-round polish
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Operator-feedback pass on the dialog:

- Visible-in-/v1/models moved from a checkbox sharing the on-conflict
  row to a Switch in the footer, left of Cancel — matches the global
  pattern (CustomConfigPanel fetch toggle) and frees the row above
  for a single-purpose layout.
- On conflict gets its own full-width row plus a one-line helper
  underneath that mirrors the selected option's explanation. Operators
  shouldn't have to re-open the dropdown to remember which mode is
  active; the same `explanation` field that powers the popover
  description now also sits in the trigger's helper line.
- Reasoning section flattened: no `REASONING` heading, no inner card
  wrapper, the four facets sit in two two-column rows
  (Effort / Budget, Adaptive / Summary). Adaptive switches from
  Checkbox to Switch for visual consistency with the footer toggle.
- Display name placeholder now demonstrates the synthesized fallback
  live. When the target id is empty the dialog shows the teaching
  example `GPT-5.5 (xhigh effort, fast speed)` to match the new
  `gpt-5.5-xhigh-fast` / `gpt-5.5` placeholders on the alias and
  target inputs — the trio communicates the operator pattern at a
  glance. Once a target is picked we feed the live form state through
  `composeAliasDisplayName` (now in @floway-dev/protocols/common) so
  the placeholder tracks every rule edit.

Test query selectors that previously matched the static placeholders
shift to the new placeholders; the Display name lookup changes from
placeholder match to value match since the placeholder is now dynamic.
---
 .../components/alias-edit/AliasEditDialog.vue | 123 ++++++++++++------
 .../alias-edit/AliasEditDialog_test.ts        |  18 +--
 2 files changed, 96 insertions(+), 45 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 7b238d007..926ec889d 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -12,7 +12,8 @@ import { callApi, useApi } from '../../api/client.ts';
 import type { ModelAlias, ModelAliasOnConflict } from '../../api/types.ts';
 import { useModelsStore } from '../../composables/useModels.ts';
 import { useUpstreamsStore } from '../../composables/useUpstreams.ts';
-import { Button, Checkbox, Combobox, Dialog, Input, Select, TagCombobox } from '@floway-dev/ui';
+import { composeAliasDisplayName } from '@floway-dev/protocols/common';
+import { Button, Combobox, Dialog, Input, Select, Switch, TagCombobox } from '@floway-dev/ui';
 
 // Mutable mirror of @floway-dev/protocols ModelAliasRules — the wire shape
 // is `readonly` at the contract boundary, but the form mutates it in place
@@ -90,8 +91,10 @@ const upstreamItems = computed(() => (upstreamsStore.upstreams.value ?? []).map(
   detail: u.id,
 })));
 
+const targetMatch = computed(() => modelsStore.models.value?.find(m => m.id === targetModelId.value));
+
 const targetChat = computed(() => {
-  const match = modelsStore.models.value?.find(m => m.id === targetModelId.value);
+  const match = targetMatch.value;
   return match && 'chat' in match ? (match as { chat?: { reasoning?: { effort?: { supported: string[] }; budget_tokens?: { min?: number; max?: number }; adaptive?: boolean } } }).chat : undefined;
 });
 
@@ -104,10 +107,10 @@ const SUMMARY_HINTS = ['auto', 'concise', 'detailed', 'omitted'];
 const VERBOSITY_HINTS = ['low', 'medium', 'high'];
 const SERVICE_TIER_HINTS = ['auto', 'default', 'flex', 'scale', 'priority', 'standard_only'];
 
-// Each on-conflict option carries a one-line explanation surfaced in the
-// Select popover so an operator picks by what happens at request time, not
-// by guessing what `real-only` / `alias-only` mean. Mirrors the Auth Style
-// pattern in CustomConfigPanel.
+// Each on-conflict option carries a one-line explanation surfaced both in
+// the Select popover and in a helper line below the trigger so an operator
+// picks by what happens at request time, not by guessing what `real-only`
+// / `alias-only` mean. Mirrors the Auth Style pattern in CustomConfigPanel.
 interface OnConflictOption {
   value: ModelAliasOnConflict;
   label: string;
@@ -137,6 +140,52 @@ const onConflictOptions: OnConflictOption[] = [
   },
 ];
 
+const selectedOnConflict = computed(() => onConflictOptions.find(o => o.value === onConflict.value));
+
+// --- display name placeholder ---
+//
+// Shows the operator what the synthesized fallback would look like when
+// the Display name field is left blank. Before a target is picked we hold
+// a teaching example so the three placeholders (alias / target / display
+// name) read as a coherent trio; once a target is set we compute the real
+// synthesized label off the current form state so the placeholder tracks
+// every rule edit live.
+const FALLBACK_PLACEHOLDER_EXAMPLE = 'GPT-5.5 (xhigh effort, fast speed)';
+
+// Mirror of `buildRules` without the validation errors — used purely for
+// the live placeholder so a half-typed budget value (e.g. mid-typing) does
+// not bubble validation text into a UI hint. Invalid intermediate states
+// fall back to the empty rules object.
+const buildRulesForPreview = (): MutableRules => {
+  const rules: MutableRules = {};
+  const reasoning: NonNullable<MutableRules['reasoning']> = {};
+  const effort = reasoningEffort.value.trim();
+  if (effort !== '') reasoning.effort = effort;
+  const budgetRaw = reasoningBudgetTokens.value.trim();
+  if (budgetRaw !== '' && /^\d+$/.test(budgetRaw)) reasoning.budgetTokens = Number(budgetRaw);
+  if (reasoningAdaptive.value) reasoning.adaptive = true;
+  const summary = reasoningSummary.value.trim();
+  if (summary !== '') reasoning.summary = summary;
+  if (Object.keys(reasoning).length > 0) rules.reasoning = reasoning;
+  const verb = verbosity.value.trim();
+  if (verb !== '') rules.verbosity = verb;
+  const tier = serviceTier.value.trim();
+  if (tier !== '') rules.serviceTier = tier;
+  const betas = anthropicBeta.value.map(s => s.trim()).filter(s => s !== '');
+  if (betas.length > 0) rules.anthropicBeta = betas;
+  return rules;
+};
+
+const displayNamePlaceholder = computed(() => {
+  const trimmedTarget = targetModelId.value.trim();
+  if (trimmedTarget === '') return FALLBACK_PLACEHOLDER_EXAMPLE;
+  const targetDisplay = targetMatch.value?.display_name ?? trimmedTarget;
+  return composeAliasDisplayName({
+    targetDisplayName: targetDisplay,
+    rules: buildRulesForPreview(),
+  });
+});
+
 // --- save ---
 
 const saving = ref(false);
@@ -245,18 +294,18 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
       <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
         <div>
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Alias name</label>
-          <Input v-model="aliasName" placeholder="codex-auto-review" class="font-mono" />
+          <Input v-model="aliasName" placeholder="gpt-5.5-xhigh-fast" class="font-mono" />
         </div>
 
         <div>
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Target model id</label>
-          <Combobox v-model="targetModelId" :items="modelOptions" placeholder="gpt-5.4" input-class="font-mono" />
+          <Combobox v-model="targetModelId" :items="modelOptions" placeholder="gpt-5.5" input-class="font-mono" />
         </div>
       </div>
 
       <div>
         <label class="mb-1.5 block text-xs font-medium text-gray-500">Display name <span class="text-gray-600">(optional)</span></label>
-        <Input v-model="displayName" placeholder="Codex Auto Review" />
+        <Input v-model="displayName" :placeholder="displayNamePlaceholder" />
       </div>
 
       <div>
@@ -264,25 +313,17 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
         <TagCombobox v-model="upstreamIds" :items="upstreamItems" placeholder="Pick an upstream..." empty-text="No upstreams match" />
       </div>
 
-      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
-        <div>
-          <label class="mb-1.5 block text-xs font-medium text-gray-500">On conflict</label>
-          <Select v-model="onConflict" :options="onConflictOptions">
-            <template #description="{ option }">
-              <p class="text-[11px] text-gray-500">{{ option.explanation }}</p>
-            </template>
-          </Select>
-        </div>
-
-        <div class="flex items-center gap-2 pt-6">
-          <Checkbox v-model="visibleInModelsList" />
-          <label class="text-sm text-gray-300">Visible in <code class="rounded bg-white/[0.04] px-1 font-mono text-xs">/v1/models</code></label>
-        </div>
+      <div>
+        <label class="mb-1.5 block text-xs font-medium text-gray-500">On conflict</label>
+        <Select v-model="onConflict" :options="onConflictOptions">
+          <template #description="{ option }">
+            <p class="text-[11px] text-gray-500">{{ option.explanation }}</p>
+          </template>
+        </Select>
+        <p v-if="selectedOnConflict" class="mt-1.5 text-xs text-gray-500">{{ selectedOnConflict.explanation }}</p>
       </div>
 
-      <div class="space-y-4">
-        <h4 class="text-xs font-semibold uppercase tracking-wide text-gray-400">Reasoning</h4>
-
+      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
         <div>
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Effort</label>
           <Combobox v-model="reasoningEffort" :items="effortSuggestions" placeholder="high" />
@@ -306,21 +347,23 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
             <template v-if="budgetMax !== undefined">max {{ budgetMax }}</template>
           </p>
         </div>
+      </div>
 
+      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
         <div>
-          <label class="mb-1.5 block text-xs font-medium text-gray-500">Summary</label>
-          <Combobox v-model="reasoningSummary" :items="SUMMARY_HINTS" placeholder="auto" />
-        </div>
-
-        <div>
-          <label class="inline-flex items-center gap-2">
-            <Checkbox v-model="reasoningAdaptive" />
+          <div class="flex h-9 items-center gap-2">
+            <Switch v-model="reasoningAdaptive" />
             <span class="text-sm text-gray-300">Adaptive reasoning</span>
-          </label>
+          </div>
           <p v-if="reasoningAdaptive && !adaptiveSupported" class="mt-1 rounded-md border border-amber-500/30 bg-amber-500/10 px-2 py-1 text-xs text-amber-300">
             Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.
           </p>
         </div>
+
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Summary</label>
+          <Combobox v-model="reasoningSummary" :items="SUMMARY_HINTS" placeholder="auto" />
+        </div>
       </div>
 
       <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
@@ -340,9 +383,15 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
         <TagCombobox v-model="anthropicBeta" :items="[]" placeholder="extended-cache-ttl-2025-04-11" empty-text="Type a header token and press Enter" />
       </div>
 
-      <div class="flex flex-wrap items-center gap-2 border-t border-white/[0.06] pt-5">
-        <Button :loading="saving" @click="save">Save</Button>
-        <Button variant="secondary" :disabled="saving" @click="open = false">Cancel</Button>
+      <div class="flex flex-wrap items-center justify-between gap-3 border-t border-white/[0.06] pt-5">
+        <label class="flex items-center gap-2">
+          <Switch v-model="visibleInModelsList" />
+          <span class="text-sm text-gray-300">Visible in <code class="rounded bg-white/[0.04] px-1 font-mono text-xs">/v1/models</code></span>
+        </label>
+        <div class="flex items-center gap-2">
+          <Button variant="secondary" :disabled="saving" @click="open = false">Cancel</Button>
+          <Button :loading="saving" @click="save">Save</Button>
+        </div>
       </div>
     </div>
   </Dialog>
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index bd6870b2d..99574c248 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -85,11 +85,11 @@ test('AliasEditDialog (create mode) posts a payload matching the form state', as
 
   // Fill the form: alias name + target id are the only required fields for
   // the create-mode happy path. Everything else uses its default.
-  const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+  const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
   expect(aliasInput.exists()).toBe(true);
   await aliasInput.setValue('opus-fast');
 
-  const targetInput = wrapper.find('input[placeholder="gpt-5.4"]');
+  const targetInput = wrapper.find('input[placeholder="gpt-5.5"]');
   expect(targetInput.exists()).toBe(true);
   await targetInput.setValue('claude-opus-4-6');
 
@@ -133,17 +133,19 @@ test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the merged shap
   }));
 
   // Alias name input is editable in edit mode — the PK can now be renamed.
-  const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+  const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
   expect(aliasInput.exists()).toBe(true);
   expect((aliasInput.element as HTMLInputElement).disabled).toBe(false);
   expect((aliasInput.element as HTMLInputElement).value).toBe('opus-xhigh');
 
-  // Display name pre-filled.
-  const displayInput = wrapper.find('input[placeholder="Codex Auto Review"]');
-  expect((displayInput.element as HTMLInputElement).value).toBe('Opus XHigh');
+  // Display name pre-filled — its placeholder is dynamic now (mirrors the
+  // synthesized fallback) so we locate it by its current value instead.
+  const allInputs = wrapper.findAll('input');
+  const displayInput = allInputs.find(i => (i.element as HTMLInputElement).value === 'Opus XHigh');
+  expect(displayInput).toBeDefined();
 
   // Target id pre-filled.
-  const targetInput = wrapper.find('input[placeholder="gpt-5.4"]');
+  const targetInput = wrapper.find('input[placeholder="gpt-5.5"]');
   expect((targetInput.element as HTMLInputElement).value).toBe('claude-opus-4-6');
 
   // Change one field and submit; PATCH carries the merged shape (every editable
@@ -187,7 +189,7 @@ test('AliasEditDialog (edit mode) PATCHes the original alias when the operator r
     template: '<AliasEditDialog v-model:open="open" :record="record" />',
   }));
 
-  const aliasInput = wrapper.find('input[placeholder="codex-auto-review"]');
+  const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
   await aliasInput.setValue('opus-renamed');
 
   const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');

From 1aa0a55d5aa36737bebbd60eb0a5980f9e27cdd6 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 13:16:20 +0800
Subject: [PATCH 036/170] feat(web): add 'fast' to Service tier combobox
 suggestions
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

After main's cross-protocol service_tier ↔ speed:'fast' bridge, 'fast' is
a meaningful value on both OpenAI- and Anthropic-bound requests — include
it in the dropdown alongside priority/flex/standard_only so the operator
does not have to type it freeform.
---
 apps/web/src/components/alias-edit/AliasEditDialog.vue | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 926ec889d..8d0c5844d 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -105,7 +105,7 @@ const adaptiveSupported = computed(() => targetChat.value?.reasoning?.adaptive =
 
 const SUMMARY_HINTS = ['auto', 'concise', 'detailed', 'omitted'];
 const VERBOSITY_HINTS = ['low', 'medium', 'high'];
-const SERVICE_TIER_HINTS = ['auto', 'default', 'flex', 'scale', 'priority', 'standard_only'];
+const SERVICE_TIER_HINTS = ['auto', 'default', 'flex', 'scale', 'priority', 'fast', 'standard_only'];
 
 // Each on-conflict option carries a one-line explanation surfaced both in
 // the Select popover and in a helper line below the trigger so an operator

From db15d10836a7923252377878cff942b1172fcc33 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 13:17:39 +0800
Subject: [PATCH 037/170] refactor(web): Adaptive reasoning gets a section
 label; switch reads "Enable"
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Match the labelled-field rhythm of the surrounding inputs — the bare
switch with "Adaptive reasoning" inline read inconsistently next to the
labelled Effort / Summary / Verbosity rows. Now: caption "Adaptive
reasoning" above, Switch + "Enable" below.
---
 apps/web/src/components/alias-edit/AliasEditDialog.vue | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 8d0c5844d..335ed789e 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -351,9 +351,10 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
 
       <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
         <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Adaptive reasoning</label>
           <div class="flex h-9 items-center gap-2">
             <Switch v-model="reasoningAdaptive" />
-            <span class="text-sm text-gray-300">Adaptive reasoning</span>
+            <span class="text-sm text-gray-300">Enable</span>
           </div>
           <p v-if="reasoningAdaptive && !adaptiveSupported" class="mt-1 rounded-md border border-amber-500/30 bg-amber-500/10 px-2 py-1 text-xs text-amber-300">
             Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.

From 66360bfe714038d9993ae04035bf1aaa603e6bc1 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 13:23:02 +0800
Subject: [PATCH 038/170] refactor(web): always-show combobox + drop effort
 hint + footer warnings group
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Combobox now keeps every suggestion visible while ranking matches at the
top — operators see what else is on the menu instead of typing into an
empty dropdown when the partial query doesn't match.

Drop the inline "Target supports: …" effort hint and the "Adaptive
reasoning" amber callout. A new fieldWarnings collector renders all
target-misalignment notes in one amber block above the dialog footer —
one row per condition, labelled with the field. The first entry is the
adaptive-not-supported case; the same shape will house future per-rule
warnings without crowding the form area.
---
 .../components/alias-edit/AliasEditDialog.vue | 27 +++++++++++++++---
 packages/ui/src/Combobox.vue                  | 28 +++++++++++++------
 2 files changed, 43 insertions(+), 12 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 335ed789e..ac441af0f 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -103,6 +103,19 @@ const budgetMin = computed(() => targetChat.value?.reasoning?.budget_tokens?.min
 const budgetMax = computed(() => targetChat.value?.reasoning?.budget_tokens?.max);
 const adaptiveSupported = computed(() => targetChat.value?.reasoning?.adaptive === true);
 
+// Collected at the bottom of the dialog so a misalignment between the
+// rule and the target model's advertised capability stays visible without
+// crowding the per-field area. Add a new entry per condition; the labels
+// double as the field name an operator should look up to fix it.
+interface FieldWarning { field: string; message: string }
+const fieldWarnings = computed<FieldWarning[]>(() => {
+  const out: FieldWarning[] = [];
+  if (reasoningAdaptive.value && !adaptiveSupported.value) {
+    out.push({ field: 'Adaptive reasoning', message: 'Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.' });
+  }
+  return out;
+});
+
 const SUMMARY_HINTS = ['auto', 'concise', 'detailed', 'omitted'];
 const VERBOSITY_HINTS = ['low', 'medium', 'high'];
 const SERVICE_TIER_HINTS = ['auto', 'default', 'flex', 'scale', 'priority', 'fast', 'standard_only'];
@@ -327,7 +340,6 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
         <div>
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Effort</label>
           <Combobox v-model="reasoningEffort" :items="effortSuggestions" placeholder="high" />
-          <p v-if="effortSuggestions.length > 0" class="mt-1 text-xs text-gray-600">Target supports: {{ effortSuggestions.join(', ') }}</p>
         </div>
 
         <div>
@@ -356,9 +368,6 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
             <Switch v-model="reasoningAdaptive" />
             <span class="text-sm text-gray-300">Enable</span>
           </div>
-          <p v-if="reasoningAdaptive && !adaptiveSupported" class="mt-1 rounded-md border border-amber-500/30 bg-amber-500/10 px-2 py-1 text-xs text-amber-300">
-            Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.
-          </p>
         </div>
 
         <div>
@@ -384,6 +393,16 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
         <TagCombobox v-model="anthropicBeta" :items="[]" placeholder="extended-cache-ttl-2025-04-11" empty-text="Type a header token and press Enter" />
       </div>
 
+      <div v-if="fieldWarnings.length > 0" class="space-y-1.5">
+        <p
+          v-for="warning in fieldWarnings"
+          :key="warning.field"
+          class="rounded-md border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-xs text-amber-300"
+        >
+          <span class="font-medium">{{ warning.field }}:</span> {{ warning.message }}
+        </p>
+      </div>
+
       <div class="flex flex-wrap items-center justify-between gap-3 border-t border-white/[0.06] pt-5">
         <label class="flex items-center gap-2">
           <Switch v-model="visibleInModelsList" />
diff --git a/packages/ui/src/Combobox.vue b/packages/ui/src/Combobox.vue
index 970f16ce9..006d9e4f3 100644
--- a/packages/ui/src/Combobox.vue
+++ b/packages/ui/src/Combobox.vue
@@ -59,12 +59,24 @@ watch(value, v => { if (v !== query.value) query.value = v; });
 // same value.
 watch(query, q => { value.value = q; });
 
-// Filter suggestions by the typed query against either value or label.
-// When the query exactly matches an item we still show it (the user can
-// re-pick the same row), but we hide a synthesized "create" row in that
-// case — it would be a no-op.
-const filteredItems = computed(() => normalizedItems.value.filter(item =>
-  query.value === '' || contains(item.label ?? item.value, query.value) || contains(item.value, query.value)));
+// Always show every suggestion; rank items whose label or value contains the
+// typed query above the rest, preserving the original order within each
+// group. Empty query keeps the configured order untouched. The operator
+// always sees the full set of presets — typing narrows attention to the
+// top of the list without hiding the alternatives.
+const orderedItems = computed<Item[]>(() => {
+  if (query.value === '') return normalizedItems.value;
+  const matches: Item[] = [];
+  const rest: Item[] = [];
+  for (const item of normalizedItems.value) {
+    if (contains(item.label ?? item.value, query.value) || contains(item.value, query.value)) {
+      matches.push(item);
+    } else {
+      rest.push(item);
+    }
+  }
+  return [...matches, ...rest];
+});
 
 const trimmedQuery = computed(() => query.value.trim());
 const hasExactMatch = computed(() => normalizedItems.value.some(item => item.value === trimmedQuery.value));
@@ -123,7 +135,7 @@ const commitTyped = async () => {
         class="z-50 max-h-72 w-[--reka-combobox-trigger-width] overflow-hidden rounded-[10px] border border-white/[0.06] bg-surface-800 text-white shadow-xl"
       >
         <ComboboxViewport class="p-1">
-          <ComboboxEmpty v-if="!showCreateOption" class="px-2 py-1.5 text-xs text-gray-500">
+          <ComboboxEmpty v-if="orderedItems.length === 0 && !showCreateOption" class="px-2 py-1.5 text-xs text-gray-500">
             {{ emptyText }}
           </ComboboxEmpty>
           <ComboboxGroup>
@@ -139,7 +151,7 @@ const commitTyped = async () => {
               <span class="truncate">Use "<span class="font-mono">{{ trimmedQuery }}</span>"</span>
             </ComboboxItem>
             <ComboboxItem
-              v-for="item in filteredItems"
+              v-for="item in orderedItems"
               :key="item.value"
               :value="item.value"
               class="relative flex cursor-pointer select-none items-center rounded-sm py-1.5 pl-7 pr-2 text-sm text-white outline-none data-[highlighted]:bg-accent-cyan/10 data-[highlighted]:text-accent-cyan"

From 00aa2421c0e6774777cc6fc54e511314df7ecd20 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 13:29:07 +0800
Subject: [PATCH 039/170] feat(web): expand alias warnings (effort/budget
 mismatch) + bold field labels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Now surfaces three target-mismatch warnings in the dialog footer:
- Effort value not in the target's advertised supported list
- Budget tokens below the target's minimum or above its maximum
- Adaptive reasoning enabled but the target doesn't advertise adaptive

Each warning leads with the field name in bold so an operator
scanning the dialog jumps straight to the affected control. Values
still pass through verbatim (Goal 2) — warnings are informational,
not blocking.
---
 .../components/alias-edit/AliasEditDialog.vue | 40 +++++++++++++++++--
 1 file changed, 36 insertions(+), 4 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index ac441af0f..5028ca35f 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -105,14 +105,46 @@ const adaptiveSupported = computed(() => targetChat.value?.reasoning?.adaptive =
 
 // Collected at the bottom of the dialog so a misalignment between the
 // rule and the target model's advertised capability stays visible without
-// crowding the per-field area. Add a new entry per condition; the labels
-// double as the field name an operator should look up to fix it.
+// crowding the per-field area. Each entry tells the operator which field
+// to revisit; the values still flow through to the upstream verbatim
+// (Goal 2: pass through, don't enum-gate) so a warning is informational,
+// not blocking.
 interface FieldWarning { field: string; message: string }
 const fieldWarnings = computed<FieldWarning[]>(() => {
   const out: FieldWarning[] = [];
+  const chatReasoning = targetChat.value?.reasoning;
+
+  if (reasoningEffort.value.trim() !== '' && chatReasoning?.effort?.supported) {
+    const value = reasoningEffort.value.trim();
+    const supported = chatReasoning.effort.supported;
+    if (!supported.includes(value)) {
+      out.push({
+        field: 'Effort',
+        message: `"${value}" is not in the target's supported list (${supported.join(', ')}). The rule will still be sent verbatim.`,
+      });
+    }
+  }
+
+  const budgetRaw = reasoningBudgetTokens.value.trim();
+  if (budgetRaw !== '') {
+    const n = Number(budgetRaw);
+    const range = chatReasoning?.budget_tokens;
+    if (Number.isFinite(n) && range) {
+      if (range.min !== undefined && n < range.min) {
+        out.push({ field: 'Budget tokens', message: `${n} is below the target's minimum (${range.min}). The rule will still be sent verbatim.` });
+      } else if (range.max !== undefined && n > range.max) {
+        out.push({ field: 'Budget tokens', message: `${n} is above the target's maximum (${range.max}). The rule will still be sent verbatim.` });
+      }
+    }
+  }
+
   if (reasoningAdaptive.value && !adaptiveSupported.value) {
-    out.push({ field: 'Adaptive reasoning', message: 'Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.' });
+    out.push({
+      field: 'Adaptive reasoning',
+      message: 'Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.',
+    });
   }
+
   return out;
 });
 
@@ -399,7 +431,7 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
           :key="warning.field"
           class="rounded-md border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-xs text-amber-300"
         >
-          <span class="font-medium">{{ warning.field }}:</span> {{ warning.message }}
+          <span class="font-bold">{{ warning.field }}:</span> {{ warning.message }}
         </p>
       </div>
 

From d8e2b9a41273bf26c3b83a3cd51f7ea4670960b9 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 13:34:28 +0800
Subject: [PATCH 040/170] refactor(web): collapse alias warning cards into one
 amber card with multiple lines
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Replace one card per warning with a single amber card carrying every
warning paragraph inside — same visual identity, less repeated chrome
when more than one warning fires.
---
 apps/web/src/components/alias-edit/AliasEditDialog.vue | 8 ++------
 1 file changed, 2 insertions(+), 6 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 5028ca35f..db7275abe 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -425,12 +425,8 @@ const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Al
         <TagCombobox v-model="anthropicBeta" :items="[]" placeholder="extended-cache-ttl-2025-04-11" empty-text="Type a header token and press Enter" />
       </div>
 
-      <div v-if="fieldWarnings.length > 0" class="space-y-1.5">
-        <p
-          v-for="warning in fieldWarnings"
-          :key="warning.field"
-          class="rounded-md border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-xs text-amber-300"
-        >
+      <div v-if="fieldWarnings.length > 0" class="rounded-md border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-xs text-amber-300 space-y-1">
+        <p v-for="warning in fieldWarnings" :key="warning.field">
           <span class="font-bold">{{ warning.field }}:</span> {{ warning.message }}
         </p>
       </div>

From e3ecae996696d932cc66f2114907e832a021e9b0 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 13:53:53 +0800
Subject: [PATCH 041/170] feat(protocols): expose aliasedFrom.displayName +
 formatAliasRulesInline helper

Two related extensions so alias-aware UIs can render an alias model row
without re-deriving the operator's intent from scratch:

- Add an optional `displayName` field to `PublicModelAliasedFrom`. The
  gateway's alias-listing serializer carries it onto the wire only when
  the operator explicitly set one; absence means "synthesize from the
  target's name + the rules summary." This lets the dashboard show the
  operator-named heading on its own line, distinct from the synthesized
  fallback that the top-level `display_name` already carries.

- Extract a shared parts builder behind `composeAliasDisplayName` and
  expose `formatAliasRulesInline(rules)` as a sibling export. Both
  surfaces consume the same per-field wording, so the parenthesized
  suffix in the synthesized display name and the standalone summary line
  rendered on alias rows can never drift.

Gateway tests assert the new `displayName` round-trips on /v1/models and
/api/models; the existing absence-path tests stay green because the
serializer omits the key for aliases that never had one.
---
 .../src/control-plane/models/routes_test.ts   |  3 +-
 .../src/data-plane/models/alias-listing.ts    |  1 +
 .../src/data-plane/models/serve_test.ts       | 55 ++++++++++++++++
 packages/protocols/src/common/models.ts       | 62 ++++++++++++-------
 .../src/common/models_alias-display_test.ts   | 31 +++++++++-
 5 files changed, 127 insertions(+), 25 deletions(-)

diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts
index 0be710c36..bab38e182 100644
--- a/packages/gateway/src/control-plane/models/routes_test.ts
+++ b/packages/gateway/src/control-plane/models/routes_test.ts
@@ -144,13 +144,14 @@ test('/api/models appends visible alias entries with aliasedFrom alongside real
   await withMockedFetch(modelsFetchHandler, async () => {
     const response = await requestApp('/api/models', { headers: { 'x-api-key': apiKey.key } });
     assertEquals(response.status, 200);
-    const body = (await response.json()) as { data: Array<{ id: string; display_name: string; upstreams: Array<{ kind: string; id: string; name: string }>; aliasedFrom?: { targetModelId: string; rules: Record<string, unknown> } }> };
+    const body = (await response.json()) as { data: Array<{ id: string; display_name: string; upstreams: Array<{ kind: string; id: string; name: string }>; aliasedFrom?: { targetModelId: string; rules: Record<string, unknown>; displayName?: string } }> };
     const aliasEntry = body.data.find(model => model.id === 'codex-auto-review');
     if (!aliasEntry) throw new Error('expected codex-auto-review alias entry on /api/models');
     assertEquals(aliasEntry.display_name, 'Codex Auto Review');
     assertEquals(aliasEntry.upstreams, [{ kind: 'custom', id: 'up_custom_models', name: 'Custom Provider' }]);
     assertEquals(aliasEntry.aliasedFrom?.targetModelId, 'custom-model');
     assertEquals(aliasEntry.aliasedFrom?.rules, { reasoning: { effort: 'low' } });
+    assertEquals(aliasEntry.aliasedFrom?.displayName, 'Codex Auto Review');
     assertEquals(body.data.some(model => model.id === 'hidden-alias'), false);
   });
 });
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index ea3fca570..72c0fdcf7 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -99,6 +99,7 @@ const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmi
       upstreamIds: alias.upstreamIds,
       rules: alias.rules,
       onConflict: alias.onConflict,
+      ...(alias.displayName !== undefined ? { displayName: alias.displayName } : {}),
     },
   };
 };
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index cc5227967..9af4d7981 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -661,6 +661,61 @@ test('/v1/models appends a visible alias with aliasedFrom after the real entries
   );
 });
 
+// `displayName` propagates verbatim when the operator set it; absence on the
+// wire (the prior test) means "synthesize from target name + rules summary".
+test('/v1/models forwards the operator-set displayName on the aliasedFrom payload', async () => {
+  const { repo, apiKey } = await setupAppTest();
+
+  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
+    {
+      alias: 'codex-auto-review',
+      targetModelId: 'gpt-5.4',
+      upstreamIds: [],
+      rules: { reasoning: { effort: 'low' } },
+      visibleInModelsList: true,
+      onConflict: 'real-only',
+      displayName: 'Codex Auto Review',
+      createdAt: 1_700_000_000,
+    },
+  ]);
+
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_oai',
+    name: 'Test OpenAI',
+    sortOrder: 100,
+    config: {
+      baseUrl: 'https://oai.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-test',
+      endpoints: { chatCompletions: {} },
+    },
+  }));
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
+        return jsonResponse(copilotModels([]));
+      }
+      if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
+        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+      const body = await response.json() as { data: Array<{ id: string; aliasedFrom?: { displayName?: string } }> };
+      const aliasEntry = body.data.find(m => m.id === 'codex-auto-review');
+      if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
+      assertEquals(aliasEntry.aliasedFrom?.displayName, 'Codex Auto Review');
+    },
+  );
+});
+
 test('/v1/models omits aliases marked visibleInModelsList=false', async () => {
   const { repo, apiKey } = await setupAppTest();
 
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index e0d92c49a..9a73c11d5 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -157,16 +157,20 @@ export interface PublicModelAliasedFrom {
     anthropicBeta?: readonly string[];
   };
   onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
+  // Operator-set display name. Absent (undefined) when the operator left the
+  // field blank — alias-aware UIs then synthesize a label from the target's
+  // display name and the inline rules summary instead.
+  displayName?: string;
 }
 
 // One badge per rule field on an alias, in a `${label}` / `${label}: ${value}`
 // shape the dashboard renders inline next to the model row. Returned in a
 // deterministic order so the badge sequence stays stable across surfaces and
 // across JSON key arrivals. Boolean toggles render label-only (no colon);
-// every other field renders as `${label}: ${value}`. The gateway's
-// `formatAliasRulesSummary` uses its own labels for the parenthesized
-// display-name suffix — the two surfaces deliberately diverge so the suffix
-// stays compact while the badge view stays self-describing.
+// every other field renders as `${label}: ${value}`. The inline-prose form
+// (`composeAliasDisplayName`'s suffix and `formatAliasRulesInline`) uses its
+// own compact wording — the two surfaces deliberately diverge so the inline
+// summary stays compact while the badge view stays self-describing.
 export interface AliasRuleBadge {
   label: string;
   value?: string;
@@ -186,26 +190,15 @@ export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): A
   return out;
 };
 
-// Compose the alias-local display name — what the operator named the alias
-// (when set) or a synthesized target + rules summary. Independent of which
-// upstream is surfacing the alias; the prefixed listing form prepends the
-// upstream display name at the call site, mirroring the real-model path in
-// the gateway's provider registry.
-//
-// The synthesized form's parenthesized rules suffix uses the compact
-// `value label` wording so it fits alongside the target name in narrow
-// listings — the dashboard's per-badge view uses `formatAliasRuleBadges`
-// for the self-describing `label: value` form. `anthropicBeta` tokens are
-// sorted so two operators carrying the same set in different orders see
-// the same label.
-export const composeAliasDisplayName = (input: {
-  aliasDisplayName?: string;
-  targetDisplayName: string;
-  rules: PublicModelAliasedFrom['rules'];
-}): string => {
-  if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
+// Inline-prose parts for an alias's rules, in a deterministic order. Each
+// entry uses the compact `value label` wording (e.g. `low effort`,
+// `4096tk reasoning`) so it fits both alongside the target name in narrow
+// listings and on its own as a standalone summary line. The dashboard's
+// per-badge view uses `formatAliasRuleBadges` for the self-describing
+// `label: value` form. `anthropicBeta` tokens are sorted so two operators
+// carrying the same set in different orders see the same label.
+const aliasRulesInlineParts = (rules: PublicModelAliasedFrom['rules']): string[] => {
   const parts: string[] = [];
-  const { rules } = input;
   if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
   if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
   if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
@@ -215,10 +208,33 @@ export const composeAliasDisplayName = (input: {
   if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
     parts.push([...rules.anthropicBeta].sort().join('/'));
   }
+  return parts;
+};
+
+// Compose the alias-local display name — what the operator named the alias
+// (when set) or a synthesized target + rules summary. Independent of which
+// upstream is surfacing the alias; the prefixed listing form prepends the
+// upstream display name at the call site, mirroring the real-model path in
+// the gateway's provider registry. The parenthesized rules suffix shares
+// its parts with `formatAliasRulesInline` so the two surfaces never drift.
+export const composeAliasDisplayName = (input: {
+  aliasDisplayName?: string;
+  targetDisplayName: string;
+  rules: PublicModelAliasedFrom['rules'];
+}): string => {
+  if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
+  const parts = aliasRulesInlineParts(input.rules);
   const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : '';
   return `${input.targetDisplayName}${suffix}`;
 };
 
+// Joined rules summary without the parentheses — what the dashboard's alias
+// row renders on its third line. Empty string when no rule applies; callers
+// should drop the line entirely in that case rather than rendering blank.
+export const formatAliasRulesInline = (rules: PublicModelAliasedFrom['rules']): string => {
+  return aliasRulesInlineParts(rules).join(', ');
+};
+
 export interface PublicModelsResponse {
   // OpenAI container
   object: 'list';
diff --git a/packages/protocols/src/common/models_alias-display_test.ts b/packages/protocols/src/common/models_alias-display_test.ts
index 21473a3a4..7c7d4c49c 100644
--- a/packages/protocols/src/common/models_alias-display_test.ts
+++ b/packages/protocols/src/common/models_alias-display_test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, test } from 'vitest';
 
-import { composeAliasDisplayName } from './models.ts';
+import { composeAliasDisplayName, formatAliasRulesInline } from './models.ts';
 
 describe('composeAliasDisplayName', () => {
   test('uses alias displayName when set, suppressing the rules summary', () => {
@@ -69,3 +69,32 @@ describe('composeAliasDisplayName', () => {
     ).toBe('GPT-5.4 (low effort, concise summary, high verbosity, flex tier)');
   });
 });
+
+describe('formatAliasRulesInline', () => {
+  test('returns empty string when no rule applies', () => {
+    expect(formatAliasRulesInline({})).toBe('');
+  });
+
+  test('returns each rule field with the same compact wording as the parenthesized suffix, sans parens', () => {
+    expect(formatAliasRulesInline({ reasoning: { effort: 'low' } })).toBe('low effort');
+    expect(formatAliasRulesInline({ reasoning: { budgetTokens: 4096 } })).toBe('4096tk reasoning');
+    expect(formatAliasRulesInline({ reasoning: { adaptive: true } })).toBe('adaptive reasoning');
+    expect(formatAliasRulesInline({ reasoning: { summary: 'detailed' } })).toBe('detailed summary');
+  });
+
+  test('joins multiple fields with comma in the same order composeAliasDisplayName uses', () => {
+    expect(
+      formatAliasRulesInline({
+        reasoning: { effort: 'low', summary: 'detailed' },
+        verbosity: 'high',
+        serviceTier: 'fast',
+      }),
+    ).toBe('low effort, detailed summary, high verbosity, fast tier');
+  });
+
+  test('sorts anthropicBeta tokens and joins with slashes', () => {
+    expect(
+      formatAliasRulesInline({ anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] }),
+    ).toBe('extended-thinking/fast-mode-2026-02-01');
+  });
+});

From 6045ab8b36d2bb679d31f94c6013044823a29bf3 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 13:54:04 +0800
Subject: [PATCH 042/170] refactor(web): alias model rows render as 3-line text
 block in /dashboard/models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The badge cluster the row used to grow for every alias rule made it look
like a real catalog entry with a long list of capabilities, when the
information was actually the alias's own definition. Switching to a
left-justified text block makes the relationship explicit:

- Line 1 — the operator-set display name (omitted when not set, since
  the third line + the second line already convey the synthesized
  fallback name without redundancy).
- Line 2 — the id mapping `<alias-id> → <target-model-id>`, alias id in
  white and target muted so the eye lands on the public name first.
- Line 3 — the inline rules summary from `formatAliasRulesInline`,
  omitted when the alias has no rules.

The upstream pills and context/prompt/output limit badges are dropped on
alias rows on purpose — every value there belongs to the target row, and
duplicating it on the alias muddied which limits are policy vs. inherited.
Real-model rows keep the current heading + id + badges layout verbatim.

The Clear button stays on its right; the chat playground below is
unchanged.
---
 .../src/components/models/ModelInfoBar.vue    |  85 +++++++-------
 .../components/models/ModelInfoBar_test.ts    | 110 ++++++++++++++++++
 2 files changed, 154 insertions(+), 41 deletions(-)
 create mode 100644 apps/web/src/components/models/ModelInfoBar_test.ts

diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index a70fa7f93..5e4d52262 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -1,9 +1,11 @@
 <script setup lang="ts">
-import type { ControlPlaneModel, PublicModelAliasedFrom } from '../../api/types.ts';
+import { computed } from 'vue';
+
+import type { ControlPlaneModel } from '../../api/types.ts';
 import { providerBadgeClass, providerMeta } from '../upstreams/provider-meta.ts';
-import { formatAliasRuleBadges, type AliasRuleBadge } from '@floway-dev/protocols/common';
+import { formatAliasRulesInline } from '@floway-dev/protocols/common';
 
-defineProps<{
+const props = defineProps<{
   model: ControlPlaneModel;
 }>();
 
@@ -15,50 +17,51 @@ const formatTokenLimit = (n: number) => {
   return n.toString();
 };
 
-// `alias of: <target>` always leads the alias badge sequence so the operator
-// reading the row sees what the alias resolves to before scanning the rule
-// pills. The rule badges follow in the order `formatAliasRuleBadges`
-// returns, keeping dashboard and any future alias-aware tooling in lockstep.
-const aliasBadges = (aliasedFrom: PublicModelAliasedFrom): AliasRuleBadge[] => [
-  { label: 'alias of', value: aliasedFrom.targetModelId },
-  ...formatAliasRuleBadges(aliasedFrom.rules),
-];
+const rulesInline = computed(() => props.model.aliasedFrom ? formatAliasRulesInline(props.model.aliasedFrom.rules) : '');
 </script>
 
 <template>
   <div class="shrink-0 p-4 border-b border-white/[0.06]">
     <div class="flex items-center justify-between gap-4">
       <div class="min-w-0 flex-1">
-        <div class="flex flex-wrap items-center gap-x-2">
-          <h3 class="text-sm font-semibold text-white">{{ model.display_name ?? model.id }}</h3>
-          <span
-            v-if="(model.display_name ?? model.id) !== model.id"
-            class="font-mono text-[11px] text-gray-500 break-all"
-          >{{ model.id }}</span>
-        </div>
-        <div class="flex flex-wrap gap-1.5 mt-2">
-          <span
-            v-for="binding in model.upstreams"
-            :key="binding.id"
-            class="text-[10px] font-semibold px-2 py-0.5 rounded-full border"
-            :class="providerBadgeClass(binding.kind)"
-            :title="providerMeta(binding.kind).label + ' · ' + binding.name"
-          >{{ binding.name }}</span>
-          <span v-if="model.limits?.max_context_window_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
-            context: {{ formatTokenLimit(model.limits.max_context_window_tokens) }}
-          </span>
-          <span v-if="model.limits?.max_prompt_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
-            prompt: {{ formatTokenLimit(model.limits.max_prompt_tokens) }}
-          </span>
-          <span v-if="model.limits?.max_output_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
-            output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
-          </span>
-          <span
-            v-for="badge in (model.aliasedFrom ? aliasBadges(model.aliasedFrom) : [])"
-            :key="`${badge.label}:${badge.value ?? ''}`"
-            class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/[0.08] text-gray-400"
-          >{{ badge.label }}{{ badge.value !== undefined ? `: ${badge.value}` : '' }}</span>
-        </div>
+        <template v-if="model.aliasedFrom">
+          <div class="flex flex-col gap-1">
+            <h3 v-if="model.aliasedFrom.displayName" class="text-sm font-semibold text-white">{{ model.aliasedFrom.displayName }}</h3>
+            <p class="font-mono text-sm flex flex-wrap items-center gap-x-2">
+              <span class="text-white break-all">{{ model.id }}</span>
+              <span class="text-gray-600">→</span>
+              <span class="text-gray-500 break-all">{{ model.aliasedFrom.targetModelId }}</span>
+            </p>
+            <p v-if="rulesInline" class="text-xs text-gray-500">{{ rulesInline }}</p>
+          </div>
+        </template>
+        <template v-else>
+          <div class="flex flex-wrap items-center gap-x-2">
+            <h3 class="text-sm font-semibold text-white">{{ model.display_name ?? model.id }}</h3>
+            <span
+              v-if="(model.display_name ?? model.id) !== model.id"
+              class="font-mono text-[11px] text-gray-500 break-all"
+            >{{ model.id }}</span>
+          </div>
+          <div class="flex flex-wrap gap-1.5 mt-2">
+            <span
+              v-for="binding in model.upstreams"
+              :key="binding.id"
+              class="text-[10px] font-semibold px-2 py-0.5 rounded-full border"
+              :class="providerBadgeClass(binding.kind)"
+              :title="providerMeta(binding.kind).label + ' · ' + binding.name"
+            >{{ binding.name }}</span>
+            <span v-if="model.limits?.max_context_window_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
+              context: {{ formatTokenLimit(model.limits.max_context_window_tokens) }}
+            </span>
+            <span v-if="model.limits?.max_prompt_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
+              prompt: {{ formatTokenLimit(model.limits.max_prompt_tokens) }}
+            </span>
+            <span v-if="model.limits?.max_output_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
+              output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
+            </span>
+          </div>
+        </template>
       </div>
       <button class="btn-ghost text-[11px] flex shrink-0 items-center gap-1" @click="$emit('clear')">
         <svg class="w-3 h-3" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
diff --git a/apps/web/src/components/models/ModelInfoBar_test.ts b/apps/web/src/components/models/ModelInfoBar_test.ts
new file mode 100644
index 000000000..ac8d62f5a
--- /dev/null
+++ b/apps/web/src/components/models/ModelInfoBar_test.ts
@@ -0,0 +1,110 @@
+// @vitest-environment happy-dom
+
+import { mount } from '@vue/test-utils';
+import { describe, expect, test } from 'vitest';
+
+import ModelInfoBar from './ModelInfoBar.vue';
+import type { ControlPlaneModel } from '../../api/types.ts';
+
+const realModel: ControlPlaneModel = {
+  id: 'gpt-5.4',
+  display_name: 'GPT-5.4',
+  kind: 'chat',
+  limits: { max_context_window_tokens: 200_000, max_output_tokens: 16_384 },
+  upstreams: [{ id: 'up_oai', kind: 'custom', name: 'OpenAI' }],
+};
+
+const aliasModel: ControlPlaneModel = {
+  id: 'codex-auto-review',
+  display_name: 'Codex Auto Review',
+  kind: 'chat',
+  limits: { max_context_window_tokens: 200_000, max_output_tokens: 16_384 },
+  upstreams: [{ id: 'up_oai', kind: 'custom', name: 'OpenAI' }],
+  aliasedFrom: {
+    targetModelId: 'gpt-5.4',
+    upstreamIds: [],
+    rules: { reasoning: { effort: 'low' } },
+    onConflict: 'real-only',
+    displayName: 'Codex Auto Review',
+  },
+};
+
+describe('ModelInfoBar', () => {
+  describe('real-model row (no aliasedFrom)', () => {
+    test('renders the display-name heading + upstream + limit badges', () => {
+      const wrapper = mount(ModelInfoBar, { props: { model: realModel } });
+      const text = wrapper.text();
+      expect(text).toContain('GPT-5.4');
+      expect(text).toContain('OpenAI');
+      expect(text).toContain('context:');
+      expect(text).toContain('output:');
+    });
+
+    test('does not render the alias-prose summary line', () => {
+      const wrapper = mount(ModelInfoBar, { props: { model: realModel } });
+      // The phrase "low effort" is uniquely produced by the alias path; its
+      // absence on a real-model row guards against the alias branch leaking.
+      expect(wrapper.text()).not.toContain('low effort');
+      expect(wrapper.text()).not.toContain('→');
+    });
+  });
+
+  describe('alias row', () => {
+    test('renders the operator-set displayName as a heading when present', () => {
+      const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
+      const headings = wrapper.findAll('h3');
+      expect(headings).toHaveLength(1);
+      expect(headings[0].text()).toBe('Codex Auto Review');
+    });
+
+    test('omits the heading when displayName is missing', () => {
+      const without: ControlPlaneModel = {
+        ...aliasModel,
+        aliasedFrom: { ...aliasModel.aliasedFrom!, displayName: undefined },
+      };
+      const wrapper = mount(ModelInfoBar, { props: { model: without } });
+      expect(wrapper.findAll('h3')).toHaveLength(0);
+    });
+
+    test('renders the id mapping with the alias id emphasised and target muted', () => {
+      const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
+      const aliasSpan = wrapper.get('.text-white.break-all');
+      const targetSpan = wrapper.get('.text-gray-500.break-all');
+      expect(aliasSpan.text()).toBe('codex-auto-review');
+      expect(targetSpan.text()).toBe('gpt-5.4');
+      // The arrow lives between them.
+      expect(wrapper.text()).toContain('→');
+    });
+
+    test('renders the rules summary on a third line when rules apply', () => {
+      const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
+      const paragraphs = wrapper.findAll('p');
+      expect(paragraphs).toHaveLength(2);
+      expect(paragraphs[1].text()).toBe('low effort');
+      expect(paragraphs[1].classes()).toContain('text-xs');
+      expect(paragraphs[1].classes()).toContain('text-gray-500');
+    });
+
+    test('omits the rules summary line when no rule applies', () => {
+      const empty: ControlPlaneModel = {
+        ...aliasModel,
+        aliasedFrom: { ...aliasModel.aliasedFrom!, rules: {} },
+      };
+      const wrapper = mount(ModelInfoBar, { props: { model: empty } });
+      expect(wrapper.findAll('p')).toHaveLength(1);
+    });
+
+    test('drops the upstream and limit badges that the real-model path renders', () => {
+      const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
+      const text = wrapper.text();
+      expect(text).not.toContain('OpenAI');
+      expect(text).not.toContain('context:');
+      expect(text).not.toContain('output:');
+    });
+
+    test('keeps the Clear button', () => {
+      const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
+      expect(wrapper.text()).toContain('Clear');
+    });
+  });
+});

From 1989a38520e3e7dd8e5c86111061c6caf540721d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 14:09:02 +0800
Subject: [PATCH 043/170] refactor(web): AliasRow renders as 3-line text block,
 matching the /models row
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mirror the dashboard Models row layout the operator landed on: drop the
right-side effort/upstream/visibility badges; render the row as three
left-justified lines — operator-set display name (skipped when the
alias has no displayName), `<alias-id> → <target-model-id>` mapping
with the alias id emphasised, and `formatAliasRulesInline` summary.
The hidden-from-/v1/models hint stays as a small amber line below the
mapping when the alias is hidden.
---
 apps/web/src/components/settings/AliasRow.vue | 52 ++++++-------------
 .../src/components/settings/AliasRow_test.ts  | 30 +++++++----
 2 files changed, 36 insertions(+), 46 deletions(-)

diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index b00f36348..e4899dc5c 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -2,7 +2,7 @@
 import { computed } from 'vue';
 
 import type { ModelAlias } from '../../api/types.ts';
-import { formatAliasRuleBadges } from '@floway-dev/protocols/common';
+import { formatAliasRulesInline } from '@floway-dev/protocols/common';
 
 const props = defineProps<{
   alias: ModelAlias;
@@ -13,47 +13,25 @@ defineEmits<{
   delete: [];
 }>();
 
-// Effective label: operator-set display name when present, otherwise fall
-// back to the alias id itself. The "→ target" annotation is rendered
-// alongside the label rather than substituted in so an operator who picks a
-// long display name still sees what the alias points at.
-const labelText = computed(() => props.alias.display_name ?? props.alias.alias);
-
-const badges = computed(() => formatAliasRuleBadges(props.alias.rules));
+const rulesInline = computed(() => formatAliasRulesInline(props.alias.rules));
 </script>
 
 <template>
-  <div class="flex items-center gap-3 rounded-lg border border-white/5 bg-surface-800/80 px-3 py-2">
-    <div class="min-w-0 flex-1 truncate">
-      <span class="font-mono text-xs text-gray-500">{{ alias.alias }}</span>
-      <span class="ml-2 text-sm font-semibold text-white">{{ labelText }}</span>
-      <span class="ml-2 text-xs text-gray-500">&rarr; {{ alias.target_model_id }}</span>
-    </div>
-
-    <div v-if="alias.upstream_ids.length > 0" class="hidden shrink-0 items-center gap-1 sm:flex">
-      <span
-        v-for="id in alias.upstream_ids"
-        :key="id"
-        class="rounded border border-white/10 bg-white/[0.02] px-1.5 py-0.5 font-mono text-[10px] text-gray-400"
-      >{{ id }}</span>
+  <div class="flex items-start gap-3 rounded-lg border border-white/5 bg-surface-800/80 px-3 py-2.5">
+    <div class="min-w-0 flex-1 flex flex-col gap-1">
+      <h4 v-if="alias.display_name" class="text-sm font-semibold text-white">{{ alias.display_name }}</h4>
+      <p class="font-mono text-sm flex flex-wrap items-center gap-x-2">
+        <span class="text-white break-all">{{ alias.alias }}</span>
+        <span class="text-gray-600">&rarr;</span>
+        <span class="text-gray-500 break-all">{{ alias.target_model_id }}</span>
+      </p>
+      <p v-if="rulesInline" class="text-xs text-gray-500">{{ rulesInline }}</p>
+      <p
+        v-if="!alias.visible_in_models_list"
+        class="text-[10px] uppercase tracking-wide text-amber-300"
+      >hidden from <code class="font-mono normal-case">/v1/models</code></p>
     </div>
 
-    <div v-if="badges.length > 0" class="hidden shrink-0 items-center gap-1 sm:flex">
-      <span
-        v-for="badge in badges"
-        :key="badge.label"
-        class="rounded border border-white/10 bg-white/[0.02] px-1.5 py-0.5 text-[10px] uppercase tracking-wide text-gray-400"
-      >
-        {{ badge.label }}<template v-if="badge.value !== undefined">: <span class="text-gray-300 normal-case">{{ badge.value }}</span></template>
-      </span>
-    </div>
-
-    <span
-      v-if="!alias.visible_in_models_list"
-      class="hidden shrink-0 rounded border border-amber-500/30 bg-amber-500/10 px-1.5 py-0.5 text-[10px] uppercase tracking-wide text-amber-300 sm:inline"
-      title="Hidden from /v1/models"
-    >hidden</span>
-
     <div class="flex shrink-0 items-center gap-1">
       <button
         type="button"
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
index 7e1a6ea60..988065fc6 100644
--- a/apps/web/src/components/settings/AliasRow_test.ts
+++ b/apps/web/src/components/settings/AliasRow_test.ts
@@ -40,13 +40,18 @@ describe('AliasRow', () => {
     expect(wrapper.text()).not.toContain('alias-only');
   });
 
-  test('renders upstream-id pills when the alias whitelists upstreams', () => {
+  test('does not render per-upstream pills (upstream restriction is shown in the edit dialog instead)', () => {
+    // The row used to surface alias.upstream_ids as small font-mono pills.
+    // Operator feedback pushed all alias detail beyond the 3-line text block
+    // (display name, id mapping, rules summary) into the edit dialog so the
+    // listing reads at a glance. Assert the upstream id strings stay out of
+    // the row text — they live in the dialog now.
     const wrapper = mount(AliasRow, {
       props: { alias: { ...baseAlias, upstream_ids: ['up_anth', 'up_oai'] } },
     });
     const text = wrapper.text();
-    expect(text).toContain('up_anth');
-    expect(text).toContain('up_oai');
+    expect(text).not.toContain('up_anth');
+    expect(text).not.toContain('up_oai');
   });
 
   test('falls back to alias name when display_name is null', () => {
@@ -70,7 +75,7 @@ describe('AliasRow', () => {
     expect(wrapper.text()).toContain('hidden');
   });
 
-  test('renders one rule badge per active rule field', () => {
+  test('renders the rules summary inline as one comma-joined line', () => {
     const wrapper = mount(AliasRow, {
       props: {
         alias: {
@@ -79,11 +84,18 @@ describe('AliasRow', () => {
         },
       },
     });
-    // formatAliasRuleBadges drives the order: effort, verbosity, service tier.
-    const text = wrapper.text();
-    expect(text).toContain('effort: high');
-    expect(text).toContain('verbosity: low');
-    expect(text).toContain('service tier: priority');
+    // formatAliasRulesInline produces "value label, ..." in the same order
+    // composeAliasDisplayName uses for its parenthesized suffix.
+    expect(wrapper.text()).toContain('high effort, low verbosity, priority tier');
+  });
+
+  test('omits the rules summary line entirely when no rules are set', () => {
+    const wrapper = mount(AliasRow, {
+      props: { alias: { ...baseAlias, rules: {} } },
+    });
+    expect(wrapper.text()).not.toContain('effort');
+    expect(wrapper.text()).not.toContain('verbosity');
+    expect(wrapper.text()).not.toContain('tier');
   });
 });
 

From 6f616054a095563dd013b010338f4ab173346330 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 14:12:38 +0800
Subject: [PATCH 044/170] style(web): match alias rules-summary font size to
 the id-mapping line above

---
 apps/web/src/components/settings/AliasRow.vue | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index e4899dc5c..913455ad1 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -25,7 +25,7 @@ const rulesInline = computed(() => formatAliasRulesInline(props.alias.rules));
         <span class="text-gray-600">&rarr;</span>
         <span class="text-gray-500 break-all">{{ alias.target_model_id }}</span>
       </p>
-      <p v-if="rulesInline" class="text-xs text-gray-500">{{ rulesInline }}</p>
+      <p v-if="rulesInline" class="text-sm text-gray-500">{{ rulesInline }}</p>
       <p
         v-if="!alias.visible_in_models_list"
         class="text-[10px] uppercase tracking-wide text-amber-300"

From d43f6d826ba585ae648a31482a10f3f9b254469b Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 14:13:53 +0800
Subject: [PATCH 045/170] Revert "refactor(web): alias model rows render as
 3-line text block in /dashboard/models"

This reverts commit 6045ab8b36d2bb679d31f94c6013044823a29bf3.
---
 .../src/components/models/ModelInfoBar.vue    |  85 +++++++-------
 .../components/models/ModelInfoBar_test.ts    | 110 ------------------
 2 files changed, 41 insertions(+), 154 deletions(-)
 delete mode 100644 apps/web/src/components/models/ModelInfoBar_test.ts

diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index 5e4d52262..a70fa7f93 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -1,11 +1,9 @@
 <script setup lang="ts">
-import { computed } from 'vue';
-
-import type { ControlPlaneModel } from '../../api/types.ts';
+import type { ControlPlaneModel, PublicModelAliasedFrom } from '../../api/types.ts';
 import { providerBadgeClass, providerMeta } from '../upstreams/provider-meta.ts';
-import { formatAliasRulesInline } from '@floway-dev/protocols/common';
+import { formatAliasRuleBadges, type AliasRuleBadge } from '@floway-dev/protocols/common';
 
-const props = defineProps<{
+defineProps<{
   model: ControlPlaneModel;
 }>();
 
@@ -17,51 +15,50 @@ const formatTokenLimit = (n: number) => {
   return n.toString();
 };
 
-const rulesInline = computed(() => props.model.aliasedFrom ? formatAliasRulesInline(props.model.aliasedFrom.rules) : '');
+// `alias of: <target>` always leads the alias badge sequence so the operator
+// reading the row sees what the alias resolves to before scanning the rule
+// pills. The rule badges follow in the order `formatAliasRuleBadges`
+// returns, keeping dashboard and any future alias-aware tooling in lockstep.
+const aliasBadges = (aliasedFrom: PublicModelAliasedFrom): AliasRuleBadge[] => [
+  { label: 'alias of', value: aliasedFrom.targetModelId },
+  ...formatAliasRuleBadges(aliasedFrom.rules),
+];
 </script>
 
 <template>
   <div class="shrink-0 p-4 border-b border-white/[0.06]">
     <div class="flex items-center justify-between gap-4">
       <div class="min-w-0 flex-1">
-        <template v-if="model.aliasedFrom">
-          <div class="flex flex-col gap-1">
-            <h3 v-if="model.aliasedFrom.displayName" class="text-sm font-semibold text-white">{{ model.aliasedFrom.displayName }}</h3>
-            <p class="font-mono text-sm flex flex-wrap items-center gap-x-2">
-              <span class="text-white break-all">{{ model.id }}</span>
-              <span class="text-gray-600">→</span>
-              <span class="text-gray-500 break-all">{{ model.aliasedFrom.targetModelId }}</span>
-            </p>
-            <p v-if="rulesInline" class="text-xs text-gray-500">{{ rulesInline }}</p>
-          </div>
-        </template>
-        <template v-else>
-          <div class="flex flex-wrap items-center gap-x-2">
-            <h3 class="text-sm font-semibold text-white">{{ model.display_name ?? model.id }}</h3>
-            <span
-              v-if="(model.display_name ?? model.id) !== model.id"
-              class="font-mono text-[11px] text-gray-500 break-all"
-            >{{ model.id }}</span>
-          </div>
-          <div class="flex flex-wrap gap-1.5 mt-2">
-            <span
-              v-for="binding in model.upstreams"
-              :key="binding.id"
-              class="text-[10px] font-semibold px-2 py-0.5 rounded-full border"
-              :class="providerBadgeClass(binding.kind)"
-              :title="providerMeta(binding.kind).label + ' · ' + binding.name"
-            >{{ binding.name }}</span>
-            <span v-if="model.limits?.max_context_window_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
-              context: {{ formatTokenLimit(model.limits.max_context_window_tokens) }}
-            </span>
-            <span v-if="model.limits?.max_prompt_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
-              prompt: {{ formatTokenLimit(model.limits.max_prompt_tokens) }}
-            </span>
-            <span v-if="model.limits?.max_output_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
-              output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
-            </span>
-          </div>
-        </template>
+        <div class="flex flex-wrap items-center gap-x-2">
+          <h3 class="text-sm font-semibold text-white">{{ model.display_name ?? model.id }}</h3>
+          <span
+            v-if="(model.display_name ?? model.id) !== model.id"
+            class="font-mono text-[11px] text-gray-500 break-all"
+          >{{ model.id }}</span>
+        </div>
+        <div class="flex flex-wrap gap-1.5 mt-2">
+          <span
+            v-for="binding in model.upstreams"
+            :key="binding.id"
+            class="text-[10px] font-semibold px-2 py-0.5 rounded-full border"
+            :class="providerBadgeClass(binding.kind)"
+            :title="providerMeta(binding.kind).label + ' · ' + binding.name"
+          >{{ binding.name }}</span>
+          <span v-if="model.limits?.max_context_window_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
+            context: {{ formatTokenLimit(model.limits.max_context_window_tokens) }}
+          </span>
+          <span v-if="model.limits?.max_prompt_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
+            prompt: {{ formatTokenLimit(model.limits.max_prompt_tokens) }}
+          </span>
+          <span v-if="model.limits?.max_output_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
+            output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
+          </span>
+          <span
+            v-for="badge in (model.aliasedFrom ? aliasBadges(model.aliasedFrom) : [])"
+            :key="`${badge.label}:${badge.value ?? ''}`"
+            class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/[0.08] text-gray-400"
+          >{{ badge.label }}{{ badge.value !== undefined ? `: ${badge.value}` : '' }}</span>
+        </div>
       </div>
       <button class="btn-ghost text-[11px] flex shrink-0 items-center gap-1" @click="$emit('clear')">
         <svg class="w-3 h-3" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
diff --git a/apps/web/src/components/models/ModelInfoBar_test.ts b/apps/web/src/components/models/ModelInfoBar_test.ts
deleted file mode 100644
index ac8d62f5a..000000000
--- a/apps/web/src/components/models/ModelInfoBar_test.ts
+++ /dev/null
@@ -1,110 +0,0 @@
-// @vitest-environment happy-dom
-
-import { mount } from '@vue/test-utils';
-import { describe, expect, test } from 'vitest';
-
-import ModelInfoBar from './ModelInfoBar.vue';
-import type { ControlPlaneModel } from '../../api/types.ts';
-
-const realModel: ControlPlaneModel = {
-  id: 'gpt-5.4',
-  display_name: 'GPT-5.4',
-  kind: 'chat',
-  limits: { max_context_window_tokens: 200_000, max_output_tokens: 16_384 },
-  upstreams: [{ id: 'up_oai', kind: 'custom', name: 'OpenAI' }],
-};
-
-const aliasModel: ControlPlaneModel = {
-  id: 'codex-auto-review',
-  display_name: 'Codex Auto Review',
-  kind: 'chat',
-  limits: { max_context_window_tokens: 200_000, max_output_tokens: 16_384 },
-  upstreams: [{ id: 'up_oai', kind: 'custom', name: 'OpenAI' }],
-  aliasedFrom: {
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: { reasoning: { effort: 'low' } },
-    onConflict: 'real-only',
-    displayName: 'Codex Auto Review',
-  },
-};
-
-describe('ModelInfoBar', () => {
-  describe('real-model row (no aliasedFrom)', () => {
-    test('renders the display-name heading + upstream + limit badges', () => {
-      const wrapper = mount(ModelInfoBar, { props: { model: realModel } });
-      const text = wrapper.text();
-      expect(text).toContain('GPT-5.4');
-      expect(text).toContain('OpenAI');
-      expect(text).toContain('context:');
-      expect(text).toContain('output:');
-    });
-
-    test('does not render the alias-prose summary line', () => {
-      const wrapper = mount(ModelInfoBar, { props: { model: realModel } });
-      // The phrase "low effort" is uniquely produced by the alias path; its
-      // absence on a real-model row guards against the alias branch leaking.
-      expect(wrapper.text()).not.toContain('low effort');
-      expect(wrapper.text()).not.toContain('→');
-    });
-  });
-
-  describe('alias row', () => {
-    test('renders the operator-set displayName as a heading when present', () => {
-      const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
-      const headings = wrapper.findAll('h3');
-      expect(headings).toHaveLength(1);
-      expect(headings[0].text()).toBe('Codex Auto Review');
-    });
-
-    test('omits the heading when displayName is missing', () => {
-      const without: ControlPlaneModel = {
-        ...aliasModel,
-        aliasedFrom: { ...aliasModel.aliasedFrom!, displayName: undefined },
-      };
-      const wrapper = mount(ModelInfoBar, { props: { model: without } });
-      expect(wrapper.findAll('h3')).toHaveLength(0);
-    });
-
-    test('renders the id mapping with the alias id emphasised and target muted', () => {
-      const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
-      const aliasSpan = wrapper.get('.text-white.break-all');
-      const targetSpan = wrapper.get('.text-gray-500.break-all');
-      expect(aliasSpan.text()).toBe('codex-auto-review');
-      expect(targetSpan.text()).toBe('gpt-5.4');
-      // The arrow lives between them.
-      expect(wrapper.text()).toContain('→');
-    });
-
-    test('renders the rules summary on a third line when rules apply', () => {
-      const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
-      const paragraphs = wrapper.findAll('p');
-      expect(paragraphs).toHaveLength(2);
-      expect(paragraphs[1].text()).toBe('low effort');
-      expect(paragraphs[1].classes()).toContain('text-xs');
-      expect(paragraphs[1].classes()).toContain('text-gray-500');
-    });
-
-    test('omits the rules summary line when no rule applies', () => {
-      const empty: ControlPlaneModel = {
-        ...aliasModel,
-        aliasedFrom: { ...aliasModel.aliasedFrom!, rules: {} },
-      };
-      const wrapper = mount(ModelInfoBar, { props: { model: empty } });
-      expect(wrapper.findAll('p')).toHaveLength(1);
-    });
-
-    test('drops the upstream and limit badges that the real-model path renders', () => {
-      const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
-      const text = wrapper.text();
-      expect(text).not.toContain('OpenAI');
-      expect(text).not.toContain('context:');
-      expect(text).not.toContain('output:');
-    });
-
-    test('keeps the Clear button', () => {
-      const wrapper = mount(ModelInfoBar, { props: { model: aliasModel } });
-      expect(wrapper.text()).toContain('Clear');
-    });
-  });
-});

From 33373d2eba242ef89b26ab91459ccb0df1d2623d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 14:14:39 +0800
Subject: [PATCH 046/170] style(web): shrink alias id-mapping and rules-summary
 to text-xs
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The first line is the title (display_name) and should be the largest.
The id mapping and the rules summary below it are subordinate detail —
both render at text-xs so the title stands out and the two helper lines
read as one block beneath it.
---
 apps/web/src/components/settings/AliasRow.vue | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index 913455ad1..cea5d37de 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -20,12 +20,12 @@ const rulesInline = computed(() => formatAliasRulesInline(props.alias.rules));
   <div class="flex items-start gap-3 rounded-lg border border-white/5 bg-surface-800/80 px-3 py-2.5">
     <div class="min-w-0 flex-1 flex flex-col gap-1">
       <h4 v-if="alias.display_name" class="text-sm font-semibold text-white">{{ alias.display_name }}</h4>
-      <p class="font-mono text-sm flex flex-wrap items-center gap-x-2">
+      <p class="font-mono text-xs flex flex-wrap items-center gap-x-2">
         <span class="text-white break-all">{{ alias.alias }}</span>
         <span class="text-gray-600">&rarr;</span>
         <span class="text-gray-500 break-all">{{ alias.target_model_id }}</span>
       </p>
-      <p v-if="rulesInline" class="text-sm text-gray-500">{{ rulesInline }}</p>
+      <p v-if="rulesInline" class="text-xs text-gray-500">{{ rulesInline }}</p>
       <p
         v-if="!alias.visible_in_models_list"
         class="text-[10px] uppercase tracking-wide text-amber-300"

From d7433ef95b46c2651f02cc2f77e42b86443e2c34 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 17:17:14 +0800
Subject: [PATCH 047/170] chore(aliases): scrap v1 implementation
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the v1 model-aliases feature in preparation for the v2 redesign
documented in docs/superpowers/specs/2026-06-26-model-aliases-design.md.
The v1 implementation grew an alias rule-application surface (effort,
budget, adaptive, summary, verbosity, serviceTier, anthropicBeta) that
the v2 spec rebuilds from a different angle; clearing v1 first keeps
the v2 changeset small and review-friendly.

Deleted:
- migrations 0046_model_aliases.sql and
  0047_model_aliases_display_name.sql (v2 will reuse the 0046 slot for
  its own schema; local dev databases are rebuilt fresh, no on-disk
  state is preserved across the v1 → v2 boundary).
- control-plane CRUD: routes, repo, types, serialize, tests.
- data-plane: model-aliases/apply, model-aliases/match,
  models/alias-listing.
- dashboard: useModelAliases composable, AliasEditDialog, AliasRow,
  AliasesSettingsCard, and their tests.

Surgical edits:
- protocols/common/models.ts loses PublicModelAliasedFrom and the
  alias-formatting helpers; ChatModelInfo stays (introduced for #115's
  per-model reasoning metadata, unrelated to aliases).
- repo: ModelAliasesRepo and its memory/SQL bindings dropped from the
  Repo contract; the InMemoryRepo test seam goes with them.
- registry: enumerateModelInterpretations and resolveModelForRequest
  return to their pre-alias signatures; getModelsForListing collapses
  into getModels — the catalog no longer fans out alias entries.
- chat/responses/messages/gemini/chat-completions serve + attempt paths
  stop loading aliases, stop applying rules, and stop staging the
  x-floway-alias response header; the staging helper and its companion
  responseHeaders bag on GatewayCtx are removed.
- shared/sanitize.ts drops the alias-aware trace context and the
  traceAllRulesDropped helper used by the passthrough surface.
- control-plane Hono entry and Zod schemas lose the /api/aliases route
  group and its bodies.
- dashboard PublicModel + ControlPlaneModel lose aliasedFrom;
  ModelInfoBar stops rendering alias badges; settings.vue drops the
  AliasesSettingsCard slot.

The translate-pair extension fields (thinking_budget, adaptive_thinking,
reasoning_summary, anthropic_beta) stay in place — they are protocol
extensions that flow regardless of alias source. The
applyAnthropicBetaToHeaders helper becomes unused by the gateway but
stays exported from packages/translate alongside the other Messages-via
helpers it ships with.

Verified: pnpm run lint and pnpm run typecheck both exit 0.
---
 apps/web/src/api/types.ts                     |   6 +-
 .../components/alias-edit/AliasEditDialog.vue | 446 -----------------
 .../alias-edit/AliasEditDialog_test.ts        | 205 --------
 .../src/components/models/ModelInfoBar.vue    |  17 +-
 apps/web/src/components/settings/AliasRow.vue |  62 ---
 .../src/components/settings/AliasRow_test.ts  | 159 ------
 .../settings/AliasesSettingsCard.vue          |  65 ---
 apps/web/src/composables/useModelAliases.ts   |  29 --
 apps/web/src/pages/dashboard/settings.vue     |  33 +-
 apps/web/vitest.config.ts                     |   2 +-
 .../gateway/migrations/0046_model_aliases.sql |  14 -
 .../0047_model_aliases_display_name.sql       |   4 -
 packages/gateway/package.json                 |   2 -
 .../src/control-plane/model-aliases/repo.ts   | 124 -----
 .../control-plane/model-aliases/repo_test.ts  | 172 -------
 .../src/control-plane/model-aliases/routes.ts |  87 ----
 .../model-aliases/routes_test.ts              | 354 -------------
 .../control-plane/model-aliases/serialize.ts  |  31 --
 .../src/control-plane/model-aliases/types.ts  |  34 --
 .../src/control-plane/models/routes.ts        |  18 +-
 .../src/control-plane/models/routes_test.ts   |  34 +-
 packages/gateway/src/control-plane/routes.ts  |   8 +-
 packages/gateway/src/control-plane/schemas.ts |  73 ---
 .../chat/chat-completions/attempt.ts          |   2 +-
 .../chat/chat-completions/attempt_test.ts     |   1 -
 .../demote-developer-to-system_test.ts        |   1 -
 .../demote-interleaved-system-to-user_test.ts |   1 -
 ...le-reasoning-on-forced-tool-choice_test.ts |   1 -
 .../include-usage-stream-options_test.ts      |   1 -
 .../interceptors/normalize-usage_test.ts      |   1 -
 .../vendor-deepseek-normalize_test.ts         |   1 -
 .../vendor-kimi-normalize_test.ts             |   1 -
 .../vendor-qwen-normalize_test.ts             |   1 -
 .../data-plane/chat/chat-completions/serve.ts |  11 -
 .../chat/chat-completions/serve_test.ts       |   1 -
 .../data-plane/chat/gemini/attempt_test.ts    |   1 -
 .../strip-safety-settings_test.ts             |   1 -
 .../strip-unsupported-part-fields_test.ts     |   1 -
 .../strip-unsupported-tools_test.ts           |   1 -
 .../suppress-thought-parts_test.ts            |   1 -
 .../data-plane/chat/gemini/respond_test.ts    |   1 -
 .../src/data-plane/chat/gemini/serve.ts       |  14 -
 .../src/data-plane/chat/gemini/serve_test.ts  |   1 -
 .../src/data-plane/chat/messages/attempt.ts   |  21 +-
 .../data-plane/chat/messages/attempt_test.ts  |   1 -
 .../src/data-plane/chat/messages/http_test.ts |  57 ---
 .../demote-interleaved-system-to-user_test.ts |   1 -
 ...le-reasoning-on-forced-tool-choice_test.ts |   1 -
 .../strip-billing-attribution_test.ts         |   1 -
 .../interceptors/web-search-shim_test.ts      |   1 -
 .../data-plane/chat/messages/respond_test.ts  |   1 -
 .../src/data-plane/chat/messages/serve.ts     |  19 -
 .../data-plane/chat/messages/serve_test.ts    |   1 -
 .../src/data-plane/chat/responses/attempt.ts  |   4 +-
 .../data-plane/chat/responses/attempt_test.ts |   1 -
 .../canonicalize-encrypted-content_test.ts    |   1 -
 .../demote-developer-to-system_test.ts        |   1 -
 .../demote-interleaved-system-to-user_test.ts |   1 -
 ...le-reasoning-on-forced-tool-choice_test.ts |   1 -
 .../interceptors/retry-cyber-policy_test.ts   |   1 -
 .../interceptors/server-tool-shim_test.ts     |   2 -
 .../image-generation-integration_test.ts      |   1 -
 .../server-tools/image-generation.ts          |   9 +-
 .../server-tools/image-generation_test.ts     |   1 -
 .../vendor-deepseek-normalize_test.ts         |   1 -
 .../vendor-qwen-normalize_test.ts             |   1 -
 .../data-plane/chat/responses/serve-prep.ts   |   3 -
 .../src/data-plane/chat/responses/serve.ts    |  13 -
 .../data-plane/chat/responses/serve_test.ts   |   1 -
 .../src/data-plane/chat/shared/candidates.ts  |  37 +-
 .../data-plane/chat/shared/candidates_test.ts |  12 -
 .../src/data-plane/chat/shared/gateway-ctx.ts |  38 +-
 .../data-plane/chat/shared/respond_test.ts    |   1 -
 .../src/data-plane/chat/shared/routing.ts     |  10 +-
 .../src/data-plane/chat/shared/sanitize.ts    |  36 +-
 .../data-plane/chat/shared/sanitize_test.ts   |  18 +-
 .../chat/shared/upstream-telemetry_test.ts    |   1 -
 .../src/data-plane/completions/serve_test.ts  |  47 --
 .../src/data-plane/embeddings/serve_test.ts   |  56 ---
 .../src/data-plane/images/serve_test.ts       |  60 ---
 .../src/data-plane/model-aliases/apply.ts     | 104 ----
 .../data-plane/model-aliases/apply_test.ts    | 203 --------
 .../src/data-plane/model-aliases/match.ts     |  15 -
 .../data-plane/model-aliases/match_test.ts    |  55 --
 .../src/data-plane/models/alias-listing.ts    | 139 ------
 .../gateway/src/data-plane/models/gemini.ts   |  21 +-
 .../src/data-plane/models/gemini_test.ts      |  46 --
 .../gateway/src/data-plane/models/load.ts     |  14 +-
 .../gateway/src/data-plane/models/serve.ts    |   4 +-
 .../src/data-plane/models/serve_test.ts       | 468 ------------------
 .../src/data-plane/providers/registry.ts      | 173 +------
 .../src/data-plane/providers/registry_test.ts | 210 +-------
 .../data-plane/shared/passthrough-serve.ts    |  21 +-
 packages/gateway/src/repo/memory.ts           |  53 --
 packages/gateway/src/repo/sql.ts              |  33 --
 packages/gateway/src/repo/types.ts            |  24 -
 .../gateway/src/test-helpers/gateway-ctx.ts   |  10 +-
 packages/protocols/src/common/models.ts       |  98 ----
 .../src/common/models_alias-display_test.ts   | 100 ----
 99 files changed, 77 insertions(+), 4199 deletions(-)
 delete mode 100644 apps/web/src/components/alias-edit/AliasEditDialog.vue
 delete mode 100644 apps/web/src/components/alias-edit/AliasEditDialog_test.ts
 delete mode 100644 apps/web/src/components/settings/AliasRow.vue
 delete mode 100644 apps/web/src/components/settings/AliasRow_test.ts
 delete mode 100644 apps/web/src/components/settings/AliasesSettingsCard.vue
 delete mode 100644 apps/web/src/composables/useModelAliases.ts
 delete mode 100644 packages/gateway/migrations/0046_model_aliases.sql
 delete mode 100644 packages/gateway/migrations/0047_model_aliases_display_name.sql
 delete mode 100644 packages/gateway/src/control-plane/model-aliases/repo.ts
 delete mode 100644 packages/gateway/src/control-plane/model-aliases/repo_test.ts
 delete mode 100644 packages/gateway/src/control-plane/model-aliases/routes.ts
 delete mode 100644 packages/gateway/src/control-plane/model-aliases/routes_test.ts
 delete mode 100644 packages/gateway/src/control-plane/model-aliases/serialize.ts
 delete mode 100644 packages/gateway/src/control-plane/model-aliases/types.ts
 delete mode 100644 packages/gateway/src/data-plane/model-aliases/apply.ts
 delete mode 100644 packages/gateway/src/data-plane/model-aliases/apply_test.ts
 delete mode 100644 packages/gateway/src/data-plane/model-aliases/match.ts
 delete mode 100644 packages/gateway/src/data-plane/model-aliases/match_test.ts
 delete mode 100644 packages/gateway/src/data-plane/models/alias-listing.ts
 delete mode 100644 packages/protocols/src/common/models_alias-display_test.ts

diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 66216b812..088891e1b 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -6,11 +6,10 @@ import type {
   ModelEndpoints,
   ModelKind,
   ModelPricing,
-  PublicModelAliasedFrom,
 } from '@floway-dev/protocols/common';
 import type { AddressableForm, ModelPrefixConfig } from '@floway-dev/provider/model-prefix';
 
-export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing, PublicModelAliasedFrom };
+export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing };
 export type { AddressableForm, ModelPrefixConfig };
 
 export type UpstreamProviderKind = 'custom' | 'azure' | 'copilot' | 'codex' | 'claude-code' | 'ollama';
@@ -305,8 +304,6 @@ export interface FlagDef {
 // than redeclaring the shape) makes any future field rename a compile error
 // here instead of a runtime mismatch the next time someone refreshes the page.
 export type { SerializedProxyRecord as ProxyRecord, SerializedBackoffRow as BackoffRow } from '@floway-dev/gateway/control-plane/proxies/serialize';
-export type { SerializedModelAlias as ModelAlias } from '@floway-dev/gateway/control-plane/model-aliases/serialize';
-export type { ModelAliasRules, OnConflict as ModelAliasOnConflict } from '@floway-dev/gateway/control-plane/model-aliases/types';
 
 // 409 body returned by DELETE /api/proxies/:id when the row is referenced
 // by an upstream's fallback list.
@@ -343,7 +340,6 @@ export interface PublicModel {
   endpoints?: Record<string, ModelEndpointInfo>;
   cost?: ModelPricing;
   kind?: ModelKind;
-  aliasedFrom?: PublicModelAliasedFrom;
 }
 
 export interface ControlPlaneModel extends PublicModel {
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
deleted file mode 100644
index db7275abe..000000000
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ /dev/null
@@ -1,446 +0,0 @@
-<script setup lang="ts">
-// Operator editor for one alias. The form is intentionally Goal-2-friendly:
-// every "enum" field below is rendered as a combobox with suggestions
-// pulled from the target model's chat metadata (when available) and from
-// well-known wire values, but the operator can type any value verbatim so
-// a new upstream-side level (e.g. an "xhigh" effort that shipped this
-// morning) flows through without a frontend release.
-
-import { computed, ref } from 'vue';
-
-import { callApi, useApi } from '../../api/client.ts';
-import type { ModelAlias, ModelAliasOnConflict } from '../../api/types.ts';
-import { useModelsStore } from '../../composables/useModels.ts';
-import { useUpstreamsStore } from '../../composables/useUpstreams.ts';
-import { composeAliasDisplayName } from '@floway-dev/protocols/common';
-import { Button, Combobox, Dialog, Input, Select, Switch, TagCombobox } from '@floway-dev/ui';
-
-// Mutable mirror of @floway-dev/protocols ModelAliasRules — the wire shape
-// is `readonly` at the contract boundary, but the form mutates it in place
-// while the operator is editing. The Hono RPC client expects the mutable
-// version too.
-interface MutableRules {
-  reasoning?: {
-    effort?: string;
-    budgetTokens?: number;
-    adaptive?: boolean;
-    summary?: string;
-  };
-  verbosity?: string;
-  serviceTier?: string;
-  anthropicBeta?: string[];
-}
-
-const open = defineModel<boolean>('open', { required: true });
-
-const props = defineProps<{
-  /** null = create; non-null = edit. The alias name is editable in both modes. */
-  record: ModelAlias | null;
-}>();
-
-const emit = defineEmits<{
-  saved: [];
-}>();
-
-const api = useApi();
-const modelsStore = useModelsStore();
-const upstreamsStore = useUpstreamsStore();
-
-const mode = computed<'create' | 'edit'>(() => (props.record ? 'edit' : 'create'));
-
-// --- form state ---
-
-const aliasName = ref(props.record?.alias ?? '');
-const displayName = ref(props.record?.display_name ?? '');
-const targetModelId = ref(props.record?.target_model_id ?? '');
-const upstreamIds = ref<string[]>([...(props.record?.upstream_ids ?? [])]);
-const visibleInModelsList = ref(props.record?.visible_in_models_list ?? true);
-const onConflict = ref<ModelAliasOnConflict>(props.record?.on_conflict ?? 'real-only');
-
-// Reasoning fields are flat: every input is always visible. The wire schema
-// still allows the four facets (effort / budget / adaptive / summary) to
-// coexist; the apply layer's adaptive-first precedence handles the runtime
-// resolution. Forcing mutual exclusivity at the UI level previously meant
-// operators had to nuke an existing knob before setting another, which
-// fought their actual workflow.
-const initialReasoning = props.record?.rules.reasoning;
-const reasoningEffort = ref(initialReasoning?.effort ?? '');
-const reasoningBudgetTokens = ref<string>(initialReasoning?.budgetTokens === undefined ? '' : String(initialReasoning.budgetTokens));
-const reasoningAdaptive = ref(initialReasoning?.adaptive === true);
-const reasoningSummary = ref(initialReasoning?.summary ?? '');
-
-const verbosity = ref(props.record?.rules.verbosity ?? '');
-const serviceTier = ref(props.record?.rules.serviceTier ?? '');
-const anthropicBeta = ref<string[]>([...(props.record?.rules.anthropicBeta ?? [])]);
-
-// --- suggestion sources ---
-//
-// Models list seeds the target-model combobox and feeds the reasoning hint
-// lookup. `chat.reasoning` lives on per-model metadata the operator wired
-// at upstream-config time; surface its supported effort list / budget range
-// as combobox hints once a target id matches a real entry.
-
-const modelOptions = computed(() => (modelsStore.models.value ?? []).map(m => ({
-  value: m.id,
-  label: m.display_name ?? m.id,
-})));
-
-const upstreamItems = computed(() => (upstreamsStore.upstreams.value ?? []).map(u => ({
-  value: u.id,
-  label: u.name,
-  detail: u.id,
-})));
-
-const targetMatch = computed(() => modelsStore.models.value?.find(m => m.id === targetModelId.value));
-
-const targetChat = computed(() => {
-  const match = targetMatch.value;
-  return match && 'chat' in match ? (match as { chat?: { reasoning?: { effort?: { supported: string[] }; budget_tokens?: { min?: number; max?: number }; adaptive?: boolean } } }).chat : undefined;
-});
-
-const effortSuggestions = computed(() => targetChat.value?.reasoning?.effort?.supported ?? []);
-const budgetMin = computed(() => targetChat.value?.reasoning?.budget_tokens?.min);
-const budgetMax = computed(() => targetChat.value?.reasoning?.budget_tokens?.max);
-const adaptiveSupported = computed(() => targetChat.value?.reasoning?.adaptive === true);
-
-// Collected at the bottom of the dialog so a misalignment between the
-// rule and the target model's advertised capability stays visible without
-// crowding the per-field area. Each entry tells the operator which field
-// to revisit; the values still flow through to the upstream verbatim
-// (Goal 2: pass through, don't enum-gate) so a warning is informational,
-// not blocking.
-interface FieldWarning { field: string; message: string }
-const fieldWarnings = computed<FieldWarning[]>(() => {
-  const out: FieldWarning[] = [];
-  const chatReasoning = targetChat.value?.reasoning;
-
-  if (reasoningEffort.value.trim() !== '' && chatReasoning?.effort?.supported) {
-    const value = reasoningEffort.value.trim();
-    const supported = chatReasoning.effort.supported;
-    if (!supported.includes(value)) {
-      out.push({
-        field: 'Effort',
-        message: `"${value}" is not in the target's supported list (${supported.join(', ')}). The rule will still be sent verbatim.`,
-      });
-    }
-  }
-
-  const budgetRaw = reasoningBudgetTokens.value.trim();
-  if (budgetRaw !== '') {
-    const n = Number(budgetRaw);
-    const range = chatReasoning?.budget_tokens;
-    if (Number.isFinite(n) && range) {
-      if (range.min !== undefined && n < range.min) {
-        out.push({ field: 'Budget tokens', message: `${n} is below the target's minimum (${range.min}). The rule will still be sent verbatim.` });
-      } else if (range.max !== undefined && n > range.max) {
-        out.push({ field: 'Budget tokens', message: `${n} is above the target's maximum (${range.max}). The rule will still be sent verbatim.` });
-      }
-    }
-  }
-
-  if (reasoningAdaptive.value && !adaptiveSupported.value) {
-    out.push({
-      field: 'Adaptive reasoning',
-      message: 'Target model does not advertise adaptive reasoning support. The rule will still be sent verbatim.',
-    });
-  }
-
-  return out;
-});
-
-const SUMMARY_HINTS = ['auto', 'concise', 'detailed', 'omitted'];
-const VERBOSITY_HINTS = ['low', 'medium', 'high'];
-const SERVICE_TIER_HINTS = ['auto', 'default', 'flex', 'scale', 'priority', 'fast', 'standard_only'];
-
-// Each on-conflict option carries a one-line explanation surfaced both in
-// the Select popover and in a helper line below the trigger so an operator
-// picks by what happens at request time, not by guessing what `real-only`
-// / `alias-only` mean. Mirrors the Auth Style pattern in CustomConfigPanel.
-interface OnConflictOption {
-  value: ModelAliasOnConflict;
-  label: string;
-  explanation: string;
-}
-
-const onConflictOptions: OnConflictOption[] = [
-  {
-    value: 'real-only',
-    label: 'Real model wins',
-    explanation: "When an upstream serves a real model with the same id as this alias, the real model is used and the alias's rules don't apply on that upstream.",
-  },
-  {
-    value: 'alias-only',
-    label: 'Alias replaces real',
-    explanation: 'The alias always wins, even when an upstream serves a real model with the same id.',
-  },
-  {
-    value: 'both-real-first',
-    label: 'Both, real first',
-    explanation: 'Both entries appear; routing prefers the real model when present, falling back to the alias.',
-  },
-  {
-    value: 'both-alias-first',
-    label: 'Both, alias first',
-    explanation: 'Both entries appear; routing prefers the alias when present, falling back to the real model.',
-  },
-];
-
-const selectedOnConflict = computed(() => onConflictOptions.find(o => o.value === onConflict.value));
-
-// --- display name placeholder ---
-//
-// Shows the operator what the synthesized fallback would look like when
-// the Display name field is left blank. Before a target is picked we hold
-// a teaching example so the three placeholders (alias / target / display
-// name) read as a coherent trio; once a target is set we compute the real
-// synthesized label off the current form state so the placeholder tracks
-// every rule edit live.
-const FALLBACK_PLACEHOLDER_EXAMPLE = 'GPT-5.5 (xhigh effort, fast speed)';
-
-// Mirror of `buildRules` without the validation errors — used purely for
-// the live placeholder so a half-typed budget value (e.g. mid-typing) does
-// not bubble validation text into a UI hint. Invalid intermediate states
-// fall back to the empty rules object.
-const buildRulesForPreview = (): MutableRules => {
-  const rules: MutableRules = {};
-  const reasoning: NonNullable<MutableRules['reasoning']> = {};
-  const effort = reasoningEffort.value.trim();
-  if (effort !== '') reasoning.effort = effort;
-  const budgetRaw = reasoningBudgetTokens.value.trim();
-  if (budgetRaw !== '' && /^\d+$/.test(budgetRaw)) reasoning.budgetTokens = Number(budgetRaw);
-  if (reasoningAdaptive.value) reasoning.adaptive = true;
-  const summary = reasoningSummary.value.trim();
-  if (summary !== '') reasoning.summary = summary;
-  if (Object.keys(reasoning).length > 0) rules.reasoning = reasoning;
-  const verb = verbosity.value.trim();
-  if (verb !== '') rules.verbosity = verb;
-  const tier = serviceTier.value.trim();
-  if (tier !== '') rules.serviceTier = tier;
-  const betas = anthropicBeta.value.map(s => s.trim()).filter(s => s !== '');
-  if (betas.length > 0) rules.anthropicBeta = betas;
-  return rules;
-};
-
-const displayNamePlaceholder = computed(() => {
-  const trimmedTarget = targetModelId.value.trim();
-  if (trimmedTarget === '') return FALLBACK_PLACEHOLDER_EXAMPLE;
-  const targetDisplay = targetMatch.value?.display_name ?? trimmedTarget;
-  return composeAliasDisplayName({
-    targetDisplayName: targetDisplay,
-    rules: buildRulesForPreview(),
-  });
-});
-
-// --- save ---
-
-const saving = ref(false);
-const saveError = ref<string | null>(null);
-
-const trimOrUndef = (s: string): string | undefined => {
-  const t = s.trim();
-  return t === '' ? undefined : t;
-};
-
-const buildRules = (): MutableRules | { error: string } => {
-  const rules: MutableRules = {};
-  const reasoning: NonNullable<MutableRules['reasoning']> = {};
-
-  const effort = trimOrUndef(reasoningEffort.value);
-  if (effort !== undefined) reasoning.effort = effort;
-
-  const budgetRaw = reasoningBudgetTokens.value.trim();
-  if (budgetRaw !== '') {
-    if (!/^\d+$/.test(budgetRaw)) return { error: 'Reasoning budget tokens must be a non-negative integer' };
-    reasoning.budgetTokens = Number(budgetRaw);
-  }
-
-  if (reasoningAdaptive.value) reasoning.adaptive = true;
-
-  const summary = trimOrUndef(reasoningSummary.value);
-  if (summary !== undefined) reasoning.summary = summary;
-
-  if (Object.keys(reasoning).length > 0) rules.reasoning = reasoning;
-
-  const verb = trimOrUndef(verbosity.value);
-  if (verb !== undefined) rules.verbosity = verb;
-  const tier = trimOrUndef(serviceTier.value);
-  if (tier !== undefined) rules.serviceTier = tier;
-  const betas = anthropicBeta.value.map(s => s.trim()).filter(s => s !== '');
-  if (betas.length > 0) rules.anthropicBeta = betas;
-
-  return rules;
-};
-
-const save = async () => {
-  saveError.value = null;
-  const trimmedAlias = aliasName.value.trim();
-  const trimmedTarget = targetModelId.value.trim();
-  if (trimmedAlias === '') { saveError.value = 'Alias name is required'; return; }
-  if (trimmedTarget === '') { saveError.value = 'Target model id is required'; return; }
-
-  const rulesOrErr = buildRules();
-  if ('error' in rulesOrErr) { saveError.value = rulesOrErr.error; return; }
-
-  const displayNameValue = trimOrUndef(displayName.value);
-
-  saving.value = true;
-  try {
-    if (mode.value === 'create') {
-      const { error } = await callApi(() => api.api.aliases.$post({
-        json: {
-          alias: trimmedAlias,
-          targetModelId: trimmedTarget,
-          upstreamIds: [...upstreamIds.value],
-          rules: rulesOrErr,
-          visibleInModelsList: visibleInModelsList.value,
-          onConflict: onConflict.value,
-          ...(displayNameValue !== undefined ? { displayName: displayNameValue } : {}),
-        },
-      }));
-      if (error) { saveError.value = error.message; return; }
-    } else if (props.record) {
-      // PATCH addresses the row at its *original* PK; `alias` in the body
-      // requests a rename when it differs. The backend route handles the
-      // 409-on-collision path and the safe no-op when nothing changed.
-      const { error } = await callApi(() => api.api.aliases[':alias'].$patch({
-        param: { alias: props.record!.alias },
-        json: {
-          alias: trimmedAlias,
-          targetModelId: trimmedTarget,
-          upstreamIds: [...upstreamIds.value],
-          rules: rulesOrErr,
-          visibleInModelsList: visibleInModelsList.value,
-          onConflict: onConflict.value,
-          // Carry an explicit null when the operator cleared the label so the
-          // backend wipes the display_name column rather than preserving the
-          // old value through the absent-field merge.
-          displayName: displayNameValue ?? null,
-        },
-      }));
-      if (error) { saveError.value = error.message; return; }
-    }
-    emit('saved');
-    open.value = false;
-  } finally {
-    saving.value = false;
-  }
-};
-
-const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Alias: ${props.record?.alias ?? ''}`);
-</script>
-
-<template>
-  <Dialog v-model:open="open" :title="title" size="xl">
-    <div class="space-y-5">
-      <p v-if="saveError" class="rounded-md border border-accent-rose/40 bg-accent-rose/10 px-3 py-2 text-sm text-accent-rose">
-        {{ saveError }}
-      </p>
-
-      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
-        <div>
-          <label class="mb-1.5 block text-xs font-medium text-gray-500">Alias name</label>
-          <Input v-model="aliasName" placeholder="gpt-5.5-xhigh-fast" class="font-mono" />
-        </div>
-
-        <div>
-          <label class="mb-1.5 block text-xs font-medium text-gray-500">Target model id</label>
-          <Combobox v-model="targetModelId" :items="modelOptions" placeholder="gpt-5.5" input-class="font-mono" />
-        </div>
-      </div>
-
-      <div>
-        <label class="mb-1.5 block text-xs font-medium text-gray-500">Display name <span class="text-gray-600">(optional)</span></label>
-        <Input v-model="displayName" :placeholder="displayNamePlaceholder" />
-      </div>
-
-      <div>
-        <label class="mb-1.5 block text-xs font-medium text-gray-500">Upstreams <span class="text-gray-600">(leave empty to allow any upstream that serves the target)</span></label>
-        <TagCombobox v-model="upstreamIds" :items="upstreamItems" placeholder="Pick an upstream..." empty-text="No upstreams match" />
-      </div>
-
-      <div>
-        <label class="mb-1.5 block text-xs font-medium text-gray-500">On conflict</label>
-        <Select v-model="onConflict" :options="onConflictOptions">
-          <template #description="{ option }">
-            <p class="text-[11px] text-gray-500">{{ option.explanation }}</p>
-          </template>
-        </Select>
-        <p v-if="selectedOnConflict" class="mt-1.5 text-xs text-gray-500">{{ selectedOnConflict.explanation }}</p>
-      </div>
-
-      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
-        <div>
-          <label class="mb-1.5 block text-xs font-medium text-gray-500">Effort</label>
-          <Combobox v-model="reasoningEffort" :items="effortSuggestions" placeholder="high" />
-        </div>
-
-        <div>
-          <label class="mb-1.5 block text-xs font-medium text-gray-500">Budget tokens</label>
-          <Input
-            v-model="reasoningBudgetTokens"
-            placeholder="4096"
-            inputmode="numeric"
-            class="font-mono"
-            :min="budgetMin"
-            :max="budgetMax"
-          />
-          <p v-if="budgetMin !== undefined || budgetMax !== undefined" class="mt-1 text-xs text-gray-600">
-            Target range:
-            <template v-if="budgetMin !== undefined">min {{ budgetMin }}</template>
-            <template v-if="budgetMin !== undefined && budgetMax !== undefined">, </template>
-            <template v-if="budgetMax !== undefined">max {{ budgetMax }}</template>
-          </p>
-        </div>
-      </div>
-
-      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
-        <div>
-          <label class="mb-1.5 block text-xs font-medium text-gray-500">Adaptive reasoning</label>
-          <div class="flex h-9 items-center gap-2">
-            <Switch v-model="reasoningAdaptive" />
-            <span class="text-sm text-gray-300">Enable</span>
-          </div>
-        </div>
-
-        <div>
-          <label class="mb-1.5 block text-xs font-medium text-gray-500">Summary</label>
-          <Combobox v-model="reasoningSummary" :items="SUMMARY_HINTS" placeholder="auto" />
-        </div>
-      </div>
-
-      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
-        <div>
-          <label class="mb-1.5 block text-xs font-medium text-gray-500">Verbosity</label>
-          <Combobox v-model="verbosity" :items="VERBOSITY_HINTS" placeholder="medium" />
-        </div>
-
-        <div>
-          <label class="mb-1.5 block text-xs font-medium text-gray-500">Service tier</label>
-          <Combobox v-model="serviceTier" :items="SERVICE_TIER_HINTS" placeholder="auto" />
-        </div>
-      </div>
-
-      <div>
-        <label class="mb-1.5 block text-xs font-medium text-gray-500">Anthropic beta headers <span class="text-gray-600">(comma- or Enter-separated tokens)</span></label>
-        <TagCombobox v-model="anthropicBeta" :items="[]" placeholder="extended-cache-ttl-2025-04-11" empty-text="Type a header token and press Enter" />
-      </div>
-
-      <div v-if="fieldWarnings.length > 0" class="rounded-md border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-xs text-amber-300 space-y-1">
-        <p v-for="warning in fieldWarnings" :key="warning.field">
-          <span class="font-bold">{{ warning.field }}:</span> {{ warning.message }}
-        </p>
-      </div>
-
-      <div class="flex flex-wrap items-center justify-between gap-3 border-t border-white/[0.06] pt-5">
-        <label class="flex items-center gap-2">
-          <Switch v-model="visibleInModelsList" />
-          <span class="text-sm text-gray-300">Visible in <code class="rounded bg-white/[0.04] px-1 font-mono text-xs">/v1/models</code></span>
-        </label>
-        <div class="flex items-center gap-2">
-          <Button variant="secondary" :disabled="saving" @click="open = false">Cancel</Button>
-          <Button :loading="saving" @click="save">Save</Button>
-        </div>
-      </div>
-    </div>
-  </Dialog>
-</template>
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
deleted file mode 100644
index 99574c248..000000000
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ /dev/null
@@ -1,205 +0,0 @@
-// @vitest-environment happy-dom
-
-import { mount } from '@vue/test-utils';
-import { afterEach, beforeEach, expect, test, vi } from 'vitest';
-import { defineComponent, h, ref } from 'vue';
-
-import type { ModelAlias } from '../../api/types.ts';
-
-// Module-level mocks for the api client + every store the dialog imports.
-// The dialog stays as-is; we substitute the dependencies so the component
-// renders and submits without any real HTTP. callApi is exposed as a spy so
-// tests can read what was posted.
-const createAliasMock = vi.fn(async (_args: { json: unknown }) => new Response(JSON.stringify({}), { status: 201, headers: { 'content-type': 'application/json' } }));
-const patchAliasMock = vi.fn(async (_args: { param: { alias: string }; json: unknown }) => new Response(JSON.stringify({}), { status: 200, headers: { 'content-type': 'application/json' } }));
-
-vi.mock('../../api/client.ts', async () => {
-  const { callApi: realCallApi } = await vi.importActual<typeof import('../../api/client.ts')>('../../api/client.ts');
-  return {
-    useApi: () => ({
-      api: {
-        aliases: Object.assign(
-          { $post: (args: { json: unknown }) => createAliasMock(args) },
-          { ':alias': { $patch: (args: { param: { alias: string }; json: unknown }) => patchAliasMock(args) } },
-        ),
-      },
-    }),
-    callApi: realCallApi,
-  };
-});
-
-vi.mock('../../composables/useModels.ts', () => ({
-  useModelsStore: () => ({
-    models: {
-      value: [
-        { id: 'gpt-5.4', display_name: 'GPT-5.4', object: 'model', type: 'model', limits: {}, kind: 'chat', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' }, budget_tokens: { min: 1024, max: 8192 }, adaptive: true } } },
-        { id: 'claude-opus-4-6', display_name: 'Claude Opus 4.6', object: 'model', type: 'model', limits: {}, kind: 'chat' },
-      ],
-    },
-    loading: { value: false },
-    error: { value: null },
-    load: vi.fn(async () => undefined),
-  }),
-}));
-
-vi.mock('../../composables/useUpstreams.ts', () => ({
-  useUpstreamsStore: () => ({
-    upstreams: {
-      value: [
-        { id: 'up_oai', name: 'OpenAI' },
-        { id: 'up_anth', name: 'Anthropic' },
-      ],
-    },
-    loading: { value: false },
-    load: vi.fn(async () => undefined),
-  }),
-}));
-
-// reka-ui's Dialog mounts via Teleport into document.body and renders a
-// portal — we stub it down to a passthrough so happy-dom mounts the slot
-// content inline where assertions can reach it.
-vi.mock('@floway-dev/ui', async () => {
-  const real = await vi.importActual<typeof import('@floway-dev/ui')>('@floway-dev/ui');
-  const Passthrough = defineComponent({ name: 'Passthrough', setup(_props, { slots }) { return () => h('div', slots.default?.()); } });
-  return { ...real, Dialog: Passthrough };
-});
-
-beforeEach(() => {
-  createAliasMock.mockClear();
-  patchAliasMock.mockClear();
-});
-
-afterEach(() => {
-  vi.clearAllMocks();
-});
-
-test('AliasEditDialog (create mode) posts a payload matching the form state', async () => {
-  const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
-  const open = ref(true);
-
-  const wrapper = mount(defineComponent({
-    components: { AliasEditDialog },
-    setup() { return { open }; },
-    template: '<AliasEditDialog v-model:open="open" :record="null" />',
-  }));
-
-  // Fill the form: alias name + target id are the only required fields for
-  // the create-mode happy path. Everything else uses its default.
-  const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
-  expect(aliasInput.exists()).toBe(true);
-  await aliasInput.setValue('opus-fast');
-
-  const targetInput = wrapper.find('input[placeholder="gpt-5.5"]');
-  expect(targetInput.exists()).toBe(true);
-  await targetInput.setValue('claude-opus-4-6');
-
-  // Click Save.
-  const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
-  expect(saveBtn).toBeDefined();
-  await saveBtn!.trigger('click');
-  // Flush microtasks so the async save completes.
-  await new Promise(r => setTimeout(r, 0));
-
-  expect(createAliasMock).toHaveBeenCalledTimes(1);
-  const args = createAliasMock.mock.calls[0]![0];
-  expect(args.json).toMatchObject({
-    alias: 'opus-fast',
-    targetModelId: 'claude-opus-4-6',
-    upstreamIds: [],
-    rules: {},
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-  });
-});
-
-test('AliasEditDialog (edit mode) pre-fills the form and PATCHes the merged shape', async () => {
-  const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
-  const open = ref(true);
-  const record: ModelAlias = {
-    alias: 'opus-xhigh',
-    target_model_id: 'claude-opus-4-6',
-    upstream_ids: ['up_anth'],
-    rules: { reasoning: { effort: 'xhigh' } },
-    visible_in_models_list: true,
-    on_conflict: 'real-only',
-    display_name: 'Opus XHigh',
-    created_at: 1_700_000_000,
-  };
-
-  const wrapper = mount(defineComponent({
-    components: { AliasEditDialog },
-    setup() { return { open, record }; },
-    template: '<AliasEditDialog v-model:open="open" :record="record" />',
-  }));
-
-  // Alias name input is editable in edit mode — the PK can now be renamed.
-  const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
-  expect(aliasInput.exists()).toBe(true);
-  expect((aliasInput.element as HTMLInputElement).disabled).toBe(false);
-  expect((aliasInput.element as HTMLInputElement).value).toBe('opus-xhigh');
-
-  // Display name pre-filled — its placeholder is dynamic now (mirrors the
-  // synthesized fallback) so we locate it by its current value instead.
-  const allInputs = wrapper.findAll('input');
-  const displayInput = allInputs.find(i => (i.element as HTMLInputElement).value === 'Opus XHigh');
-  expect(displayInput).toBeDefined();
-
-  // Target id pre-filled.
-  const targetInput = wrapper.find('input[placeholder="gpt-5.5"]');
-  expect((targetInput.element as HTMLInputElement).value).toBe('claude-opus-4-6');
-
-  // Change one field and submit; PATCH carries the merged shape (every editable
-  // field, not just the diff — the route layer merges against the stored row).
-  await targetInput.setValue('gpt-5.4');
-  const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
-  await saveBtn!.trigger('click');
-  await new Promise(r => setTimeout(r, 0));
-
-  expect(patchAliasMock).toHaveBeenCalledTimes(1);
-  const args = patchAliasMock.mock.calls[0]![0];
-  expect(args.param.alias).toBe('opus-xhigh');
-  expect(args.json).toMatchObject({
-    alias: 'opus-xhigh',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: ['up_anth'],
-    rules: { reasoning: { effort: 'xhigh' } },
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    displayName: 'Opus XHigh',
-  });
-});
-
-test('AliasEditDialog (edit mode) PATCHes the original alias when the operator renames it', async () => {
-  const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
-  const open = ref(true);
-  const record: ModelAlias = {
-    alias: 'opus-xhigh',
-    target_model_id: 'claude-opus-4-6',
-    upstream_ids: [],
-    rules: {},
-    visible_in_models_list: true,
-    on_conflict: 'real-only',
-    display_name: null,
-    created_at: 1_700_000_000,
-  };
-
-  const wrapper = mount(defineComponent({
-    components: { AliasEditDialog },
-    setup() { return { open, record }; },
-    template: '<AliasEditDialog v-model:open="open" :record="record" />',
-  }));
-
-  const aliasInput = wrapper.find('input[placeholder="gpt-5.5-xhigh-fast"]');
-  await aliasInput.setValue('opus-renamed');
-
-  const saveBtn = wrapper.findAll('button').find(b => b.text() === 'Save');
-  await saveBtn!.trigger('click');
-  await new Promise(r => setTimeout(r, 0));
-
-  expect(patchAliasMock).toHaveBeenCalledTimes(1);
-  const args = patchAliasMock.mock.calls[0]![0];
-  // The PATCH path stays at the row's *original* PK; the rename is requested
-  // via `body.alias`, which the route handler maps to the rename codepath.
-  expect(args.param.alias).toBe('opus-xhigh');
-  expect(args.json).toMatchObject({ alias: 'opus-renamed' });
-});
diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index a70fa7f93..f8bf98b6e 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -1,7 +1,6 @@
 <script setup lang="ts">
-import type { ControlPlaneModel, PublicModelAliasedFrom } from '../../api/types.ts';
+import type { ControlPlaneModel } from '../../api/types.ts';
 import { providerBadgeClass, providerMeta } from '../upstreams/provider-meta.ts';
-import { formatAliasRuleBadges, type AliasRuleBadge } from '@floway-dev/protocols/common';
 
 defineProps<{
   model: ControlPlaneModel;
@@ -14,15 +13,6 @@ const formatTokenLimit = (n: number) => {
   if (n >= 1_000) return `${(n / 1_000).toFixed(n % 1_000 === 0 ? 0 : 1)}k`;
   return n.toString();
 };
-
-// `alias of: <target>` always leads the alias badge sequence so the operator
-// reading the row sees what the alias resolves to before scanning the rule
-// pills. The rule badges follow in the order `formatAliasRuleBadges`
-// returns, keeping dashboard and any future alias-aware tooling in lockstep.
-const aliasBadges = (aliasedFrom: PublicModelAliasedFrom): AliasRuleBadge[] => [
-  { label: 'alias of', value: aliasedFrom.targetModelId },
-  ...formatAliasRuleBadges(aliasedFrom.rules),
-];
 </script>
 
 <template>
@@ -53,11 +43,6 @@ const aliasBadges = (aliasedFrom: PublicModelAliasedFrom): AliasRuleBadge[] => [
           <span v-if="model.limits?.max_output_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
             output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
           </span>
-          <span
-            v-for="badge in (model.aliasedFrom ? aliasBadges(model.aliasedFrom) : [])"
-            :key="`${badge.label}:${badge.value ?? ''}`"
-            class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/[0.08] text-gray-400"
-          >{{ badge.label }}{{ badge.value !== undefined ? `: ${badge.value}` : '' }}</span>
         </div>
       </div>
       <button class="btn-ghost text-[11px] flex shrink-0 items-center gap-1" @click="$emit('clear')">
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
deleted file mode 100644
index cea5d37de..000000000
--- a/apps/web/src/components/settings/AliasRow.vue
+++ /dev/null
@@ -1,62 +0,0 @@
-<script setup lang="ts">
-import { computed } from 'vue';
-
-import type { ModelAlias } from '../../api/types.ts';
-import { formatAliasRulesInline } from '@floway-dev/protocols/common';
-
-const props = defineProps<{
-  alias: ModelAlias;
-}>();
-
-defineEmits<{
-  edit: [];
-  delete: [];
-}>();
-
-const rulesInline = computed(() => formatAliasRulesInline(props.alias.rules));
-</script>
-
-<template>
-  <div class="flex items-start gap-3 rounded-lg border border-white/5 bg-surface-800/80 px-3 py-2.5">
-    <div class="min-w-0 flex-1 flex flex-col gap-1">
-      <h4 v-if="alias.display_name" class="text-sm font-semibold text-white">{{ alias.display_name }}</h4>
-      <p class="font-mono text-xs flex flex-wrap items-center gap-x-2">
-        <span class="text-white break-all">{{ alias.alias }}</span>
-        <span class="text-gray-600">&rarr;</span>
-        <span class="text-gray-500 break-all">{{ alias.target_model_id }}</span>
-      </p>
-      <p v-if="rulesInline" class="text-xs text-gray-500">{{ rulesInline }}</p>
-      <p
-        v-if="!alias.visible_in_models_list"
-        class="text-[10px] uppercase tracking-wide text-amber-300"
-      >hidden from <code class="font-mono normal-case">/v1/models</code></p>
-    </div>
-
-    <div class="flex shrink-0 items-center gap-1">
-      <button
-        type="button"
-        class="inline-flex h-8 w-8 items-center justify-center rounded-md p-1 text-gray-600 transition-colors hover:bg-white/[0.04] hover:text-accent-cyan"
-        aria-label="Edit alias"
-        title="Edit"
-        @click="$emit('edit')"
-      >
-        <svg class="h-4 w-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-          <path d="M17 3a2.83 2.83 0 1 1 4 4L7.5 20.5 2 22l1.5-5.5Z" />
-          <path d="m15 5 4 4" />
-        </svg>
-      </button>
-      <button
-        type="button"
-        class="inline-flex h-8 w-8 items-center justify-center rounded-md p-1 text-gray-600 transition-colors hover:bg-white/[0.04] hover:text-accent-rose"
-        aria-label="Delete alias"
-        title="Delete"
-        @click="$emit('delete')"
-      >
-        <svg class="h-4 w-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
-          <polyline points="3 6 5 6 21 6" />
-          <path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2" />
-        </svg>
-      </button>
-    </div>
-  </div>
-</template>
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
deleted file mode 100644
index 988065fc6..000000000
--- a/apps/web/src/components/settings/AliasRow_test.ts
+++ /dev/null
@@ -1,159 +0,0 @@
-// @vitest-environment happy-dom
-
-import { mount } from '@vue/test-utils';
-import { beforeEach, describe, expect, test, vi } from 'vitest';
-import { defineComponent } from 'vue';
-
-import AliasRow from './AliasRow.vue';
-import type { ModelAlias } from '../../api/types.ts';
-
-const baseAlias: ModelAlias = {
-  alias: 'opus-xhigh',
-  target_model_id: 'claude-opus-4-6',
-  upstream_ids: [],
-  rules: { reasoning: { effort: 'xhigh' } },
-  visible_in_models_list: true,
-  on_conflict: 'real-only',
-  display_name: 'Opus XHigh',
-  created_at: 1_700_000_000,
-};
-
-describe('AliasRow', () => {
-  beforeEach(() => {
-    vi.restoreAllMocks();
-  });
-
-  test('renders the display name, alias id, and target', () => {
-    const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
-    expect(wrapper.text()).toContain('Opus XHigh');
-    expect(wrapper.text()).toContain('opus-xhigh');
-    expect(wrapper.text()).toContain('claude-opus-4-6');
-  });
-
-  test('does not render the on_conflict label as a badge', () => {
-    // The row used to surface `real-only` / `alias-only` as a coloured badge.
-    // Operator feedback was that the inline label was noisy and the same
-    // information lives inside the edit dialog. Asserting absence here pins
-    // the regression — the words must not slip back into the row template.
-    const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
-    expect(wrapper.text()).not.toContain('real-only');
-    expect(wrapper.text()).not.toContain('alias-only');
-  });
-
-  test('does not render per-upstream pills (upstream restriction is shown in the edit dialog instead)', () => {
-    // The row used to surface alias.upstream_ids as small font-mono pills.
-    // Operator feedback pushed all alias detail beyond the 3-line text block
-    // (display name, id mapping, rules summary) into the edit dialog so the
-    // listing reads at a glance. Assert the upstream id strings stay out of
-    // the row text — they live in the dialog now.
-    const wrapper = mount(AliasRow, {
-      props: { alias: { ...baseAlias, upstream_ids: ['up_anth', 'up_oai'] } },
-    });
-    const text = wrapper.text();
-    expect(text).not.toContain('up_anth');
-    expect(text).not.toContain('up_oai');
-  });
-
-  test('falls back to alias name when display_name is null', () => {
-    const wrapper = mount(AliasRow, { props: { alias: { ...baseAlias, display_name: null } } });
-    // alias id appears twice (label fallback + the small font-mono id), but the
-    // important assertion is that the label slot is non-empty.
-    expect(wrapper.text()).toContain('opus-xhigh');
-    expect(wrapper.text()).not.toContain('Opus XHigh');
-  });
-
-  test('emits edit and delete on the matching button clicks', async () => {
-    const wrapper = mount(AliasRow, { props: { alias: baseAlias } });
-    await wrapper.find('[aria-label="Edit alias"]').trigger('click');
-    await wrapper.find('[aria-label="Delete alias"]').trigger('click');
-    expect(wrapper.emitted('edit')).toHaveLength(1);
-    expect(wrapper.emitted('delete')).toHaveLength(1);
-  });
-
-  test('shows a "hidden" badge when visible_in_models_list is false', () => {
-    const wrapper = mount(AliasRow, { props: { alias: { ...baseAlias, visible_in_models_list: false } } });
-    expect(wrapper.text()).toContain('hidden');
-  });
-
-  test('renders the rules summary inline as one comma-joined line', () => {
-    const wrapper = mount(AliasRow, {
-      props: {
-        alias: {
-          ...baseAlias,
-          rules: { reasoning: { effort: 'high' }, verbosity: 'low', serviceTier: 'priority' },
-        },
-      },
-    });
-    // formatAliasRulesInline produces "value label, ..." in the same order
-    // composeAliasDisplayName uses for its parenthesized suffix.
-    expect(wrapper.text()).toContain('high effort, low verbosity, priority tier');
-  });
-
-  test('omits the rules summary line entirely when no rules are set', () => {
-    const wrapper = mount(AliasRow, {
-      props: { alias: { ...baseAlias, rules: {} } },
-    });
-    expect(wrapper.text()).not.toContain('effort');
-    expect(wrapper.text()).not.toContain('verbosity');
-    expect(wrapper.text()).not.toContain('tier');
-  });
-});
-
-// Bare-component smoke test for the card. We mock the composable so the
-// card renders deterministically without an HTTP round-trip; the stub
-// substitutes the same shape useModelAliases exposes.
-describe('AliasesSettingsCard', () => {
-  test('renders empty state when the store has no aliases', async () => {
-    vi.resetModules();
-    vi.doMock('../../composables/useModelAliases.ts', () => ({
-      useModelAliases: () => ({
-        aliases: { value: [] },
-        loading: { value: false },
-        error: { value: null },
-        load: vi.fn(),
-      }),
-    }));
-    vi.doMock('../../api/client.ts', () => ({
-      useApi: () => ({ api: { aliases: { ':alias': { $delete: vi.fn() } } } }),
-      callApi: vi.fn(),
-    }));
-    const { default: AliasesSettingsCard } = await import('./AliasesSettingsCard.vue');
-    const wrapper = mount(AliasesSettingsCard);
-    expect(wrapper.text()).toContain('No aliases configured');
-  });
-
-  test('renders one AliasRow per alias the store holds', async () => {
-    vi.resetModules();
-    const rows: ModelAlias[] = [
-      { ...baseAlias, alias: 'a-one' },
-      { ...baseAlias, alias: 'b-two', display_name: null },
-    ];
-    vi.doMock('../../composables/useModelAliases.ts', () => ({
-      useModelAliases: () => ({
-        aliases: { value: rows },
-        loading: { value: false },
-        error: { value: null },
-        load: vi.fn(),
-      }),
-    }));
-    vi.doMock('../../api/client.ts', () => ({
-      useApi: () => ({ api: { aliases: { ':alias': { $delete: vi.fn() } } } }),
-      callApi: vi.fn(),
-    }));
-    const { default: AliasesSettingsCard } = await import('./AliasesSettingsCard.vue');
-    const wrapper = mount(AliasesSettingsCard);
-    // Each row exposes its delete button by aria-label, so the count is a
-    // reliable proxy for "one AliasRow rendered per alias".
-    expect(wrapper.findAll('[aria-label="Delete alias"]').length).toBe(rows.length);
-    expect(wrapper.text()).toContain('a-one');
-    expect(wrapper.text()).toContain('b-two');
-  });
-});
-
-// Sanity: a stub wrapping the component above guards against template parse
-// regressions (an unknown directive or missing import would explode at mount
-// time even when no real backend is reachable).
-test('the test harness can mount a trivial component', () => {
-  const wrapper = mount(defineComponent({ template: '<span>ok</span>' }));
-  expect(wrapper.text()).toBe('ok');
-});
diff --git a/apps/web/src/components/settings/AliasesSettingsCard.vue b/apps/web/src/components/settings/AliasesSettingsCard.vue
deleted file mode 100644
index da71b374f..000000000
--- a/apps/web/src/components/settings/AliasesSettingsCard.vue
+++ /dev/null
@@ -1,65 +0,0 @@
-<script setup lang="ts">
-import { computed } from 'vue';
-
-import AliasRow from './AliasRow.vue';
-import { callApi, useApi } from '../../api/client.ts';
-import type { ModelAlias } from '../../api/types.ts';
-import { useModelAliases } from '../../composables/useModelAliases.ts';
-import { Spinner } from '@floway-dev/ui';
-
-const emit = defineEmits<{
-  'add': [];
-  'edit': [record: ModelAlias];
-  'changed': [];
-}>();
-
-const api = useApi();
-const aliasesStore = useModelAliases();
-
-const aliases = computed<ModelAlias[]>(() => aliasesStore.aliases.value ?? []);
-
-const deleteAlias = async (record: ModelAlias) => {
-  if (!window.confirm(`Delete alias "${record.alias}"?`)) return;
-  const { error } = await callApi(() => api.api.aliases[':alias'].$delete({ param: { alias: record.alias } }));
-  if (error) {
-    window.alert(`Delete failed: ${error.message}`);
-    return;
-  }
-  emit('changed');
-};
-</script>
-
-<template>
-  <div class="glass-card p-5 sm:p-6 animate-in delay-2">
-    <div class="mb-4 flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between">
-      <div class="min-w-0">
-        <h3 class="text-white font-semibold mb-1">Aliases</h3>
-        <p class="text-sm text-gray-400">
-          Synthesized model ids that pin a target model plus a request-time rule overlay.
-          Surfaced in <code class="rounded bg-white/[0.04] px-1">/v1/models</code> per the conflict policy.
-        </p>
-      </div>
-      <button class="btn-primary !py-2.5 !px-3 text-xs whitespace-nowrap" @click="emit('add')">Add Alias</button>
-    </div>
-
-    <p v-if="aliasesStore.error.value" class="mb-3 rounded-md border border-accent-rose/40 bg-accent-rose/10 px-3 py-2 text-sm text-accent-rose">
-      Failed to load aliases: {{ aliasesStore.error.value }}
-    </p>
-
-    <p v-if="!aliasesStore.error.value && aliases.length === 0" class="text-sm text-gray-500">
-      No aliases configured. Add one to expose a model id with locked reasoning, service tier, or other rule overrides.
-    </p>
-
-    <div v-else-if="aliases.length > 0" class="space-y-2">
-      <AliasRow
-        v-for="alias in aliases"
-        :key="alias.alias"
-        :alias="alias"
-        @edit="emit('edit', alias)"
-        @delete="deleteAlias(alias)"
-      />
-    </div>
-
-    <Spinner v-if="aliasesStore.loading.value && aliases.length > 0" class="mt-3 h-4 w-4 text-gray-500" />
-  </div>
-</template>
diff --git a/apps/web/src/composables/useModelAliases.ts b/apps/web/src/composables/useModelAliases.ts
deleted file mode 100644
index 880c771f9..000000000
--- a/apps/web/src/composables/useModelAliases.ts
+++ /dev/null
@@ -1,29 +0,0 @@
-import { ref, shallowRef } from 'vue';
-
-import { callApi, useApi } from '../api/client.ts';
-import type { ModelAlias } from '../api/types.ts';
-
-// Module-scoped cache so concurrent callers share one fetch — mirrors the
-// proxies store pattern: settings tabs that mount in parallel reuse a single
-// in-flight request instead of fan-out per-component.
-const aliases = shallowRef<ModelAlias[] | null>(null);
-const loading = ref(false);
-const error = ref<string | null>(null);
-
-export const useModelAliases = () => {
-  const api = useApi();
-
-  const load = async () => {
-    loading.value = true;
-    error.value = null;
-    const { data, error: err } = await callApi<ModelAlias[]>(() => api.api.aliases.$get());
-    loading.value = false;
-    if (err) {
-      error.value = err.message;
-      return;
-    }
-    aliases.value = data;
-  };
-
-  return { aliases, loading, error, load };
-};
diff --git a/apps/web/src/pages/dashboard/settings.vue b/apps/web/src/pages/dashboard/settings.vue
index 636754fee..b8e23dc3d 100644
--- a/apps/web/src/pages/dashboard/settings.vue
+++ b/apps/web/src/pages/dashboard/settings.vue
@@ -4,17 +4,14 @@ import { ref, watch } from 'vue';
 import { useRouter } from 'vue-router';
 
 import { callApi, useApi } from '../../api/client.ts';
-import type { ModelAlias, ProxyRecord, SearchConfig, UpstreamProviderKind, UpstreamRecord } from '../../api/types.ts';
-import AliasEditDialog from '../../components/alias-edit/AliasEditDialog.vue';
+import type { ProxyRecord, SearchConfig, UpstreamProviderKind, UpstreamRecord } from '../../api/types.ts';
 import ProxyEditDialog from '../../components/proxy-edit/ProxyEditDialog.vue';
-import AliasesSettingsCard from '../../components/settings/AliasesSettingsCard.vue';
 import ApiEndpointsSection from '../../components/settings/ApiEndpointsSection.vue';
 import ExportSection from '../../components/settings/ExportSection.vue';
 import ImportSection from '../../components/settings/ImportSection.vue';
 import ProxiesSettingsCard from '../../components/settings/ProxiesSettingsCard.vue';
 import SearchConfigSection from '../../components/settings/SearchConfigSection.vue';
 import UpstreamsSettingsCard from '../../components/settings/UpstreamsSettingsCard.vue';
-import { useModelAliases } from '../../composables/useModelAliases.ts';
 import { useModelsStore } from '../../composables/useModels.ts';
 import { useProxiesStore } from '../../composables/useProxies.ts';
 import { useRuntimeInfo } from '../../composables/useRuntimeInfo.ts';
@@ -34,7 +31,6 @@ export const useSettingsPageData = defineBasicLoader(async () => {
     useUpstreamsStore().load(),
     useModelsStore().load(),
     useProxiesStore().load(),
-    useModelAliases().load(),
     useRuntimeInfo().load(),
   ]);
   return {
@@ -53,8 +49,6 @@ const { upstreams, loading: storeLoading, load } = useUpstreamsStore();
 const modelsStore = useModelsStore();
 const proxiesStore = useProxiesStore();
 const { load: loadProxies } = proxiesStore;
-const aliasesStore = useModelAliases();
-const { load: loadAliases } = aliasesStore;
 const settingsData = useSettingsPageData();
 
 // Local working copy the child reorders via v-model:ordered; reloadAll
@@ -65,24 +59,17 @@ watch(upstreams, list => {
 }, { immediate: true });
 
 const reloadAll = async () => {
-  await Promise.all([load(), modelsStore.load(), loadProxies(), loadAliases()]);
+  await Promise.all([load(), modelsStore.load(), loadProxies()]);
 };
 
-// Proxy + alias editors are hosted as modals — v-if drives the unmount on close
-// so the next open boots from a fresh script setup (no manual reset).
+// Proxy editor is hosted as a modal — v-if drives the unmount on close so the
+// next open boots from a fresh script setup (no manual reset).
 const proxyDialogOpen = ref(false);
 const proxyDialogRecord = ref<ProxyRecord | null>(null);
 const openProxyDialog = (record: ProxyRecord | null): void => {
   proxyDialogRecord.value = record;
   proxyDialogOpen.value = true;
 };
-
-const aliasDialogOpen = ref(false);
-const aliasDialogRecord = ref<ModelAlias | null>(null);
-const openAliasDialog = (record: ModelAlias | null): void => {
-  aliasDialogRecord.value = record;
-  aliasDialogOpen.value = true;
-};
 </script>
 
 <template>
@@ -102,11 +89,6 @@ const openAliasDialog = (record: ModelAlias | null): void => {
           @edit="(record: ProxyRecord) => openProxyDialog(record)"
           @changed="reloadAll"
         />
-        <AliasesSettingsCard
-          @add="() => openAliasDialog(null)"
-          @edit="(record: ModelAlias) => openAliasDialog(record)"
-          @changed="reloadAll"
-        />
         <SearchConfigSection
           :initial-config="settingsData.data.value.searchConfig"
           :initial-error="settingsData.data.value.searchConfigError"
@@ -129,12 +111,5 @@ const openAliasDialog = (record: ModelAlias | null): void => {
       :record="proxyDialogRecord"
       @saved="reloadAll"
     />
-
-    <AliasEditDialog
-      v-if="aliasDialogOpen"
-      v-model:open="aliasDialogOpen"
-      :record="aliasDialogRecord"
-      @saved="reloadAll"
-    />
   </div>
 </template>
diff --git a/apps/web/vitest.config.ts b/apps/web/vitest.config.ts
index 20fd5dbbc..4f478ad04 100644
--- a/apps/web/vitest.config.ts
+++ b/apps/web/vitest.config.ts
@@ -4,7 +4,7 @@ import { defineConfig } from 'vitest/config';
 export default defineConfig({
   // The Vue plugin is required for any test that mounts an SFC; logic-only
   // tests don't need it, but adding it here is cheap and lets component
-  // tests (AliasRow, AliasEditDialog, ...) live next to the rest.
+  // tests live next to the rest.
   plugins: [Vue()],
   test: {
     // happy-dom provides DOM + EventSource for the dump-subscription
diff --git a/packages/gateway/migrations/0046_model_aliases.sql b/packages/gateway/migrations/0046_model_aliases.sql
deleted file mode 100644
index d76687bbd..000000000
--- a/packages/gateway/migrations/0046_model_aliases.sql
+++ /dev/null
@@ -1,14 +0,0 @@
-CREATE TABLE model_aliases (
-  alias TEXT PRIMARY KEY,
-  target_model_id TEXT NOT NULL,
-  upstream_ids_json TEXT NOT NULL DEFAULT '[]',
-  rules_json TEXT NOT NULL DEFAULT '{}',
-  visible_in_models_list INTEGER NOT NULL DEFAULT 1,
-  on_conflict TEXT NOT NULL DEFAULT 'real-only'
-    CHECK (on_conflict IN ('alias-only', 'real-only', 'both-real-first', 'both-alias-first')),
-  created_at INTEGER NOT NULL DEFAULT (unixepoch()),
-  updated_at INTEGER NOT NULL DEFAULT (unixepoch())
-);
-
-INSERT OR IGNORE INTO model_aliases (alias, target_model_id, rules_json, on_conflict)
-VALUES ('codex-auto-review', 'gpt-5.4', '{"reasoning":{"effort":"low"}}', 'real-only');
diff --git a/packages/gateway/migrations/0047_model_aliases_display_name.sql b/packages/gateway/migrations/0047_model_aliases_display_name.sql
deleted file mode 100644
index 5fd9b2591..000000000
--- a/packages/gateway/migrations/0047_model_aliases_display_name.sql
+++ /dev/null
@@ -1,4 +0,0 @@
-ALTER TABLE model_aliases ADD COLUMN display_name TEXT;
-
-UPDATE model_aliases SET display_name = 'Codex Auto Review'
-  WHERE alias = 'codex-auto-review' AND display_name IS NULL;
diff --git a/packages/gateway/package.json b/packages/gateway/package.json
index 99ce27a73..0e763374d 100644
--- a/packages/gateway/package.json
+++ b/packages/gateway/package.json
@@ -11,8 +11,6 @@
       "types": "./src/runtime/channel-broker-contract.ts"
     },
     "./control-plane/proxies/serialize": { "types": "./src/control-plane/proxies/serialize.ts" },
-    "./control-plane/model-aliases/serialize": { "types": "./src/control-plane/model-aliases/serialize.ts" },
-    "./control-plane/model-aliases/types": { "types": "./src/control-plane/model-aliases/types.ts" },
     "./control-plane/pricing/types": { "types": "./src/control-plane/pricing/types.ts" },
     "./data-plane/tools/web-search/types": {
       "import": "./src/data-plane/tools/web-search/types.ts",
diff --git a/packages/gateway/src/control-plane/model-aliases/repo.ts b/packages/gateway/src/control-plane/model-aliases/repo.ts
deleted file mode 100644
index f49c1e639..000000000
--- a/packages/gateway/src/control-plane/model-aliases/repo.ts
+++ /dev/null
@@ -1,124 +0,0 @@
-import type { ModelAlias, OnConflict } from './types.ts';
-import type { SqlDatabase } from '@floway-dev/platform';
-
-interface ModelAliasRow {
-  alias: string;
-  target_model_id: string;
-  upstream_ids_json: string;
-  rules_json: string;
-  visible_in_models_list: number;
-  on_conflict: OnConflict;
-  display_name: string | null;
-  created_at: number;
-}
-
-const ALIAS_COLUMNS = 'alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at';
-
-// The model_aliases table is operator-managed and small (dozens of rows at
-// most), so the data plane reads the full table per request — no cache layer.
-// `ORDER BY alias` makes the read deterministic so `/v1/models` and friends
-// emit alias entries in a stable, operator-predictable order across runtimes.
-export const loadAllAliases = async (db: SqlDatabase): Promise<readonly ModelAlias[]> => {
-  const { results } = await db
-    .prepare(`SELECT ${ALIAS_COLUMNS} FROM model_aliases ORDER BY alias`)
-    .all<ModelAliasRow>();
-  return results.map(toModelAlias);
-};
-
-export const getAliasByName = async (db: SqlDatabase, alias: string): Promise<ModelAlias | null> => {
-  const row = await db
-    .prepare(`SELECT ${ALIAS_COLUMNS} FROM model_aliases WHERE alias = ?`)
-    .bind(alias)
-    .first<ModelAliasRow>();
-  return row ? toModelAlias(row) : null;
-};
-
-// Detects PK collision with a SELECT round-trip rather than catching the
-// INSERT throw — driver error shape differs between node:sqlite and D1.
-export const insertAlias = async (db: SqlDatabase, alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> => {
-  const existing = await db
-    .prepare('SELECT 1 FROM model_aliases WHERE alias = ?')
-    .bind(alias.alias)
-    .first<{ 1: number }>();
-  if (existing) return { ok: false, reason: 'duplicate' };
-  await db
-    .prepare(`INSERT INTO model_aliases (${ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?)`)
-    .bind(...bindValues(alias))
-    .run();
-  return { ok: true };
-};
-
-// UPSERT — on conflict the row is overwritten in place, but `created_at`
-// is preserved (the row's first INSERT wins, matching how `proxies.save`
-// keeps the original creation timestamp on a re-save).
-export const saveAlias = async (db: SqlDatabase, alias: ModelAlias): Promise<void> => {
-  await db
-    .prepare(
-      `INSERT INTO model_aliases (${ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?)
-       ON CONFLICT (alias) DO UPDATE SET
-         target_model_id = excluded.target_model_id,
-         upstream_ids_json = excluded.upstream_ids_json,
-         rules_json = excluded.rules_json,
-         visible_in_models_list = excluded.visible_in_models_list,
-         on_conflict = excluded.on_conflict,
-         display_name = excluded.display_name,
-         updated_at = unixepoch()`,
-    )
-    .bind(...bindValues(alias))
-    .run();
-};
-
-export const deleteAlias = async (db: SqlDatabase, alias: string): Promise<{ deleted: boolean }> => {
-  const result = await db.prepare('DELETE FROM model_aliases WHERE alias = ?').bind(alias).run();
-  return { deleted: (result.meta.changes ?? 0) > 0 };
-};
-
-// Updates the PK column in place. A pre-flight SELECT detects the destination
-// collision so the caller gets a structured `duplicate` reason instead of a
-// driver-specific SQLITE_CONSTRAINT thrown error (shape differs between
-// node:sqlite and D1). `meta.changes === 0` after the UPDATE means the source
-// row was gone — propagated as `notFound` for the 404 mapping.
-export const renameAlias = async (db: SqlDatabase, oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> => {
-  if (oldAlias === newAlias) return { ok: true };
-  const conflict = await db
-    .prepare('SELECT 1 FROM model_aliases WHERE alias = ?')
-    .bind(newAlias)
-    .first<{ 1: number }>();
-  if (conflict) return { ok: false, reason: 'duplicate' };
-  const result = await db
-    .prepare('UPDATE model_aliases SET alias = ?, updated_at = unixepoch() WHERE alias = ?')
-    .bind(newAlias, oldAlias)
-    .run();
-  if ((result.meta.changes ?? 0) === 0) return { ok: false, reason: 'notFound' };
-  return { ok: true };
-};
-
-const bindValues = (alias: ModelAlias): unknown[] => [
-  alias.alias,
-  alias.targetModelId,
-  JSON.stringify(alias.upstreamIds),
-  JSON.stringify(alias.rules),
-  alias.visibleInModelsList ? 1 : 0,
-  alias.onConflict,
-  alias.displayName ?? null,
-  alias.createdAt,
-];
-
-const toModelAlias = (row: ModelAliasRow): ModelAlias => ({
-  alias: row.alias,
-  targetModelId: row.target_model_id,
-  upstreamIds: parseJsonField<string[]>(row.alias, 'upstream_ids_json', row.upstream_ids_json),
-  rules: parseJsonField<ModelAlias['rules']>(row.alias, 'rules_json', row.rules_json),
-  visibleInModelsList: row.visible_in_models_list === 1,
-  onConflict: row.on_conflict,
-  ...(row.display_name !== null ? { displayName: row.display_name } : {}),
-  createdAt: row.created_at,
-});
-
-const parseJsonField = <T>(alias: string, field: string, raw: string): T => {
-  try {
-    return JSON.parse(raw) as T;
-  } catch (cause) {
-    throw new Error(`Malformed model_aliases ${field} for ${alias}`, { cause });
-  }
-};
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
deleted file mode 100644
index 32ba3aea0..000000000
--- a/packages/gateway/src/control-plane/model-aliases/repo_test.ts
+++ /dev/null
@@ -1,172 +0,0 @@
-import { test } from 'vitest';
-
-import { loadAllAliases, renameAlias } from './repo.ts';
-import { createSqliteTestDb } from '../../repo/test-sqlite.ts';
-import { assertEquals, assertRejects } from '@floway-dev/test-utils';
-
-test('loadAllAliases reads the seed row from a freshly migrated database', async () => {
-  const db = await createSqliteTestDb();
-
-  const aliases = await loadAllAliases(db);
-  assertEquals(aliases.length, 1);
-  const [seed] = aliases;
-  // `createdAt` rides off the migration's `DEFAULT (unixepoch())`, so the
-  // exact value is wall-clock dependent. Assert structurally that it landed
-  // as a number and strip it before comparing the rest of the row.
-  assertEquals(typeof seed.createdAt, 'number');
-  const { createdAt: _createdAt, ...withoutTimestamp } = seed;
-  assertEquals(withoutTimestamp, {
-    alias: 'codex-auto-review',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: { reasoning: { effort: 'low' } },
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    displayName: 'Codex Auto Review',
-  });
-});
-
-test('loadAllAliases parses upstreamIds and rules JSON and coerces visible_in_models_list to a boolean', async () => {
-  const db = await createSqliteTestDb();
-  await db.exec('DELETE FROM model_aliases');
-  await db
-    .prepare(
-      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
-    )
-    .bind(
-      'opus-xhigh',
-      'claude-opus-4-6',
-      '["up_priority","up_secondary"]',
-      '{"reasoning":{"effort":"xhigh"},"anthropicBeta":["fine-grained-tool-streaming"]}',
-      0,
-      'alias-only',
-      1_700_000_000,
-    )
-    .run();
-  await db
-    .prepare(
-      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
-    )
-    .bind('gpt-5-fast', 'gpt-5.4', '[]', '{"serviceTier":"priority"}', 1, 'both-alias-first', 1_700_000_001)
-    .run();
-
-  const aliases = await loadAllAliases(db);
-  const byAlias = new Map(aliases.map(entry => [entry.alias, entry]));
-
-  assertEquals(byAlias.get('opus-xhigh'), {
-    alias: 'opus-xhigh',
-    targetModelId: 'claude-opus-4-6',
-    upstreamIds: ['up_priority', 'up_secondary'],
-    rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] },
-    visibleInModelsList: false,
-    onConflict: 'alias-only',
-    createdAt: 1_700_000_000,
-  });
-  assertEquals(byAlias.get('gpt-5-fast'), {
-    alias: 'gpt-5-fast',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: { serviceTier: 'priority' },
-    visibleInModelsList: true,
-    onConflict: 'both-alias-first',
-    createdAt: 1_700_000_001,
-  });
-});
-
-test('loadAllAliases reads display_name and omits the field when SQL stored NULL', async () => {
-  const db = await createSqliteTestDb();
-  await db.exec('DELETE FROM model_aliases');
-  await db
-    .prepare(
-      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
-    )
-    .bind('with-label', 'gpt-5.4', '[]', '{}', 1, 'real-only', 'Pretty Label', 1_700_000_000)
-    .run();
-  await db
-    .prepare(
-      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
-    )
-    .bind('no-label', 'gpt-5.4', '[]', '{}', 1, 'real-only', null, 1_700_000_001)
-    .run();
-
-  const byAlias = new Map((await loadAllAliases(db)).map(entry => [entry.alias, entry]));
-  assertEquals(byAlias.get('with-label')?.displayName, 'Pretty Label');
-  // SQL NULL becomes undefined on the typed row so callers can branch on `=== undefined`.
-  assertEquals('displayName' in (byAlias.get('no-label') ?? {}), false);
-});
-
-test('loadAllAliases surfaces malformed rules_json as a descriptive error', async () => {
-  const db = await createSqliteTestDb();
-  await db.exec('DELETE FROM model_aliases');
-  await db
-    .prepare(
-      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
-    )
-    .bind('bad-rules', 'gpt-5.4', '[]', '{not json', 1, 'real-only')
-    .run();
-
-  await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases rules_json for bad-rules');
-});
-
-test('loadAllAliases surfaces malformed upstream_ids_json as a descriptive error', async () => {
-  const db = await createSqliteTestDb();
-  await db.exec('DELETE FROM model_aliases');
-  await db
-    .prepare(
-      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict) VALUES (?, ?, ?, ?, ?, ?)',
-    )
-    .bind('bad-upstreams', 'gpt-5.4', '[bad', '{}', 1, 'real-only')
-    .run();
-
-  await assertRejects(() => loadAllAliases(db), Error, 'Malformed model_aliases upstream_ids_json for bad-upstreams');
-});
-
-test('renameAlias updates the PRIMARY KEY in place', async () => {
-  const db = await createSqliteTestDb();
-  await db.exec('DELETE FROM model_aliases');
-  await db
-    .prepare(
-      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, display_name, created_at) VALUES (?, ?, ?, ?, ?, ?, ?, ?)',
-    )
-    .bind('source', 'gpt-5.4', '[]', '{}', 1, 'real-only', 'Source Label', 1_700_000_000)
-    .run();
-
-  const result = await renameAlias(db, 'source', 'renamed');
-  assertEquals(result, { ok: true });
-
-  const remaining = await loadAllAliases(db);
-  assertEquals(remaining.map(a => a.alias), ['renamed']);
-  // Preserved row payload — only the PK changed; createdAt and displayName intact.
-  assertEquals(remaining[0]!.displayName, 'Source Label');
-  assertEquals(remaining[0]!.createdAt, 1_700_000_000);
-});
-
-test('renameAlias returns notFound when the source row is missing', async () => {
-  const db = await createSqliteTestDb();
-  await db.exec('DELETE FROM model_aliases');
-  const result = await renameAlias(db, 'ghost', 'new-name');
-  assertEquals(result, { ok: false, reason: 'notFound' });
-});
-
-test('renameAlias returns duplicate when the destination row already exists', async () => {
-  const db = await createSqliteTestDb();
-  await db.exec('DELETE FROM model_aliases');
-  await db
-    .prepare(
-      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
-    )
-    .bind('source', 'gpt-5.4', '[]', '{}', 1, 'real-only', 1_700_000_000)
-    .run();
-  await db
-    .prepare(
-      'INSERT INTO model_aliases (alias, target_model_id, upstream_ids_json, rules_json, visible_in_models_list, on_conflict, created_at) VALUES (?, ?, ?, ?, ?, ?, ?)',
-    )
-    .bind('taken', 'gpt-5.4', '[]', '{}', 1, 'real-only', 1_700_000_001)
-    .run();
-
-  const result = await renameAlias(db, 'source', 'taken');
-  assertEquals(result, { ok: false, reason: 'duplicate' });
-  // Both rows still present.
-  const remaining = (await loadAllAliases(db)).map(a => a.alias).sort();
-  assertEquals(remaining, ['source', 'taken']);
-});
diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
deleted file mode 100644
index 611d8913e..000000000
--- a/packages/gateway/src/control-plane/model-aliases/routes.ts
+++ /dev/null
@@ -1,87 +0,0 @@
-import type { Context } from 'hono';
-
-import { aliasToJson } from './serialize.ts';
-import type { ModelAlias } from './types.ts';
-import { type CtxWithJson } from '../../middleware/zod-validator.ts';
-import { getRepo } from '../../repo/index.ts';
-import type { createAliasBody, updateAliasBody } from '../schemas.ts';
-
-export const listAliases = async (c: Context) => {
-  const aliases = await getRepo().modelAliases.loadAll();
-  return c.json(aliases.map(aliasToJson));
-};
-
-export const createAlias = async (c: CtxWithJson<typeof createAliasBody>) => {
-  const body = c.req.valid('json');
-  const record: ModelAlias = {
-    alias: body.alias,
-    targetModelId: body.targetModelId,
-    upstreamIds: body.upstreamIds,
-    rules: body.rules,
-    visibleInModelsList: body.visibleInModelsList,
-    // `real-only` is the safe default: an alias whose target id collides with
-    // a real model id stays hidden until the operator opts the alias into one
-    // of the surfacing modes. Matches the migration's column default.
-    onConflict: body.onConflict ?? 'real-only',
-    ...(body.displayName !== undefined ? { displayName: body.displayName } : {}),
-    createdAt: Math.floor(Date.now() / 1000),
-  };
-
-  const result = await getRepo().modelAliases.create(record);
-  if (!result.ok) {
-    return c.json({ error: { type: 'conflict', message: `Alias "${body.alias}" already exists` } }, 409);
-  }
-
-  return c.json(aliasToJson(record), 201);
-};
-
-export const updateAlias = async (c: CtxWithJson<typeof updateAliasBody>) => {
-  const aliasName = c.req.param('alias')!;
-  const body = c.req.valid('json');
-
-  const repo = getRepo();
-  const existing = await repo.modelAliases.getByAlias(aliasName);
-  if (!existing) return c.json({ error: 'Alias not found' }, 404);
-
-  // Rename runs first so the merged save below targets the row at its new
-  // PK. A no-op (alias unchanged or omitted) returns ok without touching
-  // the row.
-  const nextAlias = body.alias ?? existing.alias;
-  if (nextAlias !== existing.alias) {
-    const renamed = await repo.modelAliases.rename(existing.alias, nextAlias);
-    if (!renamed.ok) {
-      return c.json({ error: { type: 'conflict', message: `Alias "${nextAlias}" already exists` } }, 409);
-    }
-  }
-
-  // Field-by-field merge so an absent field preserves the existing value.
-  // `displayName` accepts an explicit null to clear the operator-set label
-  // back to the synthesized fallback; we use Object.hasOwn to keep the
-  // absent / null distinction that `??` would collapse.
-  const merged: ModelAlias = {
-    alias: nextAlias,
-    targetModelId: body.targetModelId ?? existing.targetModelId,
-    upstreamIds: body.upstreamIds ?? existing.upstreamIds,
-    rules: body.rules ?? existing.rules,
-    visibleInModelsList: body.visibleInModelsList ?? existing.visibleInModelsList,
-    onConflict: body.onConflict ?? existing.onConflict,
-    createdAt: existing.createdAt,
-    ...nextDisplayName(existing, body.displayName),
-  };
-
-  await repo.modelAliases.save(merged);
-  return c.json(aliasToJson(merged));
-};
-
-const nextDisplayName = (existing: ModelAlias, patch: string | null | undefined): { displayName?: string } => {
-  if (patch === undefined) return existing.displayName !== undefined ? { displayName: existing.displayName } : {};
-  if (patch === null) return {};
-  return { displayName: patch };
-};
-
-export const deleteAlias = async (c: Context) => {
-  const aliasName = c.req.param('alias')!;
-  const { deleted } = await getRepo().modelAliases.delete(aliasName);
-  if (!deleted) return c.json({ error: 'Alias not found' }, 404);
-  return c.body(null, 204);
-};
diff --git a/packages/gateway/src/control-plane/model-aliases/routes_test.ts b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
deleted file mode 100644
index bf1cd766c..000000000
--- a/packages/gateway/src/control-plane/model-aliases/routes_test.ts
+++ /dev/null
@@ -1,354 +0,0 @@
-import { test } from 'vitest';
-
-import type { SerializedModelAlias } from './serialize.ts';
-import { requestApp, setupAppTest } from '../../test-helpers.ts';
-import { assertEquals } from '@floway-dev/test-utils';
-
-const authedGet = (adminSession: string): RequestInit => ({
-  method: 'GET',
-  headers: { 'x-floway-session': adminSession },
-});
-
-const authedJson = (adminSession: string, method: 'POST' | 'PATCH' | 'DELETE', body?: unknown): RequestInit => ({
-  method,
-  headers: {
-    'content-type': 'application/json',
-    'x-floway-session': adminSession,
-  },
-  ...(body === undefined ? {} : { body: JSON.stringify(body) }),
-});
-
-const baseCreate = (overrides: Record<string, unknown> = {}) => ({
-  alias: 'opus-xhigh',
-  targetModelId: 'claude-opus-4-6',
-  upstreamIds: [],
-  rules: { reasoning: { effort: 'xhigh' } },
-  visibleInModelsList: true,
-  onConflict: 'real-only',
-  ...overrides,
-});
-
-test('GET /api/aliases returns rows sorted by alias', async () => {
-  const { repo, adminSession } = await setupAppTest();
-  await repo.modelAliases.save({
-    alias: 'zzz-late',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: {},
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    createdAt: 1_700_000_001,
-  });
-  await repo.modelAliases.save({
-    alias: 'aaa-early',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: {},
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    createdAt: 1_700_000_000,
-  });
-
-  const resp = await requestApp('/api/aliases', authedGet(adminSession));
-  assertEquals(resp.status, 200);
-  const list = (await resp.json()) as SerializedModelAlias[];
-  assertEquals(list.map(a => a.alias), ['aaa-early', 'zzz-late']);
-});
-
-test('POST /api/aliases creates a row and echoes the serialized shape', async () => {
-  const { repo, adminSession } = await setupAppTest();
-
-  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({
-    displayName: 'Opus Extra-High',
-    upstreamIds: ['up_a', 'up_b'],
-    rules: { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] },
-  })));
-  assertEquals(resp.status, 201);
-  const created = (await resp.json()) as SerializedModelAlias;
-  assertEquals(created.alias, 'opus-xhigh');
-  assertEquals(created.target_model_id, 'claude-opus-4-6');
-  assertEquals(created.upstream_ids, ['up_a', 'up_b']);
-  assertEquals(created.rules, { reasoning: { effort: 'xhigh' }, anthropicBeta: ['fine-grained-tool-streaming'] });
-  assertEquals(created.visible_in_models_list, true);
-  assertEquals(created.on_conflict, 'real-only');
-  assertEquals(created.display_name, 'Opus Extra-High');
-  assertEquals(typeof created.created_at, 'number');
-
-  const stored = await repo.modelAliases.getByAlias('opus-xhigh');
-  assertEquals(stored?.targetModelId, 'claude-opus-4-6');
-  assertEquals(stored?.displayName, 'Opus Extra-High');
-});
-
-test('POST /api/aliases defaults onConflict to real-only when omitted', async () => {
-  const { adminSession } = await setupAppTest();
-
-  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', {
-    alias: 'no-onconflict',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: {},
-    visibleInModelsList: true,
-  }));
-  assertEquals(resp.status, 201);
-  const created = (await resp.json()) as SerializedModelAlias;
-  assertEquals(created.on_conflict, 'real-only');
-});
-
-test('POST /api/aliases returns 409 on duplicate alias', async () => {
-  const { adminSession } = await setupAppTest();
-
-  const first = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
-  assertEquals(first.status, 201);
-
-  const dup = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
-  assertEquals(dup.status, 409);
-  const body = (await dup.json()) as { error: { type: string; message: string } };
-  assertEquals(body.error.type, 'conflict');
-});
-
-test('POST /api/aliases rejects an empty alias name with 400', async () => {
-  const { adminSession } = await setupAppTest();
-  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ alias: '' })));
-  assertEquals(resp.status, 400);
-});
-
-test('POST /api/aliases rejects an alias containing whitespace with 400', async () => {
-  const { adminSession } = await setupAppTest();
-  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ alias: 'has space' })));
-  assertEquals(resp.status, 400);
-});
-
-test('POST /api/aliases rejects an unknown rule key with 400', async () => {
-  const { adminSession } = await setupAppTest();
-  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({
-    rules: { reasoning: { effort: 'high' }, mysteryKnob: true } as unknown as Record<string, unknown>,
-  })));
-  assertEquals(resp.status, 400);
-});
-
-test('POST /api/aliases rejects an invalid onConflict value with 400', async () => {
-  const { adminSession } = await setupAppTest();
-  const resp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate({ onConflict: 'mystery-mode' })));
-  assertEquals(resp.status, 400);
-});
-
-test('POST /api/aliases requires admin auth (non-admin api key returns 403)', async () => {
-  const { adminSession, apiKey } = await setupAppTest();
-
-  // Sanity: the admin call succeeds so the failure below pins the auth gate,
-  // not a request-shape mistake shared by both calls.
-  const adminResp = await requestApp('/api/aliases', authedJson(adminSession, 'POST', baseCreate()));
-  assertEquals(adminResp.status, 201);
-
-  const userResp = await requestApp('/api/aliases', {
-    method: 'POST',
-    headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
-    body: JSON.stringify(baseCreate({ alias: 'other' })),
-  });
-  assertEquals(userResp.status, 403);
-});
-
-test('PATCH /api/aliases/:alias merges a partial body and preserves untouched fields', async () => {
-  const { repo, adminSession } = await setupAppTest();
-  await repo.modelAliases.save({
-    alias: 'opus-xhigh',
-    targetModelId: 'claude-opus-4-6',
-    upstreamIds: ['up_a'],
-    rules: { reasoning: { effort: 'xhigh' } },
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    displayName: 'Existing Label',
-    createdAt: 1_700_000_000,
-  });
-
-  const resp = await requestApp('/api/aliases/opus-xhigh', authedJson(adminSession, 'PATCH', {
-    rules: { reasoning: { effort: 'medium' }, serviceTier: 'priority' },
-  }));
-  assertEquals(resp.status, 200);
-  const updated = (await resp.json()) as SerializedModelAlias;
-  // Patched fields took effect.
-  assertEquals(updated.rules, { reasoning: { effort: 'medium' }, serviceTier: 'priority' });
-  // Untouched fields preserved verbatim.
-  assertEquals(updated.target_model_id, 'claude-opus-4-6');
-  assertEquals(updated.upstream_ids, ['up_a']);
-  assertEquals(updated.visible_in_models_list, true);
-  assertEquals(updated.display_name, 'Existing Label');
-  assertEquals(updated.created_at, 1_700_000_000);
-});
-
-test('PATCH /api/aliases/:alias accepts displayName=null to clear the label', async () => {
-  const { repo, adminSession } = await setupAppTest();
-  await repo.modelAliases.save({
-    alias: 'opus-xhigh',
-    targetModelId: 'claude-opus-4-6',
-    upstreamIds: [],
-    rules: {},
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    displayName: 'Existing Label',
-    createdAt: 1_700_000_000,
-  });
-
-  const resp = await requestApp('/api/aliases/opus-xhigh', authedJson(adminSession, 'PATCH', { displayName: null }));
-  assertEquals(resp.status, 200);
-  const updated = (await resp.json()) as SerializedModelAlias;
-  assertEquals(updated.display_name, null);
-
-  const stored = await repo.modelAliases.getByAlias('opus-xhigh');
-  assertEquals(stored?.displayName, undefined);
-});
-
-test('PATCH /api/aliases/:alias returns 404 when the alias does not exist', async () => {
-  const { adminSession } = await setupAppTest();
-  const resp = await requestApp('/api/aliases/nope', authedJson(adminSession, 'PATCH', { visibleInModelsList: false }));
-  assertEquals(resp.status, 404);
-});
-
-test('PATCH /api/aliases/:alias renames the row when body.alias differs from the path', async () => {
-  const { repo, adminSession } = await setupAppTest();
-  await repo.modelAliases.save({
-    alias: 'old-name',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: ['up_a'],
-    rules: { reasoning: { effort: 'high' } },
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    displayName: 'Old Label',
-    createdAt: 1_700_000_000,
-  });
-
-  const resp = await requestApp('/api/aliases/old-name', authedJson(adminSession, 'PATCH', {
-    alias: 'new-name',
-    rules: { reasoning: { effort: 'medium' } },
-  }));
-  assertEquals(resp.status, 200);
-  const updated = (await resp.json()) as SerializedModelAlias;
-  // Response carries the new alias and the patched rules; preserved fields stay intact.
-  assertEquals(updated.alias, 'new-name');
-  assertEquals(updated.target_model_id, 'gpt-5.4');
-  assertEquals(updated.upstream_ids, ['up_a']);
-  assertEquals(updated.rules, { reasoning: { effort: 'medium' } });
-  assertEquals(updated.display_name, 'Old Label');
-  assertEquals(updated.created_at, 1_700_000_000);
-
-  // Repo state: old row gone, new row present.
-  assertEquals(await repo.modelAliases.getByAlias('old-name'), null);
-  const stored = await repo.modelAliases.getByAlias('new-name');
-  assertEquals(stored?.alias, 'new-name');
-  assertEquals(stored?.rules, { reasoning: { effort: 'medium' } });
-});
-
-test('PATCH /api/aliases/:alias returns 409 when body.alias collides with an existing row', async () => {
-  const { repo, adminSession } = await setupAppTest();
-  await repo.modelAliases.save({
-    alias: 'source',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: {},
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    createdAt: 1_700_000_000,
-  });
-  await repo.modelAliases.save({
-    alias: 'taken',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: {},
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    createdAt: 1_700_000_001,
-  });
-
-  const resp = await requestApp('/api/aliases/source', authedJson(adminSession, 'PATCH', { alias: 'taken' }));
-  assertEquals(resp.status, 409);
-  const body = (await resp.json()) as { error: { type: string; message: string } };
-  assertEquals(body.error.type, 'conflict');
-
-  // Both rows untouched.
-  assertEquals((await repo.modelAliases.getByAlias('source'))?.alias, 'source');
-  assertEquals((await repo.modelAliases.getByAlias('taken'))?.alias, 'taken');
-});
-
-test('PATCH /api/aliases/:alias treats body.alias === path as a no-op rename', async () => {
-  const { repo, adminSession } = await setupAppTest();
-  await repo.modelAliases.save({
-    alias: 'same-name',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: {},
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    createdAt: 1_700_000_000,
-  });
-
-  const resp = await requestApp('/api/aliases/same-name', authedJson(adminSession, 'PATCH', {
-    alias: 'same-name',
-    targetModelId: 'claude-opus-4-6',
-  }));
-  assertEquals(resp.status, 200);
-  const updated = (await resp.json()) as SerializedModelAlias;
-  assertEquals(updated.alias, 'same-name');
-  assertEquals(updated.target_model_id, 'claude-opus-4-6');
-});
-
-test('PATCH /api/aliases/:alias requires admin auth', async () => {
-  const { repo, adminSession: _adminSession, apiKey } = await setupAppTest();
-  await repo.modelAliases.save({
-    alias: 'opus-xhigh',
-    targetModelId: 'claude-opus-4-6',
-    upstreamIds: [],
-    rules: {},
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    createdAt: 1_700_000_000,
-  });
-
-  const userResp = await requestApp('/api/aliases/opus-xhigh', {
-    method: 'PATCH',
-    headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
-    body: JSON.stringify({ visibleInModelsList: false }),
-  });
-  assertEquals(userResp.status, 403);
-});
-
-test('DELETE /api/aliases/:alias returns 204 on success and removes the row', async () => {
-  const { repo, adminSession } = await setupAppTest();
-  await repo.modelAliases.save({
-    alias: 'doomed',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: {},
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    createdAt: 1_700_000_000,
-  });
-
-  const resp = await requestApp('/api/aliases/doomed', authedJson(adminSession, 'DELETE'));
-  assertEquals(resp.status, 204);
-  assertEquals(await repo.modelAliases.getByAlias('doomed'), null);
-});
-
-test('DELETE /api/aliases/:alias returns 404 when the alias does not exist', async () => {
-  const { adminSession } = await setupAppTest();
-  const resp = await requestApp('/api/aliases/nope', authedJson(adminSession, 'DELETE'));
-  assertEquals(resp.status, 404);
-});
-
-test('DELETE /api/aliases/:alias requires admin auth', async () => {
-  const { repo, apiKey } = await setupAppTest();
-  await repo.modelAliases.save({
-    alias: 'doomed',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: {},
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    createdAt: 1_700_000_000,
-  });
-
-  const resp = await requestApp('/api/aliases/doomed', {
-    method: 'DELETE',
-    headers: { 'x-api-key': apiKey.key },
-  });
-  assertEquals(resp.status, 403);
-});
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize.ts b/packages/gateway/src/control-plane/model-aliases/serialize.ts
deleted file mode 100644
index 5889997cc..000000000
--- a/packages/gateway/src/control-plane/model-aliases/serialize.ts
+++ /dev/null
@@ -1,31 +0,0 @@
-// Wire-format projection for the operator-managed model_aliases rows. The
-// dashboard reads the same shape it sends back for create/update; the few
-// snake_cased fields (`visible_in_models_list`, `on_conflict`, `created_at`,
-// `display_name`) follow the rest of the control-plane HTTP surface.
-
-import type { ModelAlias, ModelAliasRules, OnConflict } from './types.ts';
-
-export interface SerializedModelAlias {
-  alias: string;
-  target_model_id: string;
-  upstream_ids: string[];
-  rules: ModelAliasRules;
-  visible_in_models_list: boolean;
-  on_conflict: OnConflict;
-  display_name: string | null;
-  created_at: number;
-}
-
-export const aliasToJson = (alias: ModelAlias): SerializedModelAlias => ({
-  alias: alias.alias,
-  target_model_id: alias.targetModelId,
-  // Defensive copy: the readonly arrays inside ModelAlias are shared with
-  // callers, and JSON serialization would otherwise expose the same backing
-  // array used by `loadAll`.
-  upstream_ids: [...alias.upstreamIds],
-  rules: alias.rules,
-  visible_in_models_list: alias.visibleInModelsList,
-  on_conflict: alias.onConflict,
-  display_name: alias.displayName ?? null,
-  created_at: alias.createdAt,
-});
diff --git a/packages/gateway/src/control-plane/model-aliases/types.ts b/packages/gateway/src/control-plane/model-aliases/types.ts
deleted file mode 100644
index e7d9f64bd..000000000
--- a/packages/gateway/src/control-plane/model-aliases/types.ts
+++ /dev/null
@@ -1,34 +0,0 @@
-// Closed set of request-time mode knobs an operator can lock on a matched
-// alias. Each value is freeform — the gateway does not enum-gate operator
-// input so values pass through to upstream verbatim.
-export type ModelAliasRules = {
-  readonly reasoning?: {
-    readonly effort?: string;
-    readonly budgetTokens?: number;
-    readonly adaptive?: boolean;
-    readonly summary?: string;
-  };
-  readonly verbosity?: string;
-  readonly serviceTier?: string;
-  readonly anthropicBeta?: readonly string[];
-};
-
-export type OnConflict = 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
-
-export type ModelAlias = {
-  readonly alias: string;
-  readonly targetModelId: string;
-  readonly upstreamIds: readonly string[];
-  readonly rules: ModelAliasRules;
-  readonly visibleInModelsList: boolean;
-  readonly onConflict: OnConflict;
-  // Operator-set human-readable label shown after the upstream display name in
-  // `/v1/models` entries (e.g. "Azure: Codex Auto Review"). When unset, the
-  // listing falls back to the alias target's display name plus a rules-summary
-  // suffix; see `data-plane/model-aliases/display.ts`.
-  readonly displayName?: string;
-  // Unix epoch seconds stamped at row insertion. Surfaced on the
-  // `/v1/models` synthesized alias entry so callers see when an alias was
-  // declared, matching the `created` semantics of the real entries.
-  readonly createdAt: number;
-};
diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 9db27e7f4..3a6620496 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -1,25 +1,21 @@
 import type { Context } from 'hono';
 
-import type { ListedModel } from '../../data-plane/models/alias-listing.ts';
 import { toPublicModel } from '../../data-plane/models/load.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from '../../data-plane/models/shared.ts';
-import { getModelsForListing } from '../../data-plane/providers/registry.ts';
+import { getModels } from '../../data-plane/providers/registry.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
-import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
 import { ProviderModelsUnavailableError } from '@floway-dev/provider';
-import type { UpstreamProviderKind } from '@floway-dev/provider';
+import type { ResolvedModel, UpstreamProviderKind } from '@floway-dev/provider';
 
 // Same DTO as the public /models endpoint, plus one dashboard-only field:
 // `upstreams` lists every provider binding for this model as { kind, id, name }
 // triples. A single model id can be served by mixed provider kinds (e.g. one
 // azure deployment + one custom upstream both expose `gpt-5.5`), so a flat
-// `provider`/`upstream_ids` split would misrepresent that. Alias entries
-// carry a single binding (the upstream that resolves their target) and the
-// `aliasedFrom` provenance flows through `toPublicModel`.
+// `provider`/`upstream_ids` split would misrepresent that.
 interface ControlPlaneModel extends PublicModel {
   upstreams: { kind: UpstreamProviderKind; id: string; name: string }[];
 }
@@ -28,7 +24,7 @@ interface ControlPlaneModelsResponse extends Omit<PublicModelsResponse, 'data'>
   data: ControlPlaneModel[];
 }
 
-const toControlPlaneModel = (model: ListedModel): ControlPlaneModel => ({
+const toControlPlaneModel = (model: ResolvedModel): ControlPlaneModel => ({
   ...toPublicModel(model),
   upstreams: model.providers.map(binding => ({ kind: binding.providerKind, id: binding.upstream, name: binding.upstreamName })),
 });
@@ -39,15 +35,11 @@ export const controlPlaneModels = async (c: Context) => {
     // like the data-plane /models endpoint. On a session request there is no
     // API key, so this resolves to the user's per-user upstream cap: a user who
     // has had an upstream removed must not see its models in the Models tab.
-    // Aliases come from the same repo singleton the data plane uses, so the
-    // dashboard sees exactly the alias rows the runtime would honour.
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    const aliases = await getRepo().modelAliases.loadAll();
-    const { models } = await getModelsForListing(
+    const models = await getModels(
       effectiveUpstreamIdsFromContext(c),
       fetcherForUpstream,
       backgroundSchedulerFromContext(c),
-      aliases,
     );
     const data = models.map(toControlPlaneModel);
     const response: ControlPlaneModelsResponse = {
diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts
index bab38e182..bc49181de 100644
--- a/packages/gateway/src/control-plane/models/routes_test.ts
+++ b/packages/gateway/src/control-plane/models/routes_test.ts
@@ -1,6 +1,5 @@
 import { test } from 'vitest';
 
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
 import type { UpstreamRecord } from '@floway-dev/provider';
 import { assertEquals, jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
@@ -119,39 +118,10 @@ test('/api/models appends visible alias entries with aliasedFrom alongside real
   const { apiKey, repo } = await setupAppTest();
   await repo.upstreams.save(buildCustomUpstreamRecord({ id: 'up_custom_models', sortOrder: 100 }));
 
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'codex-auto-review',
-      displayName: 'Codex Auto Review',
-      targetModelId: 'custom-model',
-      upstreamIds: [],
-      rules: { reasoning: { effort: 'low' } },
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      createdAt: 1_700_000_000,
-    },
-    {
-      alias: 'hidden-alias',
-      targetModelId: 'custom-model',
-      upstreamIds: [],
-      rules: {},
-      visibleInModelsList: false,
-      onConflict: 'real-only',
-      createdAt: 1_700_000_001,
-    },
-  ]);
-
   await withMockedFetch(modelsFetchHandler, async () => {
     const response = await requestApp('/api/models', { headers: { 'x-api-key': apiKey.key } });
     assertEquals(response.status, 200);
-    const body = (await response.json()) as { data: Array<{ id: string; display_name: string; upstreams: Array<{ kind: string; id: string; name: string }>; aliasedFrom?: { targetModelId: string; rules: Record<string, unknown>; displayName?: string } }> };
-    const aliasEntry = body.data.find(model => model.id === 'codex-auto-review');
-    if (!aliasEntry) throw new Error('expected codex-auto-review alias entry on /api/models');
-    assertEquals(aliasEntry.display_name, 'Codex Auto Review');
-    assertEquals(aliasEntry.upstreams, [{ kind: 'custom', id: 'up_custom_models', name: 'Custom Provider' }]);
-    assertEquals(aliasEntry.aliasedFrom?.targetModelId, 'custom-model');
-    assertEquals(aliasEntry.aliasedFrom?.rules, { reasoning: { effort: 'low' } });
-    assertEquals(aliasEntry.aliasedFrom?.displayName, 'Codex Auto Review');
-    assertEquals(body.data.some(model => model.id === 'hidden-alias'), false);
+    const body = (await response.json()) as { data: Array<{ id: string; display_name: string; upstreams: Array<{ kind: string; id: string; name: string }> }> };
+    assertEquals(body.data.some(model => model.id === 'custom-model'), true);
   });
 });
diff --git a/packages/gateway/src/control-plane/routes.ts b/packages/gateway/src/control-plane/routes.ts
index 35ceb71de..94b5f06ff 100644
--- a/packages/gateway/src/control-plane/routes.ts
+++ b/packages/gateway/src/control-plane/routes.ts
@@ -5,11 +5,10 @@ import { authLogin, authLogout, authMe } from './auth/routes.ts';
 import { copilotQuota } from './copilot-quota/routes.ts';
 import { exportData, importData } from './data-transfer/routes.ts';
 import { dumpRoutes } from './dump.ts';
-import { createAlias, deleteAlias, listAliases, updateAlias } from './model-aliases/routes.ts';
 import { controlPlaneModels } from './models/routes.ts';
 import { performanceOverview, performanceTelemetry } from './performance/routes.ts';
 import { createProxy, deleteProxy, listAllBackoffs, listProxies, listProxyBackoffs, resetProxyBackoffs, testProxy, updateProxy } from './proxies/routes.ts';
-import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createAliasBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateAliasBody, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
+import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
 import { getSearchConfigRoute, putSearchConfigRoute, testSearchConfigRoute } from './search-config/routes.ts';
 import { searchUsage } from './search-usage/routes.ts';
 import { tokenUsage } from './token-usage/routes.ts';
@@ -101,11 +100,6 @@ export const controlPlaneRoutes = new Hono<{ Variables: AuthVars }>()
     .get('/search-config', getSearchConfigRoute)
     .put('/search-config', zValidator('json', searchConfigSchema), putSearchConfigRoute)
     .post('/search-config/test', zValidator('json', searchConfigSchema), testSearchConfigRoute)
-    // Model aliases.
-    .get('/aliases', listAliases)
-    .post('/aliases', zValidator('json', createAliasBody), createAlias)
-    .patch('/aliases/:alias', zValidator('json', updateAliasBody), updateAlias)
-    .delete('/aliases/:alias', deleteAlias)
     .get('/export', zValidator('query', exportQuery), exportData)
     .post('/import', zValidator('json', importBody), importData));
 
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index f81c5df42..f718539ee 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -635,76 +635,3 @@ export const performanceQuery = z.object({
   bucket: z.enum(['hour', '4h', '8h', 'day', 'all']).optional(),
   timezone_offset_minutes: z.string().optional(),
 });
-
-// --- model aliases ---
-//
-// Operator-managed alias rows wired through `/api/aliases`. The schemas
-// describe the request bodies the dashboard sends; deeper invariants
-// (the alias's target model exists in the catalog, the upstream ids
-// resolve, etc.) are intentionally NOT enforced here — an alias is allowed
-// to point at a target that is currently absent so an operator can pre-
-// stage the row before the upstream is registered, mirroring how the
-// catalog tolerates pending references.
-
-// Mirror the public model-id grammar: letters, digits, `_ . : - / `. Matches
-// the surface ids the dashboard already accepts in the models picker and the
-// `/v1/models` listing, so an alias name is interchangeable with a real id at
-// the request boundary.
-export const MODEL_ALIAS_PATTERN = /^[A-Za-z0-9_.:\-/]+$/;
-
-const aliasNameSchema = z.string().min(1).regex(MODEL_ALIAS_PATTERN, 'alias must be 1+ chars of [A-Za-z0-9_.:-/]');
-
-// Rule field values pass through to the upstream verbatim — the gateway
-// deliberately does not enum-gate operator input here. The Goal-2 contract
-// is that a freshly added enum upstream-side ships through without a
-// gateway code change, so we validate shape (non-empty string, in-range
-// number) but never set membership.
-const aliasReasoningSchema = z.object({
-  effort: z.string().min(1).optional(),
-  budgetTokens: z.number().int().nonnegative().optional(),
-  adaptive: z.boolean().optional(),
-  summary: z.string().min(1).optional(),
-}).strict().refine(
-  r => r.effort !== undefined || r.budgetTokens !== undefined || r.adaptive !== undefined || r.summary !== undefined,
-  { message: 'reasoning must declare at least one of effort, budgetTokens, adaptive, summary' },
-);
-
-const aliasRulesSchema = z.object({
-  reasoning: aliasReasoningSchema.optional(),
-  verbosity: z.string().min(1).optional(),
-  serviceTier: z.string().min(1).optional(),
-  // Each beta header token is a non-empty string. Empty arrays are accepted
-  // (the dashboard sends `[]` when the operator clears every tag) and are
-  // semantically equivalent to omitting the field.
-  anthropicBeta: z.array(z.string().min(1)).optional(),
-}).strict();
-
-const onConflictSchema = z.enum(['alias-only', 'real-only', 'both-real-first', 'both-alias-first']);
-const upstreamIdsSchema = z.array(z.string().min(1));
-
-export const createAliasBody = z.object({
-  alias: aliasNameSchema,
-  targetModelId: z.string().min(1),
-  upstreamIds: upstreamIdsSchema,
-  rules: aliasRulesSchema,
-  visibleInModelsList: z.boolean(),
-  // Defaults to `'real-only'` server-side when omitted so the dashboard's
-  // "create" form does not have to ship a default — the route layer fills it.
-  onConflict: onConflictSchema.optional(),
-  displayName: z.string().min(1).optional(),
-});
-
-// PATCH accepts a partial shape. `alias` is the row's primary key — when
-// present and different from the path param, the handler renames the row
-// (409 on collision). `displayName` is nullable so the operator can clear
-// an existing label back to the synthesized fallback; absent vs. null is
-// meaningful and propagated through to the handler via Object.hasOwn.
-export const updateAliasBody = z.object({
-  alias: aliasNameSchema.optional(),
-  targetModelId: z.string().min(1).optional(),
-  upstreamIds: upstreamIdsSchema.optional(),
-  rules: aliasRulesSchema.optional(),
-  visibleInModelsList: z.boolean().optional(),
-  onConflict: onConflictSchema.optional(),
-  displayName: z.string().min(1).nullable().optional(),
-});
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
index 71c3a8288..8d2cda45c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
@@ -103,7 +103,7 @@ const callChatCompletionsAsExecuteResult = async (
   headers: Headers,
 ): Promise<ExecuteResult<ProtocolFrame<ChatCompletionsStreamEvent>>> => {
   const { model: _model, ...body } = payload;
-  sanitizeForChatCompletionsUpstream(body as Record<string, unknown>, createSanitizeTraceCtx(candidate.aliasName));
+  sanitizeForChatCompletionsUpstream(body as Record<string, unknown>, createSanitizeTraceCtx());
   const recorder = createUpstreamLatencyRecorder();
   const providerResult = await candidate.binding.provider.callChatCompletions(
     candidate.binding.upstreamModel,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
index 29f9d1b1b..004c95cfb 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
@@ -25,7 +25,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
index b84aea40f..3f2664912 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
index 4ecb21e72..6f1ef50d4 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 0ff191572..331856b9b 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
index f8d0c33c5..2af2ecba0 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
index b9dcb1b9c..d4996ef8c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
@@ -18,7 +18,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
index 7f96ee5f2..176bad4b2 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
@@ -22,7 +22,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
index 2547a30b4..e2c3e61d8 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
@@ -18,7 +18,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
index e374ea3b4..8a8a7d6d8 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 28e10cfd5..73e8c1afd 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -1,12 +1,9 @@
 import { chatCompletionsAttempt } from './attempt.ts';
 import { renderChatCompletionsFailure } from './errors.ts';
 import { planChatCompletionsRouting } from './routing.ts';
-import { getRepo } from '../../../repo/index.ts';
-import { applyAliasRulesToChatCompletions } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
-import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -21,11 +18,9 @@ export interface ChatCompletionsServeGenerateArgs {
 export const chatCompletionsServe = {
   generate: async (args: ChatCompletionsServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<ChatCompletionsStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
-    const aliases = await getRepo().modelAliases.loadAll();
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model: payload.model,
-      aliases,
       pickTarget: endpoints =>
         endpoints.chatCompletions ? 'chat-completions'
           : endpoints.messages ? 'messages'
@@ -49,12 +44,6 @@ export const chatCompletionsServe = {
           : { kind: 'model-missing', model: payload.model, failedUpstreams },
       );
     }
-    // Apply operator-locked alias rules to the inbound IR before the
-    // attempt runs its interceptor chain. The matching `x-floway-alias`
-    // header is staged via Hono's `c.header` so it survives `streamSSE`'s
-    // internal `c.newResponse`.
-    if (candidate.aliasRules) applyAliasRulesToChatCompletions(payload, candidate.aliasRules);
-    if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
     return await chatCompletionsAttempt.generate({ payload, ctx, store, candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 37441a5ba..c3f90f81d 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -51,7 +51,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
index bc7c7949b..877295494 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
@@ -26,7 +26,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
index 98d15dd39..90c86fb1a 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
index e74d45a4f..84a282748 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
index 689ee6d0e..a066ce6c3 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
index 33e49a791..0cbf60b61 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
@@ -17,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
index 61e0132a7..76edee680 100644
--- a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
@@ -28,7 +28,6 @@ const ctx = (): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index 7d1cf345e..f5daa1d86 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -1,12 +1,9 @@
 import { geminiAttempt } from './attempt.ts';
 import { renderGeminiFailure } from './errors.ts';
 import { planGeminiRouting } from './routing.ts';
-import { getRepo } from '../../../repo/index.ts';
-import { applyAliasRulesToGemini } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
-import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
 import type { ExecuteResult, PlainResult } from '@floway-dev/provider';
@@ -33,11 +30,9 @@ export interface GeminiServeCountTokensArgs {
 export const geminiServe = {
   generate: async (args: GeminiServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<GeminiStreamEvent>>> => {
     const { payload, ctx, store, model, headers } = args;
-    const aliases = await getRepo().modelAliases.loadAll();
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model,
-      aliases,
       // Gemini has no native upstream target in the provider API; prefer
       // Chat Completions, then Messages, then Responses.
       pickTarget: endpoints => endpoints.chatCompletions ? 'chat-completions' : endpoints.messages ? 'messages' : endpoints.responses ? 'responses' : null,
@@ -60,21 +55,14 @@ export const geminiServe = {
         'generate',
       );
     }
-    // Operator-locked alias rules apply to the Gemini IR before the attempt
-    // runs; the matching `x-floway-alias` header is staged via Hono's
-    // `c.header` so it survives `streamSSE`'s internal `c.newResponse`.
-    if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
-    if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
     return await geminiAttempt.generate({ payload, ctx, store, candidate, headers });
   },
 
   countTokens: async (args: GeminiServeCountTokensArgs): Promise<ExecuteResult<ProtocolFrame<GeminiStreamEvent>> | PlainResult> => {
     const { payload, ctx, store, model, headers } = args;
-    const aliases = await getRepo().modelAliases.loadAll();
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model,
-      aliases,
       // Gemini countTokens has no native upstream support; only providers
       // exposing the Messages endpoint qualify because we translate Gemini
       // → Messages and call Messages count_tokens upstream.
@@ -97,8 +85,6 @@ export const geminiServe = {
         'countTokens',
       );
     }
-    if (candidate.aliasRules) applyAliasRulesToGemini(payload, candidate.aliasRules);
-    if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
     return await geminiAttempt.countTokens({ payload, ctx, store, candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 3ef8114e8..1e834a8be 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -50,7 +50,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt.ts b/packages/gateway/src/data-plane/chat/messages/attempt.ts
index 3dcde67c7..edf635328 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt.ts
@@ -18,7 +18,6 @@ import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesMessage, MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import { type ExecuteResult, type PlainResult } from '@floway-dev/provider';
 import { translateMessagesViaChatCompletions, translateMessagesViaResponses } from '@floway-dev/translate';
-import { applyAnthropicBetaToHeaders } from '@floway-dev/translate/via-messages/anthropic-extensions';
 import { messagesViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
 export interface MessagesAttemptGenerateArgs {
@@ -50,21 +49,13 @@ export const messagesAttempt = {
     return await runInterceptors(invocation, ctx, messagesInterceptors, async () => {
       if (candidate.targetApi === 'messages') {
         const { model: _model, ...body } = invocation.payload;
-        // The candidate's `anthropic_beta` alias rule merges onto the
-        // anthropic-beta header (the wire path; the body slot is rejected
-        // by the http entry). Body extensions are stripped just before the
-        // upstream call, after every interceptor has had its say.
-        const outgoingHeaders = new Headers(invocation.headers);
-        if (candidate.aliasRules?.anthropicBeta?.length) {
-          applyAnthropicBetaToHeaders(outgoingHeaders, candidate.aliasRules.anthropicBeta);
-        }
-        sanitizeForMessagesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx(candidate.aliasName));
+        sanitizeForMessagesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx());
         const recorder = createUpstreamLatencyRecorder();
         const providerResult = await candidate.binding.provider.callMessages(
           candidate.binding.upstreamModel,
           body,
           ctx.abortSignal,
-          buildUpstreamCallOptions(candidate, ctx, recorder.record, outgoingHeaders),
+          buildUpstreamCallOptions(candidate, ctx, recorder.record, invocation.headers),
         );
         return await providerStreamResultToExecuteResult(providerResult, candidate, ctx, recorder);
       }
@@ -109,16 +100,12 @@ export const messagesAttempt = {
     const recorder = createUpstreamLatencyRecorder();
     const response = await runInterceptors(invocation, ctx, messagesCountTokensInterceptors, async () => {
       const { model: _model, ...body } = invocation.payload;
-      const outgoingHeaders = new Headers(invocation.headers);
-      if (candidate.aliasRules?.anthropicBeta?.length) {
-        applyAnthropicBetaToHeaders(outgoingHeaders, candidate.aliasRules.anthropicBeta);
-      }
-      sanitizeForMessagesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx(candidate.aliasName));
+      sanitizeForMessagesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx());
       const { response } = await candidate.binding.provider.callMessagesCountTokens(
         candidate.binding.upstreamModel,
         body,
         ctx.abortSignal,
-        buildUpstreamCallOptions(candidate, ctx, recorder.record, outgoingHeaders),
+        buildUpstreamCallOptions(candidate, ctx, recorder.record, invocation.headers),
       );
       return response;
     });
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
index 41a96f0de..36fe9e284 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
@@ -25,7 +25,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/messages/http_test.ts b/packages/gateway/src/data-plane/chat/messages/http_test.ts
index cf32509d2..27f403465 100644
--- a/packages/gateway/src/data-plane/chat/messages/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/http_test.ts
@@ -272,60 +272,3 @@ test('POST /v1/messages forwards upstream response headers end-to-end (non-strea
   assertEquals(response.headers.get('anthropic-ratelimit-unified-status'), 'allowed');
   assertEquals(response.headers.get('cf-ray'), 'cf_ray_e2e');
 });
-
-test('POST /v1/messages stamps x-floway-alias when the candidate is alias-matched', async () => {
-  installRepo();
-  const callMessages = vi.fn(async (): Promise<ProviderStreamResult<MessagesStreamEvent>> => ({
-    ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers(),
-  }));
-  const candidate = makeCandidate({ callMessages });
-  queueCandidates([{ ...candidate, aliasRules: { reasoning: { effort: 'low' } }, aliasName: 'codex-auto-review' }]);
-
-  const response = await makeApp().request('/v1/messages', {
-    method: 'POST',
-    headers: { 'content-type': 'application/json' },
-    body: JSON.stringify({ model: 'codex-auto-review', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
-  });
-
-  assertEquals(response.status, 200);
-  assertEquals(response.headers.get('x-floway-alias'), 'codex-auto-review');
-});
-
-test('POST /v1/messages does not set x-floway-alias when no alias matched', async () => {
-  installRepo();
-  const callMessages = vi.fn(async (): Promise<ProviderStreamResult<MessagesStreamEvent>> => ({
-    ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers(),
-  }));
-  queueCandidates([makeCandidate({ callMessages })]);
-
-  const response = await makeApp().request('/v1/messages', {
-    method: 'POST',
-    headers: { 'content-type': 'application/json' },
-    body: JSON.stringify({ model: 'test-model', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
-  });
-
-  assertEquals(response.status, 200);
-  assertEquals(response.headers.get('x-floway-alias'), null);
-});
-
-test('POST /v1/messages applies alias reasoning.effort onto output_config before upstream call', async () => {
-  installRepo();
-  const observedBodies: { output_config?: { effort?: string } }[] = [];
-  const callMessages = vi.fn(async (_model: unknown, body: unknown): Promise<ProviderStreamResult<MessagesStreamEvent>> => {
-    observedBodies.push(body as { output_config?: { effort?: string } });
-    return { ok: true, events: makeProtocolFrames(makeMessagesEvents()), modelKey: 'k', headers: new Headers() };
-  });
-  const candidate = makeCandidate({ callMessages });
-  queueCandidates([{ ...candidate, aliasRules: { reasoning: { effort: 'high' } }, aliasName: 'alias-x' }]);
-
-  const response = await makeApp().request('/v1/messages', {
-    method: 'POST',
-    headers: { 'content-type': 'application/json' },
-    body: JSON.stringify({ model: 'alias-x', max_tokens: 32, messages: [{ role: 'user', content: 'hello' }] }),
-  });
-
-  assertEquals(response.status, 200);
-  const observed = observedBodies[0];
-  if (observed === undefined) throw new Error('expected callMessages to receive a body');
-  assertEquals(observed.output_config?.effort, 'high');
-});
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
index bf29636a7..590a05c7b 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
@@ -18,7 +18,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 9db2fe856..2917537a7 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -18,7 +18,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
index c08720c71..f1e8e49c0 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
@@ -18,7 +18,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
index 8ddfa03e7..f1cf56677 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
@@ -60,7 +60,6 @@ const gatewayCtx = (apiKeyId: string = 'test-key'): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/messages/respond_test.ts b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
index 0caef38b6..82191d8af 100644
--- a/packages/gateway/src/data-plane/chat/messages/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
@@ -536,7 +536,6 @@ const makeRespondCtx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
   currentColo: 'TEST',
   dump: null,
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index 8b0b337f1..ae9bb5d6c 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -1,12 +1,9 @@
 import { messagesAttempt } from './attempt.ts';
 import { renderMessagesFailure } from './errors.ts';
 import { planMessagesRouting } from './routing.ts';
-import { getRepo } from '../../../repo/index.ts';
-import { applyAliasRulesToMessages } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
-import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ExecuteResult, PlainResult } from '@floway-dev/provider';
@@ -28,11 +25,9 @@ export interface MessagesServeCountTokensArgs {
 export const messagesServe = {
   generate: async (args: MessagesServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<MessagesStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
-    const aliases = await getRepo().modelAliases.loadAll();
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model: payload.model,
-      aliases,
       pickTarget: endpoints =>
         endpoints.messages ? 'messages'
           : endpoints.responses ? 'responses'
@@ -57,23 +52,14 @@ export const messagesServe = {
         'generate',
       );
     }
-    // Operator-locked alias rules go onto the inbound IR before the attempt
-    // begins so the per-protocol interceptor chain (and any downstream
-    // translate pass) sees the already-injected fields. The matching
-    // `x-floway-alias` header is staged via Hono's `c.header` so it
-    // survives `streamSSE`'s internal `c.newResponse`.
-    if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
-    if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
     return await messagesAttempt.generate({ payload, ctx, store, candidate, headers });
   },
 
   countTokens: async (args: MessagesServeCountTokensArgs): Promise<ExecuteResult<ProtocolFrame<MessagesStreamEvent>> | PlainResult> => {
     const { payload, ctx, store, headers } = args;
-    const aliases = await getRepo().modelAliases.loadAll();
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model: payload.model,
-      aliases,
       pickTarget: endpoints => endpoints.messages ? 'messages' : null,
       scheduler: ctx.backgroundScheduler,
       currentColo: ctx.currentColo,
@@ -93,11 +79,6 @@ export const messagesServe = {
         'countTokens',
       );
     }
-    // count_tokens carries the same alias semantics as generate — operator
-    // rules apply uniformly regardless of endpoint, and the response header
-    // rides out the same way.
-    if (candidate.aliasRules) applyAliasRulesToMessages(payload, candidate.aliasRules);
-    if (candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', candidate.aliasName);
     return await messagesAttempt.countTokens({ payload, ctx, store, candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index bb1dfcfb0..de09f5646 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -48,7 +48,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt.ts b/packages/gateway/src/data-plane/chat/responses/attempt.ts
index c0fb6a902..c70233e93 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt.ts
@@ -192,7 +192,7 @@ const dispatchResponses = async (
   switch (candidate.targetApi) {
   case 'responses': {
     const { model: _model, ...body } = payload;
-    sanitizeForResponsesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx(candidate.aliasName));
+    sanitizeForResponsesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx());
     const recorder = createUpstreamLatencyRecorder();
     const providerResult = await candidate.binding.provider.callResponses(
       candidate.binding.upstreamModel,
@@ -242,7 +242,7 @@ const callResponsesCompactAsExecuteResult = async (
   headers: Headers,
 ): Promise<ExecuteResult<ProtocolFrame<ResponsesStreamEvent>>> => {
   const { model: _model, stream: _stream, store: _store, ...body } = payload;
-  sanitizeForResponsesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx(candidate.aliasName));
+  sanitizeForResponsesUpstream(body as Record<string, unknown>, createSanitizeTraceCtx());
   const recorder = createUpstreamLatencyRecorder();
   const providerResult = await candidate.binding.provider.callResponsesCompact(
     candidate.binding.upstreamModel,
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
index ceba118d7..22ec01ee1 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
@@ -27,7 +27,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
index 444ce069d..914711316 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
index a2951ef03..fc422467e 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
index f4f26c112..4210d34ba 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index ea7b872ab..9618d2f02 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
index 5b9bb4c91..16c220950 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
@@ -47,7 +47,6 @@ const stubCtx = (overrides: { abortSignal?: AbortSignal } = {}): GatewayCtx => (
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
   ...overrides,
 });
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
index ff2e0f910..d48ea7d2b 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
@@ -350,7 +350,6 @@ const makeGatewayCtx = (apiKeyId: string = 'k1'): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
@@ -4500,7 +4499,6 @@ test('downstream AbortSignal threads through to provider search / fetchPage and
     dump: null,
     backgroundScheduler: () => {},
     c: stubAuthedContext(),
-    responseHeaders: new Headers(),
     requestStartedAt: 0,
     abortSignal: controller.signal,
   };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
index 8f1abec4c..e10d24fa7 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
@@ -146,7 +146,6 @@ const gatewayCtx = (): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
index 77904bb63..a66fe9995 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
@@ -1,5 +1,4 @@
 import { createPerRequestFetcher } from '../../../../../dial/per-request.ts';
-import { getRepo } from '../../../../../repo/index.ts';
 import { sleep } from '../../../../../shared/sleep.ts';
 import { resolveModelForRequest } from '../../../../providers/registry.ts';
 import { appendFailedUpstreams } from '../../../../shared/failed-upstreams.ts';
@@ -536,13 +535,7 @@ const resolveImageBinding = async (
   const endpointPath = isEdit ? '/images/edits' : '/images/generations';
   let resolution;
   try {
-    // The image-generation server-tool runs inside a Responses request; the
-    // outer request's matched alias (if any) has already stamped the
-    // response header. Threading aliases here keeps the second
-    // resolveModelForRequest (for the image tool's own model id) consistent
-    // with how the outer LLM call resolved its candidate.
-    const aliases = await getRepo().modelAliases.loadAll();
-    resolution = await resolveModelForRequest(state.config.model, state.upstreamIds, fetcherForUpstream, state.backgroundScheduler, aliases);
+    resolution = await resolveModelForRequest(state.config.model, state.upstreamIds, fetcherForUpstream, state.backgroundScheduler);
   } catch (e) {
     return { ok: false, error: serverError(e) };
   }
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
index 16f0415e1..284004632 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
@@ -58,7 +58,6 @@ const gatewayCtx = (): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
index c873f9225..08534e87b 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
index 6417306ff..9a51ac553 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
@@ -19,7 +19,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index ec4a48afa..8b2638334 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -1,7 +1,6 @@
 import { renderResponsesFailure } from './errors.ts';
 import type { StatefulResponsesStore } from './items/store.ts';
 import { planResponsesRouting } from './routing.ts';
-import { getRepo } from '../../../repo/index.ts';
 import { enumerateProviderCandidates, type ChatCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common';
@@ -89,11 +88,9 @@ export const prepareResponsesServePlan = async (args: {
 }): Promise<ResponsesServePlan> => {
   const { payload, ctx, store, pickTarget } = args;
   const prepared = await expandPreviousResponseId(payload, store);
-  const aliases = await getRepo().modelAliases.loadAll();
   const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
     upstreamIds: ctx.upstreamIds,
     model: prepared.model,
-    aliases,
     pickTarget,
     scheduler: ctx.backgroundScheduler,
     currentColo: ctx.currentColo,
diff --git a/packages/gateway/src/data-plane/chat/responses/serve.ts b/packages/gateway/src/data-plane/chat/responses/serve.ts
index 81035f20d..bed8b6df3 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve.ts
@@ -2,9 +2,7 @@ import { responsesAttempt } from './attempt.ts';
 import type { ResponsesAttemptResult } from './interceptors/types.ts';
 import type { ResponsesSnapshotMode, StatefulResponsesStore } from './items/store.ts';
 import { prepareResponsesServePlan } from './serve-prep.ts';
-import { applyAliasRulesToResponses } from '../../model-aliases/apply.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
-import { stageGatewayResponseHeader } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -48,11 +46,6 @@ export const responsesServe = {
               : null,
     });
     if (plan.kind === 'failure') return plan.result;
-    // Operator-locked alias rules apply to the prepared inbound IR before
-    // the attempt runs; the `x-floway-alias` header is staged via Hono's
-    // `c.header` so it survives `streamSSE`'s internal `c.newResponse`.
-    if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
-    if (plan.candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', plan.candidate.aliasName);
     const effectiveSnapshotMode: ResponsesSnapshotMode = snapshotMode !== 'none' && containsCompactionTrigger(plan.prepared.input)
       ? 'replace'
       : snapshotMode;
@@ -69,12 +62,6 @@ export const responsesServe = {
       pickTarget: endpoints => endpoints.responses ? 'responses' : null,
     });
     if (plan.kind === 'failure') return plan.result;
-    // Alias rules also apply on the compact path. The upstream compact
-    // endpoint silently drops fields like `reasoning` it does not honor;
-    // applying uniformly keeps the operator's intent expressed at the
-    // inbound boundary regardless of which endpoint runs.
-    if (plan.candidate.aliasRules) applyAliasRulesToResponses(plan.prepared, plan.candidate.aliasRules);
-    if (plan.candidate.aliasName) stageGatewayResponseHeader(ctx, 'x-floway-alias', plan.candidate.aliasName);
     return await responsesAttempt.compact({ payload: plan.prepared, ctx, store, candidate: plan.candidate, headers });
   },
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index b42f5bdbf..f60be9a2a 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -59,7 +59,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
-  responseHeaders: new Headers(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates.ts b/packages/gateway/src/data-plane/chat/shared/candidates.ts
index 7a58f9e08..591f44f55 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates.ts
@@ -1,4 +1,3 @@
-import type { ModelAlias, ModelAliasRules } from '../../../control-plane/model-aliases/types.ts';
 import { createPerRequestFetcher } from '../../../dial/per-request.ts';
 import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
@@ -7,17 +6,7 @@ import type { ChatTargetApi, ProviderCandidate } from '@floway-dev/provider';
 
 export type { ProviderCandidate };
 
-// Wrapper around `ProviderCandidate` that carries the matched alias's
-// operator-locked request-time rules and the alias name. The wrapper lives
-// here (in the gateway) rather than on `ProviderCandidate` itself to keep
-// the `@floway-dev/provider` package unaware of the gateway's alias
-// concept. Downstream attempt logic narrows the candidate when it needs
-// to apply rules or stamp the `x-floway-alias` response header; passthrough
-// consumers continue to treat the candidate as a plain `ProviderCandidate`.
-export type ChatCandidate = ProviderCandidate & {
-  readonly aliasRules?: ModelAliasRules;
-  readonly aliasName?: string;
-};
+export type ChatCandidate = ProviderCandidate;
 
 // Returns the candidates that satisfy both the model resolution and the
 // target-endpoint pick, plus a `sawModel` flag that distinguishes the
@@ -26,16 +15,11 @@ export type ChatCandidate = ProviderCandidate & {
 // whose catalog fetch rejected this round so the caller's failure
 // renderer can surface them parenthetically.
 export const enumerateProviderCandidates = async ({
-  upstreamIds, model, aliases, pickTarget, scheduler, currentColo,
+  upstreamIds, model, pickTarget, scheduler, currentColo,
 }: {
   // null = unrestricted; empty list = no providers visible.
   upstreamIds: readonly string[] | null;
   model: string;
-  // Operator-managed alias table loaded by the caller (typically via
-  // `getRepo().modelAliases.loadAll()`). The fan-out matches each
-  // (provider, lookupId) interpretation against this list; an empty list
-  // is a valid input and produces only literal interpretations.
-  aliases: readonly ModelAlias[];
   pickTarget: (endpoints: ModelEndpoints) => ChatTargetApi | null;
   // Threaded into `resolveModelForProvider` so the per-upstream catalog
   // lookup hits the SWR-cached `fetchUpstreamModelsCached` instead of
@@ -57,28 +41,17 @@ export const enumerateProviderCandidates = async ({
   // `resolveModelForRequest`; first-viable-wins ordering follows configured
   // sort_order across upstreams, with the unprefixed interpretation pushed
   // before the prefixed one within a single upstream.
-  //
-  // Alias matching runs inside `enumerateModelInterpretations`: each
-  // (provider, lookupId) pair is checked against the alias table and the
-  // matched alias's `onConflict` decides what to push. The alias-rewrite
-  // metadata rides out alongside each resolved candidate so the attempt
-  // layer can apply the locked rules.
-  const interpretations = enumerateModelInterpretations(model, providers, aliases);
+  const interpretations = enumerateModelInterpretations(model, providers);
   const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
 
   const candidates: ChatCandidate[] = [];
   let sawModel = false;
 
-  for (const { interpretation, provider, resolved } of resolutions) {
+  for (const { provider, resolved } of resolutions) {
     sawModel = true;
     const targetApi = pickTarget(resolved.binding.upstreamModel.endpoints);
     if (!targetApi) continue;
-    const base: ProviderCandidate = { provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) };
-    candidates.push(
-      interpretation.aliasRules !== undefined
-        ? { ...base, aliasRules: interpretation.aliasRules, aliasName: interpretation.aliasName }
-        : base,
-    );
+    candidates.push({ provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) });
   }
 
   return { candidates, sawModel, failedUpstreams };
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
index 381d395b9..627b631d5 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
@@ -54,7 +54,6 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates, sawModel } = await enumerateProviderCandidates({
       upstreamIds: null,
-      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -75,7 +74,6 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates, sawModel } = await enumerateProviderCandidates({
       upstreamIds: null,
-      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -96,7 +94,6 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates, sawModel } = await enumerateProviderCandidates({
       upstreamIds: null,
-      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -116,7 +113,6 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates } = await enumerateProviderCandidates({
       upstreamIds: null,
-      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -137,7 +133,6 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates } = await enumerateProviderCandidates({
       upstreamIds: ['up_c', 'up_a'],
-      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -160,7 +155,6 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates } = await enumerateProviderCandidates({
       upstreamIds: null,
-      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -178,7 +172,6 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates: msgCandidates } = await enumerateProviderCandidates({
       upstreamIds: null,
-      aliases: [],
       model: 'test-model',
       pickTarget: pickMessagesOrResponses,
       scheduler: testScheduler,
@@ -189,7 +182,6 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates: resCandidates } = await enumerateProviderCandidates({
       upstreamIds: null,
-      aliases: [],
       model: 'test-model',
       pickTarget: pickResponses,
       scheduler: testScheduler,
@@ -206,7 +198,6 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates: anyCandidates } = await enumerateProviderCandidates({
       upstreamIds: null,
-      aliases: [],
       model: 'test-model',
       pickTarget: pickAny,
       scheduler: testScheduler,
@@ -217,7 +208,6 @@ describe('enumerateProviderCandidates', () => {
 
     const { candidates: msgCandidates, sawModel } = await enumerateProviderCandidates({
       upstreamIds: null,
-      aliases: [],
       model: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
@@ -255,7 +245,6 @@ describe('enumerateProviderCandidates', () => {
       async () => {
         const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
           upstreamIds: null,
-          aliases: [],
           model: 'test-model',
           pickTarget: pickMessages,
           scheduler: testScheduler,
@@ -299,7 +288,6 @@ describe('enumerateProviderCandidates', () => {
       async () => {
         const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
           upstreamIds: null,
-          aliases: [],
           model: 'test-model',
           pickTarget: pickMessages,
           scheduler: testScheduler,
diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
index ad0ef2169..9cdbab0c8 100644
--- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
+++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
@@ -6,13 +6,7 @@ import { getCurrentColo } from '../../../runtime/runtime-info.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 
 export interface GatewayCtx {
-  // The inbound Hono context. Carried so the serve layer can stage
-  // response headers via `c.header(name, value)` — the Hono-documented
-  // knob that survives `streamSSE`'s internal `c.newResponse` for the
-  // streaming surfaces. For non-streaming surfaces that build the
-  // outgoing Response via the Web `Response.json` constructor (which
-  // bypasses Hono's context), the same value also lands on
-  // `responseHeaders` so `finalizeGatewayResponse` can stamp it.
+  // The inbound Hono context.
   readonly c: AuthedContext;
   readonly apiKeyId: string;
   readonly upstreamIds: readonly string[] | null;
@@ -31,15 +25,8 @@ export interface GatewayCtx {
   readonly currentColo: string;
   // Null when the api key has no retention configured, in which case
   // `finalizeGatewayResponse` short-circuits the dump tee and returns the
-  // response untouched (entries from `responseHeaders` are still applied).
+  // response untouched.
   readonly dump: DumpAccumulator | null;
-  // Per-request response-header staging for the non-streaming and error
-  // paths that build their Response via the Web `Response.json` constructor
-  // rather than through Hono's `c.json`/`streamSSE`. The serve layer writes
-  // gateway-stamped headers (e.g. `x-floway-alias`) here in lockstep with
-  // its `ctx.c.header(...)` call; `finalizeGatewayResponse` then merges
-  // them onto the outgoing Response.
-  readonly responseHeaders: Headers;
 }
 
 export interface CreateGatewayCtxOptions {
@@ -86,27 +73,12 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
     runtimeLocation: colo,
     currentColo: colo,
     dump,
-    responseHeaders: new Headers(),
   };
 };
 
-// Stage one gateway response header so it lands on the outgoing Response
-// regardless of which builder produced it. Calls Hono's `c.header` (the
-// only knob that survives `streamSSE`'s internal `c.newResponse`) AND
-// stages on the per-ctx `responseHeaders` bag that `finalizeGatewayResponse`
-// merges onto Web-`Response.json`-built non-streaming responses.
-export const stageGatewayResponseHeader = (ctx: GatewayCtx, name: string, value: string): void => {
-  ctx.c.header(name, value);
-  ctx.responseHeaders.set(name, value);
-};
-
-// Apply ctx-stamped response headers onto the outgoing Response and then run
-// the dump-accumulator's finalize tee. Every inbound HTTP wrapper returns its
-// response through this seam so gateway-stamped headers ride out uniformly
-// across happy-path, error, and passthrough paths — including the
-// non-streaming surfaces that build their Response via Web `Response.json`
-// rather than Hono's `c.json`.
+// Run the dump-accumulator's finalize tee on the outgoing Response. Every
+// inbound HTTP wrapper returns its response through this seam so the dump
+// pipeline applies uniformly across happy-path, error, and passthrough paths.
 export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => {
-  for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value);
   return ctx.dump?.finalize(response) ?? response;
 };
diff --git a/packages/gateway/src/data-plane/chat/shared/respond_test.ts b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
index 738bb6399..9a5c506a6 100644
--- a/packages/gateway/src/data-plane/chat/shared/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
@@ -47,7 +47,6 @@ const setup = (): Harness => {
       backgroundScheduler: promise => { background.push(promise); },
       requestStartedAt,
       c: stubAuthedContext(),
-      responseHeaders: new Headers(),
     }),
   };
 };
diff --git a/packages/gateway/src/data-plane/chat/shared/routing.ts b/packages/gateway/src/data-plane/chat/shared/routing.ts
index 392c50cf5..ddca37cb0 100644
--- a/packages/gateway/src/data-plane/chat/shared/routing.ts
+++ b/packages/gateway/src/data-plane/chat/shared/routing.ts
@@ -1,12 +1,10 @@
 import type { ChatCandidate, ProviderCandidate } from './candidates.ts';
 import type { ChatServeFailure } from './errors.ts';
 
-// Generic over the candidate type so call sites that hand in `ChatCandidate`
-// receive a decision whose surviving candidates retain the alias metadata.
-// The candidate filtering and ordering inside routing is shape-agnostic —
-// it touches `binding.upstream` and `binding.supportsResponsesItemReference`
-// only — so the generic narrows naturally from `ChatCandidate` back out
-// without re-deriving the alias fields.
+// Generic over the candidate type so call sites can narrow back to their
+// concrete shape. The candidate filtering and ordering inside routing is
+// shape-agnostic — it touches `binding.upstream` and
+// `binding.supportsResponsesItemReference` only.
 export type RoutingDecision<T extends ProviderCandidate = ChatCandidate> =
   | { readonly kind: 'success'; readonly candidates: readonly T[] }
   | { readonly kind: 'failure'; readonly failure: ChatServeFailure };
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
index 57a6959dc..64f404944 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -1,19 +1,12 @@
-import type { ModelAliasRules } from '../../../control-plane/model-aliases/types.ts';
 import { FLOWAY_EXTENSION_FIELDS } from '@floway-dev/protocols/extensions';
 
 export interface SanitizeTraceCtx {
-  readonly aliasName?: string;
-  readonly emit: (line: { alias?: string; field: string; targetProtocol: string }) => void;
+  readonly emit: (line: { field: string; targetProtocol: string }) => void;
 }
 
 // Default per-request trace that flows through the gateway's console logger.
-// `aliasName` rides through to the trace line so an operator inspecting logs
-// can correlate the drop with the matched alias; when no alias matched the
-// field still appears (residue from a client-sent extension), just without
-// alias attribution.
-export const createSanitizeTraceCtx = (aliasName: string | undefined): SanitizeTraceCtx => ({
-  ...(aliasName !== undefined ? { aliasName } : {}),
-  emit: line => console.warn('floway.alias.drop', JSON.stringify(line)),
+export const createSanitizeTraceCtx = (): SanitizeTraceCtx => ({
+  emit: line => console.warn('floway.extension.drop', JSON.stringify(line)),
 });
 
 const stripKeys = (
@@ -26,7 +19,7 @@ const stripKeys = (
   for (const key of keys) {
     if (key in body) {
       delete body[key];
-      trace?.emit({ alias: trace.aliasName, field: `${fieldPrefix}${key}`, targetProtocol });
+      trace?.emit({ field: `${fieldPrefix}${key}`, targetProtocol });
     }
   }
 };
@@ -50,24 +43,3 @@ export const sanitizeForGeminiUpstream = (body: Record<string, unknown>, trace?:
     stripKeys(generationConfig as Record<string, unknown>, FLOWAY_EXTENSION_FIELDS.gemini.generationConfig, 'gemini', trace, 'generationConfig.');
   }
 };
-
-// Walks the alias rules object and emits one trace line per non-empty rule
-// field. Used by inbound surfaces that have no protocol-extension slots for
-// the rules in the first place (embeddings, images, /v1/completions) — the
-// rules are structurally dropped before the upstream call, and this helper
-// gives the operator the same `floway.alias.drop` signal the chat
-// sanitizers produce when they strip extension residue.
-export const traceAllRulesDropped = (
-  rules: ModelAliasRules,
-  targetProtocol: string,
-  trace: SanitizeTraceCtx,
-): void => {
-  if (rules.reasoning) {
-    for (const key of Object.keys(rules.reasoning)) {
-      trace.emit({ alias: trace.aliasName, field: `reasoning.${key}`, targetProtocol });
-    }
-  }
-  if (rules.verbosity !== undefined) trace.emit({ alias: trace.aliasName, field: 'verbosity', targetProtocol });
-  if (rules.serviceTier !== undefined) trace.emit({ alias: trace.aliasName, field: 'serviceTier', targetProtocol });
-  if (rules.anthropicBeta?.length) trace.emit({ alias: trace.aliasName, field: 'anthropicBeta', targetProtocol });
-};
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
index da2e72d0d..0dd52330d 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
@@ -11,20 +11,22 @@ import { assertEquals } from '@floway-dev/test-utils';
 
 type TraceLine = { alias?: string; field: string; targetProtocol: string };
 
-const makeTrace = (aliasName?: string): { ctx: SanitizeTraceCtx; lines: TraceLine[] } => {
+const makeTrace = (): { ctx: SanitizeTraceCtx; lines: TraceLine[] } => {
   const lines: TraceLine[] = [];
   return {
-    ctx: { aliasName, emit: line => lines.push(line) },
+    ctx: { emit: line => lines.push(line) },
     lines,
   };
 };
 
 test('sanitizeForMessagesUpstream strips verbosity and emits one trace line', () => {
   const body: Record<string, unknown> = { verbosity: 'low', model: 'x' };
-  const { ctx, lines } = makeTrace('codex-auto-review');
+  const { ctx, lines } = makeTrace();
   sanitizeForMessagesUpstream(body, ctx);
   assertEquals(body, { model: 'x' });
-  assertEquals(lines, [{ alias: 'codex-auto-review', field: 'verbosity', targetProtocol: 'messages' }]);
+  assertEquals(lines.length, 1);
+  assertEquals(lines[0].field, 'verbosity');
+  assertEquals(lines[0].targetProtocol, 'messages');
 });
 
 test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves native fields', () => {
@@ -34,11 +36,11 @@ test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves nat
     reasoning_effort: 'high',
     model: 'x',
   };
-  const { ctx, lines } = makeTrace('alias-1');
+  const { ctx, lines } = makeTrace();
   sanitizeForChatCompletionsUpstream(body, ctx);
   assertEquals(body, { reasoning_effort: 'high', model: 'x' });
   assertEquals(lines.length, 2);
-  assertEquals(lines.every(l => l.alias === 'alias-1' && l.targetProtocol === 'chat-completions'), true);
+  assertEquals(lines.every(l => l.targetProtocol === 'chat-completions'), true);
   const droppedFields = lines.map(l => l.field).sort();
   assertEquals(droppedFields, ['anthropic_beta', 'thinking_budget']);
 });
@@ -54,13 +56,13 @@ test('sanitizeForGeminiUpstream walks top-level and generationConfig', () => {
     generationConfig: { verbosity: 'low', thinkingConfig: { thinkingBudget: 100 } },
     anthropicBeta: ['ctx-1m'],
   };
-  const { ctx, lines } = makeTrace('alias-g');
+  const { ctx, lines } = makeTrace();
   sanitizeForGeminiUpstream(body, ctx);
   assertEquals(body, { generationConfig: { thinkingConfig: { thinkingBudget: 100 } } });
   assertEquals(lines.length, 2);
   const droppedFields = lines.map(l => l.field).sort();
   assertEquals(droppedFields, ['anthropicBeta', 'generationConfig.verbosity']);
-  assertEquals(lines.every(l => l.alias === 'alias-g' && l.targetProtocol === 'gemini'), true);
+  assertEquals(lines.every(l => l.targetProtocol === 'gemini'), true);
 });
 
 test('sanitizer is idempotent — a second run emits no additional traces', () => {
diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
index d3e9ad8b0..bdef8b1f1 100644
--- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
@@ -26,7 +26,6 @@ const baseCtx = (overrides: Partial<GatewayCtx> = {}): GatewayCtx => {
     upstreamIds: null,
     wantsStream: true,
     c: stubAuthedContext(),
-    responseHeaders: new Headers(),
     requestStartedAt: 0,
     runtimeLocation: 'TEST',
     currentColo: 'TEST',
diff --git a/packages/gateway/src/data-plane/completions/serve_test.ts b/packages/gateway/src/data-plane/completions/serve_test.ts
index 1dd90c6ed..1cd2263bb 100644
--- a/packages/gateway/src/data-plane/completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/completions/serve_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { initDumpBroker, initDumpStore } from '../../dump/registry.ts';
 import { installDumpStubs } from '../../dump/test-fixtures.ts';
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCustomUpstreamRecord, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
 import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
 import { assertEquals, assertExists, jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
@@ -398,49 +397,3 @@ test('/v1/completions streaming records usage row, request_total+upstream_succes
     assertEquals(frames[3]?.type, 'done');
   }
 });
-
-// Alias header coverage for /v1/completions: the matched alias name rides
-// out on `x-floway-alias`. Non-streaming path uses passthrough's `json`
-// branch; the streaming path stamps the same header via Hono's `c.header`
-// before `streamSSE` builds the response.
-test('/v1/completions stamps x-floway-alias when the request hits an aliased model', async () => {
-  const { apiKey, repo } = await setupAppTest();
-  await registerCompletionsUpstream(repo);
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'completions-alias',
-      targetModelId: 'davinci-002',
-      upstreamIds: [],
-      rules: {},
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      createdAt: 0,
-    },
-  ]);
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'passthrough.example.com' && url.pathname === '/v1/completions') {
-        return jsonResponse({
-          id: 'cmpl_resp',
-          object: 'text_completion',
-          created: 1,
-          model: 'davinci-002',
-          choices: [{ index: 0, text: ' world', finish_reason: 'stop' }],
-          usage: { prompt_tokens: 5, completion_tokens: 1, total_tokens: 6 },
-        });
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1/completions', {
-        method: 'POST',
-        headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
-        body: JSON.stringify({ model: 'completions-alias', prompt: 'hello' }),
-      });
-      assertEquals(response.status, 200);
-      assertEquals(response.headers.get('x-floway-alias'), 'completions-alias');
-    },
-  );
-});
diff --git a/packages/gateway/src/data-plane/embeddings/serve_test.ts b/packages/gateway/src/data-plane/embeddings/serve_test.ts
index c6c44f61b..bf86dc9a7 100644
--- a/packages/gateway/src/data-plane/embeddings/serve_test.ts
+++ b/packages/gateway/src/data-plane/embeddings/serve_test.ts
@@ -1,6 +1,5 @@
 import { test } from 'vitest';
 
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCustomUpstreamRecord, copilotModels, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
 import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
 import { jsonResponse, withMockedFetch, assertEquals, assertExists } from '@floway-dev/test-utils';
@@ -496,58 +495,3 @@ test('/v1/embeddings rejects malformed body at the provider-independent boundary
     },
   );
 });
-
-// Critical alias header coverage for the passthrough surface: the matched
-// alias name must ride out on `x-floway-alias` so downstream observers can
-// tell a real-model hit from an alias-routed one. Goes through Hono's
-// `c.header` in `passthroughServe`, mirroring the chat path.
-test('/v1/embeddings stamps x-floway-alias when the request hits an aliased model', async () => {
-  const { apiKey, repo } = await setupAppTest();
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'embed-alias',
-      targetModelId: 'text-embedding-real',
-      upstreamIds: [],
-      rules: {},
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      createdAt: 0,
-    },
-  ]);
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
-      if (url.pathname === '/copilot_internal/v2/token') {
-        return jsonResponse({
-          token: 'copilot-access-token',
-          expires_at: 4102444800,
-          refresh_in: 3600,
-          endpoints: { api: 'https://api.individual.githubcopilot.com' },
-        });
-      }
-      if (url.pathname === '/models') {
-        return jsonResponse(copilotModels([{ id: 'text-embedding-real', supported_endpoints: ['/embeddings'] }]));
-      }
-      if (url.pathname === '/embeddings') {
-        return jsonResponse({
-          object: 'list',
-          model: 'text-embedding-real',
-          data: [{ object: 'embedding', index: 0, embedding: [0.1] }],
-          usage: { prompt_tokens: 1, total_tokens: 1 },
-        });
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1/embeddings', {
-        method: 'POST',
-        headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
-        body: JSON.stringify({ model: 'embed-alias', input: 'hello' }),
-      });
-      assertEquals(response.status, 200);
-      assertEquals(response.headers.get('x-floway-alias'), 'embed-alias');
-    },
-  );
-});
diff --git a/packages/gateway/src/data-plane/images/serve_test.ts b/packages/gateway/src/data-plane/images/serve_test.ts
index f241ad89d..85b5f1adf 100644
--- a/packages/gateway/src/data-plane/images/serve_test.ts
+++ b/packages/gateway/src/data-plane/images/serve_test.ts
@@ -1,6 +1,5 @@
 import { test } from 'vitest';
 
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCustomUpstreamRecord, copilotModels, flushAsyncWork, requestApp, setupAppTest } from '../../test-helpers.ts';
 import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
 import { jsonResponse, withMockedFetch, assertEquals, assertExists } from '@floway-dev/test-utils';
@@ -234,62 +233,3 @@ test('/v1/images/edits forwards a multipart request through an Azure model and r
   const usageRows = await repo.usage.listAll();
   assertEquals(usageRows.some(row => row.model === 'gpt-image-2' && row.tokens.input === 7 && row.tokens.output === 11), true);
 });
-
-// Alias header coverage for /v1/images/generations: an alias whose target is
-// an image-generation model must surface its name on `x-floway-alias` for
-// downstream observability.
-test('/v1/images/generations stamps x-floway-alias when the request hits an aliased model', async () => {
-  const { apiKey, repo } = await setupAppTest();
-  clearInProcessCopilotTokenCache();
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'image-alias',
-      targetModelId: 'gpt-image-2',
-      upstreamIds: [],
-      rules: {},
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      createdAt: 0,
-    },
-  ]);
-  await repo.upstreams.save(buildCustomUpstreamRecord({
-    id: 'up_images',
-    name: 'Custom Image Provider',
-    sortOrder: 100,
-    config: {
-      baseUrl: 'https://images.example.com',
-      authStyle: 'bearer',
-      apiKey: 'sk-images',
-      endpoints: {},
-    },
-  }));
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
-      if (url.pathname === '/copilot_internal/v2/token') {
-        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
-      }
-      if (url.hostname === 'api.individual.githubcopilot.com' && url.pathname === '/models') {
-        return jsonResponse(copilotModels([{ id: 'copilot-chat', supported_endpoints: ['/chat/completions'] }]));
-      }
-      if (url.hostname === 'images.example.com' && url.pathname === '/v1/models') {
-        return jsonResponse({ data: [{ id: 'gpt-image-2' }] });
-      }
-      if (url.hostname === 'images.example.com' && url.pathname === '/v1/images/generations') {
-        return jsonResponse({ data: [{ b64_json: 'aGVsbG8=' }] });
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1/images/generations', {
-        method: 'POST',
-        headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
-        body: JSON.stringify({ model: 'image-alias', prompt: 'hi' }),
-      });
-      assertEquals(response.status, 200);
-      assertEquals(response.headers.get('x-floway-alias'), 'image-alias');
-    },
-  );
-});
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
deleted file mode 100644
index 0791c5f7b..000000000
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ /dev/null
@@ -1,104 +0,0 @@
-import type { ModelAliasRules } from '../../control-plane/model-aliases/types.ts';
-import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
-import type { GeminiPayload } from '@floway-dev/protocols/gemini';
-import type { MessagesPayload, MessagesThinkingDisplay } from '@floway-dev/protocols/messages';
-import type { ResponsesPayload } from '@floway-dev/protocols/responses';
-import { mapSummaryToAnthropicDisplay } from '@floway-dev/translate/via-messages/anthropic-extensions';
-
-// Each function writes the alias rules into the inbound IR's slot best suited
-// to the host protocol: native when the protocol can express the concept,
-// extension otherwise. Writes overwrite any user-supplied value — aliases are
-// operator-locked.
-
-export const applyAliasRulesToChatCompletions = (payload: ChatCompletionsPayload, rules: ModelAliasRules): void => {
-  // reasoning.effort is native; budget/adaptive/summary ride on extension slots
-  // because Chat Completions has no native expression for those.
-  if (rules.reasoning?.effort !== undefined) payload.reasoning_effort = rules.reasoning.effort;
-  if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens;
-  if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true;
-  if (rules.reasoning?.summary !== undefined) payload.reasoning_summary = rules.reasoning.summary;
-  if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
-  if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
-  if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
-};
-
-export const applyAliasRulesToResponses = (payload: ResponsesPayload, rules: ModelAliasRules): void => {
-  // reasoning.{effort, summary} and text.verbosity / service_tier are native;
-  // budget/adaptive ride on extension slots; anthropic_beta only matters when
-  // this Responses inbound lands on a Messages upstream.
-  if (rules.reasoning?.effort !== undefined) payload.reasoning = { ...payload.reasoning, effort: rules.reasoning.effort };
-  if (rules.reasoning?.summary !== undefined) payload.reasoning = { ...payload.reasoning, summary: rules.reasoning.summary };
-  if (rules.reasoning?.budgetTokens !== undefined) payload.thinking_budget = rules.reasoning.budgetTokens;
-  if (rules.reasoning?.adaptive === true) payload.adaptive_thinking = true;
-  if (rules.verbosity !== undefined) payload.text = { ...payload.text, verbosity: rules.verbosity };
-  if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
-  if (rules.anthropicBeta?.length) payload.anthropic_beta = [...rules.anthropicBeta];
-};
-
-export const applyAliasRulesToMessages = (payload: MessagesPayload, rules: ModelAliasRules): void => {
-  // Anthropic has natives for effort, thinking, and service_tier; only
-  // verbosity is a Floway extension on this inbound. anthropic_beta is the
-  // wire header — the attempt layer reads `candidate.aliasRules.anthropicBeta`
-  // and merges via mergeAnthropicBetaTokens, so we do not stamp the body here.
-  if (rules.reasoning?.effort !== undefined) {
-    payload.output_config = { ...payload.output_config, effort: rules.reasoning.effort };
-  }
-  // The dashboard's tagged radio enforces mutual exclusivity between
-  // adaptive and budgetTokens; if both arrive through the raw API the apply
-  // step picks adaptive (matches the translate-layer adaptive-first policy).
-  if (rules.reasoning?.adaptive === true) {
-    payload.thinking = { type: 'adaptive' };
-  } else if (rules.reasoning?.budgetTokens !== undefined) {
-    payload.thinking = { type: 'enabled', budget_tokens: rules.reasoning.budgetTokens };
-  }
-  if (rules.reasoning?.summary !== undefined) {
-    const display = mapSummaryToAnthropicDisplay(rules.reasoning.summary);
-    // summary='auto' maps to undefined and is an explicit no-op on the
-    // Messages path — the operator chose "let upstream default decide", so
-    // we neither synthesize a thinking block nor overwrite a user-supplied
-    // thinking.display. Every other summary value enforces operator-locked
-    // overwrite.
-    if (display !== undefined) {
-      // When no prior thinking branch ran (no effort/budget/adaptive in this
-      // rule), synthesize `thinking: {type:'enabled', display}` so the
-      // operator's summary intent survives — Anthropic discards `display`
-      // without `type`. Matches `buildMessagesThinkingFromExtensions`.
-      const base = payload.thinking ?? { type: 'enabled' as const };
-      payload.thinking = { ...base, display: display as MessagesThinkingDisplay };
-    }
-  }
-  if (rules.verbosity !== undefined) payload.verbosity = rules.verbosity;
-  if (rules.serviceTier !== undefined) payload.service_tier = rules.serviceTier;
-};
-
-export const applyAliasRulesToGemini = (payload: GeminiPayload, rules: ModelAliasRules): void => {
-  // All four reasoning knobs ride on the native thinkingConfig; verbosity and
-  // serviceTier ride on extension slots under generationConfig. anthropicBeta
-  // doesn't surface on Gemini-inbound bodies — the gemini-via-messages
-  // translator doesn't read it, and the Messages attempt reads it off the
-  // candidate's aliasRules directly when stamping the outbound header.
-  const hasThinking = rules.reasoning?.effort !== undefined
-    || rules.reasoning?.budgetTokens !== undefined
-    || rules.reasoning?.adaptive === true
-    || rules.reasoning?.summary !== undefined;
-  const hasGenerationConfig = hasThinking || rules.verbosity !== undefined || rules.serviceTier !== undefined;
-
-  if (hasGenerationConfig) {
-    const generationConfig = { ...payload.generationConfig };
-    const thinkingConfig = { ...generationConfig.thinkingConfig };
-    if (rules.reasoning?.effort !== undefined) thinkingConfig.thinkingLevel = rules.reasoning.effort;
-    if (rules.reasoning?.budgetTokens !== undefined) thinkingConfig.thinkingBudget = rules.reasoning.budgetTokens;
-    if (rules.reasoning?.adaptive === true) thinkingConfig.thinkingBudget = -1;
-    if (rules.reasoning?.summary !== undefined) {
-      // Gemini exposes a single boolean for summary; map summary='omitted' to
-      // false and every other value (auto / concise / detailed / freeform) to
-      // true. Operators that want to fall back to Gemini's account default
-      // simply omit `reasoning.summary` from the rule.
-      thinkingConfig.includeThoughts = rules.reasoning.summary !== 'omitted';
-    }
-    if (hasThinking) generationConfig.thinkingConfig = thinkingConfig;
-    if (rules.verbosity !== undefined) generationConfig.verbosity = rules.verbosity;
-    if (rules.serviceTier !== undefined) generationConfig.serviceTier = rules.serviceTier;
-    payload.generationConfig = generationConfig;
-  }
-};
diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
deleted file mode 100644
index c62bac7f7..000000000
--- a/packages/gateway/src/data-plane/model-aliases/apply_test.ts
+++ /dev/null
@@ -1,203 +0,0 @@
-import { describe, expect, test } from 'vitest';
-
-import {
-  applyAliasRulesToChatCompletions,
-  applyAliasRulesToGemini,
-  applyAliasRulesToMessages,
-  applyAliasRulesToResponses,
-} from './apply.ts';
-import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
-import type { GeminiPayload } from '@floway-dev/protocols/gemini';
-import type { MessagesPayload } from '@floway-dev/protocols/messages';
-import type { ResponsesPayload } from '@floway-dev/protocols/responses';
-
-// Empty-shaped payload helpers; the apply functions only touch the alias-rule
-// slots so the rest can stay structurally minimal.
-const cc = (overrides: Partial<ChatCompletionsPayload> = {}): ChatCompletionsPayload => ({ model: 'x', messages: [], ...overrides });
-const resp = (overrides: Partial<ResponsesPayload> = {}): ResponsesPayload => ({ model: 'x', input: 'hi', ...overrides });
-const msg = (overrides: Partial<MessagesPayload> = {}): MessagesPayload => ({ model: 'x', messages: [], max_tokens: 1, ...overrides });
-const gem = (overrides: Partial<GeminiPayload> = {}): GeminiPayload => ({ ...overrides });
-
-describe('applyAliasRulesToChatCompletions', () => {
-  test('writes effort to native reasoning_effort and overrides user value', () => {
-    const payload = cc({ reasoning_effort: 'low' });
-    applyAliasRulesToChatCompletions(payload, { reasoning: { effort: 'high' } });
-    expect(payload.reasoning_effort).toBe('high');
-  });
-
-  test('writes budgetTokens to extension thinking_budget', () => {
-    const payload = cc();
-    applyAliasRulesToChatCompletions(payload, { reasoning: { budgetTokens: 4096 } });
-    expect(payload.thinking_budget).toBe(4096);
-  });
-
-  test('writes adaptive to extension adaptive_thinking', () => {
-    const payload = cc();
-    applyAliasRulesToChatCompletions(payload, { reasoning: { adaptive: true } });
-    expect(payload.adaptive_thinking).toBe(true);
-  });
-
-  test('writes summary to extension reasoning_summary', () => {
-    const payload = cc();
-    applyAliasRulesToChatCompletions(payload, { reasoning: { summary: 'detailed' } });
-    expect(payload.reasoning_summary).toBe('detailed');
-  });
-
-  test('writes verbosity, serviceTier, anthropicBeta to their slots', () => {
-    const payload = cc();
-    applyAliasRulesToChatCompletions(payload, {
-      verbosity: 'low', serviceTier: 'flex', anthropicBeta: ['ctx-1m'],
-    });
-    expect(payload.verbosity).toBe('low');
-    expect(payload.service_tier).toBe('flex');
-    expect(payload.anthropic_beta).toEqual(['ctx-1m']);
-  });
-
-  test('leaves payload untouched when rules carry no fields', () => {
-    const payload = cc({ reasoning_effort: 'medium', verbosity: 'high' });
-    applyAliasRulesToChatCompletions(payload, {});
-    expect(payload.reasoning_effort).toBe('medium');
-    expect(payload.verbosity).toBe('high');
-  });
-});
-
-describe('applyAliasRulesToResponses', () => {
-  test('writes effort to native reasoning.effort and overrides user value', () => {
-    const payload = resp({ reasoning: { effort: 'low' } });
-    applyAliasRulesToResponses(payload, { reasoning: { effort: 'high' } });
-    expect(payload.reasoning?.effort).toBe('high');
-  });
-
-  test('writes summary to native reasoning.summary', () => {
-    const payload = resp();
-    applyAliasRulesToResponses(payload, { reasoning: { summary: 'detailed' } });
-    expect(payload.reasoning?.summary).toBe('detailed');
-  });
-
-  test('writes budgetTokens to extension thinking_budget', () => {
-    const payload = resp();
-    applyAliasRulesToResponses(payload, { reasoning: { budgetTokens: 4096 } });
-    expect(payload.thinking_budget).toBe(4096);
-  });
-
-  test('writes adaptive to extension adaptive_thinking', () => {
-    const payload = resp();
-    applyAliasRulesToResponses(payload, { reasoning: { adaptive: true } });
-    expect(payload.adaptive_thinking).toBe(true);
-  });
-
-  test('writes verbosity to native text.verbosity, preserving format', () => {
-    const payload = resp({ text: { format: { type: 'json_object' } } });
-    applyAliasRulesToResponses(payload, { verbosity: 'low' });
-    expect(payload.text?.verbosity).toBe('low');
-    expect(payload.text?.format).toEqual({ type: 'json_object' });
-  });
-
-  test('writes serviceTier to native service_tier', () => {
-    const payload = resp();
-    applyAliasRulesToResponses(payload, { serviceTier: 'flex' });
-    expect(payload.service_tier).toBe('flex');
-  });
-
-  test('writes anthropicBeta to extension slot', () => {
-    const payload = resp();
-    applyAliasRulesToResponses(payload, { anthropicBeta: ['ctx-1m'] });
-    expect(payload.anthropic_beta).toEqual(['ctx-1m']);
-  });
-});
-
-describe('applyAliasRulesToMessages', () => {
-  test('writes effort to native output_config.effort', () => {
-    const payload = msg();
-    applyAliasRulesToMessages(payload, { reasoning: { effort: 'high' } });
-    expect(payload.output_config?.effort).toBe('high');
-  });
-
-  test('writes budgetTokens to thinking.enabled', () => {
-    const payload = msg();
-    applyAliasRulesToMessages(payload, { reasoning: { budgetTokens: 4096 } });
-    expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 4096 });
-  });
-
-  test('writes adaptive to thinking.type=adaptive', () => {
-    const payload = msg();
-    applyAliasRulesToMessages(payload, { reasoning: { adaptive: true } });
-    expect(payload.thinking).toEqual({ type: 'adaptive' });
-  });
-
-  test('writes summary to thinking.display (mapped from OpenAI vocabulary)', () => {
-    const payload = msg({ thinking: { type: 'enabled', budget_tokens: 1024 } });
-    applyAliasRulesToMessages(payload, { reasoning: { summary: 'detailed' } });
-    expect(payload.thinking).toEqual({ type: 'enabled', budget_tokens: 1024, display: 'summarized' });
-  });
-
-  test('writes serviceTier to native service_tier', () => {
-    const payload = msg();
-    applyAliasRulesToMessages(payload, { serviceTier: 'priority' });
-    expect(payload.service_tier).toBe('priority');
-  });
-
-  test('writes verbosity to the extension slot', () => {
-    const payload = msg();
-    applyAliasRulesToMessages(payload, { verbosity: 'low' });
-    expect(payload.verbosity).toBe('low');
-  });
-
-  test('adaptive overrides budgetTokens when both arrive on the same call', () => {
-    // The write-side validator forbids both, but if both still arrive the
-    // adaptive choice has to win to match the translate-layer policy.
-    const payload = msg();
-    applyAliasRulesToMessages(payload, { reasoning: { budgetTokens: 1024, adaptive: true } });
-    expect(payload.thinking).toEqual({ type: 'adaptive' });
-  });
-});
-
-describe('applyAliasRulesToGemini', () => {
-  test('writes effort to generationConfig.thinkingConfig.thinkingLevel', () => {
-    const payload = gem();
-    applyAliasRulesToGemini(payload, { reasoning: { effort: 'high' } });
-    expect(payload.generationConfig?.thinkingConfig?.thinkingLevel).toBe('high');
-  });
-
-  test('writes budgetTokens to generationConfig.thinkingConfig.thinkingBudget', () => {
-    const payload = gem();
-    applyAliasRulesToGemini(payload, { reasoning: { budgetTokens: 4096 } });
-    expect(payload.generationConfig?.thinkingConfig?.thinkingBudget).toBe(4096);
-  });
-
-  test('writes adaptive to generationConfig.thinkingConfig.thinkingBudget = -1', () => {
-    const payload = gem();
-    applyAliasRulesToGemini(payload, { reasoning: { adaptive: true } });
-    expect(payload.generationConfig?.thinkingConfig?.thinkingBudget).toBe(-1);
-  });
-
-  test('writes summary to generationConfig.thinkingConfig.includeThoughts when not omitted', () => {
-    const payload = gem();
-    applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } });
-    expect(payload.generationConfig?.thinkingConfig?.includeThoughts).toBe(true);
-  });
-
-  test('writes summary=omitted to generationConfig.thinkingConfig.includeThoughts=false', () => {
-    const payload = gem();
-    applyAliasRulesToGemini(payload, { reasoning: { summary: 'omitted' } });
-    expect(payload.generationConfig?.thinkingConfig?.includeThoughts).toBe(false);
-  });
-
-  test('writes verbosity to generationConfig.verbosity extension', () => {
-    const payload = gem();
-    applyAliasRulesToGemini(payload, { verbosity: 'low' });
-    expect(payload.generationConfig?.verbosity).toBe('low');
-  });
-
-  test('writes serviceTier to generationConfig.serviceTier extension', () => {
-    const payload = gem();
-    applyAliasRulesToGemini(payload, { serviceTier: 'flex' });
-    expect(payload.generationConfig?.serviceTier).toBe('flex');
-  });
-
-  test('preserves existing thinkingConfig entries when adding a new one', () => {
-    const payload = gem({ generationConfig: { thinkingConfig: { thinkingBudget: 1024 } } });
-    applyAliasRulesToGemini(payload, { reasoning: { summary: 'detailed' } });
-    expect(payload.generationConfig?.thinkingConfig).toEqual({ thinkingBudget: 1024, includeThoughts: true });
-  });
-});
diff --git a/packages/gateway/src/data-plane/model-aliases/match.ts b/packages/gateway/src/data-plane/model-aliases/match.ts
deleted file mode 100644
index edb31d071..000000000
--- a/packages/gateway/src/data-plane/model-aliases/match.ts
+++ /dev/null
@@ -1,15 +0,0 @@
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-
-// Lookup an alias for the (post-prefix-strip) lookupId against the upstream's
-// id. An empty `upstreamIds` filter on the alias means "match any upstream";
-// a non-empty filter must include the upstream's id.
-export const matchAlias = (
-  lookupId: string,
-  upstreamId: string,
-  aliases: readonly ModelAlias[],
-): ModelAlias | undefined => {
-  const hit = aliases.find(a => a.alias === lookupId);
-  if (!hit) return undefined;
-  if (hit.upstreamIds.length > 0 && !hit.upstreamIds.includes(upstreamId)) return undefined;
-  return hit;
-};
diff --git a/packages/gateway/src/data-plane/model-aliases/match_test.ts b/packages/gateway/src/data-plane/model-aliases/match_test.ts
deleted file mode 100644
index b3fbf5596..000000000
--- a/packages/gateway/src/data-plane/model-aliases/match_test.ts
+++ /dev/null
@@ -1,55 +0,0 @@
-import { describe, expect, test } from 'vitest';
-
-import { matchAlias } from './match.ts';
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-
-const make = (overrides: Partial<ModelAlias>): ModelAlias => ({
-  alias: 'a',
-  targetModelId: 't',
-  upstreamIds: [],
-  rules: {},
-  visibleInModelsList: true,
-  onConflict: 'real-only',
-  createdAt: 0,
-  ...overrides,
-});
-
-describe('matchAlias', () => {
-  test('matches by exact lookupId when alias has no upstream filter', () => {
-    const aliases = [make({ alias: 'codex-auto-review', targetModelId: 'gpt-5.4' })];
-    expect(matchAlias('codex-auto-review', 'up-1', aliases)?.alias).toBe('codex-auto-review');
-  });
-
-  test('does not match when lookupId differs', () => {
-    const aliases = [make({ alias: 'codex-auto-review' })];
-    expect(matchAlias('something-else', 'up-1', aliases)).toBeUndefined();
-  });
-
-  test('respects upstreamIds allowlist (member matches)', () => {
-    const aliases = [make({ alias: 'a', upstreamIds: ['up-1', 'up-2'] })];
-    expect(matchAlias('a', 'up-1', aliases)).toBeDefined();
-    expect(matchAlias('a', 'up-2', aliases)).toBeDefined();
-  });
-
-  test('respects upstreamIds allowlist (non-member misses)', () => {
-    const aliases = [make({ alias: 'a', upstreamIds: ['up-1'] })];
-    expect(matchAlias('a', 'up-3', aliases)).toBeUndefined();
-  });
-
-  test('empty upstreamIds means match-any', () => {
-    const aliases = [make({ alias: 'a', upstreamIds: [] })];
-    expect(matchAlias('a', 'anywhere', aliases)).toBeDefined();
-  });
-
-  test('returns the first matching alias entry verbatim', () => {
-    const aliases = [
-      make({ alias: 'a', targetModelId: 'first', rules: { reasoning: { effort: 'low' } } }),
-      make({ alias: 'a', targetModelId: 'second' }),
-    ];
-    expect(matchAlias('a', 'up-x', aliases)).toEqual(aliases[0]);
-  });
-
-  test('returns undefined for an empty alias list', () => {
-    expect(matchAlias('a', 'up-x', [])).toBeUndefined();
-  });
-});
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
deleted file mode 100644
index 72c0fdcf7..000000000
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ /dev/null
@@ -1,139 +0,0 @@
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-import { unionEndpoints } from '../providers/registry.ts';
-import { composeAliasDisplayName, kindForEndpoints, type PublicModel } from '@floway-dev/protocols/common';
-import type { ModelProviderInstance, ProviderModelRecord, ResolvedModel, UpstreamModel } from '@floway-dev/provider';
-
-// One emission slot for an alias: a (provider, addressable form) pair where
-// the provider's raw catalog carries the alias target id, plus the matched
-// UpstreamModel so the synthesized listing entry can borrow the target's
-// limits, owner, and cost without re-querying.
-interface AliasListingEmission {
-  provider: ModelProviderInstance;
-  form: 'unprefixed' | 'prefixed';
-  target: UpstreamModel;
-}
-
-// A `ResolvedModel` that may carry an `aliasedFrom` provenance — what
-// `getModelsForListing` returns when alias entries have been interleaved into
-// the catalog. Each listing endpoint's mapper (`toPublicModel`,
-// `toControlPlaneModel`, `toGeminiModel`) reads the same shape, so the alias
-// fan-out happens exactly once instead of being re-implemented per surface.
-export type ListedModel = ResolvedModel & {
-  readonly aliasedFrom?: NonNullable<PublicModel['aliasedFrom']>;
-};
-
-// Per-upstream alias enumeration. An alias with empty `upstreamIds` matches
-// every reachable provider; a non-empty list narrows the candidate set. Per
-// provider, the alias emits one entry per `listed` form when its target sits
-// in the upstream's raw catalog. Upstreams that do not carry the target — or
-// whose operator disabled the target — drop the alias entirely for that row.
-const aliasListingEmissions = (
-  alias: ModelAlias,
-  providers: readonly ModelProviderInstance[],
-  rawCatalogs: ReadonlyMap<string, readonly UpstreamModel[]>,
-): AliasListingEmission[] => {
-  const out: AliasListingEmission[] = [];
-  const upstreamFilter = alias.upstreamIds.length > 0 ? new Set(alias.upstreamIds) : null;
-  for (const provider of providers) {
-    if (upstreamFilter !== null && !upstreamFilter.has(provider.upstream)) continue;
-    const catalog = rawCatalogs.get(provider.upstream);
-    if (catalog === undefined) continue;
-    const disabled = new Set(provider.disabledPublicModelIds);
-    const target = catalog.find(m => m.id === alias.targetModelId && !disabled.has(m.id));
-    if (target === undefined) continue;
-    const cfg = provider.modelPrefix;
-    if (cfg === null) {
-      out.push({ provider, form: 'unprefixed', target });
-    } else {
-      for (const form of cfg.listed) {
-        out.push({ provider, form, target });
-      }
-    }
-  }
-  return out;
-};
-
-// Turn an alias emission into a `ListedModel` that walks the same listing
-// pipeline as real catalog entries. The synthesized `providers` array carries
-// a single binding pointing at the alias's target on this upstream, so the
-// dashboard's per-binding view renders correctly without alias-specific
-// branching. `aliasedFrom` rides out as the public protocol extension.
-//
-// Display name: the alias-local part (operator displayName, or
-// `${target.display_name} (rules summary)`) lives by itself for the
-// `unprefixed` listing form; the `prefixed` form mirrors the real-model path
-// in `registry.ts` and prepends `${provider.name}: ` so the upstream is
-// visible at a glance.
-//
-// Public id: bare alias name for the unprefixed form; provider prefix + alias
-// name for the prefixed form. Mirrors how real models are surfaced in the
-// same listing pass.
-const aliasEmissionToListedModel = (alias: ModelAlias, emission: AliasListingEmission): ListedModel => {
-  const { provider, target, form } = emission;
-  const aliasLocalName = composeAliasDisplayName({
-    aliasDisplayName: alias.displayName,
-    targetDisplayName: target.display_name ?? target.id,
-    rules: alias.rules,
-  });
-  const cfg = provider.modelPrefix;
-  const publicId = form === 'prefixed' && cfg !== null ? `${cfg.prefix}${alias.alias}` : alias.alias;
-  const record: ProviderModelRecord = {
-    upstream: provider.upstream,
-    upstreamName: provider.name,
-    providerKind: provider.providerKind,
-    provider: provider.provider,
-    upstreamModel: target,
-    enabledFlags: target.enabledFlags,
-    supportsResponsesItemReference: provider.supportsResponsesItemReference,
-  };
-  const { providerData: _providerData, endpoints, id: _targetId, display_name: _targetDisplay, created: _targetCreated, ...rest } = target;
-  return {
-    ...rest,
-    id: publicId,
-    display_name: form === 'prefixed' ? `${provider.name}: ${aliasLocalName}` : aliasLocalName,
-    created: alias.createdAt,
-    endpoints: { ...endpoints },
-    providers: [record],
-    aliasedFrom: {
-      targetModelId: alias.targetModelId,
-      upstreamIds: alias.upstreamIds,
-      rules: alias.rules,
-      onConflict: alias.onConflict,
-      ...(alias.displayName !== undefined ? { displayName: alias.displayName } : {}),
-    },
-  };
-};
-
-// Single-pass alias fan-out used by every listing surface. Visibility filter
-// honoured here. Emissions whose synthesized public id collides — two
-// no-prefix upstreams both serving the alias target, or two prefix-aliased
-// upstreams sharing a prefix — merge into one row with the bindings
-// appended, mirroring how `mergeIntoCatalog` collapses duplicate real-model
-// ids; the dashboard then renders a single alias row whose `upstreams` lists
-// every backing binding instead of N identical rows.
-export const synthesizeListedAliases = (
-  aliases: readonly ModelAlias[],
-  providers: readonly ModelProviderInstance[],
-  rawCatalogs: ReadonlyMap<string, readonly UpstreamModel[]>,
-): ListedModel[] => {
-  const byId = new Map<string, ListedModel>();
-  for (const alias of aliases) {
-    if (!alias.visibleInModelsList) continue;
-    for (const emission of aliasListingEmissions(alias, providers, rawCatalogs)) {
-      const next = aliasEmissionToListedModel(alias, emission);
-      const existing = byId.get(next.id);
-      if (existing === undefined) {
-        byId.set(next.id, next);
-        continue;
-      }
-      const endpoints = unionEndpoints(existing.endpoints, next.endpoints);
-      byId.set(next.id, {
-        ...existing,
-        endpoints,
-        kind: kindForEndpoints(endpoints),
-        providers: [...existing.providers, ...next.providers],
-      });
-    }
-  }
-  return [...byId.values()];
-};
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index f01579f9a..ab9242bd1 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -1,14 +1,12 @@
 import type { Context } from 'hono';
 
 import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
-import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts';
-import { getModelsForListing } from '../providers/registry.ts';
+import { getModels } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { ModelPricing } from '@floway-dev/protocols/common';
 import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -32,10 +30,6 @@ interface GeminiModel {
   cost?: ModelPricing;
 }
 
-// Gemini's Model resource is closed (no `aliasedFrom` extension), so an alias
-// arrives here through `getModelsForListing` looking like any other chat
-// model — `id`, `display_name`, `limits`, `cost` already finalized by
-// `synthesizeListedAliases` — and the mapper has no alias-specific branch.
 const toGeminiModel = (model: InternalModel): GeminiModel => {
   const limits = model.limits;
   const inputTokenLimit = limits.max_prompt_tokens ?? limits.max_context_window_tokens;
@@ -72,20 +66,16 @@ const loadGeminiModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
-  aliases: readonly ModelAlias[],
 ): Promise<GeminiModel[]> => {
-  const { models } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler, aliases);
-  // Only chat models are representable in the Gemini /models shape — alias
-  // entries whose target is non-chat fall out of this filter just like real
-  // non-chat catalog entries do.
+  const models = await getModels(upstreamFilter, fetcherForUpstream, scheduler);
+  // Only chat models are representable in the Gemini /models shape.
   return models.filter(model => model.kind === 'chat').map(toGeminiModel);
 };
 
 export const serveGeminiModels = async (c: Context): Promise<Response> => {
   try {
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    const aliases = await getRepo().modelAliases.loadAll();
-    return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases) });
+    return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)) });
   } catch (error) {
     return geminiModelLoadError(error);
   }
@@ -98,8 +88,7 @@ export const serveGeminiModelInfo = async (c: Context): Promise<Response> => {
   const modelId = rawModelId.replace(/^models\//, '');
   try {
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    const aliases = await getRepo().modelAliases.loadAll();
-    const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases)).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
+    const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c))).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
     if (!model) return geminiError(404, `Model not found: ${modelId}`);
     return Response.json(model);
   } catch (error) {
diff --git a/packages/gateway/src/data-plane/models/gemini_test.ts b/packages/gateway/src/data-plane/models/gemini_test.ts
index 584054328..7f4cedaec 100644
--- a/packages/gateway/src/data-plane/models/gemini_test.ts
+++ b/packages/gateway/src/data-plane/models/gemini_test.ts
@@ -1,6 +1,5 @@
 import { test } from 'vitest';
 
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
 import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
 import { jsonResponse, withMockedFetch, assertEquals } from '@floway-dev/test-utils';
@@ -409,48 +408,3 @@ test('/v1beta/models hides malformed upstream response bodies', async () => {
     },
   );
 });
-
-// Gemini's `Model` resource is closed (no `aliasedFrom` extension), so the
-// `/v1beta/models` surface advertises an alias entry as a synthetic Gemini
-// model carrying the alias id and the target's display fields. This test
-// guards the synthetic shape — name, displayName, supportedGenerationMethods
-// — so a future refactor of `loadGeminiModels` cannot silently drop the
-// alias entries.
-test('/v1beta/models appends visible aliases as synthetic Gemini model entries', async () => {
-  const { repo, apiKey } = await setupAppTest();
-
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'codex-auto-review',
-      targetModelId: 'gpt-gemini-list',
-      upstreamIds: [],
-      rules: { reasoning: { effort: 'low' } },
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      createdAt: 1_700_000_000,
-    },
-  ]);
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
-      if (url.pathname === '/copilot_internal/v2/token') {
-        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
-      }
-      if (url.pathname === '/models') {
-        return jsonResponse(copilotModels([{ id: 'gpt-gemini-list', display_name: 'GPT Gemini List' }]));
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1beta/models', { headers: { 'x-api-key': apiKey.key } });
-      assertEquals(response.status, 200);
-      const body = await response.json() as { models: Array<{ name: string; displayName: string; supportedGenerationMethods: string[] }> };
-      const aliasEntry = body.models.find(m => m.name === 'models/codex-auto-review');
-      if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
-      assertEquals(aliasEntry.displayName, 'GPT Gemini List (low effort)');
-      assertEquals(aliasEntry.supportedGenerationMethods, ['generateContent', 'streamGenerateContent', 'countTokens']);
-    },
-  );
-});
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 39c6649a0..027ba38db 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,15 +1,9 @@
-import type { ListedModel } from './alias-listing.ts';
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
-import { getModelsForListing } from '../providers/registry.ts';
+import { getModels } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
 import type { Fetcher, InternalModel } from '@floway-dev/provider';
 
-// Maps a single listed catalog entry (real or alias) to the wire DTO. Alias
-// entries arrive with `aliasedFrom` pre-populated by
-// `synthesizeListedAliases`; this mapper just rides it through so every
-// listing surface sees the same provenance field.
-export const toPublicModel = (model: InternalModel & { aliasedFrom?: ListedModel['aliasedFrom'] }): PublicModel => {
+export const toPublicModel = (model: InternalModel): PublicModel => {
   const info: PublicModel = {
     id: model.id,
     object: 'model',
@@ -24,7 +18,6 @@ export const toPublicModel = (model: InternalModel & { aliasedFrom?: ListedModel
     info.created_at = new Date(model.created * 1000).toISOString();
   }
   if (model.cost) info.cost = model.cost;
-  if (model.aliasedFrom) info.aliasedFrom = model.aliasedFrom;
   if (model.chat) info.chat = model.chat;
   return info;
 };
@@ -33,9 +26,8 @@ export const loadModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
-  aliases: readonly ModelAlias[],
 ): Promise<PublicModelsResponse> => {
-  const { models } = await getModelsForListing(upstreamFilter, fetcherForUpstream, scheduler, aliases);
+  const models = await getModels(upstreamFilter, fetcherForUpstream, scheduler);
   const data = models.map(toPublicModel);
   return {
     object: 'list',
diff --git a/packages/gateway/src/data-plane/models/serve.ts b/packages/gateway/src/data-plane/models/serve.ts
index 60736266b..9b8b510f9 100644
--- a/packages/gateway/src/data-plane/models/serve.ts
+++ b/packages/gateway/src/data-plane/models/serve.ts
@@ -7,7 +7,6 @@ import { loadModels } from './load.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
-import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -15,8 +14,7 @@ import { ProviderModelsUnavailableError } from '@floway-dev/provider';
 export const models = async (c: Context) => {
   try {
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    const aliases = await getRepo().modelAliases.loadAll();
-    return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), aliases));
+    return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)));
   } catch (e) {
     // Upstream HTTP/parse failures squash to a generic message so we do not
     // leak upstream identity. Other registry-thrown errors (e.g. the "no
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 9af4d7981..1408f10a6 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -1,6 +1,5 @@
 import { test } from 'vitest';
 
-import type { MemoryModelAliasesRepo } from '../../repo/memory.ts';
 import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
 import { clearInProcessCopilotTokenCache } from '@floway-dev/provider-copilot';
 import { jsonResponse, withMockedFetch, assertEquals } from '@floway-dev/test-utils';
@@ -587,470 +586,3 @@ test('/v1/models returns the last real error when every account model load fails
     },
   );
 });
-
-// /v1/models alias-listing coverage. Each test exercises one slice of the
-// spec's visibility contract: visible alias appears with `aliasedFrom`,
-// hidden alias does not appear, alias-with-disabled-target is still listed,
-// the `aliasedFrom` shape matches the spec byte-for-byte.
-test('/v1/models appends a visible alias with aliasedFrom after the real entries', async () => {
-  const { repo, apiKey } = await setupAppTest();
-
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'codex-auto-review',
-      targetModelId: 'gpt-5.4',
-      upstreamIds: [],
-      rules: { reasoning: { effort: 'low' } },
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      createdAt: 1_700_000_000,
-    },
-  ]);
-
-  await repo.upstreams.save(buildCustomUpstreamRecord({
-    id: 'up_oai',
-    name: 'Test OpenAI',
-    sortOrder: 100,
-    config: {
-      baseUrl: 'https://oai.example.com',
-      authStyle: 'bearer',
-      apiKey: 'sk-test',
-      endpoints: { chatCompletions: {} },
-    },
-  }));
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
-      if (url.pathname === '/copilot_internal/v2/token') {
-        return jsonResponse({
-          token: 'copilot-access-token',
-          expires_at: 4102444800,
-          refresh_in: 3600,
-          endpoints: { api: 'https://api.individual.githubcopilot.com' },
-        });
-      }
-      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
-        return jsonResponse(copilotModels([]));
-      }
-      if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
-        return jsonResponse({
-          object: 'list',
-          data: [{ id: 'gpt-5.4', owned_by: 'openai' }],
-        });
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
-      assertEquals(response.status, 200);
-      const body = await response.json() as { data: Array<{ id: string; owned_by?: string; aliasedFrom?: unknown }> };
-      const ids = body.data.map(m => m.id);
-      assertEquals(ids[ids.length - 1], 'codex-auto-review');
-      const aliasEntry = body.data.find(m => m.id === 'codex-auto-review');
-      if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
-      assertEquals(aliasEntry.aliasedFrom, {
-        targetModelId: 'gpt-5.4',
-        upstreamIds: [],
-        rules: { reasoning: { effort: 'low' } },
-        onConflict: 'real-only',
-      });
-      assertEquals(aliasEntry.owned_by, 'openai');
-    },
-  );
-});
-
-// `displayName` propagates verbatim when the operator set it; absence on the
-// wire (the prior test) means "synthesize from target name + rules summary".
-test('/v1/models forwards the operator-set displayName on the aliasedFrom payload', async () => {
-  const { repo, apiKey } = await setupAppTest();
-
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'codex-auto-review',
-      targetModelId: 'gpt-5.4',
-      upstreamIds: [],
-      rules: { reasoning: { effort: 'low' } },
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      displayName: 'Codex Auto Review',
-      createdAt: 1_700_000_000,
-    },
-  ]);
-
-  await repo.upstreams.save(buildCustomUpstreamRecord({
-    id: 'up_oai',
-    name: 'Test OpenAI',
-    sortOrder: 100,
-    config: {
-      baseUrl: 'https://oai.example.com',
-      authStyle: 'bearer',
-      apiKey: 'sk-test',
-      endpoints: { chatCompletions: {} },
-    },
-  }));
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
-      if (url.pathname === '/copilot_internal/v2/token') {
-        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
-      }
-      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
-        return jsonResponse(copilotModels([]));
-      }
-      if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
-        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
-      const body = await response.json() as { data: Array<{ id: string; aliasedFrom?: { displayName?: string } }> };
-      const aliasEntry = body.data.find(m => m.id === 'codex-auto-review');
-      if (!aliasEntry) throw new Error('expected codex-auto-review alias entry');
-      assertEquals(aliasEntry.aliasedFrom?.displayName, 'Codex Auto Review');
-    },
-  );
-});
-
-test('/v1/models omits aliases marked visibleInModelsList=false', async () => {
-  const { repo, apiKey } = await setupAppTest();
-
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'hidden-alias',
-      targetModelId: 'gpt-5.4',
-      upstreamIds: [],
-      rules: {},
-      visibleInModelsList: false,
-      onConflict: 'real-only',
-      createdAt: 0,
-    },
-  ]);
-
-  await repo.upstreams.save(buildCustomUpstreamRecord({
-    id: 'up_oai',
-    name: 'Test OpenAI',
-    sortOrder: 100,
-    config: {
-      baseUrl: 'https://oai.example.com',
-      authStyle: 'bearer',
-      apiKey: 'sk-test',
-      endpoints: { chatCompletions: {} },
-    },
-  }));
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
-      if (url.pathname === '/copilot_internal/v2/token') {
-        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
-      }
-      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
-        return jsonResponse(copilotModels([]));
-      }
-      if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
-        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
-      const body = await response.json() as { data: Array<{ id: string }> };
-      assertEquals(body.data.map(m => m.id).includes('hidden-alias'), false);
-    },
-  );
-});
-
-test('/v1/models omits an alias whose target is not in any reachable upstream catalog', async () => {
-  const { repo, apiKey } = await setupAppTest();
-
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'orphan-alias',
-      targetModelId: 'never-resolves',
-      upstreamIds: ['up_oai'],
-      rules: {},
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      createdAt: 0,
-    },
-  ]);
-
-  await repo.upstreams.save(buildCustomUpstreamRecord({
-    id: 'up_oai',
-    name: 'Test OpenAI',
-    sortOrder: 100,
-    config: {
-      baseUrl: 'https://oai.example.com',
-      authStyle: 'bearer',
-      apiKey: 'sk-test',
-      endpoints: { chatCompletions: {} },
-    },
-  }));
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
-      if (url.pathname === '/copilot_internal/v2/token') {
-        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
-      }
-      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
-        return jsonResponse(copilotModels([]));
-      }
-      if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
-        return jsonResponse({ object: 'list', data: [] });
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
-      const body = await response.json() as { data: Array<{ id: string }> };
-      // Per-upstream alias enumeration: an alias whose target cannot be served
-      // by any reachable upstream produces zero entries — there is no surface
-      // form to attach the alias to. A request for `orphan-alias` still
-      // returns the canonical user-facing model-missing error.
-      assertEquals(body.data.map(m => m.id).includes('orphan-alias'), false);
-    },
-  );
-});
-
-test('/v1/models emits the alias on each reachable upstream + listed form; prefixed entries carry the upstream label, unprefixed entries do not', async () => {
-  const { repo, apiKey } = await setupAppTest();
-
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'codex-auto-review',
-      targetModelId: 'gpt-5.4',
-      upstreamIds: [],
-      rules: { reasoning: { effort: 'low' } },
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      displayName: 'Codex Auto Review',
-      createdAt: 1_700_000_000,
-    },
-  ]);
-
-  await repo.upstreams.save(buildCustomUpstreamRecord({
-    id: 'up_azure',
-    name: 'Azure',
-    sortOrder: 100,
-    config: {
-      baseUrl: 'https://azure.example.com',
-      authStyle: 'bearer',
-      apiKey: 'sk-azure',
-      endpoints: { chatCompletions: {} },
-    },
-    modelPrefix: { prefix: 'azure/', addressable: ['unprefixed', 'prefixed'], listed: ['unprefixed', 'prefixed'] },
-  }));
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
-      if (url.pathname === '/copilot_internal/v2/token') {
-        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
-      }
-      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
-      if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
-        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4', display_name: 'GPT-5.4' }] });
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
-      const body = await response.json() as { data: Array<{ id: string; display_name: string; aliasedFrom?: unknown }> };
-      // Both addressable forms appear because the upstream listed both.
-      const bare = body.data.find(m => m.id === 'codex-auto-review');
-      const prefixed = body.data.find(m => m.id === 'azure/codex-auto-review');
-      if (!bare || !prefixed) throw new Error('expected both bare and prefixed alias entries');
-      assertEquals(bare.display_name, 'Codex Auto Review');
-      assertEquals(prefixed.display_name, 'Azure: Codex Auto Review');
-    },
-  );
-});
-
-test('/v1/models falls back to target display_name + rules summary when the alias has no displayName', async () => {
-  const { repo, apiKey } = await setupAppTest();
-
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'codex-auto-review',
-      targetModelId: 'gpt-5.4',
-      upstreamIds: [],
-      rules: { reasoning: { effort: 'low' } },
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      createdAt: 1_700_000_000,
-    },
-  ]);
-
-  await repo.upstreams.save(buildCustomUpstreamRecord({
-    id: 'up_azure',
-    name: 'Azure',
-    sortOrder: 100,
-    config: {
-      baseUrl: 'https://azure.example.com',
-      authStyle: 'bearer',
-      apiKey: 'sk-azure',
-      endpoints: { chatCompletions: {} },
-    },
-  }));
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
-      if (url.pathname === '/copilot_internal/v2/token') {
-        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
-      }
-      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
-      if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
-        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4', display_name: 'GPT-5.4' }] });
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
-      const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
-      const entry = body.data.find(m => m.id === 'codex-auto-review');
-      if (!entry) throw new Error('expected codex-auto-review alias entry');
-      assertEquals(entry.display_name, 'GPT-5.4 (low effort)');
-    },
-  );
-});
-
-test('/v1/models honours alias upstreamIds — only emits on the named upstream', async () => {
-  const { repo, apiKey } = await setupAppTest();
-
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'codex-auto-review',
-      targetModelId: 'gpt-5.4',
-      upstreamIds: ['up_azure'],
-      rules: {},
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      createdAt: 1_700_000_000,
-    },
-  ]);
-
-  await repo.upstreams.save(buildCustomUpstreamRecord({
-    id: 'up_azure',
-    name: 'Azure',
-    sortOrder: 100,
-    config: {
-      baseUrl: 'https://azure.example.com',
-      authStyle: 'bearer',
-      apiKey: 'sk-azure',
-      endpoints: { chatCompletions: {} },
-    },
-  }));
-  await repo.upstreams.save(buildCustomUpstreamRecord({
-    id: 'up_other',
-    name: 'Other',
-    sortOrder: 200,
-    config: {
-      baseUrl: 'https://other.example.com',
-      authStyle: 'bearer',
-      apiKey: 'sk-other',
-      endpoints: { chatCompletions: {} },
-    },
-  }));
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
-      if (url.pathname === '/copilot_internal/v2/token') {
-        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
-      }
-      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
-      // Both upstreams expose gpt-5.4 — but the alias is restricted to up_azure.
-      if (url.pathname === '/v1/models' && url.hostname === 'azure.example.com') {
-        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
-      }
-      if (url.pathname === '/v1/models' && url.hostname === 'other.example.com') {
-        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
-      const body = await response.json() as { data: Array<{ id: string; display_name: string }> };
-      const aliasRows = body.data.filter(m => m.id === 'codex-auto-review');
-      assertEquals(aliasRows.length, 1);
-      assertEquals(aliasRows[0].display_name, 'gpt-5.4');
-    },
-  );
-});
-
-test('/v1/models merges alias emissions whose synthesized public id collides — one row, multiple backing upstreams', async () => {
-  const { repo, apiKey } = await setupAppTest();
-
-  (repo.modelAliases as MemoryModelAliasesRepo).setAll([
-    {
-      alias: 'codex-auto-review',
-      displayName: 'Codex Auto Review',
-      targetModelId: 'gpt-5.4',
-      upstreamIds: [],
-      rules: { reasoning: { effort: 'low' } },
-      visibleInModelsList: true,
-      onConflict: 'real-only',
-      createdAt: 1_700_000_000,
-    },
-  ]);
-
-  // Two no-prefix upstreams both serve gpt-5.4 — without dedupe, the alias
-  // would emit two `codex-auto-review` rows. With dedupe, the dashboard sees
-  // one row whose `upstreams` field lists both bindings, exactly like real
-  // models that exist on multiple upstreams.
-  await repo.upstreams.save(buildCustomUpstreamRecord({
-    id: 'up_alpha',
-    name: 'Alpha',
-    sortOrder: 100,
-    config: {
-      baseUrl: 'https://alpha.example.com',
-      authStyle: 'bearer',
-      apiKey: 'sk-alpha',
-      endpoints: { chatCompletions: {} },
-    },
-  }));
-  await repo.upstreams.save(buildCustomUpstreamRecord({
-    id: 'up_beta',
-    name: 'Beta',
-    sortOrder: 200,
-    config: {
-      baseUrl: 'https://beta.example.com',
-      authStyle: 'bearer',
-      apiKey: 'sk-beta',
-      endpoints: { chatCompletions: {} },
-    },
-  }));
-
-  await withMockedFetch(
-    request => {
-      const url = new URL(request.url);
-      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
-      if (url.pathname === '/copilot_internal/v2/token') {
-        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
-      }
-      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') return jsonResponse(copilotModels([]));
-      if (url.pathname === '/v1/models' && (url.hostname === 'alpha.example.com' || url.hostname === 'beta.example.com')) {
-        return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4' }] });
-      }
-      throw new Error(`Unhandled fetch ${request.url}`);
-    },
-    async () => {
-      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
-      const body = await response.json() as { data: Array<{ id: string }> };
-      const rows = body.data.filter(m => m.id === 'codex-auto-review');
-      assertEquals(rows.length, 1);
-    },
-  );
-});
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 1264eed0d..49be73da7 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -1,8 +1,5 @@
 import { fetchUpstreamModelsCached } from './models-cache.ts';
-import type { ModelAlias, ModelAliasRules } from '../../control-plane/model-aliases/types.ts';
 import { getRepo } from '../../repo/index.ts';
-import { matchAlias } from '../model-aliases/match.ts';
-import { synthesizeListedAliases, type ListedModel } from '../models/alias-listing.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import { type ModelEndpointKey, type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common';
 import type { InternalModel, ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
@@ -21,10 +18,6 @@ interface ProviderModelsResult {
   // order as the input `providers` list so the model-missing renderer can
   // surface a stable, dashboard-aligned list.
   failedUpstreams: string[];
-  // Raw per-upstream catalogs collected during the fan-out. Aliases consume
-  // this to enumerate per-upstream entries by addressable form without paying
-  // a second round-trip.
-  rawCatalogs: Map<string, readonly UpstreamModel[]>;
 }
 
 const NO_UPSTREAM_CONFIGURED_MESSAGE = 'No upstream provider configured — connect GitHub Copilot or add a Custom/Azure upstream in the dashboard';
@@ -148,7 +141,6 @@ const collectProviderModels = async (
   scheduler: BackgroundScheduler,
 ): Promise<ProviderModelsResult> => {
   const byId = new Map<string, ResolvedModel>();
-  const rawCatalogs = new Map<string, readonly UpstreamModel[]>();
   let sawSuccess = false;
   let lastError: unknown = null;
   const failedUpstreams: string[] = [];
@@ -180,7 +172,6 @@ const collectProviderModels = async (
     }
     sawSuccess = true;
     const { instance, models: providedModels } = result.value;
-    rawCatalogs.set(instance.upstream, providedModels);
     // Operator-disabled public model ids vanish entirely for this upstream:
     // dropped before they reach the catalog map, so they appear in no /models
     // listing and resolve to nothing for routing. The disable is per-upstream,
@@ -215,7 +206,7 @@ const collectProviderModels = async (
     }
   }
 
-  return { models: [...byId.values()], sawSuccess, lastError, failedUpstreams, rawCatalogs };
+  return { models: [...byId.values()], sawSuccess, lastError, failedUpstreams };
 };
 
 // Public-facing model-id ordering, applied in getModels() to every list that
@@ -271,41 +262,6 @@ export const getModels = async (
   return [];
 };
 
-// Returns the merged public model list AND the per-upstream raw catalogs and
-// provider instances. Listing surfaces (`/v1/models`, `/api/models`, Gemini
-// `/models`) use the same call so alias entries — synthesized once via
-// `synthesizeListedAliases` against the same `(providers, rawCatalogs)` pair —
-// are interleaved into the catalog before it returns. Per-surface mappers
-// then walk one uniform `ListedModel[]` instead of re-implementing alias
-// fan-out three times.
-export interface PublicModelsListing {
-  models: ListedModel[];
-  providers: readonly ModelProviderInstance[];
-  rawCatalogs: ReadonlyMap<string, readonly UpstreamModel[]>;
-}
-
-export const getModelsForListing = async (
-  upstreamFilter: readonly string[] | null,
-  fetcherForUpstream: (upstreamId: string) => Fetcher,
-  scheduler: BackgroundScheduler,
-  aliases: readonly ModelAlias[],
-): Promise<PublicModelsListing> => {
-  const providers = await listModelProviders(upstreamFilter);
-  if (providers.length === 0) {
-    throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
-  }
-
-  const { models, sawSuccess, lastError, rawCatalogs } = await collectProviderModels(providers, fetcherForUpstream, scheduler);
-
-  if (sawSuccess) {
-    const real = models.sort((a, b) => compareModelIds(a.id, b.id));
-    const aliasEntries = synthesizeListedAliases(aliases, providers, rawCatalogs);
-    return { models: [...real, ...aliasEntries], providers, rawCatalogs };
-  }
-  if (lastError) throw lastError;
-  return { models: [], providers, rawCatalogs };
-};
-
 export const getInternalModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
@@ -327,37 +283,14 @@ export interface ProviderModelResolution {
   id: string;
   model: UpstreamModel;
   binding: ProviderModelRecord;
-  // Set when this resolution came from an alias-rewrite interpretation. The
-  // gateway-side passthrough callers (embeddings/images/completions) stamp
-  // this onto the `x-floway-alias` response header so alias-served calls are
-  // observable without enabling any extra mode.
-  aliasName?: string;
-  // Operator-locked rules carried alongside `aliasName`. Set in lockstep so
-  // passthrough callers can trace the dropped rule fields without re-finding
-  // the matched alias by name.
-  aliasRules?: ModelAliasRules;
 }
 
 export interface ModelInterpretation {
   provider: ModelProviderInstance;
   // The bare id to query the upstream's catalog with. Equals the inbound
   // model id for the unprefixed surface; equals `inbound.slice(prefix.length)`
-  // for the prefixed surface. For an alias-rewrite interpretation it equals
-  // the matched alias's `targetModelId`.
+  // for the prefixed surface.
   lookupId: string;
-  // Operator-locked request-time rules carried alongside an alias-rewrite
-  // interpretation. Set only when this interpretation is the alias-rewrite
-  // half of a matched alias; the real-name interpretation in the same
-  // `conflictGroup` (and every non-aliased interpretation) leaves this
-  // undefined.
-  aliasRules?: ModelAliasRules;
-  // The alias name as authored by the operator. Set in lockstep with
-  // `aliasRules` and carried out for the `x-floway-alias` response header.
-  aliasName?: string;
-  // Identity-keyed group shared by the two interpretations a single
-  // `onConflict: 'real-only'` alias emits. The post-resolution prune uses
-  // this to drop the alias-rewrite member when both halves resolved.
-  conflictGroup?: { readonly originalLookupId: string };
 }
 
 // Expands one inbound model id into every (provider, catalog-lookup-id) pair
@@ -365,90 +298,29 @@ export interface ModelInterpretation {
 // when the inbound id literally equals one of the public-id surfaces the
 // upstream advertises (bare and/or prefixed, per `modelPrefix.addressable`).
 // The unprefixed interpretation is always pushed first when both apply.
-//
-// Each (provider, lookupId) candidate is then matched against the global
-// alias table — semantic P, post-prefix-strip — and the matched alias's
-// `onConflict` decides whether to push the real-name interpretation, the
-// alias-rewrite interpretation, or both (in either order). When neither
-// the alias nor the alias's target id is exposed by the upstream catalog,
-// the fan-out still emits both interpretations and resolution simply
-// drops the half that misses.
 export const enumerateModelInterpretations = (
   modelId: string,
   providers: readonly ModelProviderInstance[],
-  aliases: readonly ModelAlias[],
 ): ModelInterpretation[] => {
   const out: ModelInterpretation[] = [];
   for (const provider of providers) {
     const cfg = provider.modelPrefix;
     if (cfg === null || cfg.addressable.includes('unprefixed')) {
-      pushInterpretation(out, provider, modelId, aliases);
+      out.push({ provider, lookupId: modelId });
     }
     if (cfg !== null && cfg.addressable.includes('prefixed') && modelId.startsWith(cfg.prefix)) {
-      pushInterpretation(out, provider, modelId.slice(cfg.prefix.length), aliases);
+      out.push({ provider, lookupId: modelId.slice(cfg.prefix.length) });
     }
   }
   return out;
 };
 
-const pushInterpretation = (
-  out: ModelInterpretation[],
-  provider: ModelProviderInstance,
-  lookupId: string,
-  aliases: readonly ModelAlias[],
-): void => {
-  const alias = matchAlias(lookupId, provider.upstream, aliases);
-  if (!alias) {
-    out.push({ provider, lookupId });
-    return;
-  }
-  const aliasInterp: ModelInterpretation = {
-    provider,
-    lookupId: alias.targetModelId,
-    aliasRules: alias.rules,
-    aliasName: alias.alias,
-  };
-  const realInterp: ModelInterpretation = { provider, lookupId };
-  switch (alias.onConflict) {
-  case 'alias-only':
-    out.push(aliasInterp);
-    return;
-  case 'real-only': {
-    // Both halves enter the resolution pass; the post-resolution prune
-    // drops the alias-rewrite member when the real-name resolved too.
-    // Identity-keyed group so the prune step can rejoin them without
-    // re-deriving an alias key.
-    const group = { originalLookupId: lookupId };
-    out.push({ ...realInterp, conflictGroup: group });
-    out.push({ ...aliasInterp, conflictGroup: group });
-    return;
-  }
-  case 'both-real-first':
-    out.push(realInterp);
-    out.push(aliasInterp);
-    return;
-  case 'both-alias-first':
-    out.push(aliasInterp);
-    out.push(realInterp);
-    return;
-  default: {
-    const exhaustive: never = alias.onConflict;
-    throw new Error(`pushInterpretation: unhandled onConflict '${exhaustive as string}'`);
-  }
-  }
-};
-
 // Fan out per-interpretation against the SWR cache and collect the resolved
 // matches plus a deduped list of upstreams whose catalog fetch rejected.
 // Shared by `resolveModelForRequest` and `enumerateProviderCandidates`; the
 // per-caller divergence (passthrough vs LLM-candidate shape) happens after
 // this returns. Cancellation (`AbortError`) propagates so the per-request
 // abort signal cannot be masked by a slow upstream's rejection.
-//
-// Each successful resolution carries its source `interpretation` back to
-// the caller so the alias-rewrite metadata (`aliasRules`, `aliasName`)
-// rides through to the candidate, and so the `real-only` post-resolution
-// prune can rejoin the two halves of a conflict group.
 export const collectInterpretationOutcomes = async (
   interpretations: readonly ModelInterpretation[],
   fetcherForUpstream: (upstreamId: string) => Fetcher,
@@ -483,31 +355,7 @@ export const collectInterpretationOutcomes = async (
     resolutions.push({ interpretation, provider: interpretation.provider, resolved });
   }
 
-  // `onConflict: 'real-only'`: when both halves of a conflict group
-  // resolved, drop the alias-rewrite half so the real-name match is the
-  // only one downstream sees. When only the alias-rewrite half resolved
-  // (the upstream has no model named after the alias itself), keep it —
-  // the operator's intent is to fall back to the alias when no real model
-  // collides.
-  const droppedInterpretations = new Set<ModelInterpretation>();
-  const byGroup = new Map<{ readonly originalLookupId: string }, ModelInterpretation[]>();
-  for (const { interpretation } of resolutions) {
-    const group = interpretation.conflictGroup;
-    if (!group) continue;
-    const list = byGroup.get(group) ?? [];
-    list.push(interpretation);
-    byGroup.set(group, list);
-  }
-  for (const members of byGroup.values()) {
-    if (members.length < 2) continue;
-    const aliasRewriteMember = members.find(i => i.aliasRules !== undefined);
-    if (aliasRewriteMember) droppedInterpretations.add(aliasRewriteMember);
-  }
-
-  return {
-    resolutions: resolutions.filter(r => !droppedInterpretations.has(r.interpretation)),
-    failedUpstreams,
-  };
+  return { resolutions, failedUpstreams };
 };
 
 export const resolveModelForRequest = async (
@@ -515,22 +363,15 @@ export const resolveModelForRequest = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
-  aliases: readonly ModelAlias[] = [],
 ): Promise<ModelResolution> => {
   const providers = await listModelProviders(upstreamFilter);
   if (providers.length === 0) {
     throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
   }
 
-  const interpretations = enumerateModelInterpretations(modelId, providers, aliases);
+  const interpretations = enumerateModelInterpretations(modelId, providers);
   const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
-  // Project each resolution's alias-rewrite interpretation onto the
-  // returned ProviderModelResolution so passthrough callers can stamp the
-  // `x-floway-alias` header without re-deriving the match.
-  const matches: ProviderModelResolution[] = resolutions.map(r =>
-    r.interpretation.aliasName !== undefined
-      ? { ...r.resolved, aliasName: r.interpretation.aliasName, aliasRules: r.interpretation.aliasRules }
-      : r.resolved);
+  const matches: ProviderModelResolution[] = resolutions.map(r => r.resolved);
   return { matches, failedUpstreams };
 };
 
diff --git a/packages/gateway/src/data-plane/providers/registry_test.ts b/packages/gateway/src/data-plane/providers/registry_test.ts
index 9823aeb98..c330abfee 100644
--- a/packages/gateway/src/data-plane/providers/registry_test.ts
+++ b/packages/gateway/src/data-plane/providers/registry_test.ts
@@ -2,7 +2,6 @@ import { describe, expect, test } from 'vitest';
 
 import { clearInFlightForTesting } from './models-cache.ts';
 import { compareModelIds, enumerateModelInterpretations, getInternalModels, listModelProviders, resolveModelForProvider, resolveModelForRequest } from './registry.ts';
-import type { ModelAlias } from '../../control-plane/model-aliases/types.ts';
 import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, setupAppTest } from '../../test-helpers.ts';
 import { directFetcher, type ModelProviderInstance } from '@floway-dev/provider';
 import { createCopilotProvider } from '@floway-dev/provider-copilot';
@@ -610,20 +609,20 @@ describe('enumerateModelInterpretations', () => {
     // A: no prefix, bare always accepted. B: prefixed-only addressable — bare
     // is not accepted. C: dual-addressable, bare accepted; the prefixed form
     // does not apply because `gpt-4o` does not start with `cx/`.
-    assertEquals(shape(enumerateModelInterpretations('gpt-4o', [A, B, C], [])), [
+    assertEquals(shape(enumerateModelInterpretations('gpt-4o', [A, B, C])), [
       { upstream: 'A', lookupId: 'gpt-4o' },
       { upstream: 'C', lookupId: 'gpt-4o' },
     ]);
   });
 
   test('prefix-only-addressable upstream strips the prefix when it matches', () => {
-    assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [B], [])), [
+    assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [B])), [
       { upstream: 'B', lookupId: 'gpt-4o' },
     ]);
   });
 
   test('prefix-only-addressable upstream is silent when the prefix does not match', () => {
-    assertEquals(enumerateModelInterpretations('gpt-4o', [B], []), []);
+    assertEquals(enumerateModelInterpretations('gpt-4o', [B]), []);
   });
 
   test('dual-addressable upstream produces two interpretations when the prefix matches', () => {
@@ -634,7 +633,7 @@ describe('enumerateModelInterpretations', () => {
       upstream: 'D', name: 'd',
       modelPrefix: { prefix: 'or/', addressable: ['unprefixed', 'prefixed'], listed: ['prefixed'] },
     });
-    assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [D], [])), [
+    assertEquals(shape(enumerateModelInterpretations('or/gpt-4o', [D])), [
       { upstream: 'D', lookupId: 'or/gpt-4o' },
       { upstream: 'D', lookupId: 'gpt-4o' },
     ]);
@@ -654,7 +653,7 @@ describe('enumerateModelInterpretations', () => {
       modelPrefix: { prefix: 'aa/bb/', addressable: ['prefixed'], listed: ['prefixed'] },
     });
     const Z = fakeProvider({ upstream: 'Z', name: 'z', modelPrefix: null });
-    assertEquals(shape(enumerateModelInterpretations('aa/bb/gpt-5', [X, Y, Z], [])), [
+    assertEquals(shape(enumerateModelInterpretations('aa/bb/gpt-5', [X, Y, Z])), [
       { upstream: 'X', lookupId: 'bb/gpt-5' },
       { upstream: 'Y', lookupId: 'gpt-5' },
       { upstream: 'Z', lookupId: 'aa/bb/gpt-5' },
@@ -907,202 +906,3 @@ describe('catalog listing under modelPrefix', () => {
     );
   });
 });
-
-// Synthetic-catalog alias matching against a single provider. Verifies that
-// each `onConflict` mode emits the right interpretation shape from
-// `enumerateModelInterpretations`. The downstream `collectInterpretationOutcomes`
-// pass is exercised in the e2e suite below.
-describe('enumerateModelInterpretations with alias matching', () => {
-  const provider = fakeProvider({ upstream: 'U', name: 'u', modelPrefix: null });
-
-  const makeAlias = (over: Partial<ModelAlias>): ModelAlias => ({
-    alias: 'codex-auto-review',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: { reasoning: { effort: 'low' } },
-    visibleInModelsList: true,
-    onConflict: 'real-only',
-    createdAt: 0,
-    ...over,
-  });
-
-  test('alias-only emits exactly the alias-rewrite interpretation, with rules', () => {
-    const aliases = [makeAlias({ onConflict: 'alias-only' })];
-    const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
-    assertEquals(out.length, 1);
-    assertEquals(out[0].lookupId, 'gpt-5.4');
-    assertEquals(out[0].aliasRules, { reasoning: { effort: 'low' } });
-    assertEquals(out[0].aliasName, 'codex-auto-review');
-    assertEquals(out[0].conflictGroup, undefined);
-  });
-
-  test('real-only emits both halves, tagged with a shared conflictGroup', () => {
-    const aliases = [makeAlias({ onConflict: 'real-only' })];
-    const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
-    assertEquals(out.length, 2);
-    // Real first, alias second — the prune step removes the alias when
-    // real resolved, so real-first keeps the natural iteration order.
-    assertEquals(out[0].lookupId, 'codex-auto-review');
-    assertEquals(out[0].aliasRules, undefined);
-    assertEquals(out[1].lookupId, 'gpt-5.4');
-    assertEquals(out[1].aliasRules, { reasoning: { effort: 'low' } });
-    expect(out[0].conflictGroup).toBeDefined();
-    expect(out[0].conflictGroup).toBe(out[1].conflictGroup);
-  });
-
-  test('both-real-first emits real then alias, neither group-tagged', () => {
-    const aliases = [makeAlias({ onConflict: 'both-real-first' })];
-    const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
-    assertEquals(out.length, 2);
-    assertEquals(out[0].lookupId, 'codex-auto-review');
-    assertEquals(out[0].aliasRules, undefined);
-    assertEquals(out[1].lookupId, 'gpt-5.4');
-    assertEquals(out[1].aliasRules, { reasoning: { effort: 'low' } });
-    assertEquals(out[0].conflictGroup, undefined);
-    assertEquals(out[1].conflictGroup, undefined);
-  });
-
-  test('both-alias-first emits alias then real, neither group-tagged', () => {
-    const aliases = [makeAlias({ onConflict: 'both-alias-first' })];
-    const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
-    assertEquals(out.length, 2);
-    assertEquals(out[0].lookupId, 'gpt-5.4');
-    assertEquals(out[0].aliasRules, { reasoning: { effort: 'low' } });
-    assertEquals(out[1].lookupId, 'codex-auto-review');
-    assertEquals(out[1].aliasRules, undefined);
-  });
-
-  test('upstreamIds filter skips the alias on providers outside the allowlist', () => {
-    const aliases = [makeAlias({ onConflict: 'alias-only', upstreamIds: ['OTHER'] })];
-    const out = enumerateModelInterpretations('codex-auto-review', [provider], aliases);
-    // The alias only applies to OTHER, so this provider sees a literal
-    // (no-rewrite) interpretation.
-    assertEquals(out.length, 1);
-    assertEquals(out[0].lookupId, 'codex-auto-review');
-    assertEquals(out[0].aliasRules, undefined);
-  });
-
-  test('prefix-strip happens before alias matching (semantic P)', () => {
-    // Configure the provider with a prefix; the inbound `cx/codex-auto-review`
-    // strips to `codex-auto-review` and matches the alias. The alias-rewrite
-    // interpretation carries the target id `gpt-5.4`.
-    const prefixedProvider = fakeProvider({
-      upstream: 'P', name: 'p',
-      modelPrefix: { prefix: 'cx/', addressable: ['prefixed'], listed: ['prefixed'] },
-    });
-    const aliases = [makeAlias({ onConflict: 'alias-only' })];
-    const out = enumerateModelInterpretations('cx/codex-auto-review', [prefixedProvider], aliases);
-    assertEquals(out.length, 1);
-    assertEquals(out[0].lookupId, 'gpt-5.4');
-    assertEquals(out[0].aliasName, 'codex-auto-review');
-  });
-});
-
-// E2E coverage of the post-resolution prune. Uses a real Azure-backed
-// catalog (resolved without HTTP) so the conflict pruning behavior is
-// observed end-to-end via `resolveModelForRequest`.
-describe('resolveModelForRequest applies alias onConflict pruning', () => {
-  // Helper that stages a single Azure upstream exposing both the real
-  // alias-named model and the alias's target model.
-  const stageBothNamesUpstream = async (): Promise<void> => {
-    const { repo } = await setupAppTest();
-    await repo.upstreams.deleteAll();
-    await repo.upstreams.save({
-      id: 'up_a',
-      provider: 'azure',
-      name: 'A',
-      enabled: true,
-      sortOrder: 1,
-      createdAt: '2026-05-21T00:00:00.000Z',
-      updatedAt: '2026-05-21T00:00:00.000Z',
-      config: {
-        endpoint: 'https://a.openai.azure.com',
-        apiKey: 'az-key',
-        models: [
-          { upstreamModelId: 'codex-auto-review', endpoints: { chatCompletions: {} } },
-          { upstreamModelId: 'gpt-5.4', endpoints: { chatCompletions: {} } },
-        ],
-      },
-      flagOverrides: {},
-      disabledPublicModelIds: [],
-      proxyFallbackList: [],
-      modelPrefix: null,
-      state: null,
-    });
-  };
-
-  // Helper that stages a single Azure upstream exposing ONLY the alias's
-  // target model (no real `codex-auto-review` collision).
-  const stageTargetOnlyUpstream = async (): Promise<void> => {
-    const { repo } = await setupAppTest();
-    await repo.upstreams.deleteAll();
-    await repo.upstreams.save({
-      id: 'up_a',
-      provider: 'azure',
-      name: 'A',
-      enabled: true,
-      sortOrder: 1,
-      createdAt: '2026-05-21T00:00:00.000Z',
-      updatedAt: '2026-05-21T00:00:00.000Z',
-      config: {
-        endpoint: 'https://a.openai.azure.com',
-        apiKey: 'az-key',
-        models: [
-          { upstreamModelId: 'gpt-5.4', endpoints: { chatCompletions: {} } },
-        ],
-      },
-      flagOverrides: {},
-      disabledPublicModelIds: [],
-      proxyFallbackList: [],
-      modelPrefix: null,
-      state: null,
-    });
-  };
-
-  const aliasOf = (onConflict: ModelAlias['onConflict']): ModelAlias => ({
-    alias: 'codex-auto-review',
-    targetModelId: 'gpt-5.4',
-    upstreamIds: [],
-    rules: { reasoning: { effort: 'low' } },
-    visibleInModelsList: true,
-    onConflict,
-    createdAt: 0,
-  });
-
-  test('alias-only resolves to a single match against the alias target id', async () => {
-    await stageBothNamesUpstream();
-    const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('alias-only')]);
-    assertEquals(resolved.matches.length, 1);
-    assertEquals(resolved.matches[0].id, 'gpt-5.4');
-  });
-
-  test('real-only drops the alias-rewrite resolution when the real-name resolves too', async () => {
-    await stageBothNamesUpstream();
-    const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('real-only')]);
-    assertEquals(resolved.matches.length, 1);
-    assertEquals(resolved.matches[0].id, 'codex-auto-review');
-  });
-
-  test('real-only keeps the alias-rewrite resolution when the real-name catalog lookup misses', async () => {
-    await stageTargetOnlyUpstream();
-    const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('real-only')]);
-    assertEquals(resolved.matches.length, 1);
-    assertEquals(resolved.matches[0].id, 'gpt-5.4');
-  });
-
-  test('both-real-first resolves to two matches, real first', async () => {
-    await stageBothNamesUpstream();
-    const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('both-real-first')]);
-    assertEquals(resolved.matches.length, 2);
-    assertEquals(resolved.matches[0].id, 'codex-auto-review');
-    assertEquals(resolved.matches[1].id, 'gpt-5.4');
-  });
-
-  test('both-alias-first resolves to two matches, alias first', async () => {
-    await stageBothNamesUpstream();
-    const resolved = await resolveModelForRequest('codex-auto-review', null, () => directFetcher, testScheduler, [aliasOf('both-alias-first')]);
-    assertEquals(resolved.matches.length, 2);
-    assertEquals(resolved.matches[0].id, 'gpt-5.4');
-    assertEquals(resolved.matches[1].id, 'codex-auto-review');
-  });
-});
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 755a3a230..1add1a115 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -21,11 +21,8 @@ import { createUpstreamLatencyRecorder, recordPerformanceError, recordPerformanc
 import { recordTokenUsage } from './telemetry/usage.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import type { AuthedContext } from '../../middleware/auth.ts';
-import { getRepo } from '../../repo/index.ts';
 import type { TokenUsage } from '../../repo/types.ts';
 import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
-import { stageGatewayResponseHeader } from '../chat/shared/gateway-ctx.ts';
-import { createSanitizeTraceCtx, traceAllRulesDropped } from '../chat/shared/sanitize.ts';
 import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
 import { resolveModelForRequest } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
@@ -127,22 +124,12 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
 
   try {
     const fetcherForUpstream = await createPerRequestFetcher(ctx.currentColo);
-    // Aliases pass through so a `(model, lookupId)` interpretation can rewrite
-    // to the alias's target id even for non-LLM-shaped endpoints. The alias
-    // rules themselves never apply here — the inbound payload (embeddings,
-    // images, /v1/completions) has no protocol-extension slots for the rule
-    // knobs. We still surface the matched alias name on the
-    // `x-floway-alias` response header (staged via Hono's `c.header` so it
-    // survives `streamSSE`'s internal `c.newResponse` on the streaming
-    // `/v1/completions` path) and trace one log line per dropped rule so an
-    // operator can confirm the rewrite ran.
-    const aliases = await getRepo().modelAliases.loadAll();
     // Each match is one (upstream, upstream-catalog id) pair that interprets
     // the inbound public id. Iteration order follows configured sort_order
     // across upstreams, with the unprefixed interpretation pushed before the
     // prefixed one within a single upstream. The first match whose binding
     // satisfies the endpoint capability wins.
-    const { matches, failedUpstreams } = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler, aliases);
+    const { matches, failedUpstreams } = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler);
     if (matches.length === 0) {
       ctx.dump?.error('gateway');
       return passthroughApiError(c, appendFailedUpstreams(`Model ${model} is not available on any configured upstream.`, failedUpstreams), 404);
@@ -150,12 +137,6 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
 
     for (const match of matches) {
       if (!bindingServesEndpoint(match.binding)) continue;
-      if (match.aliasName !== undefined) {
-        stageGatewayResponseHeader(ctx, 'x-floway-alias', match.aliasName);
-        if (match.aliasRules) {
-          traceAllRulesDropped(match.aliasRules, sourceApi, createSanitizeTraceCtx(match.aliasName));
-        }
-      }
 
       const recorder = createUpstreamLatencyRecorder();
       const { response, modelKey } = await call(match.binding, {
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index d7492938f..5a85dba39 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -13,7 +13,6 @@ import type {
   ApiKeyRepo,
   BackoffRow,
   CachedModelsRow,
-  ModelAliasesRepo,
   ModelsCacheRepo,
   PerformanceDimensions,
   PerformanceErrorSample,
@@ -40,7 +39,6 @@ import type {
   UsersRepo,
 } from './types.ts';
 import { serializeStoredState } from './upstream-json.ts';
-import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
 import { latencyBucketForMs } from '../shared/performance-histogram.ts';
 import { generateSessionToken } from '../shared/session-tokens.ts';
 import { assertWebSearchProviderName } from '../shared/web-search-providers.ts';
@@ -898,7 +896,6 @@ export class InMemoryRepo implements Repo {
   proxyBackoffs: ProxyBackoffRepo;
   responsesItems: ResponsesItemsRepo;
   responsesSnapshots: ResponsesSnapshotsRepo;
-  modelAliases: ModelAliasesRepo;
 
   constructor() {
     this.users = new MemoryUsersRepo();
@@ -914,55 +911,5 @@ export class InMemoryRepo implements Repo {
     this.proxyBackoffs = new MemoryProxyBackoffRepo();
     this.responsesItems = new MemoryResponsesItemsRepo();
     this.responsesSnapshots = new MemoryResponsesSnapshotsRepo();
-    this.modelAliases = new MemoryModelAliasesRepo();
-  }
-}
-
-// Test-only in-memory backing for the alias table. Mirrors SqlModelAliasesRepo:
-// `loadAll` returns rows sorted by alias, `create` rejects PK collisions,
-// `save` upserts in place. `setAll` is the test seam: tests that pre-populate
-// the table for read-only data-plane assertions reach for it directly.
-export class MemoryModelAliasesRepo implements ModelAliasesRepo {
-  private rows = new Map<string, ModelAlias>();
-
-  loadAll(): Promise<readonly ModelAlias[]> {
-    return Promise.resolve([...this.rows.values()].sort((a, b) => a.alias.localeCompare(b.alias)));
-  }
-
-  getByAlias(alias: string): Promise<ModelAlias | null> {
-    return Promise.resolve(this.rows.get(alias) ?? null);
-  }
-
-  create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> {
-    if (this.rows.has(alias.alias)) return Promise.resolve({ ok: false, reason: 'duplicate' });
-    this.rows.set(alias.alias, alias);
-    return Promise.resolve({ ok: true });
-  }
-
-  save(alias: ModelAlias): Promise<void> {
-    // Preserve the original row's createdAt on an upsert so re-saves do not
-    // overwrite the local deployment's first-seen timestamp.
-    const existing = this.rows.get(alias.alias);
-    const preserved = existing ? { ...alias, createdAt: existing.createdAt } : alias;
-    this.rows.set(preserved.alias, preserved);
-    return Promise.resolve();
-  }
-
-  rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> {
-    if (oldAlias === newAlias) return Promise.resolve({ ok: true });
-    if (this.rows.has(newAlias)) return Promise.resolve({ ok: false, reason: 'duplicate' });
-    const existing = this.rows.get(oldAlias);
-    if (!existing) return Promise.resolve({ ok: false, reason: 'notFound' });
-    this.rows.delete(oldAlias);
-    this.rows.set(newAlias, { ...existing, alias: newAlias });
-    return Promise.resolve({ ok: true });
-  }
-
-  delete(alias: string): Promise<{ deleted: boolean }> {
-    return Promise.resolve({ deleted: this.rows.delete(alias) });
-  }
-
-  setAll(rows: readonly ModelAlias[]): void {
-    this.rows = new Map(rows.map(row => [row.alias, row]));
   }
 }
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index 75f814178..b716d07e4 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -7,7 +7,6 @@ import type {
   ApiKeyRepo,
   BackoffRow,
   CachedModelsRow,
-  ModelAliasesRepo,
   ModelsCacheRepo,
   PerformanceDimensions,
   PerformanceErrorSample,
@@ -35,8 +34,6 @@ import type {
   UsersRepo,
 } from './types.ts';
 import { serializeStoredConfig, serializeStoredState } from './upstream-json.ts';
-import { deleteAlias, getAliasByName, insertAlias, loadAllAliases, renameAlias, saveAlias } from '../control-plane/model-aliases/repo.ts';
-import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
 import { latencyBucketForMs } from '../shared/performance-histogram.ts';
 import { generateSessionToken } from '../shared/session-tokens.ts';
 import { assertWebSearchProviderName } from '../shared/web-search-providers.ts';
@@ -1602,7 +1599,6 @@ export class SqlRepo implements Repo {
   proxyBackoffs: ProxyBackoffRepo;
   responsesItems: ResponsesItemsRepo;
   responsesSnapshots: ResponsesSnapshotsRepo;
-  modelAliases: ModelAliasesRepo;
 
   constructor(db: SqlDatabase) {
     this.users = new SqlUsersRepo(db);
@@ -1618,34 +1614,5 @@ export class SqlRepo implements Repo {
     this.proxyBackoffs = new SqlProxyBackoffRepo(db);
     this.responsesItems = new SqlResponsesItemsRepo(db);
     this.responsesSnapshots = new SqlResponsesSnapshotsRepo(db);
-    this.modelAliases = new SqlModelAliasesRepo(db);
-  }
-}
-
-class SqlModelAliasesRepo implements ModelAliasesRepo {
-  constructor(private db: SqlDatabase) {}
-
-  loadAll(): Promise<readonly ModelAlias[]> {
-    return loadAllAliases(this.db);
-  }
-
-  getByAlias(alias: string): Promise<ModelAlias | null> {
-    return getAliasByName(this.db, alias);
-  }
-
-  create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }> {
-    return insertAlias(this.db, alias);
-  }
-
-  save(alias: ModelAlias): Promise<void> {
-    return saveAlias(this.db, alias);
-  }
-
-  rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }> {
-    return renameAlias(this.db, oldAlias, newAlias);
-  }
-
-  delete(alias: string): Promise<{ deleted: boolean }> {
-    return deleteAlias(this.db, alias);
   }
 }
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 7d10f90ca..0341d41ef 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -1,4 +1,3 @@
-import type { ModelAlias } from '../control-plane/model-aliases/types.ts';
 import type { HistogramBucket } from '../shared/performance-histogram.ts';
 import type { WebSearchProviderName } from '../shared/web-search-providers.ts';
 import type { BillingDimension, ModelPricing } from '@floway-dev/protocols/common';
@@ -333,27 +332,4 @@ export interface Repo {
   proxyBackoffs: ProxyBackoffRepo;
   responsesItems: ResponsesItemsRepo;
   responsesSnapshots: ResponsesSnapshotsRepo;
-  modelAliases: ModelAliasesRepo;
-}
-
-// Operator-managed alias table; small (dozens of rows at most) and read
-// per request, so the repo deliberately exposes only a full-table fetch
-// plus the targeted mutations the control-plane CRUD needs.
-export interface ModelAliasesRepo {
-  loadAll(): Promise<readonly ModelAlias[]>;
-  getByAlias(alias: string): Promise<ModelAlias | null>;
-  // INSERT-only — fails with `duplicate` on PK conflict so the route layer
-  // surfaces 409 to the dashboard instead of silently overwriting an
-  // existing row.
-  create(alias: ModelAlias): Promise<{ ok: true } | { ok: false; reason: 'duplicate' }>;
-  // UPSERT — used by the PATCH update path; preserves created_at on re-save
-  // and bumps updated_at.
-  save(alias: ModelAlias): Promise<void>;
-  // Updates the PK in place. Returns `notFound` when the source row is
-  // missing, `duplicate` when the destination name already exists; the
-  // route layer maps those to 404 / 409. SQLite (and D1) permit UPDATEing
-  // a PRIMARY KEY column.
-  rename(oldAlias: string, newAlias: string): Promise<{ ok: true } | { ok: false; reason: 'duplicate' | 'notFound' }>;
-  // Returns whether a row was actually removed; routes treat false as 404.
-  delete(alias: string): Promise<{ deleted: boolean }>;
 }
diff --git a/packages/gateway/src/test-helpers/gateway-ctx.ts b/packages/gateway/src/test-helpers/gateway-ctx.ts
index 047981ed1..43358b7fb 100644
--- a/packages/gateway/src/test-helpers/gateway-ctx.ts
+++ b/packages/gateway/src/test-helpers/gateway-ctx.ts
@@ -1,11 +1,10 @@
 import type { GatewayCtx } from '../data-plane/chat/shared/gateway-ctx.ts';
 import type { AuthedContext } from '../middleware/auth.ts';
 
-// Minimal stub for the Hono `c` carried on `GatewayCtx`. Only `c.header`
-// is touched by the serve layer (to stamp `x-floway-alias`); unit tests
-// that don't exercise the alias branch never call it. Integration tests
-// that need real Hono behavior build the ctx via `createGatewayCtxFromHono`
-// against a real `makeApp()` request rather than going through this stub.
+// Minimal stub for the Hono `c` carried on `GatewayCtx`. Unit tests rarely
+// touch any methods on it; integration tests that need real Hono behavior
+// build the ctx via `createGatewayCtxFromHono` against a real `makeApp()`
+// request rather than going through this stub.
 export const stubAuthedContext = (): AuthedContext =>
   ({ header: () => {} } as unknown as AuthedContext);
 
@@ -25,6 +24,5 @@ export const mockGatewayCtx = (overrides: Partial<GatewayCtx> = {}): GatewayCtx
   dump: null,
   backgroundScheduler: promise => { void promise; },
   requestStartedAt: 0,
-  responseHeaders: new Headers(),
   ...overrides,
 });
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 9a73c11d5..54243d56d 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -134,107 +134,9 @@ export interface PublicModel {
   };
   kind: ModelKind;
   cost?: ModelPricing;
-  // Floway protocol extension. Present on synthesized alias entries the
-  // gateway appends to the listing. Clients that do not know about the
-  // field ignore it; alias-aware clients (dashboard, CLI shims) render the
-  // alias's target id and rules from this payload directly.
-  aliasedFrom?: PublicModelAliasedFrom;
   chat?: ChatModelInfo;
 }
 
-export interface PublicModelAliasedFrom {
-  targetModelId: string;
-  upstreamIds: readonly string[];
-  rules: {
-    reasoning?: {
-      effort?: string;
-      budgetTokens?: number;
-      adaptive?: boolean;
-      summary?: string;
-    };
-    verbosity?: string;
-    serviceTier?: string;
-    anthropicBeta?: readonly string[];
-  };
-  onConflict: 'alias-only' | 'real-only' | 'both-real-first' | 'both-alias-first';
-  // Operator-set display name. Absent (undefined) when the operator left the
-  // field blank — alias-aware UIs then synthesize a label from the target's
-  // display name and the inline rules summary instead.
-  displayName?: string;
-}
-
-// One badge per rule field on an alias, in a `${label}` / `${label}: ${value}`
-// shape the dashboard renders inline next to the model row. Returned in a
-// deterministic order so the badge sequence stays stable across surfaces and
-// across JSON key arrivals. Boolean toggles render label-only (no colon);
-// every other field renders as `${label}: ${value}`. The inline-prose form
-// (`composeAliasDisplayName`'s suffix and `formatAliasRulesInline`) uses its
-// own compact wording — the two surfaces deliberately diverge so the inline
-// summary stays compact while the badge view stays self-describing.
-export interface AliasRuleBadge {
-  label: string;
-  value?: string;
-}
-
-export const formatAliasRuleBadges = (rules: PublicModelAliasedFrom['rules']): AliasRuleBadge[] => {
-  const out: AliasRuleBadge[] = [];
-  if (rules.reasoning?.effort !== undefined) out.push({ label: 'effort', value: rules.reasoning.effort });
-  if (rules.reasoning?.budgetTokens !== undefined) out.push({ label: 'reasoning budget', value: `${rules.reasoning.budgetTokens}tk` });
-  if (rules.reasoning?.adaptive === true) out.push({ label: 'adaptive reasoning' });
-  if (rules.reasoning?.summary !== undefined) out.push({ label: 'reasoning summary', value: rules.reasoning.summary });
-  if (rules.verbosity !== undefined) out.push({ label: 'verbosity', value: rules.verbosity });
-  if (rules.serviceTier !== undefined) out.push({ label: 'service tier', value: rules.serviceTier });
-  if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
-    out.push({ label: 'anthropic beta', value: [...rules.anthropicBeta].sort().join('/') });
-  }
-  return out;
-};
-
-// Inline-prose parts for an alias's rules, in a deterministic order. Each
-// entry uses the compact `value label` wording (e.g. `low effort`,
-// `4096tk reasoning`) so it fits both alongside the target name in narrow
-// listings and on its own as a standalone summary line. The dashboard's
-// per-badge view uses `formatAliasRuleBadges` for the self-describing
-// `label: value` form. `anthropicBeta` tokens are sorted so two operators
-// carrying the same set in different orders see the same label.
-const aliasRulesInlineParts = (rules: PublicModelAliasedFrom['rules']): string[] => {
-  const parts: string[] = [];
-  if (rules.reasoning?.effort !== undefined) parts.push(`${rules.reasoning.effort} effort`);
-  if (rules.reasoning?.budgetTokens !== undefined) parts.push(`${rules.reasoning.budgetTokens}tk reasoning`);
-  if (rules.reasoning?.adaptive === true) parts.push('adaptive reasoning');
-  if (rules.reasoning?.summary !== undefined) parts.push(`${rules.reasoning.summary} summary`);
-  if (rules.verbosity !== undefined) parts.push(`${rules.verbosity} verbosity`);
-  if (rules.serviceTier !== undefined) parts.push(`${rules.serviceTier} tier`);
-  if (rules.anthropicBeta !== undefined && rules.anthropicBeta.length > 0) {
-    parts.push([...rules.anthropicBeta].sort().join('/'));
-  }
-  return parts;
-};
-
-// Compose the alias-local display name — what the operator named the alias
-// (when set) or a synthesized target + rules summary. Independent of which
-// upstream is surfacing the alias; the prefixed listing form prepends the
-// upstream display name at the call site, mirroring the real-model path in
-// the gateway's provider registry. The parenthesized rules suffix shares
-// its parts with `formatAliasRulesInline` so the two surfaces never drift.
-export const composeAliasDisplayName = (input: {
-  aliasDisplayName?: string;
-  targetDisplayName: string;
-  rules: PublicModelAliasedFrom['rules'];
-}): string => {
-  if (input.aliasDisplayName !== undefined) return input.aliasDisplayName;
-  const parts = aliasRulesInlineParts(input.rules);
-  const suffix = parts.length > 0 ? ` (${parts.join(', ')})` : '';
-  return `${input.targetDisplayName}${suffix}`;
-};
-
-// Joined rules summary without the parentheses — what the dashboard's alias
-// row renders on its third line. Empty string when no rule applies; callers
-// should drop the line entirely in that case rather than rendering blank.
-export const formatAliasRulesInline = (rules: PublicModelAliasedFrom['rules']): string => {
-  return aliasRulesInlineParts(rules).join(', ');
-};
-
 export interface PublicModelsResponse {
   // OpenAI container
   object: 'list';
diff --git a/packages/protocols/src/common/models_alias-display_test.ts b/packages/protocols/src/common/models_alias-display_test.ts
deleted file mode 100644
index 7c7d4c49c..000000000
--- a/packages/protocols/src/common/models_alias-display_test.ts
+++ /dev/null
@@ -1,100 +0,0 @@
-import { describe, expect, test } from 'vitest';
-
-import { composeAliasDisplayName, formatAliasRulesInline } from './models.ts';
-
-describe('composeAliasDisplayName', () => {
-  test('uses alias displayName when set, suppressing the rules summary', () => {
-    expect(
-      composeAliasDisplayName({
-        aliasDisplayName: 'Codex Auto Review',
-        targetDisplayName: 'GPT-5.4',
-        rules: { reasoning: { effort: 'low' } },
-      }),
-    ).toBe('Codex Auto Review');
-  });
-
-  test('omits the rules suffix when rules are empty', () => {
-    expect(
-      composeAliasDisplayName({
-        targetDisplayName: 'GPT-5.4',
-        rules: {},
-      }),
-    ).toBe('GPT-5.4');
-  });
-
-  test('formats each rule field with its canonical suffix when alias displayName is missing', () => {
-    const target = 'GPT-5.4';
-    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { effort: 'high' } } })).toBe('GPT-5.4 (high effort)');
-    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { budgetTokens: 4096 } } })).toBe('GPT-5.4 (4096tk reasoning)');
-    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { adaptive: true } } })).toBe('GPT-5.4 (adaptive reasoning)');
-    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { reasoning: { summary: 'detailed' } } })).toBe('GPT-5.4 (detailed summary)');
-    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { verbosity: 'low' } })).toBe('GPT-5.4 (low verbosity)');
-    expect(composeAliasDisplayName({ targetDisplayName: target, rules: { serviceTier: 'priority' } })).toBe('GPT-5.4 (priority tier)');
-  });
-
-  test('sorts anthropicBeta tokens and joins with slashes', () => {
-    expect(
-      composeAliasDisplayName({
-        targetDisplayName: 'Claude',
-        rules: { anthropicBeta: ['extended-thinking', 'fast-mode-2026-02-01'] },
-      }),
-    ).toBe('Claude (extended-thinking/fast-mode-2026-02-01)');
-    expect(
-      composeAliasDisplayName({
-        targetDisplayName: 'Claude',
-        rules: { anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] },
-      }),
-    ).toBe('Claude (extended-thinking/fast-mode-2026-02-01)');
-  });
-
-  test('drops anthropicBeta when the token list is empty', () => {
-    expect(
-      composeAliasDisplayName({
-        targetDisplayName: 'Claude',
-        rules: { anthropicBeta: [] },
-      }),
-    ).toBe('Claude');
-  });
-
-  test('joins multiple fields with comma in deterministic order', () => {
-    expect(
-      composeAliasDisplayName({
-        targetDisplayName: 'GPT-5.4',
-        rules: {
-          reasoning: { effort: 'low', summary: 'concise' },
-          verbosity: 'high',
-          serviceTier: 'flex',
-        },
-      }),
-    ).toBe('GPT-5.4 (low effort, concise summary, high verbosity, flex tier)');
-  });
-});
-
-describe('formatAliasRulesInline', () => {
-  test('returns empty string when no rule applies', () => {
-    expect(formatAliasRulesInline({})).toBe('');
-  });
-
-  test('returns each rule field with the same compact wording as the parenthesized suffix, sans parens', () => {
-    expect(formatAliasRulesInline({ reasoning: { effort: 'low' } })).toBe('low effort');
-    expect(formatAliasRulesInline({ reasoning: { budgetTokens: 4096 } })).toBe('4096tk reasoning');
-    expect(formatAliasRulesInline({ reasoning: { adaptive: true } })).toBe('adaptive reasoning');
-    expect(formatAliasRulesInline({ reasoning: { summary: 'detailed' } })).toBe('detailed summary');
-  });
-
-  test('joins multiple fields with comma in the same order composeAliasDisplayName uses', () => {
-    expect(
-      formatAliasRulesInline({
-        reasoning: { effort: 'low', summary: 'detailed' },
-        verbosity: 'high',
-        serviceTier: 'fast',
-      }),
-    ).toBe('low effort, detailed summary, high verbosity, fast tier');
-  });
-
-  test('sorts anthropicBeta tokens and joins with slashes', () => {
-    expect(
-      formatAliasRulesInline({ anthropicBeta: ['fast-mode-2026-02-01', 'extended-thinking'] }),
-    ).toBe('extended-thinking/fast-mode-2026-02-01');
-  });
-});

From c2fee82c4445adb6d874e0b7deded14128f159b5 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 17:19:22 +0800
Subject: [PATCH 048/170] feat(aliases): v2 migration + seed

Single table with JSON `targets` column. Each alias picks a kind
(chat/embedding/image), a selection strategy (first-available/random),
optional display name, visible-in-models flag, and a list of target
entries with per-target rules.

Seed `codex-auto-review` to "prefer the real model, else gpt-5.4 with
low reasoning effort". The real-id-first target makes the alias an
intentional shadow of the real model: when an upstream exposes the
real `codex-auto-review`, it wins; otherwise the configured fallback
kicks in.
---
 .../gateway/migrations/0046_model_aliases.sql | 36 +++++++++++++++++++
 1 file changed, 36 insertions(+)
 create mode 100644 packages/gateway/migrations/0046_model_aliases.sql

diff --git a/packages/gateway/migrations/0046_model_aliases.sql b/packages/gateway/migrations/0046_model_aliases.sql
new file mode 100644
index 000000000..ea20c4d81
--- /dev/null
+++ b/packages/gateway/migrations/0046_model_aliases.sql
@@ -0,0 +1,36 @@
+CREATE TABLE model_aliases (
+  name TEXT PRIMARY KEY,
+  kind TEXT NOT NULL CHECK (kind IN ('chat', 'embedding', 'image')),
+  selection TEXT NOT NULL CHECK (selection IN ('random', 'first-available')),
+  display_name TEXT,
+  visible_in_models_list INTEGER NOT NULL DEFAULT 1 CHECK (visible_in_models_list IN (0, 1)),
+  targets TEXT NOT NULL,
+  sort_order INTEGER NOT NULL DEFAULT 0,
+  created_at TEXT NOT NULL,
+  updated_at TEXT NOT NULL
+);
+
+CREATE INDEX idx_model_aliases_sort ON model_aliases (sort_order, created_at);
+
+INSERT INTO model_aliases (
+  name,
+  kind,
+  selection,
+  display_name,
+  visible_in_models_list,
+  targets,
+  sort_order,
+  created_at,
+  updated_at
+)
+VALUES (
+  'codex-auto-review',
+  'chat',
+  'first-available',
+  'Codex Auto Review',
+  1,
+  json('[{"target_model_id":"codex-auto-review","rules":{}},{"target_model_id":"gpt-5.4","rules":{"reasoning":{"effort":"low"}}}]'),
+  0,
+  strftime('%Y-%m-%dT%H:%M:%fZ', 'now'),
+  strftime('%Y-%m-%dT%H:%M:%fZ', 'now')
+);

From 7c8034bf3fdfabb415b6e00d75233750536e7e1e Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 17:36:44 +0800
Subject: [PATCH 049/170] feat(aliases): backend types + repo + routes
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Wire up the v2 model-alias backend on top of the new schema:

- packages/protocols/src/common/aliases.ts — shared snake_case wire DTO
  (ModelAlias, AliasTarget, ChatAliasRules, AliasKind, AliasSelection,
  ReasoningEffort, ReasoningSummary, Verbosity, ServiceTier). Re-exported
  through @floway-dev/protocols/common so the gateway and the dashboard
  consume one source of truth.
- ModelAliasRecord + ModelAliasesRepo in repo/types.ts. Repo contract:
  list / getByName / insert (throws on PK collision) / update(oldName,
  record) with atomic rename semantics / delete / deleteAll.
- SqlModelAliasesRepo + MemoryModelAliasesRepo implementing the contract.
  SQL persists targets as a JSON column; rename runs INSERT-then-DELETE
  through the batch primitive so D1 / sql.js apply it atomically.
- control-plane/model-aliases/{serialize,routes}.ts — camelCase <->
  snake_case projection plus four CRUD handlers (GET / POST / PUT :name /
  DELETE :name). POST collisions return 409, rename collisions return
  409, missing rows return 404, deletes are idempotent (204 whether or
  not a row existed).
- Zod schemas createAliasBody + updateAliasBody with a superRefine pass
  that gates per-target rules on the alias-level kind: chat-kind parses
  through chatAliasRulesSchema, other kinds require empty rules.
- Routes registered inside the admin-only group alongside upstreams /
  proxies.
- Tests: cross-backend repo scenarios (memory + sql.js), RPC-client
  route scenarios, and snake/camel round-trip.

Scope-limited to the backend surface; the data-plane resolver,
/v1/models alias listing, and dashboard pieces stay on separate
follow-up tasks.
---
 .../control-plane/model-aliases/repo_test.ts  | 153 +++++++++++++
 .../src/control-plane/model-aliases/routes.ts |  73 ++++++
 .../model-aliases/routes_test.ts              | 213 ++++++++++++++++++
 .../control-plane/model-aliases/serialize.ts  |  51 +++++
 .../model-aliases/serialize_test.ts           |  62 +++++
 packages/gateway/src/control-plane/routes.ts  |   9 +-
 packages/gateway/src/control-plane/schemas.ts |  83 +++++++
 packages/gateway/src/repo/memory.ts           |  54 +++++
 packages/gateway/src/repo/sql.ts              | 151 ++++++++++++-
 packages/gateway/src/repo/types.ts            |  36 ++-
 packages/protocols/src/common/aliases.ts      |  79 +++++++
 packages/protocols/src/common/index.ts        |   1 +
 12 files changed, 962 insertions(+), 3 deletions(-)
 create mode 100644 packages/gateway/src/control-plane/model-aliases/repo_test.ts
 create mode 100644 packages/gateway/src/control-plane/model-aliases/routes.ts
 create mode 100644 packages/gateway/src/control-plane/model-aliases/routes_test.ts
 create mode 100644 packages/gateway/src/control-plane/model-aliases/serialize.ts
 create mode 100644 packages/gateway/src/control-plane/model-aliases/serialize_test.ts
 create mode 100644 packages/protocols/src/common/aliases.ts

diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
new file mode 100644
index 000000000..8a56e5f96
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
@@ -0,0 +1,153 @@
+// Cross-backend tests for the model aliases repo. Memory drives the unit
+// scenarios by default; the SQL backend (sql.js applying every migration)
+// catches schema drift, JSON-column round-trips, and rename atomicity.
+
+import { test } from 'vitest';
+
+import { InMemoryRepo } from '../../repo/memory.ts';
+import { SqlRepo } from '../../repo/sql.ts';
+import { createSqliteTestDb } from '../../repo/test-sqlite.ts';
+import type { ModelAliasRecord, Repo } from '../../repo/types.ts';
+import { assertEquals, assertExists, assertRejects } from '@floway-dev/test-utils';
+
+const REPO_BACKENDS: Array<readonly [string, () => Promise<Repo>]> = [
+  ['memory', async () => new InMemoryRepo()],
+  ['sql', async () => new SqlRepo(await createSqliteTestDb())],
+];
+
+const aliasFixture = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasRecord => ({
+  name: 'gpt-fast',
+  kind: 'chat',
+  selection: 'first-available',
+  displayName: null,
+  visibleInModelsList: true,
+  targets: [
+    { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
+  ],
+  sortOrder: 0,
+  createdAt: '2026-06-26T00:00:00.000Z',
+  updatedAt: '2026-06-26T00:00:00.000Z',
+  ...overrides,
+});
+
+for (const [backend, makeRepo] of REPO_BACKENDS) {
+  // The 0046 migration seeds `codex-auto-review`; every test starts from a
+  // known-empty state so assertions on row counts and ordering stay stable.
+  const freshRepo = async (): Promise<Repo> => {
+    const repo = await makeRepo();
+    await repo.modelAliases.deleteAll();
+    return repo;
+  };
+
+  test(`[${backend}] insert then list returns the row`, async () => {
+    const repo = await freshRepo();
+    await repo.modelAliases.insert(aliasFixture());
+    const list = await repo.modelAliases.list();
+    assertEquals(list.length, 1);
+    assertEquals(list[0].name, 'gpt-fast');
+    assertEquals(list[0].targets[0].target_model_id, 'gpt-5.4');
+  });
+
+  test(`[${backend}] insert collision throws`, async () => {
+    const repo = await freshRepo();
+    await repo.modelAliases.insert(aliasFixture());
+    await assertRejects(() => repo.modelAliases.insert(aliasFixture()));
+  });
+
+  test(`[${backend}] getByName returns null when no row matches`, async () => {
+    const repo = await freshRepo();
+    assertEquals(await repo.modelAliases.getByName('nope'), null);
+  });
+
+  test(`[${backend}] update with same name preserves createdAt and refreshes updatedAt`, async () => {
+    const repo = await freshRepo();
+    await repo.modelAliases.insert(aliasFixture({ createdAt: '2026-01-01T00:00:00.000Z', updatedAt: '2026-01-01T00:00:00.000Z' }));
+    await repo.modelAliases.update('gpt-fast', aliasFixture({
+      createdAt: '2026-01-01T00:00:00.000Z',
+      updatedAt: '2026-06-26T12:00:00.000Z',
+      displayName: 'GPT Fast',
+    }));
+    const after = await repo.modelAliases.getByName('gpt-fast');
+    assertExists(after);
+    assertEquals(after.createdAt, '2026-01-01T00:00:00.000Z');
+    assertEquals(after.updatedAt, '2026-06-26T12:00:00.000Z');
+    assertEquals(after.displayName, 'GPT Fast');
+  });
+
+  test(`[${backend}] update with different name (rename) moves the row`, async () => {
+    const repo = await freshRepo();
+    await repo.modelAliases.insert(aliasFixture({ createdAt: '2026-01-01T00:00:00.000Z' }));
+    await repo.modelAliases.update('gpt-fast', aliasFixture({
+      name: 'gpt-fastest',
+      createdAt: '2026-01-01T00:00:00.000Z',
+      updatedAt: '2026-06-26T12:00:00.000Z',
+    }));
+    assertEquals(await repo.modelAliases.getByName('gpt-fast'), null);
+    const renamed = await repo.modelAliases.getByName('gpt-fastest');
+    assertExists(renamed);
+    assertEquals(renamed.createdAt, '2026-01-01T00:00:00.000Z');
+  });
+
+  test(`[${backend}] rename to an existing name throws and leaves both rows intact`, async () => {
+    const repo = await freshRepo();
+    await repo.modelAliases.insert(aliasFixture({ name: 'gpt-fast' }));
+    await repo.modelAliases.insert(aliasFixture({ name: 'gpt-slow' }));
+    await assertRejects(() => repo.modelAliases.update('gpt-fast', aliasFixture({ name: 'gpt-slow' })));
+    assertExists(await repo.modelAliases.getByName('gpt-fast'));
+    assertExists(await repo.modelAliases.getByName('gpt-slow'));
+  });
+
+  test(`[${backend}] update on a missing name throws`, async () => {
+    const repo = await freshRepo();
+    await assertRejects(() => repo.modelAliases.update('nope', aliasFixture({ name: 'nope' })));
+  });
+
+  test(`[${backend}] delete returns true when present, false when absent`, async () => {
+    const repo = await freshRepo();
+    await repo.modelAliases.insert(aliasFixture());
+    assertEquals(await repo.modelAliases.delete('gpt-fast'), true);
+    assertEquals(await repo.modelAliases.delete('gpt-fast'), false);
+  });
+
+  test(`[${backend}] list orders by (sortOrder, createdAt)`, async () => {
+    const repo = await freshRepo();
+    await repo.modelAliases.insert(aliasFixture({ name: 'a', sortOrder: 1, createdAt: '2026-01-01T00:00:00.000Z' }));
+    await repo.modelAliases.insert(aliasFixture({ name: 'b', sortOrder: 0, createdAt: '2026-02-01T00:00:00.000Z' }));
+    await repo.modelAliases.insert(aliasFixture({ name: 'c', sortOrder: 0, createdAt: '2026-01-15T00:00:00.000Z' }));
+    const list = await repo.modelAliases.list();
+    assertEquals(list.map(r => r.name), ['c', 'b', 'a']);
+  });
+
+  test(`[${backend}] targets JSON round-trips multi-target chat rules`, async () => {
+    const repo = await freshRepo();
+    await repo.modelAliases.insert(aliasFixture({
+      name: 'multi',
+      targets: [
+        { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'high', adaptive: true } } },
+        { target_model_id: 'gpt-4.1', rules: { verbosity: 'low', serviceTier: 'priority' } },
+        { target_model_id: 'gpt-3.5', rules: {} },
+      ],
+    }));
+    const row = await repo.modelAliases.getByName('multi');
+    assertExists(row);
+    assertEquals(row.targets.length, 3);
+    assertEquals(row.targets[0].rules, { reasoning: { effort: 'high', adaptive: true } });
+    assertEquals(row.targets[1].rules, { verbosity: 'low', serviceTier: 'priority' });
+    assertEquals(row.targets[2].rules, {});
+  });
+
+  test(`[${backend}] visibleInModelsList=false round-trips`, async () => {
+    const repo = await freshRepo();
+    await repo.modelAliases.insert(aliasFixture({ visibleInModelsList: false }));
+    const row = await repo.modelAliases.getByName('gpt-fast');
+    assertEquals(row?.visibleInModelsList, false);
+  });
+
+  test(`[${backend}] deleteAll wipes every row`, async () => {
+    const repo = await freshRepo();
+    await repo.modelAliases.insert(aliasFixture({ name: 'a' }));
+    await repo.modelAliases.insert(aliasFixture({ name: 'b' }));
+    await repo.modelAliases.deleteAll();
+    assertEquals((await repo.modelAliases.list()).length, 0);
+  });
+}
diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
new file mode 100644
index 000000000..49956533c
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/routes.ts
@@ -0,0 +1,73 @@
+// Admin-only CRUD for model aliases. Wire shape (snake_case) is documented in
+// `@floway-dev/protocols/common`; this layer translates between the wire DTO
+// and the camelCase `ModelAliasRecord` the repo stores.
+
+import type { Context } from 'hono';
+
+import { recordToWire, wireToRecord } from './serialize.ts';
+import { type CtxWithJson } from '../../middleware/zod-validator.ts';
+import { getRepo } from '../../repo/index.ts';
+import type { ModelAliasRecord } from '../../repo/types.ts';
+import type { createAliasBody, updateAliasBody } from '../schemas.ts';
+
+// Place a new alias at the end of the sort order by default. Empty list → 0
+// so the very first alias starts the sequence at the same origin as upstreams.
+const nextSortOrder = (existing: readonly ModelAliasRecord[]): number =>
+  existing.reduce((acc, record) => Math.max(acc, record.sortOrder), -1) + 1;
+
+export const listAliases = async (c: Context) => {
+  const records = await getRepo().modelAliases.list();
+  return c.json(records.map(recordToWire));
+};
+
+export const createAlias = async (c: CtxWithJson<typeof createAliasBody>) => {
+  const body = c.req.valid('json');
+  const repo = getRepo();
+
+  const collision = await repo.modelAliases.getByName(body.name);
+  if (collision) {
+    return c.json({ error: `Alias ${body.name} already exists` }, 409);
+  }
+
+  const existing = await repo.modelAliases.list();
+  const now = new Date().toISOString();
+  const record = wireToRecord(body, {
+    sortOrder: body.sort_order ?? nextSortOrder(existing),
+    createdAt: now,
+    updatedAt: now,
+  });
+  await repo.modelAliases.insert(record);
+  return c.json(recordToWire(record), 201);
+};
+
+export const updateAlias = async (c: CtxWithJson<typeof updateAliasBody>) => {
+  const oldName = c.req.param('name') ?? '';
+  const body = c.req.valid('json');
+  const repo = getRepo();
+
+  const existing = await repo.modelAliases.getByName(oldName);
+  if (!existing) return c.json({ error: 'Alias not found' }, 404);
+
+  if (body.name !== oldName) {
+    const collision = await repo.modelAliases.getByName(body.name);
+    if (collision) return c.json({ error: `Alias ${body.name} already exists` }, 409);
+  }
+
+  const next = wireToRecord(body, {
+    // Preserve the original sortOrder unless the client explicitly overrides
+    // it; createdAt belongs to the row's first-seen instant and never moves.
+    sortOrder: body.sort_order ?? existing.sortOrder,
+    createdAt: existing.createdAt,
+    updatedAt: new Date().toISOString(),
+  });
+  await repo.modelAliases.update(oldName, next);
+  return c.json(recordToWire(next));
+};
+
+export const deleteAlias = async (c: Context) => {
+  const name = c.req.param('name') ?? '';
+  // Idempotent: the spec calls for a successful response whether or not a row
+  // existed. 204 keeps the verb-shape parity with DELETE /api/proxies/:id.
+  await getRepo().modelAliases.delete(name);
+  return c.body(null, 204);
+};
diff --git a/packages/gateway/src/control-plane/model-aliases/routes_test.ts b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
new file mode 100644
index 000000000..328cb88ed
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
@@ -0,0 +1,213 @@
+import { test } from 'vitest';
+
+import { requestApp, setupAppTest } from '../../test-helpers.ts';
+import type { ModelAlias } from '@floway-dev/protocols/common';
+import { assertEquals, assertExists } from '@floway-dev/test-utils';
+
+const authed = (adminSession: string, body?: unknown, method?: string): RequestInit => ({
+  method: method ?? (body === undefined ? 'GET' : 'POST'),
+  headers: {
+    'content-type': 'application/json',
+    'x-floway-session': adminSession,
+  },
+  ...(body === undefined ? {} : { body: JSON.stringify(body) }),
+});
+
+const putAuthed = (adminSession: string, body: unknown): RequestInit => authed(adminSession, body, 'PUT');
+const deleteAuthed = (adminSession: string): RequestInit => ({
+  method: 'DELETE',
+  headers: { 'x-floway-session': adminSession },
+});
+
+const baseBody = (overrides: Record<string, unknown> = {}) => ({
+  name: 'gpt-fast',
+  kind: 'chat',
+  selection: 'first-available',
+  display_name: null,
+  visible_in_models_list: true,
+  targets: [
+    { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
+  ],
+  ...overrides,
+});
+
+test('GET /api/aliases lists every row in sort order', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+  await repo.modelAliases.insert({
+    name: 'b', kind: 'chat', selection: 'random', displayName: null, visibleInModelsList: true,
+    targets: [{ target_model_id: 'm1', rules: {} }],
+    sortOrder: 1, createdAt: '2026-01-01T00:00:00.000Z', updatedAt: '2026-01-01T00:00:00.000Z',
+  });
+  await repo.modelAliases.insert({
+    name: 'a', kind: 'chat', selection: 'random', displayName: null, visibleInModelsList: true,
+    targets: [{ target_model_id: 'm2', rules: {} }],
+    sortOrder: 0, createdAt: '2026-01-02T00:00:00.000Z', updatedAt: '2026-01-02T00:00:00.000Z',
+  });
+
+  const resp = await requestApp('/api/aliases', authed(adminSession));
+  assertEquals(resp.status, 200);
+  const list = (await resp.json()) as ModelAlias[];
+  assertEquals(list.map(r => r.name), ['a', 'b']);
+});
+
+test('POST /api/aliases creates an alias and returns the snake_case wire shape', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+
+  const resp = await requestApp('/api/aliases', authed(adminSession, baseBody()));
+  assertEquals(resp.status, 201);
+  const created = (await resp.json()) as ModelAlias;
+  assertEquals(created.name, 'gpt-fast');
+  assertEquals(created.visible_in_models_list, true);
+  assertEquals(created.targets[0].target_model_id, 'gpt-5.4');
+
+  const stored = await repo.modelAliases.getByName('gpt-fast');
+  assertExists(stored);
+  assertEquals(stored.visibleInModelsList, true);
+});
+
+test('POST /api/aliases rejects a name collision with 409', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+  await requestApp('/api/aliases', authed(adminSession, baseBody()));
+
+  const resp = await requestApp('/api/aliases', authed(adminSession, baseBody()));
+  assertEquals(resp.status, 409);
+  const body = (await resp.json()) as { error?: string };
+  assertEquals(body.error?.includes('already exists'), true);
+});
+
+test('PUT /api/aliases/:name updates rules and refreshes updated_at', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+  await requestApp('/api/aliases', authed(adminSession, baseBody()));
+  const before = await repo.modelAliases.getByName('gpt-fast');
+  assertExists(before);
+  await new Promise(resolve => setTimeout(resolve, 5));
+
+  const resp = await requestApp(
+    '/api/aliases/gpt-fast',
+    putAuthed(adminSession, baseBody({ display_name: 'GPT Fast', targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'high' } } }] })),
+  );
+  assertEquals(resp.status, 200);
+  const updated = (await resp.json()) as ModelAlias;
+  assertEquals(updated.display_name, 'GPT Fast');
+  assertEquals(updated.targets[0].rules, { reasoning: { effort: 'high' } });
+  // createdAt is preserved; updatedAt is fresh.
+  assertEquals(updated.created_at, before.createdAt);
+  if (updated.updated_at === before.updatedAt) throw new Error('updated_at did not refresh');
+});
+
+test('PUT /api/aliases/:name with a different body.name renames the row', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+  await requestApp('/api/aliases', authed(adminSession, baseBody()));
+
+  const resp = await requestApp(
+    '/api/aliases/gpt-fast',
+    putAuthed(adminSession, baseBody({ name: 'gpt-fastest' })),
+  );
+  assertEquals(resp.status, 200);
+  assertEquals(await repo.modelAliases.getByName('gpt-fast'), null);
+  assertExists(await repo.modelAliases.getByName('gpt-fastest'));
+});
+
+test('PUT /api/aliases/:name rename collision returns 409', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+  await requestApp('/api/aliases', authed(adminSession, baseBody({ name: 'gpt-fast' })));
+  await requestApp('/api/aliases', authed(adminSession, baseBody({ name: 'gpt-slow' })));
+
+  const resp = await requestApp(
+    '/api/aliases/gpt-fast',
+    putAuthed(adminSession, baseBody({ name: 'gpt-slow' })),
+  );
+  assertEquals(resp.status, 409);
+});
+
+test('PUT /api/aliases/:name on a missing alias returns 404', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+
+  const resp = await requestApp('/api/aliases/nope', putAuthed(adminSession, baseBody({ name: 'nope' })));
+  assertEquals(resp.status, 404);
+});
+
+test('DELETE /api/aliases/:name returns 204 when present', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+  await requestApp('/api/aliases', authed(adminSession, baseBody()));
+
+  const resp = await requestApp('/api/aliases/gpt-fast', deleteAuthed(adminSession));
+  assertEquals(resp.status, 204);
+  assertEquals(await repo.modelAliases.getByName('gpt-fast'), null);
+});
+
+test('DELETE /api/aliases/:name is idempotent — 204 even when the row is absent', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+
+  const resp = await requestApp('/api/aliases/missing', deleteAuthed(adminSession));
+  assertEquals(resp.status, 204);
+});
+
+test('POST /api/aliases rejects an empty targets array with 400', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+
+  const resp = await requestApp('/api/aliases', authed(adminSession, baseBody({ targets: [] })));
+  assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases rejects an empty target_model_id with 400', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+
+  const resp = await requestApp(
+    '/api/aliases',
+    authed(adminSession, baseBody({ targets: [{ target_model_id: '', rules: {} }] })),
+  );
+  assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases rejects non-empty rules on kind=embedding with 400', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+
+  const resp = await requestApp(
+    '/api/aliases',
+    authed(adminSession, baseBody({
+      kind: 'embedding',
+      targets: [{ target_model_id: 'text-embedding-3', rules: { verbosity: 'low' } }],
+    })),
+  );
+  assertEquals(resp.status, 400);
+});
+
+test('POST /api/aliases accepts kind=embedding with empty rules', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+
+  const resp = await requestApp(
+    '/api/aliases',
+    authed(adminSession, baseBody({
+      kind: 'embedding',
+      targets: [{ target_model_id: 'text-embedding-3', rules: {} }],
+    })),
+  );
+  assertEquals(resp.status, 201);
+});
+
+test('POST /api/aliases rejects unknown reasoning fields on a chat target with 400', async () => {
+  const { repo, adminSession } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+
+  const resp = await requestApp(
+    '/api/aliases',
+    authed(adminSession, baseBody({
+      targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { bogus: 1 } } }],
+    })),
+  );
+  assertEquals(resp.status, 400);
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize.ts b/packages/gateway/src/control-plane/model-aliases/serialize.ts
new file mode 100644
index 000000000..a3ca87ec3
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/serialize.ts
@@ -0,0 +1,51 @@
+// Snake_case wire <-> camelCase record conversion for model aliases. The wire
+// shape (`ModelAlias`) lives in `@floway-dev/protocols/common` so the
+// dashboard and the control plane share one source of truth; this file is
+// the only place those two shapes meet.
+
+import type { ModelAliasRecord } from '../../repo/types.ts';
+import type { AliasKind, AliasSelection, AliasTarget, ModelAlias } from '@floway-dev/protocols/common';
+
+export const recordToWire = (record: ModelAliasRecord): ModelAlias => ({
+  name: record.name,
+  kind: record.kind,
+  selection: record.selection,
+  display_name: record.displayName,
+  visible_in_models_list: record.visibleInModelsList,
+  targets: record.targets,
+  sort_order: record.sortOrder,
+  created_at: record.createdAt,
+  updated_at: record.updatedAt,
+});
+
+// Wire payload accepted by the create / update body schemas. Every field
+// except `sort_order` is required at this layer; the route owns how the
+// sort order and timestamps are produced before calling wireToRecord.
+export interface ModelAliasWireInput {
+  name: string;
+  kind: AliasKind;
+  selection: AliasSelection;
+  display_name: string | null;
+  visible_in_models_list: boolean;
+  targets: AliasTarget[];
+  sort_order?: number;
+}
+
+// Build a record from a validated wire payload. The caller supplies the
+// fields the wire shape doesn't carry — `sortOrder` (computed via
+// nextSortOrder, or copied from the existing row on update), `createdAt`
+// (now for create, preserved on update), and `updatedAt` (always now).
+export const wireToRecord = (
+  wire: ModelAliasWireInput,
+  meta: { sortOrder: number; createdAt: string; updatedAt: string },
+): ModelAliasRecord => ({
+  name: wire.name,
+  kind: wire.kind,
+  selection: wire.selection,
+  displayName: wire.display_name,
+  visibleInModelsList: wire.visible_in_models_list,
+  targets: wire.targets,
+  sortOrder: meta.sortOrder,
+  createdAt: meta.createdAt,
+  updatedAt: meta.updatedAt,
+});
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize_test.ts b/packages/gateway/src/control-plane/model-aliases/serialize_test.ts
new file mode 100644
index 000000000..72be080ca
--- /dev/null
+++ b/packages/gateway/src/control-plane/model-aliases/serialize_test.ts
@@ -0,0 +1,62 @@
+import { test } from 'vitest';
+
+import { recordToWire, wireToRecord } from './serialize.ts';
+import type { ModelAliasRecord } from '../../repo/types.ts';
+import { assertEquals } from '@floway-dev/test-utils';
+
+const record: ModelAliasRecord = {
+  name: 'codex-auto-review',
+  kind: 'chat',
+  selection: 'first-available',
+  displayName: 'Codex Auto Review',
+  visibleInModelsList: true,
+  targets: [
+    { target_model_id: 'codex-auto-review', rules: {} },
+    { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
+  ],
+  sortOrder: 3,
+  createdAt: '2026-06-26T00:00:00.000Z',
+  updatedAt: '2026-06-26T12:00:00.000Z',
+};
+
+test('recordToWire flips camelCase fields to snake_case', () => {
+  const wire = recordToWire(record);
+  assertEquals(wire.name, 'codex-auto-review');
+  assertEquals(wire.kind, 'chat');
+  assertEquals(wire.selection, 'first-available');
+  assertEquals(wire.display_name, 'Codex Auto Review');
+  assertEquals(wire.visible_in_models_list, true);
+  assertEquals(wire.sort_order, 3);
+  assertEquals(wire.created_at, '2026-06-26T00:00:00.000Z');
+  assertEquals(wire.updated_at, '2026-06-26T12:00:00.000Z');
+  assertEquals(wire.targets, record.targets);
+});
+
+test('wireToRecord roundtrips back to the original record', () => {
+  const wire = recordToWire(record);
+  const roundTripped = wireToRecord(wire, {
+    sortOrder: wire.sort_order,
+    createdAt: wire.created_at,
+    updatedAt: wire.updated_at,
+  });
+  assertEquals(roundTripped, record);
+});
+
+test('wireToRecord uses meta sortOrder when the wire payload omits it', () => {
+  const { sort_order: _drop, ...partial } = recordToWire(record);
+  const built = wireToRecord(partial, {
+    sortOrder: 7,
+    createdAt: '2026-01-01T00:00:00.000Z',
+    updatedAt: '2026-06-26T12:00:00.000Z',
+  });
+  assertEquals(built.sortOrder, 7);
+  assertEquals(built.createdAt, '2026-01-01T00:00:00.000Z');
+});
+
+test('wireToRecord preserves a null display_name', () => {
+  const built = wireToRecord(
+    { ...recordToWire(record), display_name: null },
+    { sortOrder: 0, createdAt: 'x', updatedAt: 'y' },
+  );
+  assertEquals(built.displayName, null);
+});
diff --git a/packages/gateway/src/control-plane/routes.ts b/packages/gateway/src/control-plane/routes.ts
index 94b5f06ff..c5aa25202 100644
--- a/packages/gateway/src/control-plane/routes.ts
+++ b/packages/gateway/src/control-plane/routes.ts
@@ -5,10 +5,11 @@ import { authLogin, authLogout, authMe } from './auth/routes.ts';
 import { copilotQuota } from './copilot-quota/routes.ts';
 import { exportData, importData } from './data-transfer/routes.ts';
 import { dumpRoutes } from './dump.ts';
+import { createAlias, deleteAlias, listAliases, updateAlias } from './model-aliases/routes.ts';
 import { controlPlaneModels } from './models/routes.ts';
 import { performanceOverview, performanceTelemetry } from './performance/routes.ts';
 import { createProxy, deleteProxy, listAllBackoffs, listProxies, listProxyBackoffs, resetProxyBackoffs, testProxy, updateProxy } from './proxies/routes.ts';
-import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
+import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createAliasBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateAliasBody, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
 import { getSearchConfigRoute, putSearchConfigRoute, testSearchConfigRoute } from './search-config/routes.ts';
 import { searchUsage } from './search-usage/routes.ts';
 import { tokenUsage } from './token-usage/routes.ts';
@@ -97,6 +98,12 @@ export const controlPlaneRoutes = new Hono<{ Variables: AuthVars }>()
     .get('/proxies/:id/backoffs', listProxyBackoffs)
     .patch('/proxies/:id', zValidator('json', updateProxyBody), updateProxy)
     .delete('/proxies/:id', deleteProxy)
+    // Model aliases. Admin-only — alias config is gateway-wide tenant state,
+    // and the data-plane resolver runs above prefix routing for every request.
+    .get('/aliases', listAliases)
+    .post('/aliases', zValidator('json', createAliasBody), createAlias)
+    .put('/aliases/:name', zValidator('json', updateAliasBody), updateAlias)
+    .delete('/aliases/:name', deleteAlias)
     .get('/search-config', getSearchConfigRoute)
     .put('/search-config', zValidator('json', searchConfigSchema), putSearchConfigRoute)
     .post('/search-config/test', zValidator('json', searchConfigSchema), testSearchConfigRoute)
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index f718539ee..0fce131b4 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -593,6 +593,89 @@ export const searchConfigSchema = z.object({
   jina: z.object({ apiKey: z.string() }),
 });
 
+// --- model aliases ---
+
+// Per-target chat rules. Field names mirror the IR slot each value overlays —
+// `reasoning.effort` / `verbosity` / `serviceTier` flow verbatim onto the
+// outbound request, so the schema does not narrow them against the target's
+// advertised capability metadata (the spec calls for verbatim forwarding so
+// the operator can drive a feature the catalog doesn't yet advertise).
+const chatAliasReasoningSchema = z.object({
+  effort: z.enum(['none', 'low', 'medium', 'high', 'xhigh']).optional(),
+  budget_tokens: z.number().int().nonnegative().optional(),
+  adaptive: z.boolean().optional(),
+  summary: z.enum(['auto', 'concise', 'detailed', 'none']).optional(),
+  mandatory: z.boolean().optional(),
+}).strict();
+
+const chatAliasRulesSchema = z.object({
+  reasoning: chatAliasReasoningSchema.optional(),
+  verbosity: z.enum(['low', 'medium', 'high']).optional(),
+  serviceTier: z.enum(['default', 'flex', 'priority', 'scale', 'fast']).optional(),
+}).strict();
+
+// Rules are validated against the alias-level kind in a superRefine pass on
+// the body schema below — chat-kind aliases accept ChatAliasRules; other kinds
+// require an empty object. Each target_model_id is opaque (no `/` semantics
+// inside the alias layer), so the only structural check is non-emptiness.
+const aliasTargetSchema = z.object({
+  target_model_id: z.string().min(1),
+  rules: z.record(z.string(), z.unknown()),
+});
+
+const aliasBaseShape = {
+  name: z.string().min(1),
+  kind: z.enum(['chat', 'embedding', 'image']),
+  selection: z.enum(['random', 'first-available']),
+  display_name: z.string().min(1).nullable(),
+  visible_in_models_list: z.boolean(),
+  targets: z.array(aliasTargetSchema).min(1),
+  sort_order: z.number().int().optional(),
+};
+
+const aliasBodyCore = z.object(aliasBaseShape);
+
+// superRefine cross-validates each target's `rules` against the alias-level
+// kind. For chat: parse through chatAliasRulesSchema and surface the inner
+// issue verbatim. For embedding / image: today there are no per-target rules,
+// so the slot must be `{}` — populating it later just needs a fresh schema.
+const aliasBodyRulesRefinement = (
+  value: z.infer<typeof aliasBodyCore>,
+  ctx: z.core.$RefinementCtx,
+): void => {
+  value.targets.forEach((target, index) => {
+    if (value.kind === 'chat') {
+      const parsed = chatAliasRulesSchema.safeParse(target.rules);
+      if (!parsed.success) {
+        for (const issue of parsed.error.issues) {
+          ctx.issues.push({
+            code: 'custom',
+            message: issue.message,
+            path: ['targets', index, 'rules', ...issue.path],
+            input: target.rules,
+          });
+        }
+      }
+      return;
+    }
+    if (Object.keys(target.rules).length !== 0) {
+      ctx.issues.push({
+        code: 'custom',
+        message: `rules must be empty for kind=${value.kind}`,
+        path: ['targets', index, 'rules'],
+        input: target.rules,
+      });
+    }
+  });
+};
+
+// Create and update share the same body shape — the difference is operational:
+// create rejects PK collisions, update reads the path `:name` as the old name
+// and treats a different `body.name` as a rename. Splitting them keeps the
+// type names self-documenting at the RPC-client surface.
+export const createAliasBody = aliasBodyCore.superRefine(aliasBodyRulesRefinement);
+export const updateAliasBody = aliasBodyCore.superRefine(aliasBodyRulesRefinement);
+
 // --- data transfer ---
 
 export const importBody = z.object({
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index 5a85dba39..49e30c5ef 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -13,6 +13,8 @@ import type {
   ApiKeyRepo,
   BackoffRow,
   CachedModelsRow,
+  ModelAliasesRepo,
+  ModelAliasRecord,
   ModelsCacheRepo,
   PerformanceDimensions,
   PerformanceErrorSample,
@@ -882,6 +884,56 @@ class MemoryProxyBackoffRepo implements ProxyBackoffRepo {
 
 const cloneBackoffRow = (row: BackoffRow): BackoffRow => ({ ...row });
 
+const cloneModelAliasRecord = (record: ModelAliasRecord): ModelAliasRecord => ({
+  ...record,
+  // Deep-clone the JSON payload so a caller's mutation of the returned record
+  // never leaks back into the store. Targets and their inner rule objects are
+  // plain JSON, so structuredClone is the cheapest faithful copy.
+  targets: structuredClone(record.targets),
+});
+
+class MemoryModelAliasesRepo implements ModelAliasesRepo {
+  private store = new Map<string, ModelAliasRecord>();
+
+  list(): Promise<ModelAliasRecord[]> {
+    return Promise.resolve(
+      [...this.store.values()]
+        .map(cloneModelAliasRecord)
+        .sort((a, b) => a.sortOrder - b.sortOrder || a.createdAt.localeCompare(b.createdAt)),
+    );
+  }
+
+  getByName(name: string): Promise<ModelAliasRecord | null> {
+    const found = this.store.get(name);
+    return Promise.resolve(found ? cloneModelAliasRecord(found) : null);
+  }
+
+  insert(record: ModelAliasRecord): Promise<void> {
+    if (this.store.has(record.name)) throw new Error(`alias ${record.name} already exists`);
+    this.store.set(record.name, cloneModelAliasRecord(record));
+    return Promise.resolve();
+  }
+
+  update(oldName: string, record: ModelAliasRecord): Promise<void> {
+    if (!this.store.has(oldName)) throw new Error(`alias ${oldName} not found`);
+    if (oldName !== record.name && this.store.has(record.name)) {
+      throw new Error(`alias ${record.name} already exists`);
+    }
+    this.store.delete(oldName);
+    this.store.set(record.name, cloneModelAliasRecord(record));
+    return Promise.resolve();
+  }
+
+  delete(name: string): Promise<boolean> {
+    return Promise.resolve(this.store.delete(name));
+  }
+
+  deleteAll(): Promise<void> {
+    this.store.clear();
+    return Promise.resolve();
+  }
+}
+
 export class InMemoryRepo implements Repo {
   apiKeys: ApiKeyRepo;
   users: UsersRepo;
@@ -894,6 +946,7 @@ export class InMemoryRepo implements Repo {
   upstreams: UpstreamRepo;
   proxies: ProxyRepo;
   proxyBackoffs: ProxyBackoffRepo;
+  modelAliases: ModelAliasesRepo;
   responsesItems: ResponsesItemsRepo;
   responsesSnapshots: ResponsesSnapshotsRepo;
 
@@ -909,6 +962,7 @@ export class InMemoryRepo implements Repo {
     this.upstreams = new MemoryUpstreamRepo();
     this.proxies = new MemoryProxyRepo(this.upstreams);
     this.proxyBackoffs = new MemoryProxyBackoffRepo();
+    this.modelAliases = new MemoryModelAliasesRepo();
     this.responsesItems = new MemoryResponsesItemsRepo();
     this.responsesSnapshots = new MemoryResponsesSnapshotsRepo();
   }
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index b716d07e4..16645772e 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -7,6 +7,8 @@ import type {
   ApiKeyRepo,
   BackoffRow,
   CachedModelsRow,
+  ModelAliasesRepo,
+  ModelAliasRecord,
   ModelsCacheRepo,
   PerformanceDimensions,
   PerformanceErrorSample,
@@ -38,7 +40,7 @@ import { latencyBucketForMs } from '../shared/performance-histogram.ts';
 import { generateSessionToken } from '../shared/session-tokens.ts';
 import { assertWebSearchProviderName } from '../shared/web-search-providers.ts';
 import type { SqlDatabase, SqlPreparedStatement, SqlResult } from '@floway-dev/platform';
-import { BILLING_DIMENSIONS, type BillingDimension, type ModelPricing, resolveEffectivePricing, unitPriceForDimension } from '@floway-dev/protocols/common';
+import { BILLING_DIMENSIONS, type AliasKind, type AliasSelection, type AliasTarget, type BillingDimension, type ModelPricing, resolveEffectivePricing, unitPriceForDimension } from '@floway-dev/protocols/common';
 import type { ProxyFallbackEntry, ModelPrefixConfig, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
 import { normalizeModelPrefix } from '@floway-dev/provider';
 
@@ -1585,6 +1587,151 @@ const toBackoffRow = (row: BackoffRowDb): BackoffRow => ({
   lastErrorAt: row.last_error_at,
 });
 
+interface ModelAliasRow {
+  name: string;
+  kind: string;
+  selection: string;
+  display_name: string | null;
+  visible_in_models_list: number;
+  targets: string;
+  sort_order: number;
+  created_at: string;
+  updated_at: string;
+}
+
+const MODEL_ALIAS_COLUMNS = 'name, kind, selection, display_name, visible_in_models_list, targets, sort_order, created_at, updated_at';
+
+const parseAliasTargets = (raw: string, name: string): AliasTarget[] => {
+  let parsed: unknown;
+  try {
+    parsed = JSON.parse(raw);
+  } catch (cause) {
+    throw new Error(`model_aliases.targets JSON is malformed for ${name}`, { cause });
+  }
+  if (!Array.isArray(parsed)) throw new Error(`model_aliases.targets is not an array for ${name}`);
+  return parsed as AliasTarget[];
+};
+
+const toModelAliasRecord = (row: ModelAliasRow): ModelAliasRecord => ({
+  name: row.name,
+  kind: row.kind as AliasKind,
+  selection: row.selection as AliasSelection,
+  displayName: row.display_name,
+  visibleInModelsList: row.visible_in_models_list !== 0,
+  targets: parseAliasTargets(row.targets, row.name),
+  sortOrder: row.sort_order,
+  createdAt: row.created_at,
+  updatedAt: row.updated_at,
+});
+
+class SqlModelAliasesRepo implements ModelAliasesRepo {
+  constructor(private db: SqlDatabase) {}
+
+  async list(): Promise<ModelAliasRecord[]> {
+    const { results } = await this.db
+      .prepare(`SELECT ${MODEL_ALIAS_COLUMNS} FROM model_aliases ORDER BY sort_order, created_at`)
+      .all<ModelAliasRow>();
+    return results.map(toModelAliasRecord);
+  }
+
+  async getByName(name: string): Promise<ModelAliasRecord | null> {
+    const row = await this.db
+      .prepare(`SELECT ${MODEL_ALIAS_COLUMNS} FROM model_aliases WHERE name = ?`)
+      .bind(name)
+      .first<ModelAliasRow>();
+    return row ? toModelAliasRecord(row) : null;
+  }
+
+  async insert(record: ModelAliasRecord): Promise<void> {
+    await this.db
+      .prepare(
+        `INSERT INTO model_aliases (${MODEL_ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+      )
+      .bind(
+        record.name,
+        record.kind,
+        record.selection,
+        record.displayName,
+        record.visibleInModelsList ? 1 : 0,
+        JSON.stringify(record.targets),
+        record.sortOrder,
+        record.createdAt,
+        record.updatedAt,
+      )
+      .run();
+  }
+
+  async update(oldName: string, record: ModelAliasRecord): Promise<void> {
+    if (oldName === record.name) {
+      // Plain in-place update — the PK is unchanged, no rename to coordinate.
+      const result = await this.db
+        .prepare(
+          `UPDATE model_aliases SET
+             kind = ?,
+             selection = ?,
+             display_name = ?,
+             visible_in_models_list = ?,
+             targets = ?,
+             sort_order = ?,
+             created_at = ?,
+             updated_at = ?
+           WHERE name = ?`,
+        )
+        .bind(
+          record.kind,
+          record.selection,
+          record.displayName,
+          record.visibleInModelsList ? 1 : 0,
+          JSON.stringify(record.targets),
+          record.sortOrder,
+          record.createdAt,
+          record.updatedAt,
+          oldName,
+        )
+        .run();
+      if ((result.meta.changes ?? 0) === 0) throw new Error(`alias ${oldName} not found`);
+      return;
+    }
+
+    // Rename. Verify the source row exists first so a missing oldName fails
+    // before any write hits the table. Then INSERT(new) + DELETE(old) atomically
+    // through the batch primitive — a PK collision against `record.name`
+    // bubbles up from the INSERT, which is exactly the "rename collides" signal
+    // the route layer translates to 409.
+    const existing = await this.getByName(oldName);
+    if (!existing) throw new Error(`alias ${oldName} not found`);
+
+    await runStatements(this.db, [
+      this.db
+        .prepare(`INSERT INTO model_aliases (${MODEL_ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`)
+        .bind(
+          record.name,
+          record.kind,
+          record.selection,
+          record.displayName,
+          record.visibleInModelsList ? 1 : 0,
+          JSON.stringify(record.targets),
+          record.sortOrder,
+          record.createdAt,
+          record.updatedAt,
+        ),
+      this.db.prepare('DELETE FROM model_aliases WHERE name = ?').bind(oldName),
+    ]);
+  }
+
+  async delete(name: string): Promise<boolean> {
+    const result = await this.db
+      .prepare('DELETE FROM model_aliases WHERE name = ?')
+      .bind(name)
+      .run();
+    return (result.meta.changes ?? 0) > 0;
+  }
+
+  async deleteAll(): Promise<void> {
+    await this.db.prepare('DELETE FROM model_aliases').run();
+  }
+}
+
 export class SqlRepo implements Repo {
   users: UsersRepo;
   sessions: SessionsRepo;
@@ -1597,6 +1744,7 @@ export class SqlRepo implements Repo {
   upstreams: UpstreamRepo;
   proxies: ProxyRepo;
   proxyBackoffs: ProxyBackoffRepo;
+  modelAliases: ModelAliasesRepo;
   responsesItems: ResponsesItemsRepo;
   responsesSnapshots: ResponsesSnapshotsRepo;
 
@@ -1612,6 +1760,7 @@ export class SqlRepo implements Repo {
     this.upstreams = new SqlUpstreamRepo(db);
     this.proxies = new SqlProxyRepo(db);
     this.proxyBackoffs = new SqlProxyBackoffRepo(db);
+    this.modelAliases = new SqlModelAliasesRepo(db);
     this.responsesItems = new SqlResponsesItemsRepo(db);
     this.responsesSnapshots = new SqlResponsesSnapshotsRepo(db);
   }
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 0341d41ef..3ca32e5c0 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -1,6 +1,6 @@
 import type { HistogramBucket } from '../shared/performance-histogram.ts';
 import type { WebSearchProviderName } from '../shared/web-search-providers.ts';
-import type { BillingDimension, ModelPricing } from '@floway-dev/protocols/common';
+import type { AliasKind, AliasSelection, AliasTarget, BillingDimension, ModelPricing } from '@floway-dev/protocols/common';
 import type { UpstreamModel, UpstreamRecord } from '@floway-dev/provider';
 
 export interface ApiKey {
@@ -264,6 +264,39 @@ export interface ProxyBackoffRepo {
   deleteAll(): Promise<void>;
 }
 
+// One alias row. The wire DTO (`ModelAlias` in @floway-dev/protocols/common)
+// is the snake_case projection of this record; conversion lives in
+// control-plane/model-aliases/serialize.ts.
+export interface ModelAliasRecord {
+  name: string;
+  kind: AliasKind;
+  selection: AliasSelection;
+  // null = derive at render time from targets + rules.
+  displayName: string | null;
+  visibleInModelsList: boolean;
+  // Order is meaningful for selection=first-available; preserved (but
+  // ignored) for selection=random.
+  targets: AliasTarget[];
+  sortOrder: number;
+  createdAt: string;
+  updatedAt: string;
+}
+
+export interface ModelAliasesRepo {
+  list(): Promise<ModelAliasRecord[]>;
+  getByName(name: string): Promise<ModelAliasRecord | null>;
+  // Throws on primary-key collision so the route layer can surface a 409.
+  insert(record: ModelAliasRecord): Promise<void>;
+  // Replaces the row keyed by `oldName`. When oldName === record.name the
+  // call is a plain UPDATE; when they differ this is a rename, executed as
+  // INSERT(new) + DELETE(old) inside one transaction so dependent reads
+  // stay consistent. Throws when `oldName` does not exist, or when the
+  // rename target already collides with a different row.
+  update(oldName: string, record: ModelAliasRecord): Promise<void>;
+  delete(name: string): Promise<boolean>;
+  deleteAll(): Promise<void>;
+}
+
 export interface StoredResponsesItem {
   id: string;
   apiKeyId: string | null;
@@ -330,6 +363,7 @@ export interface Repo {
   upstreams: UpstreamRepo;
   proxies: ProxyRepo;
   proxyBackoffs: ProxyBackoffRepo;
+  modelAliases: ModelAliasesRepo;
   responsesItems: ResponsesItemsRepo;
   responsesSnapshots: ResponsesSnapshotsRepo;
 }
diff --git a/packages/protocols/src/common/aliases.ts b/packages/protocols/src/common/aliases.ts
new file mode 100644
index 000000000..ac212f91e
--- /dev/null
+++ b/packages/protocols/src/common/aliases.ts
@@ -0,0 +1,79 @@
+// Wire-level types for model aliases. Lives in @floway-dev/protocols because
+// both the gateway control plane and the dashboard SPA need the same DTO
+// shape — keeping it here means a single source of truth for snake_case
+// field names and the JSON-serializable rule shapes.
+//
+// An alias is a named virtual model id that resolves at request time to one
+// of N target model ids, optionally overlaying protocol-rule overrides
+// (reasoning effort, verbosity, service tier, ...) onto the request IR.
+// Resolution runs above prefix routing and never re-enters itself, which
+// makes recursive aliasing impossible by construction.
+
+// Endpoint family the alias serves. An alias belongs to exactly one kind;
+// rules are only allowed when the kind admits them (today that is `chat`).
+export type AliasKind = 'chat' | 'embedding' | 'image';
+
+// Target-picking strategy applied to the pool of currently-routable targets:
+//
+// - `first-available` — pick the first target in declaration order whose
+//   target_model_id resolves to an enabled upstream binding.
+// - `random` — pick uniformly at random from the same pool.
+//
+// When the pool is empty both strategies surface the same 404 to the caller.
+export type AliasSelection = 'random' | 'first-available';
+
+// Discrete reasoning-effort presets understood across upstreams. `xhigh`
+// matches the wire value Anthropic / OpenAI use for the highest tier.
+export type ReasoningEffort = 'none' | 'low' | 'medium' | 'high' | 'xhigh';
+
+// Reasoning-summary verbosity hint emitted on the Responses / Chat surface.
+export type ReasoningSummary = 'auto' | 'concise' | 'detailed' | 'none';
+
+// Output verbosity hint (OpenAI Responses `verbosity`).
+export type Verbosity = 'low' | 'medium' | 'high';
+
+// Per-request service tier the upstream advertises (Anthropic `fast`,
+// OpenAI `priority` / `flex` / `scale`, default tier).
+export type ServiceTier = 'default' | 'flex' | 'priority' | 'scale' | 'fast';
+
+// Rule overlay applied to a chat-kind alias target. Every field is optional;
+// an absent field leaves the inbound request value untouched. Rule values
+// are forwarded verbatim to the upstream — the gateway does not narrow them
+// against the target's advertised capability metadata.
+export interface ChatAliasRules {
+  reasoning?: {
+    effort?: ReasoningEffort;
+    budget_tokens?: number;
+    adaptive?: boolean;
+    summary?: ReasoningSummary;
+    mandatory?: boolean;
+  };
+  verbosity?: Verbosity;
+  serviceTier?: ServiceTier;
+}
+
+// Rule overlay union keyed by `AliasKind`. Embedding and image targets carry
+// an empty record today; the schema reserves the slot so per-kind rules can
+// grow later without a fresh migration.
+export type AliasRules = ChatAliasRules | Record<string, never>;
+
+// One target row inside an alias's `targets` list. Order is meaningful for
+// `first-available` selection and preserved (but ignored) for `random`.
+export interface AliasTarget {
+  target_model_id: string;
+  rules: AliasRules;
+}
+
+// Wire DTO returned by `/api/aliases`. snake_case to match the rest of the
+// control plane; `display_name === null` means "derive at render time".
+export interface ModelAlias {
+  name: string;
+  kind: AliasKind;
+  selection: AliasSelection;
+  display_name: string | null;
+  visible_in_models_list: boolean;
+  targets: AliasTarget[];
+  sort_order: number;
+  created_at: string;
+  updated_at: string;
+}
diff --git a/packages/protocols/src/common/index.ts b/packages/protocols/src/common/index.ts
index 51f8ef53b..1af2533bb 100644
--- a/packages/protocols/src/common/index.ts
+++ b/packages/protocols/src/common/index.ts
@@ -1,3 +1,4 @@
+export * from './aliases.ts';
 export * from './capabilities.ts';
 export * from './models.ts';
 export * from './openai-stream.ts';

From e6c5e0c6373ffa51e98dca4919743e0569e71519 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 18:04:42 +0800
Subject: [PATCH 050/170] feat(aliases): data-plane resolver + per-protocol
 rule overlay
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`resolveAlias` runs once per request, above prefix routing. It looks up
the inbound model name in the alias repo, narrows on the kind matching
the inbound endpoint group (chat / embedding / image), pre-filters the
target list to entries currently mapping to an enabled upstream binding,
then picks one — `pool[0]` for `first-available`, uniform-random for
`random`. A kind mismatch returns null so the literal id falls through
to prefix routing's miss surface; an all-unroutable hit throws
`AliasNoTargetAvailableError` with the canonical
`alias 'X' has N target(s); none currently map to an enabled upstream
binding` message. The shadow-the-real-model pattern (an alias whose
first target equals its own name) works automatically — alias names
never re-enter the alias layer, so the target string is fed verbatim
back into the existing prefix router.

Four per-protocol apply helpers stamp `ChatAliasRules` onto each chat
IR. `applyChatRulesToChatCompletions` sets `reasoning_effort` /
`thinking_budget` / `adaptive_thinking` / `reasoning_summary` /
`verbosity` / `service_tier`. `applyChatRulesToResponses` lands the
same controls onto `reasoning.*` / `thinking_budget` / `text.verbosity`
/ `service_tier`. `applyChatRulesToMessages` routes effort to
`output_config.effort`, budget / adaptive to `thinking.*`, and uses the
existing `speed: 'fast'` ↔ `service_tier: 'fast'` bridge so a native
Messages target sees Fast Mode through its own field.
`applyChatRulesToGemini` collapses reasoning onto
`generationConfig.thinkingConfig` (effort → `thinkingLevel`, budget →
`thinkingBudget`, adaptive → `thinkingBudget: -1`) and pushes
verbosity / serviceTier through the existing extension surface. Rule
values pass through verbatim — the gateway does not enum-gate against
catalog metadata.

All four chat serves and the embeddings / images passthrough endpoints
call the resolver before candidate enumeration; the resolved id replaces
`payload.model` (or the URL-carried `model` arg on Gemini) and the
`x-floway-alias` header is staged on a new `responseHeaders` bag on
GatewayCtx (applied by `finalizeGatewayResponse`).
`AliasNoTargetAvailableError` is lifted into a new
`alias-no-target-available` ChatServeFailure variant so each chat
protocol's existing failure renderer surfaces it as a 404 in the
protocol-native error envelope.

Tests:
- resolve_test.ts (9 tests) — kind match / mismatch, available pool
  filtering, first-available vs random, shadow pattern + fallback,
  embedding-kind endpoint filtering, no-target-available throw.
- apply_test.ts (16 tests) — empty rules, full overlay, overwrite, and
  cross-protocol bridge per protocol.
- One alias integration test per chat protocol in the existing
  serve_test.ts files — mocks the resolver to inject a resolution and
  asserts the resolved id reaches candidate enumeration and the rule
  overrides land on the IR.

Out of scope (separate tasks per the design): /v1/models alias listing
+ PublicModelAliasedFrom, dashboard UI. /v1/completions does not
participate in alias resolution today — the AliasKind enum
(chat/embedding/image) doesn't model it and its IR has no rule-overlay
surface; revisit when there's a real need.
---
 .../chat/chat-completions/attempt_test.ts     |   1 +
 .../chat/chat-completions/errors.ts           |   2 +
 .../demote-developer-to-system_test.ts        |   1 +
 .../demote-interleaved-system-to-user_test.ts |   1 +
 ...le-reasoning-on-forced-tool-choice_test.ts |   1 +
 .../include-usage-stream-options_test.ts      |   1 +
 .../interceptors/normalize-usage_test.ts      |   1 +
 .../vendor-deepseek-normalize_test.ts         |   1 +
 .../vendor-kimi-normalize_test.ts             |   1 +
 .../vendor-qwen-normalize_test.ts             |   1 +
 .../data-plane/chat/chat-completions/serve.ts |   8 +
 .../chat/chat-completions/serve_test.ts       |  58 ++++-
 .../data-plane/chat/gemini/attempt_test.ts    |   1 +
 .../src/data-plane/chat/gemini/errors.ts      |   2 +
 .../strip-safety-settings_test.ts             |   1 +
 .../strip-unsupported-part-fields_test.ts     |   1 +
 .../strip-unsupported-tools_test.ts           |   1 +
 .../suppress-thought-parts_test.ts            |   1 +
 .../data-plane/chat/gemini/respond_test.ts    |   1 +
 .../src/data-plane/chat/gemini/serve.ts       |  20 +-
 .../src/data-plane/chat/gemini/serve_test.ts  |  51 +++-
 .../data-plane/chat/messages/attempt_test.ts  |   1 +
 .../src/data-plane/chat/messages/errors.ts    |   2 +
 .../demote-interleaved-system-to-user_test.ts |   1 +
 ...le-reasoning-on-forced-tool-choice_test.ts |   1 +
 .../strip-billing-attribution_test.ts         |   1 +
 .../interceptors/web-search-shim_test.ts      |   1 +
 .../data-plane/chat/messages/respond_test.ts  |   1 +
 .../src/data-plane/chat/messages/serve.ts     |  14 +
 .../data-plane/chat/messages/serve_test.ts    |  53 +++-
 .../data-plane/chat/responses/attempt_test.ts |   1 +
 .../src/data-plane/chat/responses/errors.ts   |   2 +
 .../canonicalize-encrypted-content_test.ts    |   1 +
 .../demote-developer-to-system_test.ts        |   1 +
 .../demote-interleaved-system-to-user_test.ts |   1 +
 ...le-reasoning-on-forced-tool-choice_test.ts |   1 +
 .../interceptors/retry-cyber-policy_test.ts   |   2 +
 .../interceptors/server-tool-shim_test.ts     |   2 +
 .../image-generation-integration_test.ts      |   1 +
 .../server-tools/image-generation_test.ts     |   1 +
 .../vendor-deepseek-normalize_test.ts         |   1 +
 .../vendor-qwen-normalize_test.ts             |   1 +
 .../data-plane/chat/responses/serve-prep.ts   |   8 +
 .../data-plane/chat/responses/serve_test.ts   |  55 +++-
 .../src/data-plane/chat/shared/errors.ts      |   9 +-
 .../src/data-plane/chat/shared/gateway-ctx.ts |  12 +
 .../data-plane/chat/shared/respond_test.ts    |   1 +
 .../chat/shared/upstream-telemetry_test.ts    |   1 +
 .../src/data-plane/embeddings/serve.ts        |  14 +-
 .../gateway/src/data-plane/images/serve.ts    |  26 +-
 .../src/data-plane/model-aliases/apply.ts     | 122 +++++++++
 .../data-plane/model-aliases/apply_test.ts    | 198 ++++++++++++++
 .../src/data-plane/model-aliases/resolve.ts   | 132 ++++++++++
 .../data-plane/model-aliases/resolve_test.ts  | 241 ++++++++++++++++++
 .../model-aliases/serve-integration.ts        | 102 ++++++++
 .../gateway/src/test-helpers/gateway-ctx.ts   |   1 +
 56 files changed, 1158 insertions(+), 10 deletions(-)
 create mode 100644 packages/gateway/src/data-plane/model-aliases/apply.ts
 create mode 100644 packages/gateway/src/data-plane/model-aliases/apply_test.ts
 create mode 100644 packages/gateway/src/data-plane/model-aliases/resolve.ts
 create mode 100644 packages/gateway/src/data-plane/model-aliases/resolve_test.ts
 create mode 100644 packages/gateway/src/data-plane/model-aliases/serve-integration.ts

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
index 004c95cfb..bf3e3ef3c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
@@ -23,6 +23,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/errors.ts b/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
index 9d7c12a5f..ea5b3673c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
@@ -34,5 +34,7 @@ export const renderChatCompletionsFailure = (
     return openAiErrorResult(404, appendFailedUpstreams(`Model ${failure.model} is not available on any configured upstream.`, failure.failedUpstreams));
   case 'model-unsupported':
     return openAiErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support the /chat/completions endpoint.`, failure.failedUpstreams));
+  case 'alias-no-target-available':
+    return openAiErrorResult(404, `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
index 3f2664912..e4edd7951 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
index 6f1ef50d4..40cc5b48c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 331856b9b..928b18b72 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
index 2af2ecba0..fb02a7314 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
index d4996ef8c..497a49cd8 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
index 176bad4b2..963125131 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
@@ -20,6 +20,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
index e2c3e61d8..3a6ee205f 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
index 8a8a7d6d8..f1c536f5a 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 73e8c1afd..3f04c8b8b 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -1,6 +1,8 @@
 import { chatCompletionsAttempt } from './attempt.ts';
 import { renderChatCompletionsFailure } from './errors.ts';
 import { planChatCompletionsRouting } from './routing.ts';
+import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { resolveAndApplyAliasForChatCompletions } from '../../model-aliases/serve-integration.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -18,6 +20,12 @@ export interface ChatCompletionsServeGenerateArgs {
 export const chatCompletionsServe = {
   generate: async (args: ChatCompletionsServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<ChatCompletionsStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
+    try {
+      await resolveAndApplyAliasForChatCompletions(payload, ctx);
+    } catch (error) {
+      if (error instanceof AliasNoTargetAvailableError) return renderChatCompletionsFailure(aliasFailureFromError(error));
+      throw error;
+    }
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model: payload.model,
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index c3f90f81d..9117e2f80 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -16,11 +16,13 @@ import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-d
 // Mock the candidates seam so each test hands the serve exactly the
 // provider candidates it wants.
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
+const lastCandidatesCall: { model?: string } = {};
 vi.mock('../shared/candidates.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../shared/candidates.ts')>();
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async () => {
+    enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
+      lastCandidatesCall.model = args.model;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
       return next;
@@ -28,6 +30,22 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
   };
 });
 
+// Mock the alias resolver so the integration test can inject a resolution
+// without standing up the full per-request fetcher + registry stack.
+const aliasResolutionQueue: ({ targetModelId: string; rules: Record<string, unknown>; aliasName: string } | null | Error)[] = [];
+vi.mock('../../model-aliases/resolve.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../model-aliases/resolve.ts')>();
+  return {
+    ...original,
+    resolveAlias: vi.fn(async () => {
+      if (aliasResolutionQueue.length === 0) return null;
+      const next = aliasResolutionQueue.shift()!;
+      if (next instanceof Error) throw next;
+      return next;
+    }),
+  };
+});
+
 const { chatCompletionsServe } = await import('./serve.ts');
 
 const API_KEY_ID = 'key_chat_completions_serve_test';
@@ -49,6 +67,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
@@ -281,3 +300,40 @@ test('generate renders model-missing when no candidates are available', async ()
   assertEquals(body.error.type, 'invalid_request_error');
   assertEquals(body.error.message, 'Model unknown-model is not available on any configured upstream.');
 });
+
+test('alias resolution swaps the inbound model id for the target and overlays rules onto the IR', async () => {
+  installRepo();
+  aliasResolutionQueue.push({
+    targetModelId: 'gpt-5.4',
+    rules: { reasoning: { effort: 'low' }, verbosity: 'low' },
+    aliasName: 'gpt-fast',
+  });
+  const capturedBodies: ChatCompletionsPayload[] = [];
+  const callChatCompletions = vi.fn(async (_model: unknown, body: unknown): Promise<ProviderStreamResult<ChatCompletionsStreamEvent>> => {
+    capturedBodies.push(body as ChatCompletionsPayload);
+    return { ok: true, events: makeProtocolFrames(makeChatCompletionsEvents()), modelKey: 'gpt-5.4', headers: new Headers() };
+  });
+  queueCandidates([makeCandidate({ upstream: 'up_a', callChatCompletions })]);
+
+  const result = await chatCompletionsServe.generate({
+    payload: makePayload({ model: 'gpt-fast' }),
+    ctx: makeGatewayCtx(),
+    store: createNonResponsesSourceStore(API_KEY_ID),
+    headers: new Headers(),
+  });
+
+  assertEquals(result.type, 'events');
+  if (result.type !== 'events') throw new Error('unreachable');
+  await collectEvents(result.events);
+
+  // The resolved target id, not the alias name, must reach candidate
+  // enumeration so prefix routing addresses the real upstream model.
+  assertEquals(lastCandidatesCall.model, 'gpt-5.4');
+  // The alias rule overrides must land on the IR before the upstream call.
+  // (The attempt strips `model` from the body — the provider re-stamps it
+  // from `candidate.binding.upstreamModel.id` — so we only verify the rule
+  // fields here.)
+  const observed = capturedBodies[0]!;
+  assertEquals(observed.reasoning_effort, 'low');
+  assertEquals(observed.verbosity, 'low');
+});
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
index 877295494..0e562021e 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
@@ -24,6 +24,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/errors.ts b/packages/gateway/src/data-plane/chat/gemini/errors.ts
index ccfede92c..f1f231d31 100644
--- a/packages/gateway/src/data-plane/chat/gemini/errors.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/errors.ts
@@ -53,5 +53,7 @@ export const renderGeminiFailure = (
     return geminiRpcErrorResult(404, appendFailedUpstreams(`Model ${failure.model} is not available on any configured upstream.`, failure.failedUpstreams));
   case 'model-unsupported':
     return geminiRpcErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support ${endpoint === 'countTokens' ? 'countTokens' : 'the Gemini generateContent endpoint'}.`, failure.failedUpstreams));
+  case 'alias-no-target-available':
+    return geminiRpcErrorResult(404, `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
index 90c86fb1a..225c690da 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
index 84a282748..f7f0beb55 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
index a066ce6c3..b3ca33289 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
index 0cbf60b61..a9978fa9f 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
@@ -15,6 +15,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
index 76edee680..11629b1a9 100644
--- a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
@@ -26,6 +26,7 @@ const ctx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index f5daa1d86..091143618 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -1,6 +1,8 @@
 import { geminiAttempt } from './attempt.ts';
 import { renderGeminiFailure } from './errors.ts';
 import { planGeminiRouting } from './routing.ts';
+import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { resolveAndApplyAliasForGemini } from '../../model-aliases/serve-integration.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -29,7 +31,14 @@ export interface GeminiServeCountTokensArgs {
 
 export const geminiServe = {
   generate: async (args: GeminiServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<GeminiStreamEvent>>> => {
-    const { payload, ctx, store, model, headers } = args;
+    const { payload, ctx, store, headers } = args;
+    let model: string;
+    try {
+      model = await resolveAndApplyAliasForGemini(args.model, payload, ctx);
+    } catch (error) {
+      if (error instanceof AliasNoTargetAvailableError) return renderGeminiFailure(aliasFailureFromError(error), 'generate');
+      throw error;
+    }
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model,
@@ -59,7 +68,14 @@ export const geminiServe = {
   },
 
   countTokens: async (args: GeminiServeCountTokensArgs): Promise<ExecuteResult<ProtocolFrame<GeminiStreamEvent>> | PlainResult> => {
-    const { payload, ctx, store, model, headers } = args;
+    const { payload, ctx, store, headers } = args;
+    let model: string;
+    try {
+      model = await resolveAndApplyAliasForGemini(args.model, payload, ctx);
+    } catch (error) {
+      if (error instanceof AliasNoTargetAvailableError) return renderGeminiFailure(aliasFailureFromError(error), 'countTokens');
+      throw error;
+    }
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model,
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 1e834a8be..43f2bd256 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -15,11 +15,13 @@ import { directFetcher, type ProviderCallResult, type ProviderStreamResult, type
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
+const lastCandidatesCall: { model?: string } = {};
 vi.mock('../shared/candidates.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../shared/candidates.ts')>();
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async () => {
+    enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
+      lastCandidatesCall.model = args.model;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
       return next;
@@ -27,6 +29,22 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
   };
 });
 
+// Mock the alias resolver so the integration test can inject a resolution
+// without standing up the full per-request fetcher + registry stack.
+const aliasResolutionQueue: ({ targetModelId: string; rules: Record<string, unknown>; aliasName: string } | null | Error)[] = [];
+vi.mock('../../model-aliases/resolve.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../model-aliases/resolve.ts')>();
+  return {
+    ...original,
+    resolveAlias: vi.fn(async () => {
+      if (aliasResolutionQueue.length === 0) return null;
+      const next = aliasResolutionQueue.shift()!;
+      if (next instanceof Error) throw next;
+      return next;
+    }),
+  };
+});
+
 const { geminiServe } = await import('./serve.ts');
 
 const API_KEY_ID = 'key_gemini_serve_test';
@@ -48,6 +66,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
@@ -321,3 +340,33 @@ test('countTokens renders a Google RPC NOT_FOUND when no Messages-capable candid
   const upstreamError = expectType(result, 'api-error');
   assertEquals(upstreamError.status, 404);
 });
+
+test('alias resolution swaps the inbound model id for the target and overlays rules onto the Gemini IR', async () => {
+  installRepo();
+  aliasResolutionQueue.push({
+    targetModelId: 'gpt-5.4',
+    rules: { reasoning: { effort: 'high', budget_tokens: 1024 }, verbosity: 'low' },
+    aliasName: 'gemini-fast',
+  });
+  const callChatCompletions = vi.fn(async (): Promise<ProviderStreamResult<ChatCompletionsStreamEvent>> => ({
+    ok: true, events: makeProtocolFrames(makeChatCompletionsEvents()), modelKey: 'gpt-5.4', headers: new Headers(),
+  }));
+  queueCandidates([makeCandidate({ targetApi: 'chat-completions', callChatCompletions })]);
+
+  const payload = makePayload();
+  const result = await geminiServe.generate({
+    payload,
+    ctx: makeGatewayCtx(),
+    store: createNonResponsesSourceStore(API_KEY_ID),
+    model: 'gemini-fast',
+    headers: new Headers(),
+  });
+  await collectEvents(expectType(result, 'events').events);
+
+  // The resolved target id, not the alias name, reaches candidate enumeration.
+  assertEquals(lastCandidatesCall.model, 'gpt-5.4');
+  // Alias rules land on the Gemini IR before the cross-protocol translation.
+  assertEquals(payload.generationConfig?.thinkingConfig?.thinkingLevel, 'high');
+  assertEquals(payload.generationConfig?.thinkingConfig?.thinkingBudget, 1024);
+  assertEquals(payload.generationConfig?.verbosity, 'low');
+});
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
index 36fe9e284..1495df93b 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
@@ -23,6 +23,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/messages/errors.ts b/packages/gateway/src/data-plane/chat/messages/errors.ts
index 6881616ec..9c4d5f976 100644
--- a/packages/gateway/src/data-plane/chat/messages/errors.ts
+++ b/packages/gateway/src/data-plane/chat/messages/errors.ts
@@ -38,5 +38,7 @@ export const renderMessagesFailure = (
     return anthropicErrorResult(404, 'not_found_error', appendFailedUpstreams(`Model ${failure.model} is not available on any configured upstream.`, failure.failedUpstreams));
   case 'model-unsupported':
     return anthropicErrorResult(400, 'invalid_request_error', appendFailedUpstreams(`Model ${failure.model} does not support the ${endpointPath} endpoint.`, failure.failedUpstreams));
+  case 'alias-no-target-available':
+    return anthropicErrorResult(404, 'not_found_error', `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
index 590a05c7b..da3a72d81 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 2917537a7..bd317db35 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
index f1e8e49c0..90dfa9a3e 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
@@ -16,6 +16,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
index f1cf56677..396efd981 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
@@ -58,6 +58,7 @@ const gatewayCtx = (apiKeyId: string = 'test-key'): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/messages/respond_test.ts b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
index 82191d8af..fba59f4b2 100644
--- a/packages/gateway/src/data-plane/chat/messages/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
@@ -539,6 +539,7 @@ const makeRespondCtx = (): GatewayCtx => ({
   requestStartedAt: 0,
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
 });
 
 const messagesEventsForRespond = (): readonly MessagesStreamEvent[] => [
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index ae9bb5d6c..1a7b22372 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -1,6 +1,8 @@
 import { messagesAttempt } from './attempt.ts';
 import { renderMessagesFailure } from './errors.ts';
 import { planMessagesRouting } from './routing.ts';
+import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { resolveAndApplyAliasForMessages } from '../../model-aliases/serve-integration.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -25,6 +27,12 @@ export interface MessagesServeCountTokensArgs {
 export const messagesServe = {
   generate: async (args: MessagesServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<MessagesStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
+    try {
+      await resolveAndApplyAliasForMessages(payload, ctx);
+    } catch (error) {
+      if (error instanceof AliasNoTargetAvailableError) return renderMessagesFailure(aliasFailureFromError(error), 'generate');
+      throw error;
+    }
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model: payload.model,
@@ -57,6 +65,12 @@ export const messagesServe = {
 
   countTokens: async (args: MessagesServeCountTokensArgs): Promise<ExecuteResult<ProtocolFrame<MessagesStreamEvent>> | PlainResult> => {
     const { payload, ctx, store, headers } = args;
+    try {
+      await resolveAndApplyAliasForMessages(payload, ctx);
+    } catch (error) {
+      if (error instanceof AliasNoTargetAvailableError) return renderMessagesFailure(aliasFailureFromError(error), 'countTokens');
+      throw error;
+    }
     const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
       upstreamIds: ctx.upstreamIds,
       model: payload.model,
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index de09f5646..8b07302f9 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -13,11 +13,13 @@ import { defaultsForProvider, directFetcher, type ProviderCallResult, type Provi
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
+const lastCandidatesCall: { model?: string } = {};
 vi.mock('../shared/candidates.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../shared/candidates.ts')>();
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async () => {
+    enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
+      lastCandidatesCall.model = args.model;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
       return next;
@@ -25,6 +27,22 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
   };
 });
 
+// Mock the alias resolver so the integration test can inject a resolution
+// without standing up the full per-request fetcher + registry stack.
+const aliasResolutionQueue: ({ targetModelId: string; rules: Record<string, unknown>; aliasName: string } | null | Error)[] = [];
+vi.mock('../../model-aliases/resolve.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../model-aliases/resolve.ts')>();
+  return {
+    ...original,
+    resolveAlias: vi.fn(async () => {
+      if (aliasResolutionQueue.length === 0) return null;
+      const next = aliasResolutionQueue.shift()!;
+      if (next instanceof Error) throw next;
+      return next;
+    }),
+  };
+});
+
 const { messagesServe } = await import('./serve.ts');
 
 const API_KEY_ID = 'key_messages_serve_test';
@@ -46,6 +64,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
@@ -436,3 +455,35 @@ test('copilot binding strips x-anthropic-billing-header system block via the def
   assertEquals(observed.system.length, 1);
   assertEquals(observed.system[0].text, 'You are a helpful assistant.');
 });
+
+test('alias resolution swaps the inbound model id for the target and overlays rules onto the Messages IR', async () => {
+  installRepo();
+  aliasResolutionQueue.push({
+    targetModelId: 'claude-opus-4-7',
+    rules: { reasoning: { effort: 'high', budget_tokens: 2048 }, serviceTier: 'fast' },
+    aliasName: 'claude-fast',
+  });
+  const capturedBodies: MessagesPayload[] = [];
+  const callMessages = vi.fn(async (_model: unknown, body: unknown): Promise<ProviderStreamResult<MessagesStreamEvent>> => {
+    capturedBodies.push({ ...(body as Omit<MessagesPayload, 'model'>), model: 'claude-opus-4-7' });
+    return { ok: true, events: makeProtocolFrames(makeMessagesResultEvents()), modelKey: 'claude-opus-4-7' };
+  });
+  queueCandidates([makeCandidate({ upstream: 'up_cf', callMessages })]);
+
+  const result = await messagesServe.generate({
+    payload: makePayload({ model: 'claude-fast' }),
+    ctx: makeGatewayCtx(),
+    store: createNonResponsesSourceStore(API_KEY_ID),
+    headers: new Headers(),
+  });
+
+  await collectEvents(assertResultType(result, 'events').events);
+
+  assertEquals(lastCandidatesCall.model, 'claude-opus-4-7');
+  const observed = capturedBodies[0]!;
+  assertEquals(observed.output_config?.effort, 'high');
+  assertEquals(observed.thinking?.budget_tokens, 2048);
+  // The serviceTier=fast → speed=fast bridge lands the alias rule on
+  // Anthropic's native Fast Mode field.
+  assertEquals(observed.speed, 'fast');
+});
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
index 22ec01ee1..0bf10177a 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
@@ -25,6 +25,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/errors.ts b/packages/gateway/src/data-plane/chat/responses/errors.ts
index 215f218a0..1af6c42d0 100644
--- a/packages/gateway/src/data-plane/chat/responses/errors.ts
+++ b/packages/gateway/src/data-plane/chat/responses/errors.ts
@@ -34,5 +34,7 @@ export const renderResponsesFailure = (
     return openAiErrorResult(404, appendFailedUpstreams(`Model ${failure.model} is not available on any configured upstream.`, failure.failedUpstreams));
   case 'model-unsupported':
     return openAiErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support the /responses endpoint.`, failure.failedUpstreams));
+  case 'alias-no-target-available':
+    return openAiErrorResult(404, `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
index 914711316..a1db87741 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
index fc422467e..3dc2ad972 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
index 4210d34ba..b5b7bed38 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 9618d2f02..b9bc85ed8 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
index 16c220950..1505ea5bb 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
@@ -45,6 +45,7 @@ const stubCtx = (overrides: { abortSignal?: AbortSignal } = {}): GatewayCtx => (
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
@@ -160,6 +161,7 @@ const performanceFor = (modelKey: string) => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
 });
 
 const upstreamCyberPolicyError = (message: string): ExecuteResult<ProtocolFrame<ResponsesStreamEvent>> => ({
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
index d48ea7d2b..1842ff08b 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
@@ -348,6 +348,7 @@ const makeGatewayCtx = (apiKeyId: string = 'k1'): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
@@ -4497,6 +4498,7 @@ test('downstream AbortSignal threads through to provider search / fetchPage and
     runtimeLocation: 'TEST',
     currentColo: 'TEST',
     dump: null,
+    responseHeaders: new Headers(),
     backgroundScheduler: () => {},
     c: stubAuthedContext(),
     requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
index e10d24fa7..94859eb9c 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
@@ -144,6 +144,7 @@ const gatewayCtx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
index 284004632..f2513a078 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
@@ -56,6 +56,7 @@ const gatewayCtx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
index 08534e87b..31624e32a 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
index 9a51ac553..06e39fa79 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
@@ -17,6 +17,7 @@ const stubCtx: GatewayCtx = {
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index 8b2638334..1d13c81cf 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -1,6 +1,8 @@
 import { renderResponsesFailure } from './errors.ts';
 import type { StatefulResponsesStore } from './items/store.ts';
 import { planResponsesRouting } from './routing.ts';
+import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { resolveAndApplyAliasForResponses } from '../../model-aliases/serve-integration.ts';
 import { enumerateProviderCandidates, type ChatCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common';
@@ -88,6 +90,12 @@ export const prepareResponsesServePlan = async (args: {
 }): Promise<ResponsesServePlan> => {
   const { payload, ctx, store, pickTarget } = args;
   const prepared = await expandPreviousResponseId(payload, store);
+  try {
+    await resolveAndApplyAliasForResponses(prepared, ctx);
+  } catch (error) {
+    if (error instanceof AliasNoTargetAvailableError) return { kind: 'failure', result: renderResponsesFailure(aliasFailureFromError(error)) };
+    throw error;
+  }
   const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
     upstreamIds: ctx.upstreamIds,
     model: prepared.model,
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index f60be9a2a..548c49b8b 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -23,11 +23,13 @@ import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-d
 // `model-missing` failure tests queue an empty list and expect `sawModel:
 // false` so the serve renders 404 rather than 400.
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
+const lastCandidatesCall: { model?: string } = {};
 vi.mock('../shared/candidates.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../shared/candidates.ts')>();
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async () => {
+    enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
+      lastCandidatesCall.model = args.model;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
       return next;
@@ -35,6 +37,22 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
   };
 });
 
+// Mock the alias resolver so the integration test can inject a resolution
+// without standing up the full per-request fetcher + registry stack.
+const aliasResolutionQueue: ({ targetModelId: string; rules: Record<string, unknown>; aliasName: string } | null | Error)[] = [];
+vi.mock('../../model-aliases/resolve.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../model-aliases/resolve.ts')>();
+  return {
+    ...original,
+    resolveAlias: vi.fn(async () => {
+      if (aliasResolutionQueue.length === 0) return null;
+      const next = aliasResolutionQueue.shift()!;
+      if (next instanceof Error) throw next;
+      return next;
+    }),
+  };
+});
+
 const { responsesServe } = await import('./serve.ts');
 const { expandPreviousResponseId } = await import('./serve-prep.ts');
 
@@ -57,6 +75,7 @@ const makeGatewayCtx = (): GatewayCtx => ({
   runtimeLocation: 'TEST',
   currentColo: 'TEST',
   dump: null,
+  responseHeaders: new Headers(),
   backgroundScheduler: () => {},
   c: stubAuthedContext(),
   requestStartedAt: 0,
@@ -646,3 +665,37 @@ test('generate treats compaction_trigger-bearing input as compaction: snapshot r
   if (!Array.isArray(receivedInput)) throw new Error('expected the wire input to be an array');
   assertEquals((receivedInput.at(-1) as { type?: unknown })?.type, 'compaction_trigger');
 });
+
+test('alias resolution swaps the inbound model id for the target and overlays rules onto the Responses IR', async () => {
+  installRepo();
+  aliasResolutionQueue.push({
+    targetModelId: 'gpt-5.4',
+    rules: { reasoning: { effort: 'high', summary: 'detailed' }, verbosity: 'medium', serviceTier: 'priority' },
+    aliasName: 'gpt-fast',
+  });
+  const capturedBodies: ResponsesPayload[] = [];
+  const callResponses = vi.fn(async (_model: unknown, body: unknown): Promise<ProviderStreamResult<ResponsesStreamEvent>> => {
+    capturedBodies.push(body as ResponsesPayload);
+    return { ok: true, events: makeProtocolFrames([{ type: 'response.completed', sequence_number: 0, response: makeResponsesResult() }]), modelKey: 'gpt-5.4', headers: new Headers() };
+  });
+  queueCandidates([makeCandidate({ upstream: 'up_a', callResponses })]);
+
+  const result = await responsesServe.generate({
+    payload: makePayload({ model: 'gpt-fast' }),
+    ctx: makeGatewayCtx(),
+    store: createResponsesHttpStore(API_KEY_ID, true),
+    headers: new Headers(),
+  });
+
+  assertEquals(result.type, 'events');
+  if (result.type !== 'events') throw new Error('unreachable');
+  await collectEvents(result.events);
+
+  // The resolved target id, not the alias name, reaches candidate enumeration.
+  assertEquals(lastCandidatesCall.model, 'gpt-5.4');
+  const observed = capturedBodies[0]!;
+  assertEquals(observed.reasoning?.effort, 'high');
+  assertEquals(observed.reasoning?.summary, 'detailed');
+  assertEquals(observed.text?.verbosity, 'medium');
+  assertEquals(observed.service_tier, 'priority');
+});
diff --git a/packages/gateway/src/data-plane/chat/shared/errors.ts b/packages/gateway/src/data-plane/chat/shared/errors.ts
index 7a98db1db..896ff1a65 100644
--- a/packages/gateway/src/data-plane/chat/shared/errors.ts
+++ b/packages/gateway/src/data-plane/chat/shared/errors.ts
@@ -9,7 +9,14 @@ export type ChatServeFailure =
   | { readonly kind: 'model-missing'; readonly model: string; readonly failedUpstreams?: readonly string[] }
   | { readonly kind: 'model-unsupported'; readonly model: string; readonly failedUpstreams?: readonly string[] }
   | { readonly kind: 'item-not-found'; readonly itemId: string }
-  | { readonly kind: 'routing-unavailable'; readonly message: string };
+  | { readonly kind: 'routing-unavailable'; readonly message: string }
+  // Alias name resolved, but no entry in its targets list currently maps to
+  // an enabled upstream binding that exposes the inbound endpoint group.
+  // Rendered as a 404 carrying the canonical
+  // `alias '<name>' has N target(s); none currently map to an enabled
+  // upstream binding` message — every protocol's renderer treats this as a
+  // model-not-found surface.
+  | { readonly kind: 'alias-no-target-available'; readonly aliasName: string; readonly targetCount: number };
 
 class ChatServeFailureError extends Error {
   readonly failure: ChatServeFailure;
diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
index 9cdbab0c8..4ef208a42 100644
--- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
+++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
@@ -27,6 +27,13 @@ export interface GatewayCtx {
   // `finalizeGatewayResponse` short-circuits the dump tee and returns the
   // response untouched.
   readonly dump: DumpAccumulator | null;
+  // Headers staged by the gateway during request processing (e.g. the
+  // `x-floway-alias` header the alias resolver stamps when it picked a
+  // target). `finalizeGatewayResponse` writes each entry onto the outbound
+  // response just before returning it, so the headers ride along regardless
+  // of whether the responder built the Response via Hono's streamSSE,
+  // `Response.json`, or a raw `new Response(...)`.
+  readonly responseHeaders: Headers;
 }
 
 export interface CreateGatewayCtxOptions {
@@ -73,12 +80,17 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
     runtimeLocation: colo,
     currentColo: colo,
     dump,
+    responseHeaders: new Headers(),
   };
 };
 
 // Run the dump-accumulator's finalize tee on the outgoing Response. Every
 // inbound HTTP wrapper returns its response through this seam so the dump
 // pipeline applies uniformly across happy-path, error, and passthrough paths.
+// Gateway-staged response headers (today: `x-floway-alias`) are written onto
+// the response here so they ride along regardless of how the responder
+// built the body.
 export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => {
+  for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value);
   return ctx.dump?.finalize(response) ?? response;
 };
diff --git a/packages/gateway/src/data-plane/chat/shared/respond_test.ts b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
index 9a5c506a6..58c8c454a 100644
--- a/packages/gateway/src/data-plane/chat/shared/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
@@ -44,6 +44,7 @@ const setup = (): Harness => {
       runtimeLocation: 'TEST',
       currentColo: 'TEST',
       dump: null,
+      responseHeaders: new Headers(),
       backgroundScheduler: promise => { background.push(promise); },
       requestStartedAt,
       c: stubAuthedContext(),
diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
index bdef8b1f1..756dccda8 100644
--- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
@@ -30,6 +30,7 @@ const baseCtx = (overrides: Partial<GatewayCtx> = {}): GatewayCtx => {
     runtimeLocation: 'TEST',
     currentColo: 'TEST',
     dump: null,
+    responseHeaders: new Headers(),
     abortSignal: downstream.signal,
     downstreamAbortController: downstream,
     backgroundScheduler: promise => { void promise; },
diff --git a/packages/gateway/src/data-plane/embeddings/serve.ts b/packages/gateway/src/data-plane/embeddings/serve.ts
index 9c33e6736..3d83cd1e9 100644
--- a/packages/gateway/src/data-plane/embeddings/serve.ts
+++ b/packages/gateway/src/data-plane/embeddings/serve.ts
@@ -5,6 +5,8 @@ import type { Context } from 'hono';
 
 import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
 import { readRequestBody } from '../chat/shared/request-body.ts';
+import { AliasNoTargetAvailableError } from '../model-aliases/resolve.ts';
+import { resolveAliasForPassthrough } from '../model-aliases/serve-integration.ts';
 import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
 import { tokenUsageFromEmbeddingsBody } from '../shared/telemetry/usage.ts';
 
@@ -53,11 +55,21 @@ export const embeddings = async (c: Context): Promise<Response> => {
   }
 
   ctx.dump?.requestedModel(request.model);
+  let resolvedModel: string;
+  try {
+    resolvedModel = await resolveAliasForPassthrough(request.model, 'embedding', ctx);
+  } catch (error) {
+    if (error instanceof AliasNoTargetAvailableError) {
+      ctx.dump?.error('gateway');
+      return finalizeGatewayResponse(ctx, passthroughApiError(c, error.message, 404));
+    }
+    throw error;
+  }
   const response = await passthroughServe({
     c,
     ctx,
     sourceApi: '/embeddings',
-    model: request.model,
+    model: resolvedModel,
     bindingServesEndpoint: binding => binding.upstreamModel.endpoints.embeddings !== undefined,
     call: async (binding, opts) => {
       const { model: _model, ...body } = request.body;
diff --git a/packages/gateway/src/data-plane/images/serve.ts b/packages/gateway/src/data-plane/images/serve.ts
index 58f8a7a25..ed3c7891d 100644
--- a/packages/gateway/src/data-plane/images/serve.ts
+++ b/packages/gateway/src/data-plane/images/serve.ts
@@ -12,6 +12,8 @@ import type { Context } from 'hono';
 
 import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
 import { readRequestBody } from '../chat/shared/request-body.ts';
+import { AliasNoTargetAvailableError } from '../model-aliases/resolve.ts';
+import { resolveAliasForPassthrough } from '../model-aliases/serve-integration.ts';
 import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
 import { tokenUsageFromImagesBody } from '../shared/telemetry/usage.ts';
 
@@ -52,11 +54,21 @@ export const imagesGenerations = async (c: Context): Promise<Response> => {
   }
 
   ctx.dump?.requestedModel(request.model);
+  let resolvedModel: string;
+  try {
+    resolvedModel = await resolveAliasForPassthrough(request.model, 'image', ctx);
+  } catch (error) {
+    if (error instanceof AliasNoTargetAvailableError) {
+      ctx.dump?.error('gateway');
+      return finalizeGatewayResponse(ctx, passthroughApiError(c, error.message, 404));
+    }
+    throw error;
+  }
   const response = await passthroughServe({
     c,
     ctx,
     sourceApi: '/images/generations',
-    model: request.model,
+    model: resolvedModel,
     bindingServesEndpoint: binding => binding.upstreamModel.endpoints.imagesGenerations !== undefined,
     call: (binding, opts) => {
       const { model: _model, ...body } = request.body;
@@ -91,11 +103,21 @@ export const imagesEdits = async (c: Context): Promise<Response> => {
   }
 
   ctx.dump?.requestedModel(modelRaw);
+  let resolvedModel: string;
+  try {
+    resolvedModel = await resolveAliasForPassthrough(modelRaw, 'image', ctx);
+  } catch (error) {
+    if (error instanceof AliasNoTargetAvailableError) {
+      ctx.dump?.error('gateway');
+      return finalizeGatewayResponse(ctx, passthroughApiError(c, error.message, 404));
+    }
+    throw error;
+  }
   const response = await passthroughServe({
     c,
     ctx,
     sourceApi: '/images/edits',
-    model: modelRaw,
+    model: resolvedModel,
     bindingServesEndpoint: binding => binding.upstreamModel.endpoints.imagesEdits !== undefined,
     call: (binding, opts) => {
       // ModelProvider.callImagesEdits takes ownership of the FormData and
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
new file mode 100644
index 000000000..9208d1492
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -0,0 +1,122 @@
+// Per-protocol rule overlay. Given a resolved alias's ChatAliasRules,
+// stamp the rule values onto the inbound IR. Alias rules are authoritative
+// — an existing IR field is OVERWRITTEN by a rule that names it. Rules the
+// target IR cannot express are silently dropped; the runtime never tries
+// to enum-gate a value against a model's advertised capabilities. The
+// catalog-warning surface lives in the dashboard.
+
+import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
+import type { ChatAliasRules } from '@floway-dev/protocols/common';
+import type { GeminiPayload } from '@floway-dev/protocols/gemini';
+import type { MessagesPayload } from '@floway-dev/protocols/messages';
+import type { ResponsesPayload } from '@floway-dev/protocols/responses';
+
+// Type guard — `reasoning` is optional. Helpers below assume rules are
+// non-null but each sub-key may still be absent.
+const hasReasoning = (rules: ChatAliasRules): rules is ChatAliasRules & { reasoning: NonNullable<ChatAliasRules['reasoning']> } =>
+  rules.reasoning !== undefined;
+
+export const applyChatRulesToChatCompletions = (body: ChatCompletionsPayload, rules: ChatAliasRules): void => {
+  if (hasReasoning(rules)) {
+    const { effort, budget_tokens, adaptive, summary } = rules.reasoning;
+    if (effort !== undefined) body.reasoning_effort = effort;
+    if (budget_tokens !== undefined) body.thinking_budget = budget_tokens;
+    if (adaptive !== undefined) body.adaptive_thinking = adaptive;
+    if (summary !== undefined) body.reasoning_summary = summary;
+  }
+  if (rules.verbosity !== undefined) body.verbosity = rules.verbosity;
+  if (rules.serviceTier !== undefined) body.service_tier = rules.serviceTier;
+};
+
+export const applyChatRulesToResponses = (body: ResponsesPayload, rules: ChatAliasRules): void => {
+  if (hasReasoning(rules)) {
+    const { effort, budget_tokens, adaptive, summary } = rules.reasoning;
+    if (effort !== undefined || summary !== undefined) {
+      const existing = body.reasoning ?? {};
+      body.reasoning = {
+        ...existing,
+        ...(effort !== undefined ? { effort } : {}),
+        ...(summary !== undefined ? { summary } : {}),
+      };
+    }
+    if (budget_tokens !== undefined) body.thinking_budget = budget_tokens;
+    if (adaptive !== undefined) body.adaptive_thinking = adaptive;
+  }
+  if (rules.verbosity !== undefined) {
+    body.text = { ...body.text, verbosity: rules.verbosity };
+  }
+  if (rules.serviceTier !== undefined) body.service_tier = rules.serviceTier;
+};
+
+export const applyChatRulesToMessages = (body: MessagesPayload, rules: ChatAliasRules): void => {
+  if (hasReasoning(rules)) {
+    const { effort, budget_tokens, adaptive } = rules.reasoning;
+    // Anthropic stores explicit effort in `output_config.effort`; budget /
+    // adaptive ride on `thinking.*`. Splitting them so both can be set in
+    // the same overlay (effort fixed + budget pinned, e.g.) without one
+    // erasing the other.
+    if (effort !== undefined) {
+      body.output_config = { ...body.output_config, effort };
+    }
+    if (adaptive === true) {
+      body.thinking = { ...body.thinking, type: 'adaptive' };
+    } else if (budget_tokens !== undefined) {
+      body.thinking = { ...body.thinking, type: 'enabled', budget_tokens };
+    }
+  }
+  if (rules.verbosity !== undefined) body.verbosity = rules.verbosity;
+  if (rules.serviceTier !== undefined) {
+    // The cross-protocol bridge in translate maps `speed: 'fast'` ↔
+    // `service_tier: 'fast'`; on a native Messages target the alias rule
+    // `serviceTier: 'fast'` lands on `speed` so the upstream sees Fast Mode
+    // through its native field. Other tier values pass through on
+    // `service_tier` since Messages's native enum (`auto`/`standard_only`)
+    // doesn't model them.
+    if (rules.serviceTier === 'fast') {
+      body.speed = 'fast';
+    } else {
+      body.service_tier = rules.serviceTier;
+    }
+  }
+};
+
+// Map the discrete `ReasoningEffort` presets onto Gemini's `thinkingLevel`
+// enum, which carries the same five tiers under different names. Anything
+// outside the closed set is dropped — Gemini's wire reads from a fixed
+// enum and an unknown tier would just be rejected upstream.
+const GEMINI_THINKING_LEVEL_BY_EFFORT: Record<string, 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'> = {
+  none: 'minimal',
+  low: 'low',
+  medium: 'medium',
+  high: 'high',
+  xhigh: 'xhigh',
+};
+
+export const applyChatRulesToGemini = (body: GeminiPayload, rules: ChatAliasRules): void => {
+  if (hasReasoning(rules)) {
+    const { effort, budget_tokens, adaptive } = rules.reasoning;
+    // Gemini collapses the three reasoning controls onto one `thinkingConfig`
+    // sub-object. Adaptive wins by encoding budget=-1 (Gemini's adaptive
+    // sentinel); an explicit budget pins the count; effort sets the level
+    // preset. All three can coexist on the same object.
+    const thinkingConfig = { ...body.generationConfig?.thinkingConfig };
+    if (adaptive === true) {
+      thinkingConfig.thinkingBudget = -1;
+    } else if (budget_tokens !== undefined) {
+      thinkingConfig.thinkingBudget = budget_tokens;
+    }
+    if (effort !== undefined) {
+      const level = GEMINI_THINKING_LEVEL_BY_EFFORT[effort];
+      if (level !== undefined) thinkingConfig.thinkingLevel = level;
+    }
+    if (Object.keys(thinkingConfig).length > 0) {
+      body.generationConfig = { ...body.generationConfig, thinkingConfig };
+    }
+  }
+  if (rules.verbosity !== undefined) {
+    body.generationConfig = { ...body.generationConfig, verbosity: rules.verbosity };
+  }
+  if (rules.serviceTier !== undefined) {
+    body.generationConfig = { ...body.generationConfig, serviceTier: rules.serviceTier };
+  }
+};
diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
new file mode 100644
index 000000000..72d4f6b35
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
@@ -0,0 +1,198 @@
+// Behavioral coverage for the per-protocol rule overlay. Each protocol's
+// apply helper is exercised against an inbound payload IR; alias rules are
+// authoritative — an existing IR field is OVERWRITTEN by a matching rule
+// — and rules the IR cannot express are silently dropped.
+
+import { test } from 'vitest';
+
+import { applyChatRulesToChatCompletions, applyChatRulesToGemini, applyChatRulesToMessages, applyChatRulesToResponses } from './apply.ts';
+import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
+import type { GeminiPayload } from '@floway-dev/protocols/gemini';
+import type { MessagesPayload } from '@floway-dev/protocols/messages';
+import type { ResponsesPayload } from '@floway-dev/protocols/responses';
+import { assertEquals } from '@floway-dev/test-utils';
+
+const ccPayload = (overrides: Partial<ChatCompletionsPayload> = {}): ChatCompletionsPayload => ({
+  model: 'gpt-5.4',
+  messages: [{ role: 'user', content: 'hi' }],
+  ...overrides,
+});
+
+const resPayload = (overrides: Partial<ResponsesPayload> = {}): ResponsesPayload => ({
+  model: 'gpt-5.4',
+  input: 'hi',
+  ...overrides,
+});
+
+const msgPayload = (overrides: Partial<MessagesPayload> = {}): MessagesPayload => ({
+  model: 'claude-opus-4-7',
+  max_tokens: 32,
+  messages: [{ role: 'user', content: 'hi' }],
+  ...overrides,
+});
+
+const gemPayload = (overrides: Partial<GeminiPayload> = {}): GeminiPayload => ({
+  contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
+  ...overrides,
+});
+
+// ── ChatCompletions ──
+
+test('chat-completions: empty rules leave the payload unchanged', () => {
+  const body = ccPayload({ reasoning_effort: 'high', verbosity: 'low', service_tier: 'priority' });
+  applyChatRulesToChatCompletions(body, {});
+  assertEquals(body.reasoning_effort, 'high');
+  assertEquals(body.verbosity, 'low');
+  assertEquals(body.service_tier, 'priority');
+});
+
+test('chat-completions: rules stamp every supported field onto the IR', () => {
+  const body = ccPayload();
+  applyChatRulesToChatCompletions(body, {
+    reasoning: { effort: 'high', budget_tokens: 1024, adaptive: true, summary: 'detailed' },
+    verbosity: 'low',
+    serviceTier: 'priority',
+  });
+  assertEquals(body.reasoning_effort, 'high');
+  assertEquals(body.thinking_budget, 1024);
+  assertEquals(body.adaptive_thinking, true);
+  assertEquals(body.reasoning_summary, 'detailed');
+  assertEquals(body.verbosity, 'low');
+  assertEquals(body.service_tier, 'priority');
+});
+
+test('chat-completions: alias rules overwrite existing IR fields', () => {
+  const body = ccPayload({ reasoning_effort: 'low', verbosity: 'high', service_tier: 'default' });
+  applyChatRulesToChatCompletions(body, {
+    reasoning: { effort: 'xhigh' },
+    verbosity: 'low',
+    serviceTier: 'priority',
+  });
+  assertEquals(body.reasoning_effort, 'xhigh');
+  assertEquals(body.verbosity, 'low');
+  assertEquals(body.service_tier, 'priority');
+});
+
+// ── Responses ──
+
+test('responses: empty rules leave the payload unchanged', () => {
+  const body = resPayload({ reasoning: { effort: 'high' }, text: { verbosity: 'low' }, service_tier: 'priority' });
+  applyChatRulesToResponses(body, {});
+  assertEquals(body.reasoning?.effort, 'high');
+  assertEquals(body.text?.verbosity, 'low');
+  assertEquals(body.service_tier, 'priority');
+});
+
+test('responses: rules stamp every supported field onto the IR', () => {
+  const body = resPayload();
+  applyChatRulesToResponses(body, {
+    reasoning: { effort: 'high', budget_tokens: 1024, adaptive: true, summary: 'concise' },
+    verbosity: 'medium',
+    serviceTier: 'flex',
+  });
+  assertEquals(body.reasoning?.effort, 'high');
+  assertEquals(body.reasoning?.summary, 'concise');
+  assertEquals(body.thinking_budget, 1024);
+  assertEquals(body.adaptive_thinking, true);
+  assertEquals(body.text?.verbosity, 'medium');
+  assertEquals(body.service_tier, 'flex');
+});
+
+test('responses: alias rules overwrite existing reasoning + service_tier fields', () => {
+  const body = resPayload({ reasoning: { effort: 'low', summary: 'auto' }, service_tier: 'default', text: { verbosity: 'high' } });
+  applyChatRulesToResponses(body, {
+    reasoning: { effort: 'xhigh', summary: 'detailed' },
+    verbosity: 'low',
+    serviceTier: 'priority',
+  });
+  assertEquals(body.reasoning?.effort, 'xhigh');
+  assertEquals(body.reasoning?.summary, 'detailed');
+  assertEquals(body.text?.verbosity, 'low');
+  assertEquals(body.service_tier, 'priority');
+});
+
+// ── Messages ──
+
+test('messages: empty rules leave the payload unchanged', () => {
+  const body = msgPayload({ output_config: { effort: 'high' }, thinking: { type: 'enabled', budget_tokens: 512 }, speed: 'fast' });
+  applyChatRulesToMessages(body, {});
+  assertEquals(body.output_config?.effort, 'high');
+  assertEquals(body.thinking?.budget_tokens, 512);
+  assertEquals(body.speed, 'fast');
+});
+
+test('messages: effort lands on output_config, budget+adaptive land on thinking', () => {
+  const body = msgPayload();
+  applyChatRulesToMessages(body, {
+    reasoning: { effort: 'high', budget_tokens: 2048 },
+    verbosity: 'low',
+  });
+  assertEquals(body.output_config?.effort, 'high');
+  assertEquals(body.thinking?.type, 'enabled');
+  assertEquals(body.thinking?.budget_tokens, 2048);
+  assertEquals(body.verbosity, 'low');
+});
+
+test('messages: adaptive=true sets thinking.type=adaptive and ignores budget_tokens', () => {
+  const body = msgPayload();
+  applyChatRulesToMessages(body, { reasoning: { adaptive: true, budget_tokens: 4096 } });
+  assertEquals(body.thinking?.type, 'adaptive');
+});
+
+test('messages: serviceTier=fast maps to speed=fast (cross-protocol bridge)', () => {
+  const body = msgPayload();
+  applyChatRulesToMessages(body, { serviceTier: 'fast' });
+  assertEquals(body.speed, 'fast');
+  assertEquals(body.service_tier, undefined);
+});
+
+test('messages: non-fast serviceTier lands on service_tier directly', () => {
+  const body = msgPayload();
+  applyChatRulesToMessages(body, { serviceTier: 'priority' });
+  assertEquals(body.service_tier, 'priority');
+  assertEquals(body.speed, undefined);
+});
+
+test('messages: alias rules overwrite existing thinking + output_config fields', () => {
+  const body = msgPayload({ output_config: { effort: 'low' }, thinking: { type: 'enabled', budget_tokens: 100 } });
+  applyChatRulesToMessages(body, { reasoning: { effort: 'xhigh', budget_tokens: 9999 } });
+  assertEquals(body.output_config?.effort, 'xhigh');
+  assertEquals(body.thinking?.budget_tokens, 9999);
+});
+
+// ── Gemini ──
+
+test('gemini: empty rules leave the payload unchanged', () => {
+  const body = gemPayload({ generationConfig: { thinkingConfig: { thinkingBudget: 256 }, verbosity: 'low' } });
+  applyChatRulesToGemini(body, {});
+  assertEquals(body.generationConfig?.thinkingConfig?.thinkingBudget, 256);
+  assertEquals(body.generationConfig?.verbosity, 'low');
+});
+
+test('gemini: effort maps to thinkingLevel; budget lands on thinkingBudget', () => {
+  const body = gemPayload();
+  applyChatRulesToGemini(body, {
+    reasoning: { effort: 'high', budget_tokens: 1024 },
+    verbosity: 'medium',
+    serviceTier: 'flex',
+  });
+  assertEquals(body.generationConfig?.thinkingConfig?.thinkingLevel, 'high');
+  assertEquals(body.generationConfig?.thinkingConfig?.thinkingBudget, 1024);
+  assertEquals(body.generationConfig?.verbosity, 'medium');
+  assertEquals(body.generationConfig?.serviceTier, 'flex');
+});
+
+test('gemini: adaptive=true encodes thinkingBudget=-1 and overrides any budget_tokens', () => {
+  const body = gemPayload();
+  applyChatRulesToGemini(body, { reasoning: { adaptive: true, budget_tokens: 9999 } });
+  assertEquals(body.generationConfig?.thinkingConfig?.thinkingBudget, -1);
+});
+
+test('gemini: alias rules overwrite existing generationConfig fields', () => {
+  const body = gemPayload({ generationConfig: { thinkingConfig: { thinkingBudget: 100, thinkingLevel: 'low' }, verbosity: 'high', serviceTier: 'default' } });
+  applyChatRulesToGemini(body, { reasoning: { effort: 'xhigh', budget_tokens: 2048 }, verbosity: 'low', serviceTier: 'priority' });
+  assertEquals(body.generationConfig?.thinkingConfig?.thinkingLevel, 'xhigh');
+  assertEquals(body.generationConfig?.thinkingConfig?.thinkingBudget, 2048);
+  assertEquals(body.generationConfig?.verbosity, 'low');
+  assertEquals(body.generationConfig?.serviceTier, 'priority');
+});
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
new file mode 100644
index 000000000..6ea5c73c0
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -0,0 +1,132 @@
+// Alias resolver. Runs once per request, above prefix routing. The target
+// string it returns is fed verbatim back into the existing prefix-router
+// (enumerateModelInterpretations → resolveModelForProvider); alias names
+// never re-enter the alias layer, so recursion is impossible by
+// construction and the shadow-the-real-model pattern (an alias whose first
+// target is its own name) Just Works.
+
+import { createPerRequestFetcher } from '../../dial/per-request.ts';
+import type { ModelAliasesRepo, ModelAliasRecord } from '../../repo/types.ts';
+import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../providers/registry.ts';
+import type { BackgroundScheduler } from '@floway-dev/platform';
+import type { AliasKind, AliasRules, ModelEndpointKey } from '@floway-dev/protocols/common';
+
+// Endpoint family the inbound request belongs to. Mirrors `AliasKind` but
+// named in the data-plane vocabulary so the resolver argument site reads as
+// "this is the request's endpoint group", not "this is some alias".
+export type AliasEndpointKind = AliasKind;
+
+// The endpoints (`ModelEndpoints` keys) an inbound `AliasEndpointKind` will
+// accept. A target row is considered routable when it resolves to a binding
+// whose `endpoints` map contains any one of these keys. Chat aliases accept
+// any chat surface — the source serve will pick the actual upstream target
+// API when it runs.
+const ENDPOINTS_FOR_KIND: Record<AliasEndpointKind, readonly ModelEndpointKey[]> = {
+  chat: ['chatCompletions', 'responses', 'messages'],
+  embedding: ['embeddings'],
+  image: ['imagesGenerations', 'imagesEdits'],
+};
+
+export interface AliasResolution {
+  readonly targetModelId: string;
+  readonly rules: AliasRules;
+  // Original alias name, for the `x-floway-alias` response header and dump
+  // attribution.
+  readonly aliasName: string;
+}
+
+// Thrown when the alias name was found but no target currently resolves to
+// an enabled upstream binding that exposes the inbound endpoint. Caught at
+// each protocol's serve seam and surfaced as a 404 in the protocol-specific
+// error envelope.
+export class AliasNoTargetAvailableError extends Error {
+  readonly aliasName: string;
+  readonly targetCount: number;
+
+  constructor(aliasName: string, targetCount: number) {
+    super(`alias '${aliasName}' has ${targetCount} target(s); none currently map to an enabled upstream binding`);
+    this.name = 'AliasNoTargetAvailableError';
+    this.aliasName = aliasName;
+    this.targetCount = targetCount;
+  }
+}
+
+// Lift `AliasNoTargetAvailableError` into a `ChatServeFailure` so a chat
+// serve can route it through its existing failure renderer without
+// special-casing the error type.
+export const aliasFailureFromError = (error: AliasNoTargetAvailableError): { kind: 'alias-no-target-available'; aliasName: string; targetCount: number } => ({
+  kind: 'alias-no-target-available',
+  aliasName: error.aliasName,
+  targetCount: error.targetCount,
+});
+
+interface ResolveAliasArgs {
+  readonly modelName: string;
+  readonly endpointKind: AliasEndpointKind;
+  // Upstream cap intersected from the per-user + per-api-key whitelists.
+  // null means unrestricted; matches the same parameter on
+  // `enumerateProviderCandidates` / `listModelProviders`.
+  readonly upstreamIds: readonly string[] | null;
+  readonly scheduler: BackgroundScheduler;
+  readonly currentColo: string;
+  // Injected so tests can hand in a stub; the per-request ctx already owns
+  // a concrete one via `getRepo().modelAliases`.
+  readonly repo: ModelAliasesRepo;
+}
+
+// Reports true when the given target_model_id resolves to at least one
+// enabled upstream binding that exposes any endpoint the inbound
+// `endpointKind` cares about. Mirrors the resolution path
+// `enumerateProviderCandidates` takes, narrowed to a yes/no answer so we
+// can pre-filter the alias target list once.
+const candidateIsRoutable = async (
+  targetModelId: string,
+  endpointKind: AliasEndpointKind,
+  upstreamIds: readonly string[] | null,
+  scheduler: BackgroundScheduler,
+  currentColo: string,
+): Promise<boolean> => {
+  const fetcherForUpstream = await createPerRequestFetcher(currentColo);
+  const providers = await listModelProviders(upstreamIds);
+  if (providers.length === 0) return false;
+  const interpretations = enumerateModelInterpretations(targetModelId, providers);
+  const { resolutions } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
+  const accepted = ENDPOINTS_FOR_KIND[endpointKind];
+  return resolutions.some(({ resolved }) =>
+    accepted.some(key => resolved.binding.upstreamModel.endpoints[key] !== undefined));
+};
+
+// Pre-pick the available pool ONCE. Order is preserved so
+// selection=first-available picks deterministically; selection=random picks
+// uniformly within whatever subset survived availability filtering.
+const buildAvailablePool = async (
+  record: ModelAliasRecord,
+  endpointKind: AliasEndpointKind,
+  upstreamIds: readonly string[] | null,
+  scheduler: BackgroundScheduler,
+  currentColo: string,
+): Promise<ModelAliasRecord['targets']> => {
+  const availability = await Promise.all(record.targets.map(target =>
+    candidateIsRoutable(target.target_model_id, endpointKind, upstreamIds, scheduler, currentColo)));
+  return record.targets.filter((_, index) => availability[index]);
+};
+
+export const resolveAlias = async (args: ResolveAliasArgs): Promise<AliasResolution | null> => {
+  const { modelName, endpointKind, upstreamIds, scheduler, currentColo, repo } = args;
+  const record = await repo.getByName(modelName);
+  if (!record) return null;
+
+  // Kind-mismatch is silent: the literal string falls through to prefix
+  // routing, which will 404 on its own if nothing in the catalog matches.
+  // Mirrors the "unknown model" surface a plain id would produce.
+  if (record.kind !== endpointKind) return null;
+
+  const pool = await buildAvailablePool(record, endpointKind, upstreamIds, scheduler, currentColo);
+  if (pool.length === 0) throw new AliasNoTargetAvailableError(record.name, record.targets.length);
+
+  const picked = record.selection === 'first-available'
+    ? pool[0]
+    : pool[Math.floor(Math.random() * pool.length)];
+
+  return { targetModelId: picked.target_model_id, rules: picked.rules, aliasName: record.name };
+};
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
new file mode 100644
index 000000000..428b339d1
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
@@ -0,0 +1,241 @@
+// Behavioral coverage for the alias resolver. Mocks `providers/registry.ts`
+// + the per-request fetcher so each test can hand-script which target
+// model ids look routable; the resolver itself runs unmocked, so its
+// filter logic (kind match, availability, selection strategy) is the
+// thing under test.
+
+import { test, vi } from 'vitest';
+
+import type { ModelAliasRecord, ModelAliasesRepo } from '../../repo/types.ts';
+import { stubAuthedContext } from '../../test-helpers/gateway-ctx.ts';
+import type { ModelInterpretation, ProviderModelResolution } from '../providers/registry.ts';
+import { directFetcher } from '@floway-dev/provider';
+import { assert, assertEquals, assertRejects } from '@floway-dev/test-utils';
+
+// Avoid the real `listModelProviders` (which reads the global repo) and the
+// real `collectInterpretationOutcomes` (which goes through the per-request
+// fetcher cache). The mocks let each test stamp which target_model_ids are
+// "routable" right now and which endpoints they expose.
+const routableModels = new Map<string, { endpoints: Record<string, unknown> }>();
+const ALWAYS_ROUTABLE_ENDPOINTS = { chatCompletions: {}, responses: {}, messages: {} };
+
+vi.mock('../providers/registry.ts', () => ({
+  listModelProviders: vi.fn(async () => [{ upstream: 'u_test', name: 'u_test', modelPrefix: null }]),
+  enumerateModelInterpretations: vi.fn((modelId: string, providers: readonly { upstream: string }[]): ModelInterpretation[] =>
+    providers.map(p => ({ provider: p, lookupId: modelId } as unknown as ModelInterpretation))),
+  collectInterpretationOutcomes: vi.fn(async (interpretations: readonly { provider: { upstream: string }; lookupId: string }[]) => ({
+    resolutions: interpretations
+      .filter(i => routableModels.has(i.lookupId))
+      .map(i => ({
+        interpretation: i,
+        provider: i.provider,
+        resolved: {
+          id: i.lookupId,
+          model: { id: i.lookupId, endpoints: routableModels.get(i.lookupId)!.endpoints },
+          binding: { upstream: i.provider.upstream, upstreamModel: { id: i.lookupId, endpoints: routableModels.get(i.lookupId)!.endpoints } },
+        } as unknown as ProviderModelResolution,
+      })),
+    failedUpstreams: [],
+  })),
+}));
+
+vi.mock('../../dial/per-request.ts', () => ({
+  createPerRequestFetcher: vi.fn(async () => () => directFetcher),
+}));
+
+const { resolveAlias, AliasNoTargetAvailableError } = await import('./resolve.ts');
+
+const stubRepoFor = (record: ModelAliasRecord | null): ModelAliasesRepo => ({
+  list: () => Promise.resolve(record ? [record] : []),
+  getByName: name => Promise.resolve(record?.name === name ? structuredClone(record) : null),
+  insert: () => Promise.reject(new Error('insert should not be called from resolver tests')),
+  update: () => Promise.reject(new Error('update should not be called from resolver tests')),
+  delete: () => Promise.resolve(false),
+  deleteAll: () => Promise.resolve(),
+});
+
+const aliasRecord = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasRecord => ({
+  name: 'gpt-fast',
+  kind: 'chat',
+  selection: 'first-available',
+  displayName: null,
+  visibleInModelsList: true,
+  targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
+  sortOrder: 0,
+  createdAt: '2026-06-26T00:00:00.000Z',
+  updatedAt: '2026-06-26T00:00:00.000Z',
+  ...overrides,
+});
+
+const RESOLVE_DEFAULTS = {
+  endpointKind: 'chat' as const,
+  upstreamIds: null,
+  scheduler: () => {},
+  currentColo: 'TEST',
+};
+
+const setRoutable = (...ids: string[]): void => {
+  routableModels.clear();
+  for (const id of ids) routableModels.set(id, { endpoints: ALWAYS_ROUTABLE_ENDPOINTS });
+};
+
+// Silence the unused-ctx warning helpers
+void stubAuthedContext;
+
+test('returns null when no alias matches the inbound name', async () => {
+  setRoutable('gpt-5.4');
+  const result = await resolveAlias({
+    ...RESOLVE_DEFAULTS,
+    modelName: 'not-an-alias',
+    repo: stubRepoFor(null),
+  });
+  assertEquals(result, null);
+});
+
+test('returns the target and rules when kind matches and a single target is available', async () => {
+  setRoutable('gpt-5.4');
+  const result = await resolveAlias({
+    ...RESOLVE_DEFAULTS,
+    modelName: 'gpt-fast',
+    repo: stubRepoFor(aliasRecord()),
+  });
+  assert(result !== null);
+  assertEquals(result.targetModelId, 'gpt-5.4');
+  assertEquals(result.aliasName, 'gpt-fast');
+  assertEquals(result.rules, { reasoning: { effort: 'low' } });
+});
+
+test('returns null when the alias kind does not match the inbound endpoint group', async () => {
+  setRoutable('gpt-5.4');
+  const result = await resolveAlias({
+    ...RESOLVE_DEFAULTS,
+    endpointKind: 'embedding',
+    modelName: 'gpt-fast',
+    repo: stubRepoFor(aliasRecord()),
+  });
+  assertEquals(result, null);
+});
+
+test('throws AliasNoTargetAvailableError when the alias exists but no target is currently routable', async () => {
+  setRoutable(); // catalog empty
+  await assertRejects(
+    () => resolveAlias({
+      ...RESOLVE_DEFAULTS,
+      modelName: 'gpt-fast',
+      repo: stubRepoFor(aliasRecord({
+        targets: [
+          { target_model_id: 'gpt-5.4', rules: {} },
+          { target_model_id: 'gpt-5.5', rules: {} },
+        ],
+      })),
+    }),
+    AliasNoTargetAvailableError,
+    "alias 'gpt-fast' has 2 target(s); none currently map to an enabled upstream binding",
+  );
+});
+
+test('first-available skips unroutable rows and picks the first available, not the first listed', async () => {
+  setRoutable('gpt-5.5'); // `gpt-5.4` is not in the catalog
+  const result = await resolveAlias({
+    ...RESOLVE_DEFAULTS,
+    modelName: 'gpt-fast',
+    repo: stubRepoFor(aliasRecord({
+      targets: [
+        { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'high' } } },
+        { target_model_id: 'gpt-5.5', rules: { reasoning: { effort: 'low' } } },
+        { target_model_id: 'gpt-6', rules: {} },
+      ],
+    })),
+  });
+  assert(result !== null);
+  assertEquals(result.targetModelId, 'gpt-5.5');
+  assertEquals(result.rules, { reasoning: { effort: 'low' } });
+});
+
+test('random selection picks every available target across enough iterations', async () => {
+  setRoutable('a', 'b');
+  const seen = new Set<string>();
+  for (let i = 0; i < 100; i += 1) {
+    const result = await resolveAlias({
+      ...RESOLVE_DEFAULTS,
+      modelName: 'gpt-fast',
+      repo: stubRepoFor(aliasRecord({
+        selection: 'random',
+        targets: [
+          { target_model_id: 'a', rules: {} },
+          { target_model_id: 'b', rules: {} },
+        ],
+      })),
+    });
+    assert(result !== null);
+    seen.add(result.targetModelId);
+    if (seen.size === 2) break;
+  }
+  // Two targets, both routable, 100 iterations: hitting only one is a
+  // (1/2)^100 fluke. Treat anything less than two distinct picks as a real
+  // regression in the selection logic, not coincidence.
+  assertEquals(seen.size, 2);
+});
+
+test('shadow pattern: alias whose first target equals its own name picks the real model when present', async () => {
+  setRoutable('codex-auto-review'); // the real model IS in the catalog
+  const result = await resolveAlias({
+    ...RESOLVE_DEFAULTS,
+    modelName: 'codex-auto-review',
+    repo: stubRepoFor(aliasRecord({
+      name: 'codex-auto-review',
+      targets: [
+        { target_model_id: 'codex-auto-review', rules: {} },
+        { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
+      ],
+    })),
+  });
+  assert(result !== null);
+  assertEquals(result.targetModelId, 'codex-auto-review');
+  assertEquals(result.rules, {});
+});
+
+test('shadow pattern: alias falls back to the second target when the real model is not in the catalog', async () => {
+  setRoutable('gpt-5.4'); // only the fallback is routable
+  const result = await resolveAlias({
+    ...RESOLVE_DEFAULTS,
+    modelName: 'codex-auto-review',
+    repo: stubRepoFor(aliasRecord({
+      name: 'codex-auto-review',
+      targets: [
+        { target_model_id: 'codex-auto-review', rules: {} },
+        { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
+      ],
+    })),
+  });
+  assert(result !== null);
+  assertEquals(result.targetModelId, 'gpt-5.4');
+  assertEquals(result.rules, { reasoning: { effort: 'low' } });
+});
+
+test('embedding-kind alias accepts only embedding endpoints', async () => {
+  routableModels.clear();
+  routableModels.set('text-embedding-3', { endpoints: { embeddings: {} } });
+  routableModels.set('gpt-5.4', { endpoints: ALWAYS_ROUTABLE_ENDPOINTS });
+
+  const okResult = await resolveAlias({
+    ...RESOLVE_DEFAULTS,
+    endpointKind: 'embedding',
+    modelName: 'embed-fast',
+    repo: stubRepoFor(aliasRecord({ name: 'embed-fast', kind: 'embedding', targets: [{ target_model_id: 'text-embedding-3', rules: {} }] })),
+  });
+  assert(okResult !== null);
+  assertEquals(okResult.targetModelId, 'text-embedding-3');
+
+  await assertRejects(
+    () => resolveAlias({
+      ...RESOLVE_DEFAULTS,
+      endpointKind: 'embedding',
+      modelName: 'embed-fast',
+      // gpt-5.4 is in the catalog but only exposes chat endpoints, so it
+      // cannot satisfy an embedding-kind alias.
+      repo: stubRepoFor(aliasRecord({ name: 'embed-fast', kind: 'embedding', targets: [{ target_model_id: 'gpt-5.4', rules: {} }] })),
+    }),
+    AliasNoTargetAvailableError,
+  );
+});
diff --git a/packages/gateway/src/data-plane/model-aliases/serve-integration.ts b/packages/gateway/src/data-plane/model-aliases/serve-integration.ts
new file mode 100644
index 000000000..f14308403
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/serve-integration.ts
@@ -0,0 +1,102 @@
+// Per-protocol alias preamble helpers. Each protocol's serve calls
+// `resolveAndApply<Protocol>` immediately after parsing the inbound payload
+// and before `enumerateProviderCandidates`. The helper:
+//   1. looks up the inbound model name in the alias repo,
+//   2. on a hit whose kind matches `chat`, picks one target row,
+//   3. stamps the row's rules onto the IR (overwriting any matching field),
+//   4. stages the `x-floway-alias` response header on the gateway ctx, and
+//   5. returns the resolved target_model_id so the caller substitutes it
+//      for `payload.model` before candidate enumeration runs.
+//
+// Returns `null` when the inbound name is not an alias of kind=chat;
+// callers continue with the literal name and the catalog's miss surface
+// renders if nothing matches. Throws `AliasNoTargetAvailableError` when
+// the alias exists but every target is currently unroutable — caught at
+// the serve seam and rendered via the protocol's failure renderer.
+
+import { applyChatRulesToChatCompletions, applyChatRulesToGemini, applyChatRulesToMessages, applyChatRulesToResponses } from './apply.ts';
+import { resolveAlias, type AliasResolution } from './resolve.ts';
+import { getRepo } from '../../repo/index.ts';
+import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
+import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
+import type { ChatAliasRules } from '@floway-dev/protocols/common';
+import type { GeminiPayload } from '@floway-dev/protocols/gemini';
+import type { MessagesPayload } from '@floway-dev/protocols/messages';
+import type { ResponsesPayload } from '@floway-dev/protocols/responses';
+
+const ALIAS_RESPONSE_HEADER = 'x-floway-alias';
+
+// Common preamble: resolve the alias against the request's chat endpoint
+// group and stage the response header. Returns the resolution (or null) so
+// the caller can apply rules through its protocol's overlay helper. The
+// chat-kind check lives inside the resolver — a kind mismatch silently
+// returns null here.
+const resolveChatAlias = async (modelName: string, ctx: GatewayCtx): Promise<AliasResolution | null> => {
+  const resolution = await resolveAlias({
+    modelName,
+    endpointKind: 'chat',
+    upstreamIds: ctx.upstreamIds,
+    scheduler: ctx.backgroundScheduler,
+    currentColo: ctx.currentColo,
+    repo: getRepo().modelAliases,
+  });
+  if (resolution !== null) ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, resolution.aliasName);
+  return resolution;
+};
+
+// All four `resolveAndApplyAliasFor*` helpers narrow the rule shape to
+// `ChatAliasRules` before calling the per-protocol overlay. Today every
+// chat-kind alias target carries `ChatAliasRules` per the wire schema; the
+// cast is the unavoidable narrowing from the generic `AliasRules` union.
+const asChatRules = (rules: AliasResolution['rules']): ChatAliasRules => rules as ChatAliasRules;
+
+export const resolveAndApplyAliasForChatCompletions = async (payload: ChatCompletionsPayload, ctx: GatewayCtx): Promise<void> => {
+  const resolution = await resolveChatAlias(payload.model, ctx);
+  if (!resolution) return;
+  payload.model = resolution.targetModelId;
+  applyChatRulesToChatCompletions(payload, asChatRules(resolution.rules));
+};
+
+export const resolveAndApplyAliasForResponses = async (payload: ResponsesPayload, ctx: GatewayCtx): Promise<void> => {
+  const resolution = await resolveChatAlias(payload.model, ctx);
+  if (!resolution) return;
+  payload.model = resolution.targetModelId;
+  applyChatRulesToResponses(payload, asChatRules(resolution.rules));
+};
+
+export const resolveAndApplyAliasForMessages = async (payload: MessagesPayload, ctx: GatewayCtx): Promise<void> => {
+  const resolution = await resolveChatAlias(payload.model, ctx);
+  if (!resolution) return;
+  payload.model = resolution.targetModelId;
+  applyChatRulesToMessages(payload, asChatRules(resolution.rules));
+};
+
+// Gemini's model id is carried on the URL path, not the body — the caller
+// passes it in alongside the payload and gets the resolved id back so it
+// can substitute into the candidate-enumeration call. The payload is still
+// mutated in place to overlay rules.
+export const resolveAndApplyAliasForGemini = async (model: string, payload: GeminiPayload, ctx: GatewayCtx): Promise<string> => {
+  const resolution = await resolveChatAlias(model, ctx);
+  if (!resolution) return model;
+  applyChatRulesToGemini(payload, asChatRules(resolution.rules));
+  return resolution.targetModelId;
+};
+
+// Passthrough endpoints (embeddings, images) don't carry rules today; the
+// resolver still runs to substitute the target id and stage the response
+// header. Returns the resolved target_model_id (or the original name on
+// miss). Throws `AliasNoTargetAvailableError` on the all-unroutable case
+// like the chat helpers do.
+export const resolveAliasForPassthrough = async (model: string, endpointKind: 'embedding' | 'image', ctx: GatewayCtx): Promise<string> => {
+  const resolution = await resolveAlias({
+    modelName: model,
+    endpointKind,
+    upstreamIds: ctx.upstreamIds,
+    scheduler: ctx.backgroundScheduler,
+    currentColo: ctx.currentColo,
+    repo: getRepo().modelAliases,
+  });
+  if (resolution === null) return model;
+  ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, resolution.aliasName);
+  return resolution.targetModelId;
+};
diff --git a/packages/gateway/src/test-helpers/gateway-ctx.ts b/packages/gateway/src/test-helpers/gateway-ctx.ts
index 43358b7fb..782d87370 100644
--- a/packages/gateway/src/test-helpers/gateway-ctx.ts
+++ b/packages/gateway/src/test-helpers/gateway-ctx.ts
@@ -24,5 +24,6 @@ export const mockGatewayCtx = (overrides: Partial<GatewayCtx> = {}): GatewayCtx
   dump: null,
   backgroundScheduler: promise => { void promise; },
   requestStartedAt: 0,
+  responseHeaders: new Headers(),
   ...overrides,
 });

From 67ec5a2380de8778664abd4627585568c7e469e4 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 18:18:46 +0800
Subject: [PATCH 051/170] feat(aliases): /v1/models listing + protocol shape
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Make operator-defined aliases visible in every model-listing surface
without changing how they resolve at request time. The path is the
same one a real catalog row takes — `PublicModel` + an extra
`aliasedFrom` block that an alias-aware UI uses to render the
alias-of relationship inline.

Protocol surface
- `PublicModelAliasedFrom` carries `name`, `kind`, `selection`, and
  every configured `targets[]` row — including targets that the live
  catalog can not currently serve. The dashboard needs the full
  configuration so it can warn about unavailable targets without a
  second control-plane round trip.
- Pure display helpers move next to `ChatAliasRules`:
  `composeAliasDisplayName(targetId, rules)`,
  `formatAliasRulesInline(rules)`, `formatAliasRuleBadges(rules)`.
  All three walk one ordered builder so an alias configured with
  `effort + verbosity` reads identically as a badge sequence, as a
  comma-joined caption, and as the parenthesized suffix on the
  derived display name. Boolean toggles render as dedicated words
  (`adaptive`, `non-adaptive`, `mandatory reasoning`) so the inline
  prose stays readable when several rules are set together.

Core synthesis
- `synthesizeListedAliases({ aliases, realModels })` is a pure
  transformation: one `PublicModel` per visible alias, hidden aliases
  (`visible_in_models_list = false`) dropped, entries sorted by
  `(sort_order, name)`.
- Single-target aliases inherit their sole target's `chat`/`cost`
  metadata, narrowed by rules — a fixed `reasoning.effort` collapses
  the reported effort set to that one value, a fixed budget collapses
  the reported range to a single point.
- Multi-target aliases use the INTERSECTION across every available
  target. A capability survives only when every available target
  declares it, so whichever target gets picked at request time is
  guaranteed to support the catalog's claims. Unavailable targets
  (not in `realModels`, or kind-mismatched) are excluded from the
  intersection but still appear in `aliasedFrom.targets`.
- Aliases with no available targets still emit a row — capability
  metadata stays absent so clients see no inherited claims; the
  dashboard's no-target warning explains the situation.

Wire-up
- `loadModels` now loads the alias repo alongside `getModels` and
  merges the two lists. Alias entries follow real entries; an alias
  whose `name` collides with a real id replaces the real entry in
  the final catalog, because two entries with the same `id` would
  break OpenAI client deduplication and the operator added the alias
  deliberately.
- The same merge is applied at `/api/models` (dashboard, where alias
  rows surface an empty `upstreams: []` since they have no upstream
  binding of their own) and `/v1beta/models` (Gemini, narrowed to
  chat-kind entries).

Tests
- `aliases_test.ts` exercises every helper, including the
  parenthesization rule (no rule = bare id, any rule = parens) and
  the canonical field order.
- `alias-listing_test.ts` covers single-target narrowing,
  multi-target intersection (including drop-on-mismatch and
  unavailable-target subsets), the visibility gate, kind-mismatch
  exclusion, the no-target row, the operator-set display-name
  override, the alias-vs-real id collision, and the
  `(sort_order, name)` ordering.
- `serve_test.ts` gains an integration case that asserts the merged
  payload at `/v1/models` carries `aliasedFrom`, the alias entry
  replaces the colliding real id, and hidden aliases stay out.
---
 .../src/control-plane/models/routes.ts        |  26 ++-
 .../src/data-plane/models/alias-listing.ts    | 201 ++++++++++++++++++
 .../data-plane/models/alias-listing_test.ts   | 186 ++++++++++++++++
 .../gateway/src/data-plane/models/gemini.ts   |  33 ++-
 .../gateway/src/data-plane/models/load.ts     |  21 +-
 .../src/data-plane/models/load_test.ts        |   5 +
 .../gateway/src/data-plane/models/serve.ts    |   3 +-
 .../src/data-plane/models/serve_test.ts       |  92 ++++++++
 packages/protocols/src/common/aliases.ts      |  49 +++++
 packages/protocols/src/common/aliases_test.ts |  56 +++++
 packages/protocols/src/common/models.ts       |  18 ++
 11 files changed, 675 insertions(+), 15 deletions(-)
 create mode 100644 packages/gateway/src/data-plane/models/alias-listing.ts
 create mode 100644 packages/gateway/src/data-plane/models/alias-listing_test.ts
 create mode 100644 packages/protocols/src/common/aliases_test.ts

diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 3a6620496..752fe0105 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -1,10 +1,12 @@
 import type { Context } from 'hono';
 
+import { synthesizeListedAliases } from '../../data-plane/models/alias-listing.ts';
 import { toPublicModel } from '../../data-plane/models/load.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from '../../data-plane/models/shared.ts';
 import { getModels } from '../../data-plane/providers/registry.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
@@ -15,7 +17,9 @@ import type { ResolvedModel, UpstreamProviderKind } from '@floway-dev/provider';
 // `upstreams` lists every provider binding for this model as { kind, id, name }
 // triples. A single model id can be served by mixed provider kinds (e.g. one
 // azure deployment + one custom upstream both expose `gpt-5.5`), so a flat
-// `provider`/`upstream_ids` split would misrepresent that.
+// `provider`/`upstream_ids` split would misrepresent that. Alias-synthesized
+// rows carry an empty list — they do not bind to an upstream directly; their
+// targets live under `aliasedFrom`.
 interface ControlPlaneModel extends PublicModel {
   upstreams: { kind: UpstreamProviderKind; id: string; name: string }[];
 }
@@ -36,12 +40,20 @@ export const controlPlaneModels = async (c: Context) => {
     // API key, so this resolves to the user's per-user upstream cap: a user who
     // has had an upstream removed must not see its models in the Models tab.
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    const models = await getModels(
-      effectiveUpstreamIdsFromContext(c),
-      fetcherForUpstream,
-      backgroundSchedulerFromContext(c),
-    );
-    const data = models.map(toControlPlaneModel);
+    const [models, aliases] = await Promise.all([
+      getModels(
+        effectiveUpstreamIdsFromContext(c),
+        fetcherForUpstream,
+        backgroundSchedulerFromContext(c),
+      ),
+      getRepo().modelAliases.list(),
+    ]);
+    const aliasEntries = synthesizeListedAliases({ aliases, realModels: models });
+    const aliasIds = new Set(aliasEntries.map(entry => entry.id));
+    const data: ControlPlaneModel[] = [
+      ...models.filter(model => !aliasIds.has(model.id)).map(toControlPlaneModel),
+      ...aliasEntries.map(entry => ({ ...entry, upstreams: [] })),
+    ];
     const response: ControlPlaneModelsResponse = {
       object: 'list',
       has_more: false,
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
new file mode 100644
index 000000000..70d97fdb3
--- /dev/null
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -0,0 +1,201 @@
+// Synthesizes the alias entries that join the real-model catalog inside
+// `/v1/models`. One PublicModel per visible alias — hidden aliases
+// (visible_in_models_list = false) are dropped from the listing while
+// remaining routable. The synthesized entry carries an `aliasedFrom` block
+// so an alias-aware UI (today: the dashboard) can render the alias-of
+// relationship without a second round trip.
+//
+// Capability metadata is the safe lower bound for the inbound request:
+//   • single-target → the sole target's metadata, narrowed by the alias's
+//     `rules` (a fixed reasoning effort collapses the reported effort set
+//     to that one value, a fixed budget collapses the reported range to
+//     a single point).
+//   • multi-target → the intersection across every currently-available
+//     target. A capability survives only when every target backing the
+//     alias declares it; whichever target gets picked at request time is
+//     then guaranteed to support whatever the catalog reported.
+//
+// "Available target" for intersection purposes means a target whose
+// `target_model_id` appears in `realModels` AND whose entry's `kind`
+// matches the alias's `kind`. Unavailable targets are excluded from the
+// intersection but still appear in `aliasedFrom.targets` so the dashboard
+// can show the full configuration.
+//
+// Collision: when an alias's `name` exactly equals a real model id, the
+// alias entry replaces the real entry in the final catalog. Two entries
+// with the same `id` would break OpenAI client deduplication; collapsing
+// to the alias entry preserves the operator's intent (the alias is the
+// reason both rows would have been present). The dashboard surfaces this
+// via a shadow warning in the alias editor; here it is purely a wire-shape
+// concern. The real entry is removed at the `loadModels` merge step.
+
+import type { ModelAliasRecord } from '../../repo/types.ts';
+import { composeAliasDisplayName } from '@floway-dev/protocols/common';
+import type { AliasTarget, ChatAliasRules, ChatModelInfo, PublicModel, PublicModelAliasedFrom } from '@floway-dev/protocols/common';
+import type { InternalModel } from '@floway-dev/provider';
+
+export interface ListedAliasInputs {
+  readonly aliases: readonly ModelAliasRecord[];
+  readonly realModels: readonly InternalModel[];
+}
+
+// The repo guarantees rule shape matches the row's `kind` (chat rows carry
+// `ChatAliasRules`; embedding / image rows carry the empty record), so a
+// chat-row target can be read as ChatAliasRules without a runtime check.
+const chatRules = (target: AliasTarget): ChatAliasRules => target.rules as ChatAliasRules;
+
+const intersectArrays = <T>(arrays: readonly (readonly T[])[]): T[] => {
+  if (arrays.length === 0) return [];
+  const [head, ...tail] = arrays;
+  return head.filter(value => tail.every(other => other.includes(value)));
+};
+
+const intersectChat = (chats: readonly ChatModelInfo[]): ChatModelInfo | undefined => {
+  if (chats.length === 0) return undefined;
+  const result: ChatModelInfo = {};
+
+  const modalityChats = chats.filter(c => c.modalities !== undefined);
+  if (modalityChats.length === chats.length) {
+    const input = intersectArrays(modalityChats.map(c => c.modalities!.input));
+    const output = intersectArrays(modalityChats.map(c => c.modalities!.output));
+    if (input.length > 0 || output.length > 0) result.modalities = { input, output };
+  }
+
+  const reasoningChats = chats.filter(c => c.reasoning !== undefined);
+  if (reasoningChats.length === chats.length) {
+    const reasoning: NonNullable<ChatModelInfo['reasoning']> = {};
+
+    const effortChats = reasoningChats.filter(c => c.reasoning!.effort !== undefined);
+    if (effortChats.length === reasoningChats.length) {
+      const supported = intersectArrays(effortChats.map(c => c.reasoning!.effort!.supported));
+      const defaults = new Set(effortChats.map(c => c.reasoning!.effort!.default));
+      // Intersection's `default` is the agreed value when every target
+      // names the same one and that value still survives the supported
+      // intersection; otherwise we report supported-only.
+      if (supported.length > 0) {
+        const agreedDefault = defaults.size === 1 ? [...defaults][0] : undefined;
+        reasoning.effort = agreedDefault !== undefined && supported.includes(agreedDefault)
+          ? { supported, default: agreedDefault }
+          : { supported, default: supported[0] };
+      }
+    }
+
+    const budgetChats = reasoningChats.filter(c => c.reasoning!.budget_tokens !== undefined);
+    if (budgetChats.length === reasoningChats.length) {
+      const mins = budgetChats.map(c => c.reasoning!.budget_tokens!.min).filter((v): v is number => v !== undefined);
+      const maxes = budgetChats.map(c => c.reasoning!.budget_tokens!.max).filter((v): v is number => v !== undefined);
+      const min = mins.length === budgetChats.length ? Math.max(...mins) : undefined;
+      const max = maxes.length === budgetChats.length ? Math.min(...maxes) : undefined;
+      // Drop the budget block entirely when the intersected window is
+      // empty (every caller would otherwise see a contradictory range).
+      if (!(min !== undefined && max !== undefined && min > max)) {
+        const budget: NonNullable<NonNullable<ChatModelInfo['reasoning']>['budget_tokens']> = {};
+        if (min !== undefined) budget.min = min;
+        if (max !== undefined) budget.max = max;
+        if (min !== undefined || max !== undefined) reasoning.budget_tokens = budget;
+      }
+    }
+
+    const adaptiveAgreed = new Set(reasoningChats.map(c => c.reasoning!.adaptive));
+    if (adaptiveAgreed.size === 1) {
+      const value = [...adaptiveAgreed][0];
+      if (value !== undefined) reasoning.adaptive = value;
+    }
+    const mandatoryAgreed = new Set(reasoningChats.map(c => c.reasoning!.mandatory));
+    if (mandatoryAgreed.size === 1) {
+      const value = [...mandatoryAgreed][0];
+      if (value !== undefined) reasoning.mandatory = value;
+    }
+
+    if (Object.keys(reasoning).length > 0) result.reasoning = reasoning;
+  }
+
+  return Object.keys(result).length > 0 ? result : undefined;
+};
+
+// Narrow the single target's chat metadata against the alias's rule
+// overlay. Fields the rule doesn't touch pass through unchanged.
+const narrowChatByRules = (chat: ChatModelInfo | undefined, target: AliasTarget): ChatModelInfo | undefined => {
+  if (chat === undefined) return undefined;
+  const rules = chatRules(target);
+  if (rules.reasoning === undefined) return chat;
+  const out: ChatModelInfo = { ...chat };
+  if (chat.reasoning !== undefined) {
+    const reasoning: NonNullable<ChatModelInfo['reasoning']> = { ...chat.reasoning };
+    if (rules.reasoning.effort !== undefined) {
+      const fixed = rules.reasoning.effort;
+      reasoning.effort = { supported: [fixed], default: fixed };
+    }
+    if (rules.reasoning.budget_tokens !== undefined) {
+      const fixed = rules.reasoning.budget_tokens;
+      reasoning.budget_tokens = { min: fixed, max: fixed };
+    }
+    out.reasoning = reasoning;
+  }
+  return out;
+};
+
+const deriveDisplayName = (alias: ModelAliasRecord): string => {
+  if (alias.displayName !== null) return alias.displayName;
+  if (alias.targets.length === 1) return composeAliasDisplayName(alias.targets[0].target_model_id, alias.targets[0].rules);
+  return alias.name;
+};
+
+const buildAliasedFrom = (alias: ModelAliasRecord): PublicModelAliasedFrom => ({
+  name: alias.name,
+  kind: alias.kind,
+  selection: alias.selection,
+  // Every configured target — including ones the live catalog can not
+  // serve — so the dashboard can show the full configuration.
+  targets: alias.targets,
+});
+
+const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly InternalModel[]): PublicModel => {
+  const realById = new Map(realModels.map(m => [m.id, m] as const));
+  const availableTargets = alias.targets
+    .map(target => ({ target, real: realById.get(target.target_model_id) }))
+    .filter((entry): entry is { target: AliasTarget; real: InternalModel } => entry.real !== undefined && entry.real.kind === alias.kind);
+
+  const entry: PublicModel = {
+    id: alias.name,
+    object: 'model',
+    type: 'model',
+    display_name: deriveDisplayName(alias),
+    limits: {},
+    kind: alias.kind,
+    aliasedFrom: buildAliasedFrom(alias),
+  };
+
+  // No backing target — still emit the row so the dashboard can show the
+  // alias with a no-target warning. Capability metadata stays absent so
+  // clients see no inherited claims.
+  if (availableTargets.length === 0) return entry;
+
+  if (availableTargets.length === 1) {
+    const [{ target, real }] = availableTargets;
+    if (real.chat !== undefined) {
+      const chat = narrowChatByRules(real.chat, target);
+      if (chat !== undefined) entry.chat = chat;
+    }
+    if (real.cost !== undefined) entry.cost = real.cost;
+    return entry;
+  }
+
+  const chats = availableTargets.map(({ real }) => real.chat).filter((c): c is ChatModelInfo => c !== undefined);
+  // Intersect chat metadata only when every available target declares it;
+  // a half-declared block would leak the metadata of whichever subset
+  // happened to carry it.
+  if (chats.length === availableTargets.length) {
+    const chat = intersectChat(chats);
+    if (chat !== undefined) entry.chat = chat;
+  }
+  return entry;
+};
+
+const sortAliases = (aliases: readonly ModelAliasRecord[]): ModelAliasRecord[] =>
+  [...aliases].sort((a, b) => (a.sortOrder - b.sortOrder) || a.name.localeCompare(b.name));
+
+export const synthesizeListedAliases = (input: ListedAliasInputs): PublicModel[] =>
+  sortAliases(input.aliases)
+    .filter(alias => alias.visibleInModelsList)
+    .map(alias => synthesizeOne(alias, input.realModels));
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
new file mode 100644
index 000000000..99ef91e73
--- /dev/null
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -0,0 +1,186 @@
+import { describe, expect, test } from 'vitest';
+
+import { synthesizeListedAliases } from './alias-listing.ts';
+import type { ModelAliasRecord } from '../../repo/types.ts';
+import type { InternalModel } from '@floway-dev/provider';
+
+const aliasFixture = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasRecord => ({
+  name: 'gpt-fast',
+  kind: 'chat',
+  selection: 'first-available',
+  displayName: null,
+  visibleInModelsList: true,
+  targets: [{ target_model_id: 'gpt-5.4', rules: {} }],
+  sortOrder: 0,
+  createdAt: '2026-06-26T00:00:00.000Z',
+  updatedAt: '2026-06-26T00:00:00.000Z',
+  ...overrides,
+});
+
+const realModel = (overrides: Partial<InternalModel> & { id: string }): InternalModel => ({
+  kind: 'chat',
+  limits: {},
+  ...overrides,
+});
+
+describe('synthesizeListedAliases', () => {
+  test('single-target alias narrows reasoning.effort to the fixed value', () => {
+    const aliases = [aliasFixture({
+      name: 'gpt-fast',
+      targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
+    })];
+    const realModels = [realModel({
+      id: 'gpt-5.4',
+      display_name: 'GPT 5.4',
+      chat: {
+        modalities: { input: ['text', 'image'], output: ['text'] },
+        reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } },
+      },
+    })];
+
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.id).toBe('gpt-fast');
+    expect(entry.display_name).toBe('gpt-5.4 (low effort)');
+    expect(entry.chat?.reasoning?.effort).toEqual({ supported: ['low'], default: 'low' });
+    expect(entry.chat?.modalities).toEqual({ input: ['text', 'image'], output: ['text'] });
+    expect(entry.aliasedFrom).toEqual({
+      name: 'gpt-fast',
+      kind: 'chat',
+      selection: 'first-available',
+      targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
+    });
+  });
+
+  test('single-target alias narrows reasoning.budget_tokens to a single point', () => {
+    const aliases = [aliasFixture({
+      targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { budget_tokens: 4096 } } }],
+    })];
+    const realModels = [realModel({
+      id: 'gpt-5.4',
+      chat: { reasoning: { budget_tokens: { min: 1024, max: 65536 } } },
+    })];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.chat?.reasoning?.budget_tokens).toEqual({ min: 4096, max: 4096 });
+  });
+
+  test('multi-target alias intersects chat.modalities across every target', () => {
+    const aliases = [aliasFixture({
+      name: 'smart-router',
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'a', chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
+      realModel({ id: 'b', chat: { modalities: { input: ['text'], output: ['text'] } } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.id).toBe('smart-router');
+    expect(entry.display_name).toBe('smart-router');
+    expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
+  });
+
+  test('multi-target intersection drops capabilities only one target declares', () => {
+    const aliases = [aliasFixture({
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'a', chat: { reasoning: { effort: { supported: ['low'], default: 'low' } } } }),
+      realModel({ id: 'b', chat: {} }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.chat?.reasoning).toBeUndefined();
+  });
+
+  test('multi-target with an unavailable target intersects over the available subset', () => {
+    const aliases = [aliasFixture({
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'gone', rules: {} },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'a', chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
+      realModel({ id: 'b', chat: { modalities: { input: ['text'], output: ['text', 'image'] } } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
+    // Every configured target — including the unavailable one — survives in aliasedFrom.
+    expect(entry.aliasedFrom?.targets.map(t => t.target_model_id)).toEqual(['a', 'gone', 'b']);
+  });
+
+  test('hidden alias is not emitted', () => {
+    const aliases = [aliasFixture({ visibleInModelsList: false })];
+    const realModels = [realModel({ id: 'gpt-5.4' })];
+    expect(synthesizeListedAliases({ aliases, realModels })).toEqual([]);
+  });
+
+  test('alias whose name collides with a real id is emitted (loadModels drops the duplicate real)', () => {
+    const aliases = [aliasFixture({
+      name: 'gpt-5.4',
+      targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
+    })];
+    const realModels = [realModel({ id: 'gpt-5.4', display_name: 'GPT 5.4' })];
+    const entries = synthesizeListedAliases({ aliases, realModels });
+    expect(entries).toHaveLength(1);
+    expect(entries[0].id).toBe('gpt-5.4');
+    expect(entries[0].aliasedFrom?.name).toBe('gpt-5.4');
+  });
+
+  test('no available targets still emits an entry with no chat metadata', () => {
+    const aliases = [aliasFixture({
+      name: 'orphan',
+      targets: [{ target_model_id: 'missing', rules: {} }],
+    })];
+    const [entry] = synthesizeListedAliases({ aliases, realModels: [] });
+    expect(entry.id).toBe('orphan');
+    expect(entry.display_name).toBe('missing');
+    expect(entry.chat).toBeUndefined();
+    expect(entry.cost).toBeUndefined();
+    expect(entry.aliasedFrom?.targets).toEqual([{ target_model_id: 'missing', rules: {} }]);
+  });
+
+  test('sorts entries by (sort_order, name) so listing order stays stable', () => {
+    const aliases = [
+      aliasFixture({ name: 'late', sortOrder: 1 }),
+      aliasFixture({ name: 'mid-a', sortOrder: 0 }),
+      aliasFixture({ name: 'mid-b', sortOrder: 0 }),
+    ];
+    const realModels = [realModel({ id: 'gpt-5.4' })];
+    const ids = synthesizeListedAliases({ aliases, realModels }).map(entry => entry.id);
+    expect(ids).toEqual(['mid-a', 'mid-b', 'late']);
+  });
+
+  test('targets whose kind disagrees with the alias are not counted as available', () => {
+    const aliases = [aliasFixture({
+      kind: 'chat',
+      targets: [
+        { target_model_id: 'emb', rules: {} },
+        { target_model_id: 'chat', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'emb', kind: 'embedding' }),
+      realModel({ id: 'chat', chat: { modalities: { input: ['text'], output: ['text'] } } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    // Only the chat target backs the metadata — the embedding row never
+    // enters the intersection / narrowing path.
+    expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
+  });
+
+  test('operator-set display_name wins over the derived form', () => {
+    const aliases = [aliasFixture({
+      displayName: 'My Fast GPT',
+      targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
+    })];
+    const realModels = [realModel({ id: 'gpt-5.4' })];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.display_name).toBe('My Fast GPT');
+  });
+});
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index ab9242bd1..b7b34bd4a 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -1,8 +1,11 @@
 import type { Context } from 'hono';
 
+import { synthesizeListedAliases } from './alias-listing.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
+import type { ModelAliasesRepo } from '../../repo/types.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts';
@@ -62,20 +65,40 @@ const geminiModelLoadError = (error: unknown): Response => {
   return geminiError(502, error instanceof Error ? error.message : String(error));
 };
 
+// Mirror loadModels: real models plus chat-kind alias entries, with alias
+// names winning id collisions.
 const loadGeminiModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
+  aliasRepo: ModelAliasesRepo,
 ): Promise<GeminiModel[]> => {
-  const models = await getModels(upstreamFilter, fetcherForUpstream, scheduler);
-  // Only chat models are representable in the Gemini /models shape.
-  return models.filter(model => model.kind === 'chat').map(toGeminiModel);
+  const [models, aliases] = await Promise.all([
+    getModels(upstreamFilter, fetcherForUpstream, scheduler),
+    aliasRepo.list(),
+  ]);
+  const chatModels = models.filter(model => model.kind === 'chat');
+  const aliasEntries = synthesizeListedAliases({ aliases, realModels: models })
+    .filter(entry => entry.kind === 'chat');
+  const aliasIds = new Set(aliasEntries.map(entry => entry.id));
+  const merged: InternalModel[] = [
+    ...chatModels.filter(model => !aliasIds.has(model.id)),
+    ...aliasEntries.map<InternalModel>(entry => ({
+      id: entry.id,
+      display_name: entry.display_name,
+      limits: entry.limits,
+      kind: entry.kind,
+      ...(entry.cost !== undefined ? { cost: entry.cost } : {}),
+      ...(entry.chat !== undefined ? { chat: entry.chat } : {}),
+    })),
+  ];
+  return merged.map(toGeminiModel);
 };
 
 export const serveGeminiModels = async (c: Context): Promise<Response> => {
   try {
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)) });
+    return Response.json({ models: await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), getRepo().modelAliases) });
   } catch (error) {
     return geminiModelLoadError(error);
   }
@@ -88,7 +111,7 @@ export const serveGeminiModelInfo = async (c: Context): Promise<Response> => {
   const modelId = rawModelId.replace(/^models\//, '');
   try {
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c))).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
+    const model = (await loadGeminiModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), getRepo().modelAliases)).find(candidate => candidate.baseModelId === modelId || candidate.name === `models/${modelId}`);
     if (!model) return geminiError(404, `Model not found: ${modelId}`);
     return Response.json(model);
   } catch (error) {
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 027ba38db..e0a8b513a 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,3 +1,5 @@
+import { synthesizeListedAliases } from './alias-listing.ts';
+import type { ModelAliasesRepo } from '../../repo/types.ts';
 import { getModels } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
@@ -22,13 +24,28 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
   return info;
 };
 
+// Merge real-model entries with alias entries synthesized off the operator's
+// alias catalog. An alias whose `name` collides with a real model id wins —
+// two entries with the same `id` would break OpenAI client deduplication, and
+// the alias was added by the operator deliberately, so collapsing to it
+// preserves intent. `synthesizeListedAliases` already produces the alias entry;
+// the merge step drops the real entry with that id.
 export const loadModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
+  aliasRepo: ModelAliasesRepo,
 ): Promise<PublicModelsResponse> => {
-  const models = await getModels(upstreamFilter, fetcherForUpstream, scheduler);
-  const data = models.map(toPublicModel);
+  const [realModels, aliases] = await Promise.all([
+    getModels(upstreamFilter, fetcherForUpstream, scheduler),
+    aliasRepo.list(),
+  ]);
+  const aliasEntries = synthesizeListedAliases({ aliases, realModels });
+  const aliasIds = new Set(aliasEntries.map(entry => entry.id));
+  const data: PublicModel[] = [
+    ...realModels.map(toPublicModel).filter(model => !aliasIds.has(model.id)),
+    ...aliasEntries,
+  ];
   return {
     object: 'list',
     has_more: false,
diff --git a/packages/gateway/src/data-plane/models/load_test.ts b/packages/gateway/src/data-plane/models/load_test.ts
index 20560891f..39985c9c4 100644
--- a/packages/gateway/src/data-plane/models/load_test.ts
+++ b/packages/gateway/src/data-plane/models/load_test.ts
@@ -22,3 +22,8 @@ describe('toPublicModel', () => {
     expect(toPublicModel({ ...base, chat }).chat).toEqual(chat);
   });
 });
+
+// The alias merge step inside `loadModels` (alias entries follow real
+// entries, alias names winning id collisions) is exercised through the
+// integration suite in `serve_test.ts` so the assertion observes the same
+// `/v1/models` payload a real client would see.
diff --git a/packages/gateway/src/data-plane/models/serve.ts b/packages/gateway/src/data-plane/models/serve.ts
index 9b8b510f9..a66755b6e 100644
--- a/packages/gateway/src/data-plane/models/serve.ts
+++ b/packages/gateway/src/data-plane/models/serve.ts
@@ -7,6 +7,7 @@ import { loadModels } from './load.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -14,7 +15,7 @@ import { ProviderModelsUnavailableError } from '@floway-dev/provider';
 export const models = async (c: Context) => {
   try {
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c)));
+    return Response.json(await loadModels(effectiveUpstreamIdsFromContext(c), fetcherForUpstream, backgroundSchedulerFromContext(c), getRepo().modelAliases));
   } catch (e) {
     // Upstream HTTP/parse failures squash to a generic message so we do not
     // leak upstream identity. Other registry-thrown errors (e.g. the "no
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 1408f10a6..2be79e4c5 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -586,3 +586,95 @@ test('/v1/models returns the last real error when every account model load fails
     },
   );
 });
+
+test('/v1/models appends visible aliases with their aliasedFrom block and folds alias-id collisions onto the alias entry', async () => {
+  const { repo, apiKey } = await setupAppTest();
+  await repo.modelAliases.deleteAll();
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_oai',
+    name: 'Test OpenAI',
+    sortOrder: 100,
+    config: {
+      baseUrl: 'https://oai.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-test',
+      endpoints: { chatCompletions: {} },
+    },
+  }));
+  // Two aliases: one shadows a real id (`gpt-4o`) so the alias entry must
+  // replace the catalog entry; one points at a real id under a brand-new
+  // name (`gpt-fast`).
+  await repo.modelAliases.insert({
+    name: 'gpt-4o',
+    kind: 'chat',
+    selection: 'first-available',
+    displayName: null,
+    visibleInModelsList: true,
+    targets: [{ target_model_id: 'gpt-4o', rules: { reasoning: { effort: 'low' } } }],
+    sortOrder: 1,
+    createdAt: '2026-06-26T00:00:00.000Z',
+    updatedAt: '2026-06-26T00:00:00.000Z',
+  });
+  await repo.modelAliases.insert({
+    name: 'gpt-fast',
+    kind: 'chat',
+    selection: 'first-available',
+    displayName: 'Operator Fast',
+    visibleInModelsList: true,
+    targets: [{ target_model_id: 'gpt-4o-mini', rules: {} }],
+    sortOrder: 0,
+    createdAt: '2026-06-26T00:00:00.000Z',
+    updatedAt: '2026-06-26T00:00:00.000Z',
+  });
+  await repo.modelAliases.insert({
+    name: 'hidden-alias',
+    kind: 'chat',
+    selection: 'first-available',
+    displayName: null,
+    visibleInModelsList: false,
+    targets: [{ target_model_id: 'gpt-4o', rules: {} }],
+    sortOrder: 2,
+    createdAt: '2026-06-26T00:00:00.000Z',
+    updatedAt: '2026-06-26T00:00:00.000Z',
+  });
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.pathname === '/models' && url.hostname === 'api.individual.githubcopilot.com') {
+        return jsonResponse(copilotModels([]));
+      }
+      if (url.pathname === '/v1/models' && url.hostname === 'oai.example.com') {
+        return jsonResponse({ object: 'list', data: [{ id: 'gpt-4o' }, { id: 'gpt-4o-mini' }] });
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/models', { headers: { 'x-api-key': apiKey.key } });
+      assertEquals(response.status, 200);
+      const body = (await response.json()) as { data: Array<{ id: string; display_name: string; aliasedFrom?: { name: string; selection: string } }> };
+      const ids = body.data.map(model => model.id);
+
+      // Real `gpt-4o` is replaced by the alias of the same name; the alias
+      // entry sits where the catalog ordering placed it. `gpt-4o-mini`
+      // (still a real id) stays first, and the two visible aliases land
+      // after the real-only entries.
+      assertEquals(ids.includes('gpt-4o-mini'), true);
+      assertEquals(ids.filter(id => id === 'gpt-4o').length, 1);
+      assertEquals(ids.includes('hidden-alias'), false);
+
+      const collided = body.data.find(model => model.id === 'gpt-4o')!;
+      assertEquals(collided.aliasedFrom?.name, 'gpt-4o');
+      assertEquals(collided.aliasedFrom?.selection, 'first-available');
+      assertEquals(collided.display_name, 'gpt-4o (low effort)');
+
+      const fast = body.data.find(model => model.id === 'gpt-fast')!;
+      assertEquals(fast.aliasedFrom?.name, 'gpt-fast');
+      assertEquals(fast.display_name, 'Operator Fast');
+    },
+  );
+});
diff --git a/packages/protocols/src/common/aliases.ts b/packages/protocols/src/common/aliases.ts
index ac212f91e..6958bd8e8 100644
--- a/packages/protocols/src/common/aliases.ts
+++ b/packages/protocols/src/common/aliases.ts
@@ -77,3 +77,52 @@ export interface ModelAlias {
   created_at: string;
   updated_at: string;
 }
+
+// Inline-prose parts for an alias's rules, in the canonical field order. The
+// same builder backs `formatAliasRulesInline` (joins with `, ` for a single
+// summary string) and `formatAliasRuleBadges` (one badge per part). Keeping
+// every surface — inline copy, badge sequence, parenthesized suffix in the
+// derived display name — on a single ordered walk means an operator who
+// configures `effort + verbosity` sees them in the same order whether the
+// dashboard renders badges or a comma-joined caption.
+const aliasRulePartLabels = (rules: AliasRules): string[] => {
+  const chat = rules as ChatAliasRules;
+  const parts: string[] = [];
+  if (chat.reasoning?.effort !== undefined) parts.push(`${chat.reasoning.effort} effort`);
+  if (chat.reasoning?.budget_tokens !== undefined) parts.push(`${chat.reasoning.budget_tokens}tok budget`);
+  if (chat.reasoning?.adaptive === true) parts.push('adaptive');
+  else if (chat.reasoning?.adaptive === false) parts.push('non-adaptive');
+  if (chat.reasoning?.mandatory === true) parts.push('mandatory reasoning');
+  if (chat.reasoning?.summary !== undefined) parts.push(`summary: ${chat.reasoning.summary}`);
+  if (chat.verbosity !== undefined) parts.push(`${chat.verbosity} verbosity`);
+  if (chat.serviceTier !== undefined) parts.push(`${chat.serviceTier} tier`);
+  return parts;
+};
+
+// One badge per configured rule field, in the canonical order. `value` is
+// reserved for callers that want to render a separate value pill alongside
+// the label; today every part already self-describes through `label`, so
+// `value` stays undefined.
+export interface AliasRuleBadge {
+  label: string;
+  value?: string;
+}
+
+export const formatAliasRuleBadges = (rules: AliasRules): AliasRuleBadge[] =>
+  aliasRulePartLabels(rules).map(label => ({ label }));
+
+// Comma-joined version of the same ordered parts. Empty string when no
+// rule applies — callers should drop the line entirely rather than render
+// blank.
+export const formatAliasRulesInline = (rules: AliasRules): string =>
+  aliasRulePartLabels(rules).join(', ');
+
+// Derived display name for a single-target alias whose operator did not set
+// `display_name`. Bare `target_model_id` when no rule is configured; with
+// rules, the inline summary is parenthesized. Multi-target aliases skip
+// this helper entirely — the listing falls back to the alias's own name
+// because no single target represents the alias.
+export const composeAliasDisplayName = (targetModelId: string, rules: AliasRules): string => {
+  const inline = formatAliasRulesInline(rules);
+  return inline === '' ? targetModelId : `${targetModelId} (${inline})`;
+};
diff --git a/packages/protocols/src/common/aliases_test.ts b/packages/protocols/src/common/aliases_test.ts
new file mode 100644
index 000000000..719ff4ee1
--- /dev/null
+++ b/packages/protocols/src/common/aliases_test.ts
@@ -0,0 +1,56 @@
+import { describe, expect, test } from 'vitest';
+
+import { composeAliasDisplayName, formatAliasRuleBadges, formatAliasRulesInline } from './aliases.ts';
+
+describe('composeAliasDisplayName', () => {
+  test('bare target id when no rules apply', () => {
+    expect(composeAliasDisplayName('gpt-5.4', {})).toBe('gpt-5.4');
+  });
+
+  test('parenthesizes the inline summary when a rule is set', () => {
+    expect(composeAliasDisplayName('gpt-5.4', { reasoning: { effort: 'low' } })).toBe('gpt-5.4 (low effort)');
+  });
+});
+
+describe('formatAliasRulesInline', () => {
+  test('returns empty string when no rule is set', () => {
+    expect(formatAliasRulesInline({})).toBe('');
+  });
+
+  test('joins configured parts in the canonical order', () => {
+    expect(formatAliasRulesInline({
+      reasoning: { effort: 'high' },
+      verbosity: 'low',
+      serviceTier: 'priority',
+    })).toBe('high effort, low verbosity, priority tier');
+  });
+
+  test('renders boolean reasoning toggles in their dedicated wording', () => {
+    expect(formatAliasRulesInline({
+      reasoning: { adaptive: false, mandatory: true, summary: 'concise' },
+    })).toBe('non-adaptive, mandatory reasoning, summary: concise');
+  });
+
+  test('emits adaptive when reasoning.adaptive is true and budget_tokens when set', () => {
+    expect(formatAliasRulesInline({
+      reasoning: { budget_tokens: 4096, adaptive: true },
+    })).toBe('4096tok budget, adaptive');
+  });
+});
+
+describe('formatAliasRuleBadges', () => {
+  test('returns one badge per configured part in the canonical order', () => {
+    expect(formatAliasRuleBadges({
+      reasoning: { effort: 'high', budget_tokens: 2048 },
+      verbosity: 'medium',
+    })).toEqual([
+      { label: 'high effort' },
+      { label: '2048tok budget' },
+      { label: 'medium verbosity' },
+    ]);
+  });
+
+  test('returns an empty array when no rule is set', () => {
+    expect(formatAliasRuleBadges({})).toEqual([]);
+  });
+});
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 54243d56d..d12ab4d8b 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -1,3 +1,5 @@
+import type { AliasKind, AliasSelection, AliasTarget } from './aliases.ts';
+
 // Disjoint billing dimensions a single request can be charged on. Every count
 // keyed by these is non-overlapping: a prompt token is counted under exactly
 // one of `input`, `input_cache_read`, `input_cache_write`,
@@ -113,6 +115,19 @@ export interface ChatModelInfo {
   };
 }
 
+// Alias provenance attached to a `/v1/models` entry that the gateway
+// synthesized from an operator-defined alias rather than fetched from an
+// upstream catalog. `targets` carries every configured target — including
+// targets the live catalog currently can not serve — so the dashboard can
+// show the full configuration and warn about unavailable ones without a
+// second control-plane round trip.
+export interface PublicModelAliasedFrom {
+  name: string;
+  kind: AliasKind;
+  selection: AliasSelection;
+  targets: AliasTarget[];
+}
+
 // Public DTO served at /v1/models and /models. Single superset shape — OpenAI's
 // and Anthropic's /models field names do not overlap, so one payload satisfies
 // both client shapes.
@@ -135,6 +150,9 @@ export interface PublicModel {
   kind: ModelKind;
   cost?: ModelPricing;
   chat?: ChatModelInfo;
+  // Present only on entries the gateway synthesized from an operator-defined
+  // alias; absent for entries that came from an upstream catalog.
+  aliasedFrom?: PublicModelAliasedFrom;
 }
 
 export interface PublicModelsResponse {

From 68662865e793f382d94191e4e11413816a38c825 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 18:46:14 +0800
Subject: [PATCH 052/170] feat(aliases): dashboard UI
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

End-to-end dashboard surface for the v2 alias system:

- useModelAliases composable: singleton store mirroring the
  proxies/upstreams stores; loads /api/aliases once and exposes the live
  list to every consumer.

- AliasesSettingsCard + AliasRow: Settings card slotted under the Proxies
  card. Each row is a two-line block — display name (derived for
  single-target aliases via composeAliasDisplayName, alias name for
  multi-target, operator override always wins) over a single-line caption
  that walks name · N targets · selection · optional "hidden from
  /v1/models". The action cluster reserves a left-end slot for the
  alias-level warning icon so the edit / delete buttons keep their
  on-screen position whether or not a warning is firing.

- AliasEditDialog: top form (alias name, display name, kind, selection
  segmented control), vertical AliasTargetRow stack with an Add target
  button, alias-level shadow-warning card at the bottom, visibility
  switch + Cancel/Save footer. Switching kind resets every target row's
  rules to that kind's empty default. Save is gated on a non-empty,
  non-colliding name, ≥1 target, and every target carrying a non-empty
  target_model_id.

- AliasTargetRow: expandable card whose header is a borderless,
  chevron-less Combobox (extended Combobox.vue with `borderless` +
  `hideDropdownTrigger` props rather than one-off styles, so future inline
  combobox surfaces reuse the same component) that suggests the live set
  of non-alias model ids from useModels. The expanded body is the flat
  chat-rule form (effort, budget, adaptive, summary, verbosity, service
  tier) for chat-kind aliases and a one-line empty-state caption for the
  other kinds. Rule-level warnings render inline under the annotated
  field as a single line of text-amber-300; the model-level warning is a
  yellow `!` icon at the left end of the row's action cluster.

- Shared warnings.ts: one place for catalog lookup
  (findCatalogModel, realModelIds), rule-level warning derivation
  (advertised effort levels, budget bounds, adaptive support),
  model-level warning (target id resolution against the live catalog),
  and alias-level shadow warning (name collides with a real id AND no
  target references it). Both the Settings row and the dialog read the
  same catalog through the same helpers.

- ModelInfoBar: alias-of badge sequence (`alias of: <t1>, <t2> +N more`,
  `selection: <s>`) plus a rule-badge row that keeps the single-target
  per-rule badges and collapses multi-target rules to one
  `<field>: varies` pill per varying field.

- packages/ui/Combobox.vue: borderless + hideDropdownTrigger props for
  use as an inline row title.

Tests: warnings_test for the helpers, AliasRow_test for title /
caption / action cluster, AliasTargetRow_test for combobox v-model,
edge-disabled controls, expand-on-toggle, model and rule warnings,
AliasEditDialog_test for the form bindings, kind-switch behaviour,
save gate, and shadow card, AliasesSettingsCard_test for the list,
empty state, add emit, and delete confirm flow.
---
 apps/web/src/api/types.ts                     |  30 +++
 .../components/alias-edit/AliasEditDialog.vue | 249 +++++++++++++++++
 .../alias-edit/AliasEditDialog_test.ts        | 168 ++++++++++++
 .../components/alias-edit/AliasTargetRow.vue  | 253 ++++++++++++++++++
 .../alias-edit/AliasTargetRow_test.ts         |  89 ++++++
 .../web/src/components/alias-edit/warnings.ts | 129 +++++++++
 .../components/alias-edit/warnings_test.ts    | 156 +++++++++++
 .../src/components/models/ModelInfoBar.vue    |  48 +++-
 apps/web/src/components/settings/AliasRow.vue | 101 +++++++
 .../src/components/settings/AliasRow_test.ts  | 120 +++++++++
 .../settings/AliasesSettingsCard.vue          |  71 +++++
 .../settings/AliasesSettingsCard_test.ts      | 105 ++++++++
 apps/web/src/composables/useModelAliases.ts   |  30 +++
 apps/web/src/pages/dashboard/settings.vue     |  29 +-
 packages/ui/src/Combobox.vue                  |  28 +-
 15 files changed, 1599 insertions(+), 7 deletions(-)
 create mode 100644 apps/web/src/components/alias-edit/AliasEditDialog.vue
 create mode 100644 apps/web/src/components/alias-edit/AliasEditDialog_test.ts
 create mode 100644 apps/web/src/components/alias-edit/AliasTargetRow.vue
 create mode 100644 apps/web/src/components/alias-edit/AliasTargetRow_test.ts
 create mode 100644 apps/web/src/components/alias-edit/warnings.ts
 create mode 100644 apps/web/src/components/alias-edit/warnings_test.ts
 create mode 100644 apps/web/src/components/settings/AliasRow.vue
 create mode 100644 apps/web/src/components/settings/AliasRow_test.ts
 create mode 100644 apps/web/src/components/settings/AliasesSettingsCard.vue
 create mode 100644 apps/web/src/components/settings/AliasesSettingsCard_test.ts
 create mode 100644 apps/web/src/composables/useModelAliases.ts

diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 088891e1b..3c3930ffb 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -1,7 +1,13 @@
 // Control-plane DTOs the SPA consumes — serialized shapes the gateway emits at /api.
 
 import type {
+  AliasKind,
+  AliasRules,
+  AliasSelection,
+  AliasTarget,
   BillingDimension,
+  ChatAliasRules,
+  ModelAlias,
   ModelEndpointKey,
   ModelEndpoints,
   ModelKind,
@@ -11,6 +17,7 @@ import type { AddressableForm, ModelPrefixConfig } from '@floway-dev/provider/mo
 
 export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing };
 export type { AddressableForm, ModelPrefixConfig };
+export type { AliasKind, AliasRules, AliasSelection, AliasTarget, ChatAliasRules, ModelAlias };
 
 export type UpstreamProviderKind = 'custom' | 'azure' | 'copilot' | 'codex' | 'claude-code' | 'ollama';
 
@@ -340,6 +347,29 @@ export interface PublicModel {
   endpoints?: Record<string, ModelEndpointInfo>;
   cost?: ModelPricing;
   kind?: ModelKind;
+  // Chat-only capability metadata sourced from the upstream model config.
+  // Mirrored from `@floway-dev/protocols/common`'s ChatModelInfo so the
+  // dashboard can render rule warnings against the live catalog without
+  // pulling the full protocol shape.
+  chat?: {
+    modalities?: { input: readonly ('text' | 'image')[]; output: readonly ('text' | 'image')[] };
+    reasoning?: {
+      effort?: { supported: readonly string[]; default: string };
+      budget_tokens?: { min?: number; max?: number };
+      adaptive?: boolean;
+      mandatory?: boolean;
+    };
+  };
+  // Alias provenance — present only on `/api/models` entries the gateway
+  // synthesized from an operator-defined alias. The dashboard uses this
+  // both to render alias-of badges on the Models page and to identify
+  // alias rows when computing the target-id suggestion list.
+  aliasedFrom?: {
+    name: string;
+    kind: AliasKind;
+    selection: AliasSelection;
+    targets: AliasTarget[];
+  };
 }
 
 export interface ControlPlaneModel extends PublicModel {
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
new file mode 100644
index 000000000..cb49f9055
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -0,0 +1,249 @@
+<script setup lang="ts">
+// Editor for one alias (create or edit). Top form (name / display name /
+// kind / selection); a vertical stack of AliasTargetRow cards with an
+// "Add target" button; alias-level warnings card; footer (visibility
+// switch + Cancel / Save).
+
+import { computed, ref } from 'vue';
+
+import AliasTargetRow from './AliasTargetRow.vue';
+import { computeShadowWarning, realModelIds } from './warnings.ts';
+import { callApi, useApi } from '../../api/client.ts';
+import type { AliasKind, AliasSelection, AliasTarget, ChatAliasRules, ModelAlias } from '../../api/types.ts';
+import { useModelAliases } from '../../composables/useModelAliases.ts';
+import { useModelsStore } from '../../composables/useModels.ts';
+import { Button, Dialog, Input, Select, Switch } from '@floway-dev/ui';
+
+const open = defineModel<boolean>('open', { required: true });
+
+const props = defineProps<{
+  /** null = create; non-null = edit. */
+  record: ModelAlias | null;
+}>();
+
+const emit = defineEmits<{
+  saved: [];
+}>();
+
+const api = useApi();
+const aliasesStore = useModelAliases();
+const modelsStore = useModelsStore();
+
+const mode = computed<'create' | 'edit'>(() => (props.record ? 'edit' : 'create'));
+
+// Empty rules per kind. Chat carries an open ChatAliasRules; embedding and
+// image carry an empty record per the spec — switching kind resets every
+// target row to this empty default so a chat-only rule doesn't survive a
+// switch to image.
+const emptyRulesFor = (k: AliasKind): AliasTarget['rules'] => (k === 'chat' ? {} as ChatAliasRules : {} as Record<string, never>);
+
+const blankTarget = (k: AliasKind): AliasTarget => ({ target_model_id: '', rules: emptyRulesFor(k) });
+
+const aliasName = ref(props.record?.name ?? '');
+const displayName = ref(props.record?.display_name ?? '');
+const kind = ref<AliasKind>(props.record?.kind ?? 'chat');
+const selection = ref<AliasSelection>(props.record?.selection ?? 'first-available');
+const visibleInModelsList = ref(props.record?.visible_in_models_list ?? true);
+
+// Local working copy of the targets list. Defaults to a single blank
+// target on create so the operator immediately sees a row to fill in.
+const targets = ref<AliasTarget[]>(
+  props.record
+    ? props.record.targets.map(t => ({ target_model_id: t.target_model_id, rules: { ...t.rules } as AliasTarget['rules'] }))
+    : [blankTarget(kind.value)],
+);
+
+const setKind = (k: AliasKind) => {
+  kind.value = k;
+  // Reset every target's rules to the new kind's empty default. The spec
+  // is explicit: switching kind discards rule state so a chat-only rule
+  // doesn't survive a switch into image.
+  targets.value = targets.value.map(t => ({ target_model_id: t.target_model_id, rules: emptyRulesFor(k) }));
+};
+
+const addTarget = () => { targets.value = [...targets.value, blankTarget(kind.value)]; };
+
+const updateTarget = (idx: number, next: AliasTarget) => {
+  const copy = targets.value.slice();
+  copy[idx] = next;
+  targets.value = copy;
+};
+
+const moveTarget = (idx: number, delta: -1 | 1) => {
+  const j = idx + delta;
+  if (j < 0 || j >= targets.value.length) return;
+  const copy = targets.value.slice();
+  [copy[idx], copy[j]] = [copy[j], copy[idx]];
+  targets.value = copy;
+};
+
+const removeTarget = (idx: number) => {
+  if (targets.value.length <= 1) return;
+  targets.value = targets.value.filter((_, i) => i !== idx);
+};
+
+// Suggestion list for every target-id combobox. Aliases are excluded so an
+// operator can't accidentally hop into the alias layer twice (it never
+// recurses at request time anyway, but suggesting alias names would just
+// be confusing).
+const targetIdItems = computed(() => realModelIds(modelsStore.models.value));
+
+// Alias-level warnings. Today only the shadow warning fires; future
+// alias-wide checks plug in here.
+const shadowWarning = computed(() => computeShadowWarning(aliasName.value.trim(), targets.value, modelsStore.models.value));
+
+const saving = ref(false);
+const saveError = ref<string | null>(null);
+
+// Save gate: name non-empty AND no collision with another alias AND ≥1
+// target AND every target id non-empty. The collision check excludes the
+// current record so an in-place edit of an unchanged name is allowed.
+const validationError = computed<string | null>(() => {
+  const trimmed = aliasName.value.trim();
+  if (trimmed === '') return 'Alias name is required';
+  const collisions = (aliasesStore.aliases.value ?? []).filter(a => a.name === trimmed && a.name !== props.record?.name);
+  if (collisions.length > 0) return `An alias named "${trimmed}" already exists`;
+  if (targets.value.length === 0) return 'At least one target is required';
+  if (targets.value.some(t => t.target_model_id.trim() === '')) return 'Every target needs a model id';
+  return null;
+});
+
+const canSave = computed(() => validationError.value === null && !saving.value);
+
+const save = async () => {
+  saveError.value = validationError.value;
+  if (saveError.value !== null) return;
+
+  const trimmedName = aliasName.value.trim();
+  // The Hono RPC body type infers each target's `rules` as the loose
+  // `Record<string, unknown>` from the Zod schema. Build the payload with
+  // that loose shape so the typed save call doesn't need an `as`.
+  const body = {
+    name: trimmedName,
+    kind: kind.value,
+    selection: selection.value,
+    display_name: displayName.value.trim() === '' ? null : displayName.value.trim(),
+    visible_in_models_list: visibleInModelsList.value,
+    targets: targets.value.map(t => ({
+      target_model_id: t.target_model_id.trim(),
+      rules: t.rules as Record<string, unknown>,
+    })),
+    sort_order: props.record?.sort_order ?? 0,
+  };
+
+  saving.value = true;
+  try {
+    if (mode.value === 'create') {
+      const { error } = await callApi(() => api.api.aliases.$post({ json: body }));
+      if (error) { saveError.value = error.message; return; }
+    } else if (props.record) {
+      const { error } = await callApi(() => api.api.aliases[':name'].$put({
+        param: { name: props.record!.name },
+        json: body,
+      }));
+      if (error) { saveError.value = error.message; return; }
+    }
+    emit('saved');
+    open.value = false;
+  } finally {
+    saving.value = false;
+  }
+};
+
+const title = computed(() => mode.value === 'create' ? 'Create Alias' : `Edit Alias: ${props.record?.name ?? ''}`);
+
+const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
+  { value: 'chat', label: 'Chat' },
+  { value: 'embedding', label: 'Embedding' },
+  { value: 'image', label: 'Image' },
+];
+</script>
+
+<template>
+  <Dialog v-model:open="open" :title="title" size="xl">
+    <div class="space-y-5">
+      <p v-if="saveError" class="rounded-md border border-accent-rose/40 bg-accent-rose/10 px-3 py-2 text-sm text-accent-rose">
+        {{ saveError }}
+      </p>
+
+      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Alias name</label>
+          <Input v-model="aliasName" placeholder="my-alias-id" class="font-mono" />
+        </div>
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Display name</label>
+          <Input v-model="displayName" placeholder="auto" />
+        </div>
+      </div>
+
+      <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Kind</label>
+          <Select :model-value="kind" :options="KIND_OPTIONS" @update:model-value="v => setKind(v as AliasKind)" />
+        </div>
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Selection</label>
+          <div class="inline-flex h-9 items-center overflow-hidden rounded-[10px] border border-white/[0.14] bg-surface-700 text-xs">
+            <button
+              type="button"
+              class="px-3 py-1.5 transition-colors"
+              :class="selection === 'first-available' ? 'bg-accent-cyan/20 text-accent-cyan' : 'text-gray-400 hover:text-gray-200'"
+              @click="selection = 'first-available'"
+            >First available</button>
+            <button
+              type="button"
+              class="px-3 py-1.5 transition-colors"
+              :class="selection === 'random' ? 'bg-accent-cyan/20 text-accent-cyan' : 'text-gray-400 hover:text-gray-200'"
+              @click="selection = 'random'"
+            >Random</button>
+          </div>
+        </div>
+      </div>
+
+      <div>
+        <div class="mb-2 flex items-center justify-between">
+          <h4 class="text-xs font-semibold uppercase tracking-wide text-gray-500">Models</h4>
+          <Button variant="secondary" size="sm" @click="addTarget">Add target</Button>
+        </div>
+        <div class="space-y-2">
+          <AliasTargetRow
+            v-for="(t, idx) in targets"
+            :key="idx"
+            :model-value="t"
+            :kind="kind"
+            :target-id-items="targetIdItems"
+            :models="modelsStore.models.value"
+            :is-first="idx === 0"
+            :is-last="idx === targets.length - 1"
+            :is-sole="targets.length === 1"
+            @update:model-value="(next: AliasTarget) => updateTarget(idx, next)"
+            @move-up="moveTarget(idx, -1)"
+            @move-down="moveTarget(idx, 1)"
+            @remove="removeTarget(idx)"
+          />
+        </div>
+      </div>
+
+      <div v-if="shadowWarning" class="rounded-md border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-sm text-amber-300">
+        This alias name shadows a real model id:
+        <code class="font-mono">{{ shadowWarning.shadowedId }}</code>
+        <template v-if="shadowWarning.shadowedDisplayName !== null">
+          (<strong class="font-semibold">{{ shadowWarning.shadowedDisplayName }}</strong>).
+        </template>
+        <template v-else>.</template>
+      </div>
+
+      <div class="flex flex-wrap items-center justify-between gap-3 border-t border-white/[0.06] pt-5">
+        <label class="flex items-center gap-2">
+          <Switch v-model="visibleInModelsList" />
+          <span class="text-sm text-gray-300">Visible in <code class="rounded bg-white/[0.04] px-1 font-mono text-xs">/v1/models</code></span>
+        </label>
+        <div class="flex items-center gap-2">
+          <Button variant="secondary" :disabled="saving" @click="open = false">Cancel</Button>
+          <Button :loading="saving" :disabled="!canSave" @click="save">Save</Button>
+        </div>
+      </div>
+    </div>
+  </Dialog>
+</template>
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
new file mode 100644
index 000000000..9dedb62af
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -0,0 +1,168 @@
+import { mount } from '@vue/test-utils';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { nextTick, ref } from 'vue';
+
+import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+
+// Mock the API client + composables so the dialog mounts without hitting the
+// network. The composables expose `ref`-based state — return the same shape
+// so the dialog reads the catalog and the alias list directly off these
+// stubs.
+const aliasesRef = ref<ModelAlias[]>([]);
+const modelsRef = ref<ControlPlaneModel[]>([]);
+const postSpy = vi.fn(async (_arg: unknown) => new Response(JSON.stringify({}), { status: 201 }));
+const putSpy = vi.fn(async (_arg: unknown) => new Response(JSON.stringify({}), { status: 200 }));
+
+vi.mock('../../composables/useModelAliases.ts', () => ({
+  useModelAliases: () => ({ aliases: aliasesRef, loading: ref(false), error: ref<string | null>(null), load: async () => {} }),
+}));
+vi.mock('../../composables/useModels.ts', () => ({
+  useModelsStore: () => ({ models: modelsRef, loading: ref(false), error: ref<string | null>(null), load: async () => {} }),
+}));
+vi.mock('../../api/client.ts', () => ({
+  useApi: () => ({
+    api: {
+      aliases: {
+        $post: (arg: unknown) => postSpy(arg),
+        ':name': { $put: (arg: unknown) => putSpy(arg) },
+      },
+    },
+  }),
+  callApi: async <T>(fn: () => Promise<Response>) => {
+    const res = await fn();
+    if (!res.ok) return { error: { status: res.status, message: 'mock-error' } };
+    return { data: (await res.json()) as T };
+  },
+  authFetch: vi.fn(),
+}));
+
+// Import after mocks are registered.
+const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
+
+const realModel = (id: string, display?: string): ControlPlaneModel => ({
+  id,
+  display_name: display,
+  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
+});
+
+const baseAlias = (over: Partial<ModelAlias> & { name: string }): ModelAlias => ({
+  kind: 'chat',
+  selection: 'first-available',
+  display_name: null,
+  visible_in_models_list: true,
+  targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
+  sort_order: 0,
+  created_at: '2026-01-01T00:00:00Z',
+  updated_at: '2026-01-01T00:00:00Z',
+  ...over,
+});
+
+// Reka-UI's DialogPortal teleports content out of the wrapper. Read the
+// portal-rooted DOM by scanning document.body directly.
+const portalText = () => document.body.textContent ?? '';
+const portalQuery = <T extends Element>(selector: string): T | null => document.body.querySelector<T>(selector);
+const portalQueryAll = <T extends Element>(selector: string): T[] => Array.from(document.body.querySelectorAll<T>(selector));
+
+beforeEach(() => {
+  aliasesRef.value = [];
+  modelsRef.value = [realModel('gpt-5', 'GPT 5'), realModel('claude')];
+  postSpy.mockClear();
+  putSpy.mockClear();
+});
+
+afterEach(() => {
+  // Reka-UI portals append to document.body; clear them between tests so
+  // subsequent assertions don't see stale content.
+  document.body.innerHTML = '';
+});
+
+describe('AliasEditDialog', () => {
+  it('starts create mode with one blank target row and seeds the form fields', async () => {
+    const w = mount(AliasEditDialog, { props: { open: true, record: null }, attachTo: document.body });
+    await nextTick();
+    expect(portalQueryAll('[aria-label="Toggle target row"]')).toHaveLength(1);
+    const inputs = portalQueryAll<HTMLInputElement>('input[type="text"]');
+    expect(inputs[0].value).toBe('');
+    w.unmount();
+  });
+
+  it('"Add target" appends a row', async () => {
+    const w = mount(AliasEditDialog, { props: { open: true, record: null }, attachTo: document.body });
+    await nextTick();
+    expect(portalQueryAll('[aria-label="Toggle target row"]')).toHaveLength(1);
+    const addBtn = portalQueryAll<HTMLButtonElement>('button').find(b => b.textContent?.trim() === 'Add target')!;
+    addBtn.click();
+    await nextTick();
+    expect(portalQueryAll('[aria-label="Toggle target row"]')).toHaveLength(2);
+    w.unmount();
+  });
+
+  it('renders the chat rule body when the kind is chat, and the empty-state caption when the kind is embedding', async () => {
+    const chat = mount(AliasEditDialog, {
+      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: { reasoning: { effort: 'low' } } as ChatAliasRules }] }) },
+      attachTo: document.body,
+    });
+    await nextTick();
+    portalQuery<HTMLButtonElement>('button[aria-label="Toggle target row"]')!.click();
+    await nextTick();
+    expect(portalText()).toContain('Reasoning effort');
+    chat.unmount();
+    document.body.innerHTML = '';
+
+    const embed = mount(AliasEditDialog, {
+      props: { open: true, record: baseAlias({ name: 'e', kind: 'embedding', targets: [{ target_model_id: 'embed-1', rules: {} as never }] }) },
+      attachTo: document.body,
+    });
+    await nextTick();
+    portalQuery<HTMLButtonElement>('button[aria-label="Toggle target row"]')!.click();
+    await nextTick();
+    expect(portalText()).toContain('No per-target rules for this kind.');
+    expect(portalText()).not.toContain('Reasoning effort');
+    embed.unmount();
+  });
+
+  it('Save is disabled on empty name and on collision with another alias; enabled once the name is unique', async () => {
+    aliasesRef.value = [baseAlias({ name: 'existing' })];
+    // Seed the edit dialog with a valid target so the only validation knob
+    // under test is the alias name (the borderless combobox in the target
+    // row doesn't surface a plain HTMLInput we can drive from the test).
+    const w = mount(AliasEditDialog, {
+      props: {
+        open: true,
+        record: baseAlias({ name: '', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }),
+      },
+      attachTo: document.body,
+    });
+    await nextTick();
+
+    const saveBtn = portalQueryAll<HTMLButtonElement>('button').find(b => b.textContent?.trim() === 'Save')!;
+    expect(saveBtn.disabled).toBe(true);
+
+    const nameInput = portalQueryAll<HTMLInputElement>('input[type="text"]')[0];
+    nameInput.value = 'existing';
+    nameInput.dispatchEvent(new Event('input', { bubbles: true }));
+    await nextTick();
+    expect(saveBtn.disabled).toBe(true);
+
+    nameInput.value = 'fresh';
+    nameInput.dispatchEvent(new Event('input', { bubbles: true }));
+    await nextTick();
+    expect(saveBtn.disabled).toBe(false);
+
+    w.unmount();
+  });
+
+  it('renders the shadow warning card when the alias name collides with a real model and no target references it', async () => {
+    const w = mount(AliasEditDialog, { props: { open: true, record: null }, attachTo: document.body });
+    await nextTick();
+
+    const nameInput = portalQueryAll<HTMLInputElement>('input[type="text"]')[0];
+    nameInput.value = 'gpt-5';
+    nameInput.dispatchEvent(new Event('input', { bubbles: true }));
+    await nextTick();
+
+    expect(portalText()).toContain('shadows a real model id');
+    expect(document.body.innerHTML).toContain('<strong class="font-semibold">GPT 5</strong>');
+    w.unmount();
+  });
+});
diff --git a/apps/web/src/components/alias-edit/AliasTargetRow.vue b/apps/web/src/components/alias-edit/AliasTargetRow.vue
new file mode 100644
index 000000000..d3f21b4a0
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AliasTargetRow.vue
@@ -0,0 +1,253 @@
+<script setup lang="ts">
+// One target row inside the alias edit dialog. Header: chevron + borderless
+// target-id combobox (the row's title) + action cluster (warning icon ·
+// up · down · remove). Body (chat kind only): flat rules form with one
+// rule-level warning rendered as inline yellow text under each annotated
+// field.
+
+import { computed, ref } from 'vue';
+
+import { computeModelWarnings, computeRuleWarnings, findCatalogModel } from './warnings.ts';
+import type { AliasKind, AliasTarget, ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
+import type { ReasoningEffort, ReasoningSummary, ServiceTier, Verbosity } from '@floway-dev/protocols/common';
+import { Combobox, Switch, Tooltip } from '@floway-dev/ui';
+
+const target = defineModel<AliasTarget>({ required: true });
+
+const props = defineProps<{
+  kind: AliasKind;
+  targetIdItems: readonly string[];
+  models: readonly ControlPlaneModel[] | null;
+  isFirst: boolean;
+  isLast: boolean;
+  isSole: boolean;
+}>();
+
+const emit = defineEmits<{
+  moveUp: [];
+  moveDown: [];
+  remove: [];
+}>();
+
+const expanded = ref(false);
+const toggleExpanded = () => { expanded.value = !expanded.value; };
+
+const targetId = computed({
+  get: () => target.value.target_model_id,
+  set: v => { target.value = { ...target.value, target_model_id: v }; },
+});
+
+// Mutable mirror of the chat rules. Every field setter clones the rules so
+// the v-model emit fires and the parent's targets array stays referentially
+// up to date.
+const chatRules = computed<ChatAliasRules>(() => target.value.rules as ChatAliasRules);
+
+const setRules = (next: ChatAliasRules) => { target.value = { ...target.value, rules: next }; };
+
+const patchReasoning = (patch: Partial<NonNullable<ChatAliasRules['reasoning']>>) => {
+  const current = chatRules.value.reasoning ?? {};
+  const next = { ...current, ...patch };
+  for (const k of Object.keys(patch) as (keyof typeof patch)[]) {
+    if (patch[k] === undefined) delete (next as Record<string, unknown>)[k];
+  }
+  if (Object.keys(next).length === 0) {
+    const { reasoning: _, ...rest } = chatRules.value;
+    setRules(rest);
+  } else {
+    setRules({ ...chatRules.value, reasoning: next });
+  }
+};
+
+const setEffort = (raw: string) => patchReasoning({ effort: raw === '' ? undefined : (raw as ReasoningEffort) });
+const setSummary = (raw: string) => patchReasoning({ summary: raw === '' ? undefined : (raw as ReasoningSummary) });
+const setAdaptive = (on: boolean | undefined) => patchReasoning({ adaptive: on === true ? true : undefined });
+const setVerbosity = (raw: string) => {
+  const next = { ...chatRules.value };
+  if (raw === '') delete next.verbosity;
+  else next.verbosity = raw as Verbosity;
+  setRules(next);
+};
+const setServiceTier = (raw: string) => {
+  const next = { ...chatRules.value };
+  if (raw === '') delete next.serviceTier;
+  else next.serviceTier = raw as ServiceTier;
+  setRules(next);
+};
+
+// String-bound view of the integer budget. The form keeps the typed string
+// for round-trip stability (so an in-progress "" or "1024foo" doesn't
+// clobber the underlying numeric value mid-keystroke) and writes back to
+// the rules object only when the parsed number is a finite integer.
+const budgetText = ref(chatRules.value.reasoning?.budget_tokens === undefined ? '' : String(chatRules.value.reasoning.budget_tokens));
+const onBudgetChange = (raw: string) => {
+  budgetText.value = raw;
+  const trimmed = raw.trim();
+  if (trimmed === '') {
+    patchReasoning({ budget_tokens: undefined });
+    return;
+  }
+  if (!/^\d+$/.test(trimmed)) return;
+  patchReasoning({ budget_tokens: Number(trimmed) });
+};
+
+// Suggestion lists for chat-rule comboboxes. The operator can still type
+// any value verbatim; the gateway forwards rule values without enum-gating
+// them so a brand-new upstream tier flows through without a frontend
+// release.
+const EFFORT_ITEMS = ['none', 'low', 'medium', 'high', 'xhigh'];
+const SUMMARY_ITEMS = ['auto', 'concise', 'detailed', 'none'];
+const VERBOSITY_ITEMS = ['low', 'medium', 'high'];
+const SERVICE_TIER_ITEMS = ['default', 'flex', 'priority', 'scale', 'fast'];
+
+const catalog = computed(() => findCatalogModel(props.models, target.value.target_model_id));
+const modelWarnings = computed(() => computeModelWarnings(target.value.target_model_id, catalog.value));
+const ruleWarnings = computed(() => computeRuleWarnings(chatRules.value, catalog.value));
+const warningFor = (field: string) => ruleWarnings.value.find(w => w.field === field)?.message;
+const modelWarningTooltip = computed(() => modelWarnings.value.map(w => w.message).join('\n'));
+</script>
+
+<template>
+  <div class="overflow-hidden rounded-lg border border-white/[0.06] bg-surface-800/40">
+    <header class="flex items-center gap-2 px-3 py-2">
+      <button
+        type="button"
+        class="grid size-6 shrink-0 place-items-center rounded text-gray-500 transition-colors hover:bg-white/5 hover:text-gray-200"
+        :aria-expanded="expanded"
+        aria-label="Toggle target row"
+        @click="toggleExpanded"
+      >
+        <svg
+          class="size-4 transition-transform"
+          :class="expanded && 'rotate-180'"
+          viewBox="0 0 24 24"
+          fill="none"
+          stroke="currentColor"
+          stroke-width="2"
+        >
+          <path d="m6 9 6 6 6-6" />
+        </svg>
+      </button>
+
+      <div class="min-w-0 flex-1">
+        <Combobox
+          v-model="targetId"
+          :items="targetIdItems"
+          placeholder="target model id"
+          input-class="font-mono"
+          borderless
+          hide-dropdown-trigger
+        />
+      </div>
+
+      <div class="flex shrink-0 items-center gap-1">
+        <Tooltip v-if="modelWarnings.length > 0" :content="modelWarningTooltip">
+          <span class="inline-flex h-7 w-7 items-center justify-center rounded-md text-amber-400" aria-label="Model warning">
+            <i class="i-lucide-alert-triangle size-4" />
+          </span>
+        </Tooltip>
+        <button
+          type="button"
+          class="grid size-7 place-items-center rounded text-gray-500 transition-colors hover:bg-white/5 hover:text-gray-200 disabled:opacity-30 disabled:hover:bg-transparent disabled:hover:text-gray-500"
+          :disabled="isFirst"
+          aria-label="Move target up"
+          @click="emit('moveUp')"
+        >
+          <i class="i-lucide-arrow-up size-4" />
+        </button>
+        <button
+          type="button"
+          class="grid size-7 place-items-center rounded text-gray-500 transition-colors hover:bg-white/5 hover:text-gray-200 disabled:opacity-30 disabled:hover:bg-transparent disabled:hover:text-gray-500"
+          :disabled="isLast"
+          aria-label="Move target down"
+          @click="emit('moveDown')"
+        >
+          <i class="i-lucide-arrow-down size-4" />
+        </button>
+        <button
+          type="button"
+          class="grid size-7 place-items-center rounded text-gray-500 transition-colors hover:bg-white/5 hover:text-accent-rose disabled:opacity-30 disabled:hover:bg-transparent disabled:hover:text-gray-500"
+          :disabled="isSole"
+          aria-label="Remove target"
+          @click="emit('remove')"
+        >
+          <i class="i-lucide-x size-4" />
+        </button>
+      </div>
+    </header>
+
+    <div v-if="expanded" class="space-y-3 border-t border-white/[0.06] p-3">
+      <template v-if="kind === 'chat'">
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Reasoning effort</label>
+          <Combobox
+            :model-value="chatRules.reasoning?.effort ?? ''"
+            :items="EFFORT_ITEMS"
+            placeholder="none / low / medium / high / xhigh"
+            @update:model-value="setEffort"
+          />
+          <p v-if="warningFor('reasoning.effort')" class="mt-1 text-xs text-amber-300">{{ warningFor('reasoning.effort') }}</p>
+        </div>
+
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Reasoning budget tokens</label>
+          <input
+            type="text"
+            inputmode="numeric"
+            placeholder="4096"
+            class="h-9 w-full rounded-[10px] border border-white/[0.14] bg-surface-700 px-3 text-sm text-white placeholder:text-gray-600 focus:border-accent-cyan/50 focus:outline-none focus:ring-1 focus:ring-accent-cyan/30 font-mono"
+            :value="budgetText"
+            @input="(e: Event) => onBudgetChange((e.target as HTMLInputElement).value)"
+          >
+          <p v-if="warningFor('reasoning.budget_tokens')" class="mt-1 text-xs text-amber-300">{{ warningFor('reasoning.budget_tokens') }}</p>
+        </div>
+
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Adaptive reasoning</label>
+          <div class="flex h-9 items-center gap-2">
+            <Switch
+              :model-value="chatRules.reasoning?.adaptive === true"
+              @update:model-value="setAdaptive"
+            />
+            <span class="text-sm text-gray-300">Enable</span>
+          </div>
+          <p v-if="warningFor('reasoning.adaptive')" class="mt-1 text-xs text-amber-300">{{ warningFor('reasoning.adaptive') }}</p>
+        </div>
+
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Reasoning summary</label>
+          <Combobox
+            :model-value="chatRules.reasoning?.summary ?? ''"
+            :items="SUMMARY_ITEMS"
+            placeholder="auto"
+            @update:model-value="setSummary"
+          />
+          <p v-if="warningFor('reasoning.summary')" class="mt-1 text-xs text-amber-300">{{ warningFor('reasoning.summary') }}</p>
+        </div>
+
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Verbosity</label>
+          <Combobox
+            :model-value="chatRules.verbosity ?? ''"
+            :items="VERBOSITY_ITEMS"
+            placeholder="medium"
+            @update:model-value="setVerbosity"
+          />
+          <p v-if="warningFor('verbosity')" class="mt-1 text-xs text-amber-300">{{ warningFor('verbosity') }}</p>
+        </div>
+
+        <div>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Service tier</label>
+          <Combobox
+            :model-value="chatRules.serviceTier ?? ''"
+            :items="SERVICE_TIER_ITEMS"
+            placeholder="default"
+            @update:model-value="setServiceTier"
+          />
+          <p v-if="warningFor('serviceTier')" class="mt-1 text-xs text-amber-300">{{ warningFor('serviceTier') }}</p>
+        </div>
+      </template>
+
+      <p v-else class="text-xs text-gray-500">No per-target rules for this kind.</p>
+    </div>
+  </div>
+</template>
diff --git a/apps/web/src/components/alias-edit/AliasTargetRow_test.ts b/apps/web/src/components/alias-edit/AliasTargetRow_test.ts
new file mode 100644
index 000000000..abe3398c2
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AliasTargetRow_test.ts
@@ -0,0 +1,89 @@
+import { mount } from '@vue/test-utils';
+import { describe, expect, it } from 'vitest';
+import { nextTick } from 'vue';
+
+import AliasTargetRow from './AliasTargetRow.vue';
+import type { AliasTarget, ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
+
+const target = (over: Partial<AliasTarget> = {}): AliasTarget => ({
+  target_model_id: 'gpt-5',
+  rules: {} as ChatAliasRules,
+  ...over,
+});
+
+const realModel = (id: string, chat?: ControlPlaneModel['chat']): ControlPlaneModel => ({
+  id,
+  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
+  ...(chat ? { chat } : {}),
+});
+
+const mountRow = (props: Partial<InstanceType<typeof AliasTargetRow>['$props']>) => mount(AliasTargetRow, {
+  props: {
+    modelValue: target(),
+    kind: 'chat',
+    targetIdItems: ['gpt-5', 'claude-sonnet'],
+    models: [realModel('gpt-5')],
+    isFirst: false,
+    isLast: false,
+    isSole: false,
+    ...props,
+  },
+});
+
+describe('AliasTargetRow', () => {
+  it('disables Move Up on the first row, Move Down on the last, and Remove when it is the sole row', () => {
+    const first = mountRow({ isFirst: true });
+    expect((first.find('button[aria-label="Move target up"]').element as HTMLButtonElement).disabled).toBe(true);
+    expect((first.find('button[aria-label="Move target down"]').element as HTMLButtonElement).disabled).toBe(false);
+
+    const last = mountRow({ isLast: true });
+    expect((last.find('button[aria-label="Move target up"]').element as HTMLButtonElement).disabled).toBe(false);
+    expect((last.find('button[aria-label="Move target down"]').element as HTMLButtonElement).disabled).toBe(true);
+
+    const sole = mountRow({ isSole: true });
+    expect((sole.find('button[aria-label="Remove target"]').element as HTMLButtonElement).disabled).toBe(true);
+  });
+
+  it('emits move-up / move-down / remove', async () => {
+    const w = mountRow({});
+    await w.find('button[aria-label="Move target up"]').trigger('click');
+    await w.find('button[aria-label="Move target down"]').trigger('click');
+    await w.find('button[aria-label="Remove target"]').trigger('click');
+    expect(w.emitted('moveUp')).toHaveLength(1);
+    expect(w.emitted('moveDown')).toHaveLength(1);
+    expect(w.emitted('remove')).toHaveLength(1);
+  });
+
+  it('renders the expanded chat body only when expanded AND kind is chat; renders the empty-state caption for other kinds', async () => {
+    const chatRow = mountRow({});
+    expect(chatRow.text()).not.toContain('Reasoning effort');
+    await chatRow.find('button[aria-label="Toggle target row"]').trigger('click');
+    expect(chatRow.text()).toContain('Reasoning effort');
+    expect(chatRow.text()).toContain('Verbosity');
+
+    const embedRow = mountRow({ kind: 'embedding', modelValue: { target_model_id: 'e1', rules: {} } });
+    await embedRow.find('button[aria-label="Toggle target row"]').trigger('click');
+    expect(embedRow.text()).toContain('No per-target rules for this kind.');
+    expect(embedRow.text()).not.toContain('Reasoning effort');
+  });
+
+  it('shows the model-level warning icon when the target id does not resolve to any catalog entry', () => {
+    const known = mountRow({ modelValue: target({ target_model_id: 'gpt-5' }), models: [realModel('gpt-5')] });
+    expect(known.find('span[aria-label="Model warning"]').exists()).toBe(false);
+
+    const unknown = mountRow({ modelValue: target({ target_model_id: 'mystery' }), models: [realModel('gpt-5')] });
+    expect(unknown.find('span[aria-label="Model warning"]').exists()).toBe(true);
+  });
+
+  it('renders a rule-level warning under reasoning.effort when the target does not advertise it', async () => {
+    const w = mountRow({
+      modelValue: { target_model_id: 'gpt-5', rules: { reasoning: { effort: 'xhigh' } } as ChatAliasRules },
+      models: [realModel('gpt-5', { reasoning: { effort: { supported: ['low', 'medium'], default: 'medium' } } })],
+    });
+    await w.find('button[aria-label="Toggle target row"]').trigger('click');
+    await nextTick();
+    const html = w.html();
+    expect(html).toContain('text-amber-300');
+    expect(html).toContain('low, medium');
+  });
+});
diff --git a/apps/web/src/components/alias-edit/warnings.ts b/apps/web/src/components/alias-edit/warnings.ts
new file mode 100644
index 000000000..f9d9a9147
--- /dev/null
+++ b/apps/web/src/components/alias-edit/warnings.ts
@@ -0,0 +1,129 @@
+// Shared catalog lookups + warning computation for the alias dashboard
+// surfaces (Settings row, edit dialog, target row). Centralising these
+// helpers means the Settings card and the dialog read the same view of the
+// live /api/models catalog — both the "is this id a real (non-alias) model"
+// and "does this target's chat capability advertise <X>" checks resolve
+// against one source of truth instead of two parallel implementations.
+
+import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+
+// A target id matches a catalog model when either form (prefixed or
+// unprefixed) resolves to that public id. The /api/models catalog already
+// emits the public id directly, so equality is enough; future prefix-form
+// surfaces can wire their normalisation in here without touching the
+// callers.
+export const findCatalogModel = (
+  models: readonly ControlPlaneModel[] | null | undefined,
+  targetModelId: string,
+): ControlPlaneModel | undefined =>
+  (models ?? []).find(m => m.id === targetModelId);
+
+// Real (non-alias) model ids the operator can route to. Used by the
+// target-id combobox suggestion list and by the shadow-warning check.
+export const realModelIds = (models: readonly ControlPlaneModel[] | null | undefined): string[] =>
+  (models ?? []).filter(m => m.aliasedFrom === undefined).map(m => m.id);
+
+// One warning attached to a specific chat rule field. The field key matches
+// the form's `data-field` attribute so the dialog can render the warning
+// directly under the input it annotates.
+export interface AliasRuleWarning {
+  field: 'reasoning.effort' | 'reasoning.budget_tokens' | 'reasoning.adaptive' | 'reasoning.summary' | 'reasoning.mandatory' | 'verbosity' | 'serviceTier';
+  message: string;
+}
+
+// Rule-level warnings: a configured rule field whose target's chat
+// capability metadata does not advertise the feature. The gateway still
+// forwards the value verbatim; the warning just tells the operator the
+// upstream may ignore it.
+export const computeRuleWarnings = (
+  rules: ChatAliasRules,
+  catalog: ControlPlaneModel | undefined,
+): AliasRuleWarning[] => {
+  const out: AliasRuleWarning[] = [];
+  const chat = catalog?.chat;
+  const reasoning = chat?.reasoning;
+
+  if (rules.reasoning?.effort !== undefined) {
+    const supported = reasoning?.effort?.supported;
+    if (supported === undefined) {
+      out.push({ field: 'reasoning.effort', message: 'Target does not advertise reasoning effort.' });
+    } else if (!supported.includes(rules.reasoning.effort)) {
+      out.push({ field: 'reasoning.effort', message: `Target advertises effort levels: ${supported.join(', ')}.` });
+    }
+  }
+
+  if (rules.reasoning?.budget_tokens !== undefined) {
+    const range = reasoning?.budget_tokens;
+    if (range === undefined) {
+      out.push({ field: 'reasoning.budget_tokens', message: 'Target does not advertise a reasoning budget.' });
+    } else {
+      const n = rules.reasoning.budget_tokens;
+      if (range.min !== undefined && n < range.min) out.push({ field: 'reasoning.budget_tokens', message: `Below target minimum (${range.min}).` });
+      if (range.max !== undefined && n > range.max) out.push({ field: 'reasoning.budget_tokens', message: `Above target maximum (${range.max}).` });
+    }
+  }
+
+  if (rules.reasoning?.adaptive === true && reasoning?.adaptive !== true) {
+    out.push({ field: 'reasoning.adaptive', message: 'Target does not advertise adaptive reasoning.' });
+  }
+
+  if (rules.reasoning?.mandatory === true && reasoning?.mandatory !== true) {
+    out.push({ field: 'reasoning.mandatory', message: 'Target does not advertise mandatory reasoning.' });
+  }
+
+  // Summary, verbosity, and serviceTier have no advertised catalog metadata
+  // today; their values are forwarded verbatim and never warn here.
+
+  return out;
+};
+
+// One model-level warning attached to one target row. Today the only
+// trigger is the target id failing to resolve to any catalog model — the
+// dashboard surfaces it via a yellow `!` icon in the row's action cluster
+// with a tooltip listing every warning on that row.
+export interface AliasModelWarning {
+  message: string;
+}
+
+export const computeModelWarnings = (
+  targetModelId: string,
+  catalog: ControlPlaneModel | undefined,
+): AliasModelWarning[] => {
+  if (targetModelId === '') return [];
+  if (catalog === undefined) {
+    return [{ message: `"${targetModelId}" does not currently resolve to any enabled upstream binding.` }];
+  }
+  return [];
+};
+
+// Alias-level shadow warning. Fires iff the alias name matches a real
+// (non-alias) catalog model id AND no target inside the alias references
+// that real id (the seed pattern — alias name = first target's id —
+// deliberately suppresses the warning).
+export interface AliasShadowWarning {
+  shadowedId: string;
+  shadowedDisplayName: string | null;
+}
+
+export const computeShadowWarning = (
+  aliasName: string,
+  targets: readonly { target_model_id: string }[],
+  models: readonly ControlPlaneModel[] | null | undefined,
+): AliasShadowWarning | null => {
+  if (aliasName === '') return null;
+  const shadowed = (models ?? []).find(m => m.id === aliasName && m.aliasedFrom === undefined);
+  if (!shadowed) return null;
+  if (targets.some(t => t.target_model_id === aliasName)) return null;
+  const displayName = shadowed.display_name ?? null;
+  return {
+    shadowedId: shadowed.id,
+    shadowedDisplayName: displayName !== null && displayName !== shadowed.id ? displayName : null,
+  };
+};
+
+// Convenience wrapper for the Settings row: derive whether this alias
+// trips the shadow warning against the live catalog.
+export const aliasHasShadowWarning = (
+  alias: ModelAlias,
+  models: readonly ControlPlaneModel[] | null | undefined,
+): boolean => computeShadowWarning(alias.name, alias.targets, models) !== null;
diff --git a/apps/web/src/components/alias-edit/warnings_test.ts b/apps/web/src/components/alias-edit/warnings_test.ts
new file mode 100644
index 000000000..b9088173a
--- /dev/null
+++ b/apps/web/src/components/alias-edit/warnings_test.ts
@@ -0,0 +1,156 @@
+import { describe, expect, it } from 'vitest';
+
+import { aliasHasShadowWarning, computeModelWarnings, computeRuleWarnings, computeShadowWarning, findCatalogModel, realModelIds } from './warnings.ts';
+import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+
+const realModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
+  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
+  ...over,
+});
+
+const aliasModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
+  upstreams: [],
+  aliasedFrom: { name: over.id, kind: 'chat', selection: 'first-available', targets: [] },
+  ...over,
+});
+
+const alias = (over: Partial<ModelAlias> & { name: string }): ModelAlias => ({
+  kind: 'chat',
+  selection: 'first-available',
+  display_name: null,
+  visible_in_models_list: true,
+  targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
+  sort_order: 0,
+  created_at: '2026-01-01T00:00:00Z',
+  updated_at: '2026-01-01T00:00:00Z',
+  ...over,
+});
+
+describe('realModelIds', () => {
+  it('excludes alias entries and returns the remaining ids in catalog order', () => {
+    const catalog: ControlPlaneModel[] = [
+      realModel({ id: 'gpt-5' }),
+      aliasModel({ id: 'auto-review' }),
+      realModel({ id: 'claude-sonnet' }),
+    ];
+    expect(realModelIds(catalog)).toEqual(['gpt-5', 'claude-sonnet']);
+  });
+
+  it('returns an empty array for a null or missing catalog', () => {
+    expect(realModelIds(null)).toEqual([]);
+    expect(realModelIds(undefined)).toEqual([]);
+  });
+});
+
+describe('findCatalogModel', () => {
+  it('looks up the catalog row by id', () => {
+    const catalog: ControlPlaneModel[] = [realModel({ id: 'gpt-5' }), realModel({ id: 'claude' })];
+    expect(findCatalogModel(catalog, 'claude')?.id).toBe('claude');
+    expect(findCatalogModel(catalog, 'unknown')).toBeUndefined();
+  });
+});
+
+describe('computeModelWarnings', () => {
+  it('returns no warning when the target resolves to a catalog entry', () => {
+    const catalog = realModel({ id: 'gpt-5' });
+    expect(computeModelWarnings('gpt-5', catalog)).toEqual([]);
+  });
+
+  it('returns a "does not resolve" warning when the target is unknown', () => {
+    const w = computeModelWarnings('mystery-model', undefined);
+    expect(w).toHaveLength(1);
+    expect(w[0].message).toContain('mystery-model');
+    expect(w[0].message).toContain('does not currently resolve');
+  });
+
+  it('emits no warning for an empty id (the row is mid-edit)', () => {
+    expect(computeModelWarnings('', undefined)).toEqual([]);
+  });
+});
+
+describe('computeRuleWarnings', () => {
+  const catalogWithReasoning = realModel({
+    id: 'gpt-5',
+    chat: {
+      reasoning: {
+        effort: { supported: ['low', 'medium'], default: 'medium' },
+        budget_tokens: { min: 100, max: 1000 },
+      },
+    },
+  });
+
+  it('flags effort values not in the advertised supported list', () => {
+    const w = computeRuleWarnings({ reasoning: { effort: 'xhigh' } }, catalogWithReasoning);
+    expect(w).toHaveLength(1);
+    expect(w[0].field).toBe('reasoning.effort');
+    expect(w[0].message).toContain('low, medium');
+  });
+
+  it('does not flag effort values that are advertised', () => {
+    const w = computeRuleWarnings({ reasoning: { effort: 'low' } }, catalogWithReasoning);
+    expect(w).toEqual([]);
+  });
+
+  it('flags budgets outside the advertised range', () => {
+    const tooHigh = computeRuleWarnings({ reasoning: { budget_tokens: 5000 } }, catalogWithReasoning);
+    expect(tooHigh[0].field).toBe('reasoning.budget_tokens');
+    expect(tooHigh[0].message).toContain('1000');
+    const tooLow = computeRuleWarnings({ reasoning: { budget_tokens: 10 } }, catalogWithReasoning);
+    expect(tooLow[0].field).toBe('reasoning.budget_tokens');
+    expect(tooLow[0].message).toContain('100');
+  });
+
+  it('flags adaptive=true when the target does not advertise adaptive', () => {
+    const w = computeRuleWarnings({ reasoning: { adaptive: true } }, catalogWithReasoning);
+    expect(w).toHaveLength(1);
+    expect(w[0].field).toBe('reasoning.adaptive');
+  });
+
+  it('flags reasoning at all when the target lacks reasoning metadata', () => {
+    const noReasoning = realModel({ id: 'gpt-5', chat: {} });
+    const w = computeRuleWarnings({ reasoning: { effort: 'low' } }, noReasoning);
+    expect(w[0].field).toBe('reasoning.effort');
+    expect(w[0].message).toContain('does not advertise');
+  });
+});
+
+describe('computeShadowWarning', () => {
+  const catalog: ControlPlaneModel[] = [
+    realModel({ id: 'gpt-5', display_name: 'GPT 5' }),
+    realModel({ id: 'plain' }),
+    aliasModel({ id: 'auto-review' }),
+  ];
+
+  it('returns null when the alias name does not match any real model id', () => {
+    expect(computeShadowWarning('not-a-real-id', [{ target_model_id: 'gpt-5' }], catalog)).toBeNull();
+  });
+
+  it('returns null when the alias name matches another alias (not a real model)', () => {
+    expect(computeShadowWarning('auto-review', [{ target_model_id: 'gpt-5' }], catalog)).toBeNull();
+  });
+
+  it('returns null when one of the targets references the shadowed id (seed pattern)', () => {
+    expect(computeShadowWarning('gpt-5', [{ target_model_id: 'gpt-5' }, { target_model_id: 'plain' }], catalog)).toBeNull();
+  });
+
+  it('returns the shadowed id with display_name only when display_name differs from id', () => {
+    const w1 = computeShadowWarning('gpt-5', [{ target_model_id: 'plain' }], catalog);
+    expect(w1).toEqual({ shadowedId: 'gpt-5', shadowedDisplayName: 'GPT 5' });
+    const w2 = computeShadowWarning('plain', [{ target_model_id: 'gpt-5' }], catalog);
+    expect(w2).toEqual({ shadowedId: 'plain', shadowedDisplayName: null });
+  });
+
+  it('returns null on an empty alias name (mid-edit)', () => {
+    expect(computeShadowWarning('', [{ target_model_id: 'gpt-5' }], catalog)).toBeNull();
+  });
+});
+
+describe('aliasHasShadowWarning', () => {
+  const catalog: ControlPlaneModel[] = [realModel({ id: 'gpt-5' }), realModel({ id: 'plain' })];
+
+  it('mirrors computeShadowWarning', () => {
+    expect(aliasHasShadowWarning(alias({ name: 'gpt-5', targets: [{ target_model_id: 'plain', rules: {} as ChatAliasRules }] }), catalog)).toBe(true);
+    expect(aliasHasShadowWarning(alias({ name: 'gpt-5', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }), catalog)).toBe(false);
+    expect(aliasHasShadowWarning(alias({ name: 'free-name' }), catalog)).toBe(false);
+  });
+});
diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index f8bf98b6e..00e0165ef 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -1,8 +1,11 @@
 <script setup lang="ts">
+import { computed } from 'vue';
+
 import type { ControlPlaneModel } from '../../api/types.ts';
 import { providerBadgeClass, providerMeta } from '../upstreams/provider-meta.ts';
+import { formatAliasRuleBadges } from '@floway-dev/protocols/common';
 
-defineProps<{
+const props = defineProps<{
   model: ControlPlaneModel;
 }>();
 
@@ -13,6 +16,42 @@ const formatTokenLimit = (n: number) => {
   if (n >= 1_000) return `${(n / 1_000).toFixed(n % 1_000 === 0 ? 0 : 1)}k`;
   return n.toString();
 };
+
+// Alias-of badge: truncate the target list to the first three with a
+// "+N more" tail when needed. Keeps the badge readable for aliases that
+// fan out to a long fallback chain.
+const aliasOfLabel = computed<string | null>(() => {
+  const a = props.model.aliasedFrom;
+  if (!a) return null;
+  const ids = a.targets.map(t => t.target_model_id);
+  if (ids.length <= 3) return `alias of: ${ids.join(', ')}`;
+  return `alias of: ${ids.slice(0, 3).join(', ')} +${ids.length - 3} more`;
+});
+
+// Rule badge sequence. Single-target aliases keep the existing
+// per-rule badges; multi-target aliases collapse the rule set into one
+// "<field>: varies" pill per field configured on any target.
+const ruleBadges = computed<{ label: string }[]>(() => {
+  const a = props.model.aliasedFrom;
+  if (!a) return [];
+  if (a.targets.length === 1) return formatAliasRuleBadges(a.targets[0].rules);
+  // Walk each target and bucket their badge labels by the field they
+  // describe (the leading word of every badge — "low effort", "summary:
+  // auto"). Any field that shows up in two distinct shapes collapses to
+  // "<field>: varies".
+  const byField = new Map<string, Set<string>>();
+  for (const t of a.targets) {
+    for (const badge of formatAliasRuleBadges(t.rules)) {
+      const field = badge.label.includes(':') ? badge.label.split(':')[0].trim() : badge.label.split(' ').slice(1).join(' ').trim() || badge.label;
+      const set = byField.get(field) ?? new Set<string>();
+      set.add(badge.label);
+      byField.set(field, set);
+    }
+  }
+  return Array.from(byField.entries()).map(([field, set]) => ({
+    label: set.size === 1 ? [...set][0] : `${field}: varies`,
+  }));
+});
 </script>
 
 <template>
@@ -43,6 +82,13 @@ const formatTokenLimit = (n: number) => {
           <span v-if="model.limits?.max_output_tokens" class="text-[10px] font-mono px-2 py-0.5 rounded-full bg-surface-600 text-gray-400">
             output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
           </span>
+          <span v-if="aliasOfLabel" class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/15 text-gray-400">{{ aliasOfLabel }}</span>
+          <span v-if="model.aliasedFrom" class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/15 text-gray-400">selection: {{ model.aliasedFrom.selection }}</span>
+          <span
+            v-for="badge in ruleBadges"
+            :key="badge.label"
+            class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/15 text-gray-400"
+          >{{ badge.label }}</span>
         </div>
       </div>
       <button class="btn-ghost text-[11px] flex shrink-0 items-center gap-1" @click="$emit('clear')">
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
new file mode 100644
index 000000000..0f3d1a879
--- /dev/null
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -0,0 +1,101 @@
+<script setup lang="ts">
+// One alias rendered as a two-line block in the Settings card. The action
+// cluster sits right-aligned and reserves the leftmost slot for the
+// alias-level warning icon — when no warning is firing the slot collapses
+// to zero width, so the edit and delete buttons keep the same on-screen
+// position whether or not a warning is present.
+
+import { computed } from 'vue';
+
+import type { ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+import { aliasHasShadowWarning, computeShadowWarning } from '../alias-edit/warnings.ts';
+import { composeAliasDisplayName } from '@floway-dev/protocols/common';
+import { Tooltip } from '@floway-dev/ui';
+
+const props = defineProps<{
+  alias: ModelAlias;
+  models: readonly ControlPlaneModel[] | null;
+}>();
+
+defineEmits<{
+  edit: [];
+  delete: [];
+}>();
+
+// Title resolution mirrors the spec's derivation rule: an operator-set
+// `display_name` always wins; falling back to the single-target compose
+// helper or to the alias `name` when multi-target.
+const title = computed(() => {
+  if (props.alias.display_name !== null) return props.alias.display_name;
+  if (props.alias.targets.length === 1) {
+    const t = props.alias.targets[0];
+    return composeAliasDisplayName(t.target_model_id, t.rules);
+  }
+  return props.alias.name;
+});
+
+const caption = computed(() => {
+  const parts: string[] = [
+    props.alias.name,
+    `${props.alias.targets.length} target${props.alias.targets.length === 1 ? '' : 's'}`,
+    props.alias.selection,
+  ];
+  if (!props.alias.visible_in_models_list) parts.push('hidden from /v1/models');
+  return parts.join(' · ');
+});
+
+const shadowWarning = computed(() => computeShadowWarning(props.alias.name, props.alias.targets, props.models));
+const hasShadow = computed(() => aliasHasShadowWarning(props.alias, props.models));
+const shadowTooltip = computed(() => {
+  const w = shadowWarning.value;
+  if (!w) return '';
+  const label = w.shadowedDisplayName !== null ? `${w.shadowedId} (${w.shadowedDisplayName})` : w.shadowedId;
+  return `Alias name shadows a real model id: ${label}`;
+});
+</script>
+
+<template>
+  <div class="rounded-lg border border-white/5 bg-surface-800/80 px-3 py-2.5">
+    <div class="flex items-start gap-3">
+      <div class="min-w-0 flex-1">
+        <h4 class="truncate text-sm font-semibold text-white">{{ title }}</h4>
+        <p class="mt-0.5 truncate font-mono text-xs text-gray-500">{{ caption }}</p>
+      </div>
+
+      <div class="flex shrink-0 items-center gap-1">
+        <Tooltip v-if="hasShadow" :content="shadowTooltip">
+          <span
+            class="inline-flex h-8 w-8 items-center justify-center rounded-md text-amber-400"
+            aria-label="Alias warning"
+          >
+            <i class="i-lucide-alert-triangle size-4" />
+          </span>
+        </Tooltip>
+        <button
+          type="button"
+          class="inline-flex h-8 w-8 items-center justify-center rounded-md p-1 text-gray-600 transition-colors hover:bg-white/[0.04] hover:text-accent-cyan"
+          aria-label="Edit alias"
+          title="Edit"
+          @click="$emit('edit')"
+        >
+          <svg class="h-4 w-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+            <path d="M17 3a2.83 2.83 0 1 1 4 4L7.5 20.5 2 22l1.5-5.5Z" />
+            <path d="m15 5 4 4" />
+          </svg>
+        </button>
+        <button
+          type="button"
+          class="inline-flex h-8 w-8 items-center justify-center rounded-md p-1 text-gray-600 transition-colors hover:bg-white/[0.04] hover:text-accent-rose"
+          aria-label="Delete alias"
+          title="Delete"
+          @click="$emit('delete')"
+        >
+          <svg class="h-4 w-4" viewBox="0 0 24 24" fill="none" stroke="currentColor" stroke-width="2">
+            <polyline points="3 6 5 6 21 6" />
+            <path d="M19 6v14a2 2 0 0 1-2 2H7a2 2 0 0 1-2-2V6m3 0V4a2 2 0 0 1 2-2h4a2 2 0 0 1 2 2v2" />
+          </svg>
+        </button>
+      </div>
+    </div>
+  </div>
+</template>
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
new file mode 100644
index 000000000..f30ed71ef
--- /dev/null
+++ b/apps/web/src/components/settings/AliasRow_test.ts
@@ -0,0 +1,120 @@
+import { mount } from '@vue/test-utils';
+import { describe, expect, it } from 'vitest';
+
+import AliasRow from './AliasRow.vue';
+import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+
+const alias = (over: Partial<ModelAlias> & { name: string }): ModelAlias => ({
+  kind: 'chat',
+  selection: 'first-available',
+  display_name: null,
+  visible_in_models_list: true,
+  targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
+  sort_order: 0,
+  created_at: '2026-01-01T00:00:00Z',
+  updated_at: '2026-01-01T00:00:00Z',
+  ...over,
+});
+
+const realModel = (id: string, display?: string): ControlPlaneModel => ({
+  id,
+  display_name: display,
+  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
+});
+
+const aliasModel = (id: string): ControlPlaneModel => ({
+  id,
+  upstreams: [],
+  aliasedFrom: { name: id, kind: 'chat', selection: 'first-available', targets: [] },
+});
+
+describe('AliasRow', () => {
+  it('renders display_name when set; falls back to compose helper for single-target; alias name for multi-target', () => {
+    const withDisplay = mount(AliasRow, { props: { alias: alias({ name: 'a', display_name: 'My Friendly Name' }), models: [] } });
+    expect(withDisplay.find('h4').text()).toBe('My Friendly Name');
+
+    const single = mount(AliasRow, {
+      props: {
+        alias: alias({ name: 'a', display_name: null, targets: [{ target_model_id: 'gpt-5', rules: { reasoning: { effort: 'low' } } as ChatAliasRules }] }),
+        models: [],
+      },
+    });
+    expect(single.find('h4').text()).toBe('gpt-5 (low effort)');
+
+    const multi = mount(AliasRow, {
+      props: {
+        alias: alias({
+          name: 'gizmo',
+          display_name: null,
+          targets: [
+            { target_model_id: 'gpt-5', rules: {} as ChatAliasRules },
+            { target_model_id: 'claude', rules: {} as ChatAliasRules },
+          ],
+        }),
+        models: [],
+      },
+    });
+    expect(multi.find('h4').text()).toBe('gizmo');
+  });
+
+  it('formats the caption: name · N targets · selection (and optional hidden suffix)', () => {
+    const w = mount(AliasRow, {
+      props: {
+        alias: alias({
+          name: 'auto-review',
+          selection: 'random',
+          visible_in_models_list: false,
+          targets: [
+            { target_model_id: 'a', rules: {} as ChatAliasRules },
+            { target_model_id: 'b', rules: {} as ChatAliasRules },
+          ],
+        }),
+        models: [],
+      },
+    });
+    expect(w.find('p').text()).toBe('auto-review · 2 targets · random · hidden from /v1/models');
+
+    const sole = mount(AliasRow, {
+      props: { alias: alias({ name: 'one' }), models: [] },
+    });
+    expect(sole.find('p').text()).toBe('one · 1 target · first-available');
+  });
+
+  it('emits edit on the pencil button and delete on the trash button', async () => {
+    const w = mount(AliasRow, { props: { alias: alias({ name: 'a' }), models: [] } });
+    const edit = w.find('button[aria-label="Edit alias"]');
+    const del = w.find('button[aria-label="Delete alias"]');
+    await edit.trigger('click');
+    await del.trigger('click');
+    expect(w.emitted('edit')).toHaveLength(1);
+    expect(w.emitted('delete')).toHaveLength(1);
+  });
+
+  it('renders the alias-level warning icon only when the shadow warning fires', () => {
+    const catalog = [realModel('gpt-5'), realModel('plain')];
+
+    const noShadow = mount(AliasRow, { props: { alias: alias({ name: 'unique', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }), models: catalog } });
+    expect(noShadow.find('span[aria-label="Alias warning"]').exists()).toBe(false);
+
+    const shadow = mount(AliasRow, { props: { alias: alias({ name: 'gpt-5', targets: [{ target_model_id: 'plain', rules: {} as ChatAliasRules }] }), models: catalog } });
+    expect(shadow.find('span[aria-label="Alias warning"]').exists()).toBe(true);
+
+    // Seed pattern (target references shadowed id) suppresses the warning.
+    const seeded = mount(AliasRow, {
+      props: {
+        alias: alias({ name: 'gpt-5', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }, { target_model_id: 'plain', rules: {} as ChatAliasRules }] }),
+        models: catalog,
+      },
+    });
+    expect(seeded.find('span[aria-label="Alias warning"]').exists()).toBe(false);
+
+    // An alias-name collision against another alias doesn't shadow (only real-model collisions do).
+    const aliasCollision = mount(AliasRow, {
+      props: {
+        alias: alias({ name: 'auto-review', targets: [{ target_model_id: 'plain', rules: {} as ChatAliasRules }] }),
+        models: [aliasModel('auto-review'), realModel('plain')],
+      },
+    });
+    expect(aliasCollision.find('span[aria-label="Alias warning"]').exists()).toBe(false);
+  });
+});
diff --git a/apps/web/src/components/settings/AliasesSettingsCard.vue b/apps/web/src/components/settings/AliasesSettingsCard.vue
new file mode 100644
index 000000000..7741d5509
--- /dev/null
+++ b/apps/web/src/components/settings/AliasesSettingsCard.vue
@@ -0,0 +1,71 @@
+<script setup lang="ts">
+// Settings card listing every alias the operator has configured. Mirrors
+// the Proxies card's chrome: section title, "Add alias" button, empty
+// state, list body, surfaced load-error banner.
+
+import { computed } from 'vue';
+
+import AliasRow from './AliasRow.vue';
+import { callApi, useApi } from '../../api/client.ts';
+import type { ModelAlias } from '../../api/types.ts';
+import { useModelAliases } from '../../composables/useModelAliases.ts';
+import { useModelsStore } from '../../composables/useModels.ts';
+import { Spinner } from '@floway-dev/ui';
+
+const emit = defineEmits<{
+  'add': [];
+  'edit': [record: ModelAlias];
+  'changed': [];
+}>();
+
+const api = useApi();
+const aliasesStore = useModelAliases();
+const modelsStore = useModelsStore();
+
+const aliases = computed<ModelAlias[]>(() => aliasesStore.aliases.value ?? []);
+
+const deleteAlias = async (record: ModelAlias) => {
+  if (!window.confirm(`Delete alias "${record.name}"?`)) return;
+  const { error } = await callApi(() => api.api.aliases[':name'].$delete({ param: { name: record.name } }));
+  if (error) {
+    window.alert(`Delete failed: ${error.message}`);
+    return;
+  }
+  emit('changed');
+};
+</script>
+
+<template>
+  <div class="glass-card p-5 sm:p-6 animate-in delay-2">
+    <div class="mb-4 flex flex-col gap-3 sm:flex-row sm:items-start sm:justify-between">
+      <div class="min-w-0">
+        <h3 class="text-white font-semibold mb-1">Aliases</h3>
+        <p class="text-sm text-gray-400">
+          Named virtual model ids that resolve to one of N target models, with optional per-target rule overlays.
+        </p>
+      </div>
+      <button class="btn-primary !py-2.5 !px-3 text-xs whitespace-nowrap" @click="emit('add')">Add Alias</button>
+    </div>
+
+    <p v-if="aliasesStore.error.value" class="mb-3 rounded-md border border-accent-rose/40 bg-accent-rose/10 px-3 py-2 text-sm text-accent-rose">
+      Failed to load aliases: {{ aliasesStore.error.value }}
+    </p>
+
+    <p v-if="!aliasesStore.error.value && aliases.length === 0" class="text-sm text-gray-500">
+      No aliases configured. Add one to expose a virtual model id that routes across multiple targets with locked rules.
+    </p>
+
+    <div v-else-if="aliases.length > 0" class="space-y-2">
+      <AliasRow
+        v-for="alias in aliases"
+        :key="alias.name"
+        :alias="alias"
+        :models="modelsStore.models.value"
+        @edit="emit('edit', alias)"
+        @delete="deleteAlias(alias)"
+      />
+    </div>
+
+    <Spinner v-if="aliasesStore.loading.value && aliases.length > 0" class="mt-3 h-4 w-4 text-gray-500" />
+  </div>
+</template>
diff --git a/apps/web/src/components/settings/AliasesSettingsCard_test.ts b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
new file mode 100644
index 000000000..0cfb22fa9
--- /dev/null
+++ b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
@@ -0,0 +1,105 @@
+import { mount } from '@vue/test-utils';
+import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
+import { nextTick, ref } from 'vue';
+
+import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+
+const aliasesRef = ref<ModelAlias[]>([]);
+const modelsRef = ref<ControlPlaneModel[]>([]);
+const aliasErrorRef = ref<string | null>(null);
+const deleteSpy = vi.fn(async (_arg: unknown) => new Response(null, { status: 204 }));
+
+vi.mock('../../composables/useModelAliases.ts', () => ({
+  useModelAliases: () => ({ aliases: aliasesRef, loading: ref(false), error: aliasErrorRef, load: async () => {} }),
+}));
+vi.mock('../../composables/useModels.ts', () => ({
+  useModelsStore: () => ({ models: modelsRef, loading: ref(false), error: ref<string | null>(null), load: async () => {} }),
+}));
+vi.mock('../../api/client.ts', () => ({
+  useApi: () => ({
+    api: {
+      aliases: {
+        ':name': { $delete: (arg: unknown) => deleteSpy(arg) },
+      },
+    },
+  }),
+  callApi: async <T>(fn: () => Promise<Response>) => {
+    const res = await fn();
+    if (!res.ok && res.status !== 204) return { error: { status: res.status, message: 'mock-error' } };
+    return { data: undefined as T };
+  },
+  authFetch: vi.fn(),
+}));
+
+const { default: AliasesSettingsCard } = await import('./AliasesSettingsCard.vue');
+
+const baseAlias = (over: Partial<ModelAlias> & { name: string }): ModelAlias => ({
+  kind: 'chat',
+  selection: 'first-available',
+  display_name: null,
+  visible_in_models_list: true,
+  targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
+  sort_order: 0,
+  created_at: '2026-01-01T00:00:00Z',
+  updated_at: '2026-01-01T00:00:00Z',
+  ...over,
+});
+
+beforeEach(() => {
+  aliasesRef.value = [];
+  modelsRef.value = [];
+  aliasErrorRef.value = null;
+  deleteSpy.mockClear();
+  vi.restoreAllMocks();
+  // happy-dom does not stub window.confirm / window.alert by default; install
+  // a no-op pair every test overrides per-case via reassignment.
+  window.confirm = () => true;
+  window.alert = () => {};
+});
+
+afterEach(() => {
+  document.body.innerHTML = '';
+});
+
+describe('AliasesSettingsCard', () => {
+  it('renders the empty-state copy when no aliases are configured', () => {
+    const w = mount(AliasesSettingsCard);
+    expect(w.text()).toContain('No aliases configured');
+  });
+
+  it('renders one row per alias', () => {
+    aliasesRef.value = [
+      baseAlias({ name: 'a' }),
+      baseAlias({ name: 'b' }),
+    ];
+    const w = mount(AliasesSettingsCard);
+    expect(w.findAll('button[aria-label="Edit alias"]')).toHaveLength(2);
+  });
+
+  it('emits add when the "Add Alias" button is clicked', async () => {
+    const w = mount(AliasesSettingsCard);
+    const addBtn = w.findAll('button').find(b => b.text() === 'Add Alias')!;
+    await addBtn.trigger('click');
+    expect(w.emitted('add')).toHaveLength(1);
+  });
+
+  it('prompts confirm and calls DELETE, then emits changed when the trash button fires and the user confirms', async () => {
+    aliasesRef.value = [baseAlias({ name: 'doomed' })];
+    window.confirm = () => true;
+    const w = mount(AliasesSettingsCard);
+    await w.find('button[aria-label="Delete alias"]').trigger('click');
+    await nextTick();
+    expect(deleteSpy).toHaveBeenCalledTimes(1);
+    expect(w.emitted('changed')).toHaveLength(1);
+  });
+
+  it('skips the DELETE call when the user cancels the confirm prompt', async () => {
+    aliasesRef.value = [baseAlias({ name: 'doomed' })];
+    window.confirm = () => false;
+    const w = mount(AliasesSettingsCard);
+    await w.find('button[aria-label="Delete alias"]').trigger('click');
+    await nextTick();
+    expect(deleteSpy).not.toHaveBeenCalled();
+    expect(w.emitted('changed')).toBeUndefined();
+  });
+});
diff --git a/apps/web/src/composables/useModelAliases.ts b/apps/web/src/composables/useModelAliases.ts
new file mode 100644
index 000000000..94d045767
--- /dev/null
+++ b/apps/web/src/composables/useModelAliases.ts
@@ -0,0 +1,30 @@
+import { ref, shallowRef } from 'vue';
+
+import { callApi, useApi } from '../api/client.ts';
+import type { ModelAlias } from '../api/types.ts';
+
+// Module-scoped cache so concurrent callers share one fetch — mirrors the
+// proxies/upstreams stores. Settings tabs that mount in parallel reuse a
+// single in-flight request instead of fan-out per-component, and edits in
+// the Settings card reflect on the Models page without a page reload.
+const aliases = shallowRef<ModelAlias[] | null>(null);
+const loading = ref(false);
+const error = ref<string | null>(null);
+
+export const useModelAliases = () => {
+  const api = useApi();
+
+  const load = async () => {
+    loading.value = true;
+    error.value = null;
+    const { data, error: err } = await callApi<ModelAlias[]>(() => api.api.aliases.$get());
+    loading.value = false;
+    if (err) {
+      error.value = err.message;
+      return;
+    }
+    aliases.value = data;
+  };
+
+  return { aliases, loading, error, load };
+};
diff --git a/apps/web/src/pages/dashboard/settings.vue b/apps/web/src/pages/dashboard/settings.vue
index b8e23dc3d..1bc16e0f9 100644
--- a/apps/web/src/pages/dashboard/settings.vue
+++ b/apps/web/src/pages/dashboard/settings.vue
@@ -4,14 +4,17 @@ import { ref, watch } from 'vue';
 import { useRouter } from 'vue-router';
 
 import { callApi, useApi } from '../../api/client.ts';
-import type { ProxyRecord, SearchConfig, UpstreamProviderKind, UpstreamRecord } from '../../api/types.ts';
+import type { ModelAlias, ProxyRecord, SearchConfig, UpstreamProviderKind, UpstreamRecord } from '../../api/types.ts';
+import AliasEditDialog from '../../components/alias-edit/AliasEditDialog.vue';
 import ProxyEditDialog from '../../components/proxy-edit/ProxyEditDialog.vue';
+import AliasesSettingsCard from '../../components/settings/AliasesSettingsCard.vue';
 import ApiEndpointsSection from '../../components/settings/ApiEndpointsSection.vue';
 import ExportSection from '../../components/settings/ExportSection.vue';
 import ImportSection from '../../components/settings/ImportSection.vue';
 import ProxiesSettingsCard from '../../components/settings/ProxiesSettingsCard.vue';
 import SearchConfigSection from '../../components/settings/SearchConfigSection.vue';
 import UpstreamsSettingsCard from '../../components/settings/UpstreamsSettingsCard.vue';
+import { useModelAliases } from '../../composables/useModelAliases.ts';
 import { useModelsStore } from '../../composables/useModels.ts';
 import { useProxiesStore } from '../../composables/useProxies.ts';
 import { useRuntimeInfo } from '../../composables/useRuntimeInfo.ts';
@@ -31,6 +34,7 @@ export const useSettingsPageData = defineBasicLoader(async () => {
     useUpstreamsStore().load(),
     useModelsStore().load(),
     useProxiesStore().load(),
+    useModelAliases().load(),
     useRuntimeInfo().load(),
   ]);
   return {
@@ -48,7 +52,9 @@ const router = useRouter();
 const { upstreams, loading: storeLoading, load } = useUpstreamsStore();
 const modelsStore = useModelsStore();
 const proxiesStore = useProxiesStore();
+const aliasesStore = useModelAliases();
 const { load: loadProxies } = proxiesStore;
+const { load: loadAliases } = aliasesStore;
 const settingsData = useSettingsPageData();
 
 // Local working copy the child reorders via v-model:ordered; reloadAll
@@ -59,7 +65,7 @@ watch(upstreams, list => {
 }, { immediate: true });
 
 const reloadAll = async () => {
-  await Promise.all([load(), modelsStore.load(), loadProxies()]);
+  await Promise.all([load(), modelsStore.load(), loadProxies(), loadAliases()]);
 };
 
 // Proxy editor is hosted as a modal — v-if drives the unmount on close so the
@@ -70,6 +76,13 @@ const openProxyDialog = (record: ProxyRecord | null): void => {
   proxyDialogRecord.value = record;
   proxyDialogOpen.value = true;
 };
+
+const aliasDialogOpen = ref(false);
+const aliasDialogRecord = ref<ModelAlias | null>(null);
+const openAliasDialog = (record: ModelAlias | null): void => {
+  aliasDialogRecord.value = record;
+  aliasDialogOpen.value = true;
+};
 </script>
 
 <template>
@@ -89,6 +102,11 @@ const openProxyDialog = (record: ProxyRecord | null): void => {
           @edit="(record: ProxyRecord) => openProxyDialog(record)"
           @changed="reloadAll"
         />
+        <AliasesSettingsCard
+          @add="() => openAliasDialog(null)"
+          @edit="(record: ModelAlias) => openAliasDialog(record)"
+          @changed="reloadAll"
+        />
         <SearchConfigSection
           :initial-config="settingsData.data.value.searchConfig"
           :initial-error="settingsData.data.value.searchConfigError"
@@ -111,5 +129,12 @@ const openProxyDialog = (record: ProxyRecord | null): void => {
       :record="proxyDialogRecord"
       @saved="reloadAll"
     />
+
+    <AliasEditDialog
+      v-if="aliasDialogOpen"
+      v-model:open="aliasDialogOpen"
+      :record="aliasDialogRecord"
+      @saved="reloadAll"
+    />
   </div>
 </template>
diff --git a/packages/ui/src/Combobox.vue b/packages/ui/src/Combobox.vue
index 006d9e4f3..8bb6a1555 100644
--- a/packages/ui/src/Combobox.vue
+++ b/packages/ui/src/Combobox.vue
@@ -34,6 +34,19 @@ const props = withDefaults(defineProps<{
   inputClass?: string;
   /** Override the default "no matches" copy shown when the typed value already matches nothing. */
   emptyText?: string;
+  /**
+   * Drop the bordered surface-700 shell so the input blends into its parent
+   * row — used by the alias-target row, where the combobox is the row title
+   * inside an already-bordered Card and a second border would double up.
+   */
+  borderless?: boolean;
+  /**
+   * Hide the right-edge chevron that toggles the dropdown. The popover still
+   * opens on focus / click because the input itself owns `open-on-focus`;
+   * removing the chevron just keeps the title visually clean when the row
+   * already carries its own action cluster on the right.
+   */
+  hideDropdownTrigger?: boolean;
 }>(), {
   emptyText: 'No matches',
 });
@@ -111,15 +124,22 @@ const commitTyped = async () => {
           :disabled="disabled"
           :inputmode="inputmode"
           :class="[
-            'h-9 w-full rounded-[10px] border border-white/[0.14] bg-surface-700 pl-3 pr-9 text-sm text-white',
-            'transition-colors hover:border-white/25',
-            'focus:outline-none focus:border-accent-cyan/50 focus:ring-1 focus:ring-accent-cyan/30',
-            'placeholder:text-gray-600',
+            borderless
+              ? 'h-9 w-full bg-transparent text-sm text-white border-0 focus:outline-none focus:ring-0 placeholder:text-gray-600'
+              : [
+                'h-9 w-full rounded-[10px] border border-white/[0.14] bg-surface-700 pl-3 text-sm text-white',
+                'transition-colors hover:border-white/25',
+                'focus:outline-none focus:border-accent-cyan/50 focus:ring-1 focus:ring-accent-cyan/30',
+                'placeholder:text-gray-600',
+              ],
+            !borderless && (hideDropdownTrigger ? 'pr-3' : 'pr-9'),
+            borderless && 'px-0',
             'disabled:opacity-50 disabled:cursor-not-allowed',
             inputClass,
           ]"
         />
         <ComboboxTrigger
+          v-if="!hideDropdownTrigger"
           class="absolute inset-y-0 right-0 grid w-9 place-items-center text-gray-400 hover:text-gray-200"
           tabindex="-1"
         >

From 5ab0190b90d3357304ba0901086beb4eee855a5d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 18:57:20 +0800
Subject: [PATCH 053/170] perf(aliases): hoist registry calls out of per-target
 availability loop
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`candidateIsRoutable` ran `createPerRequestFetcher(currentColo)` and
`listModelProviders(upstreamIds)` once per target. Their results depend
only on (upstreamIds, currentColo), not on the target id, so an alias
with N targets paid an N× upstream-list + proxy-factory cost where
1× was correct. Pre-build both at the pool-construction boundary and
pass them into the per-target check.
---
 .../src/data-plane/model-aliases/resolve.ts   | 20 ++++++++++++++-----
 1 file changed, 15 insertions(+), 5 deletions(-)

diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
index 6ea5c73c0..0aab3821b 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -7,9 +7,11 @@
 
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import type { ModelAliasesRepo, ModelAliasRecord } from '../../repo/types.ts';
+import type { ModelProviderInstance } from '../providers/registry.ts';
 import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { AliasKind, AliasRules, ModelEndpointKey } from '@floway-dev/protocols/common';
+import type { Fetcher } from '@floway-dev/provider';
 
 // Endpoint family the inbound request belongs to. Mirrors `AliasKind` but
 // named in the data-plane vocabulary so the resolver argument site reads as
@@ -79,15 +81,17 @@ interface ResolveAliasArgs {
 // `endpointKind` cares about. Mirrors the resolution path
 // `enumerateProviderCandidates` takes, narrowed to a yes/no answer so we
 // can pre-filter the alias target list once.
+//
+// `fetcherForUpstream` and `providers` are passed in (not derived here) so a
+// caller filtering N targets hits the underlying repo / dial factories once,
+// not N times.
 const candidateIsRoutable = async (
   targetModelId: string,
   endpointKind: AliasEndpointKind,
-  upstreamIds: readonly string[] | null,
+  providers: readonly ModelProviderInstance[],
+  fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
-  currentColo: string,
 ): Promise<boolean> => {
-  const fetcherForUpstream = await createPerRequestFetcher(currentColo);
-  const providers = await listModelProviders(upstreamIds);
   if (providers.length === 0) return false;
   const interpretations = enumerateModelInterpretations(targetModelId, providers);
   const { resolutions } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
@@ -106,8 +110,14 @@ const buildAvailablePool = async (
   scheduler: BackgroundScheduler,
   currentColo: string,
 ): Promise<ModelAliasRecord['targets']> => {
+  // Hoist both registry calls out of the per-target loop: their results
+  // depend only on (upstreamIds, currentColo), not on the target id, so the
+  // upstreams-list + proxy-factory cost is paid once per alias instead of
+  // once per target row.
+  const fetcherForUpstream = await createPerRequestFetcher(currentColo);
+  const providers = await listModelProviders(upstreamIds);
   const availability = await Promise.all(record.targets.map(target =>
-    candidateIsRoutable(target.target_model_id, endpointKind, upstreamIds, scheduler, currentColo)));
+    candidateIsRoutable(target.target_model_id, endpointKind, providers, fetcherForUpstream, scheduler)));
   return record.targets.filter((_, index) => availability[index]);
 };
 

From 2b4374e4e6a8e068896eedc926e6e31c46215f57 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 18:57:54 +0800
Subject: [PATCH 054/170] fix(aliases/web): exclude alias entries from the
 rule-warning catalog lookup
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`findCatalogModel` matched any /api/models entry by id, including alias
entries surfaced through the v1/models layer. The function's purpose is
to answer "what does the catalog say about this real model" — at runtime
target ids never re-enter the alias layer, so the lookup must compare
against the same "real, non-alias" surface the suggestion list
(`realModelIds`) and the shadow-detection helper already use. Without
this filter, typing an alias name into a target_model_id silently
matched the alias's intersected metadata and produced misleading rule
warnings instead of the "no real model resolves this id" treatment that
`computeModelWarnings` correctly emits.
---
 apps/web/src/components/alias-edit/warnings.ts | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/apps/web/src/components/alias-edit/warnings.ts b/apps/web/src/components/alias-edit/warnings.ts
index f9d9a9147..3455b9f41 100644
--- a/apps/web/src/components/alias-edit/warnings.ts
+++ b/apps/web/src/components/alias-edit/warnings.ts
@@ -11,12 +11,15 @@ import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/ty
 // unprefixed) resolves to that public id. The /api/models catalog already
 // emits the public id directly, so equality is enough; future prefix-form
 // surfaces can wire their normalisation in here without touching the
-// callers.
+// callers. Alias entries are excluded — at runtime target ids never
+// re-enter the alias layer, so the rule-warning lookup must compare against
+// the same "real model" surface the suggestion list and shadow-detection
+// helpers already use.
 export const findCatalogModel = (
   models: readonly ControlPlaneModel[] | null | undefined,
   targetModelId: string,
 ): ControlPlaneModel | undefined =>
-  (models ?? []).find(m => m.id === targetModelId);
+  (models ?? []).find(m => m.id === targetModelId && m.aliasedFrom === undefined);
 
 // Real (non-alias) model ids the operator can route to. Used by the
 // target-id combobox suggestion list and by the shadow-warning check.

From 191ac32cc4208c4752e88e5f6d7a9fd7c0498f97 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 18:58:31 +0800
Subject: [PATCH 055/170] test(aliases): pin Messages-overlay silent drop for
 reasoning.summary
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Anthropic Messages IR has no `reasoning_summary` slot, so the
overlay silently drops the field. The other three protocols already
have positive summary assertions, but messages had none — add one that
asserts the drop, so a later contributor doesn't accidentally wire
summary onto thinking/output_config/metadata.
---
 .../src/data-plane/model-aliases/apply_test.ts       | 12 ++++++++++++
 1 file changed, 12 insertions(+)

diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
index 72d4f6b35..9dc0732d5 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
@@ -133,6 +133,18 @@ test('messages: effort lands on output_config, budget+adaptive land on thinking'
   assertEquals(body.verbosity, 'low');
 });
 
+test('messages: summary has no Anthropic-shaped slot — silently dropped', () => {
+  // The Messages IR has no reasoning_summary field; the helper must NOT
+  // surface the value onto thinking/output_config/metadata. Pinning the
+  // drop here so a later contributor doesn't wire summary onto a
+  // protocol that can't carry it.
+  const body = msgPayload();
+  applyChatRulesToMessages(body, { reasoning: { summary: 'detailed' } });
+  assertEquals(body.thinking, undefined);
+  assertEquals(body.output_config, undefined);
+  assertEquals(body.metadata, undefined);
+});
+
 test('messages: adaptive=true sets thinking.type=adaptive and ignores budget_tokens', () => {
   const body = msgPayload();
   applyChatRulesToMessages(body, { reasoning: { adaptive: true, budget_tokens: 4096 } });

From 89187732e0cbd990e9466f7ac9ecdd430a42209a Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 18:59:01 +0800
Subject: [PATCH 056/170] docs(aliases): note that intersectArrays preserves
 head-order
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The downstream reasoning-effort intersection relies on this — when no
agreed-default exists across all targets, the fallback default is
`supported[0]`, so the first input's order is load-bearing rather than
incidental. Comment it next to the helper so a future cleanup pass
doesn't reach for Array.from(new Set(...)) and silently change the
fallback.
---
 packages/gateway/src/data-plane/models/alias-listing.ts | 4 ++++
 1 file changed, 4 insertions(+)

diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index 70d97fdb3..667a0e0f7 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -44,6 +44,10 @@ export interface ListedAliasInputs {
 // chat-row target can be read as ChatAliasRules without a runtime check.
 const chatRules = (target: AliasTarget): ChatAliasRules => target.rules as ChatAliasRules;
 
+// Result preserves the order of `arrays[0]`. Matters for callers like the
+// reasoning-effort intersection below: when no agreed-default exists, the
+// fallback default is `supported[0]`, so the first input's relative order
+// determines which level wins as the listing's `default`.
 const intersectArrays = <T>(arrays: readonly (readonly T[])[]): T[] => {
   if (arrays.length === 0) return [];
   const [head, ...tail] = arrays;

From 2774bc2dbbcbeda496935517a243c0c2d4d30ab2 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:17:50 +0800
Subject: [PATCH 057/170] cleanup(aliases): drop unused interpretation field
 and unionEndpoints export

`collectInterpretationOutcomes` carried each resolution's source
`ModelInterpretation` on the result, but no caller read it. Revert the
shape to `{ provider, resolved }` so the type matches what consumers
actually use.

`unionEndpoints` has a single in-file caller; drop the export.
---
 .../src/data-plane/providers/registry.ts      | 19 +++++++++----------
 1 file changed, 9 insertions(+), 10 deletions(-)

diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 49be73da7..79a4f1c5a 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -82,7 +82,7 @@ export const listModelProviders = async (
 // Merge two capability maps: a key present in either side is present in the
 // result, and its sub-capability flags are OR-ed so a sub-cap advertised by
 // either provider survives.
-export const unionEndpoints = (a: ModelEndpoints, b: ModelEndpoints): ModelEndpoints => {
+const unionEndpoints = (a: ModelEndpoints, b: ModelEndpoints): ModelEndpoints => {
   const result: ModelEndpoints = { ...a };
   for (const key of Object.keys(b) as ModelEndpointKey[]) {
     const merged = { ...result[key], ...b[key] };
@@ -326,16 +326,16 @@ export const collectInterpretationOutcomes = async (
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
 ): Promise<{
-  resolutions: Array<{ interpretation: ModelInterpretation; provider: ModelProviderInstance; resolved: ProviderModelResolution }>;
+  resolutions: Array<{ provider: ModelProviderInstance; resolved: ProviderModelResolution }>;
   failedUpstreams: string[];
 }> => {
-  const settled = await Promise.allSettled(interpretations.map(interpretation =>
-    resolveModelForProvider(interpretation.provider, interpretation.lookupId, fetcherForUpstream(interpretation.provider.upstream), scheduler)
-      .then(resolved => ({ interpretation, resolved }))));
+  const settled = await Promise.allSettled(interpretations.map(({ provider, lookupId }) =>
+    resolveModelForProvider(provider, lookupId, fetcherForUpstream(provider.upstream), scheduler)
+      .then(resolved => ({ provider, resolved }))));
 
   const failedUpstreams: string[] = [];
   const failedSeen = new Set<string>();
-  const resolutions: Array<{ interpretation: ModelInterpretation; provider: ModelProviderInstance; resolved: ProviderModelResolution }> = [];
+  const resolutions: Array<{ provider: ModelProviderInstance; resolved: ProviderModelResolution }> = [];
 
   for (const [index, result] of settled.entries()) {
     if (result.status === 'rejected') {
@@ -350,9 +350,9 @@ export const collectInterpretationOutcomes = async (
       }
       continue;
     }
-    const { interpretation, resolved } = result.value;
+    const { provider, resolved } = result.value;
     if (!resolved) continue;
-    resolutions.push({ interpretation, provider: interpretation.provider, resolved });
+    resolutions.push({ provider, resolved });
   }
 
   return { resolutions, failedUpstreams };
@@ -371,8 +371,7 @@ export const resolveModelForRequest = async (
 
   const interpretations = enumerateModelInterpretations(modelId, providers);
   const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
-  const matches: ProviderModelResolution[] = resolutions.map(r => r.resolved);
-  return { matches, failedUpstreams };
+  return { matches: resolutions.map(r => r.resolved), failedUpstreams };
 };
 
 export const resolveModelForProvider = async (

From fa6c8e67c356ad09a2b82c5809af3a0a0e194266 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:17:56 +0800
Subject: [PATCH 058/170] cleanup(aliases): trim verbose comments in gateway
 alias surface

Each touched comment restated what the adjacent code shows. Keep the
load-bearing decision notes (head-order intersection, collision merge,
PK-rename atomicity) and drop the prose that paraphrases the code.
---
 .../src/data-plane/model-aliases/apply.ts     | 19 +++-------
 .../src/data-plane/model-aliases/resolve.ts   | 13 +++----
 .../model-aliases/serve-integration.ts        | 28 ++++++---------
 .../src/data-plane/models/alias-listing.ts    | 35 ++++++-------------
 .../gateway/src/data-plane/models/gemini.ts   |  4 +--
 packages/gateway/src/repo/memory.ts           |  5 ++-
 packages/gateway/src/repo/sql.ts              | 10 +++---
 7 files changed, 37 insertions(+), 77 deletions(-)

diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index 9208d1492..5d73c4448 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -1,9 +1,5 @@
-// Per-protocol rule overlay. Given a resolved alias's ChatAliasRules,
-// stamp the rule values onto the inbound IR. Alias rules are authoritative
-// — an existing IR field is OVERWRITTEN by a rule that names it. Rules the
-// target IR cannot express are silently dropped; the runtime never tries
-// to enum-gate a value against a model's advertised capabilities. The
-// catalog-warning surface lives in the dashboard.
+// Per-protocol rule overlay. Alias rules overwrite IR fields they name;
+// fields the target IR cannot express are silently dropped.
 
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import type { ChatAliasRules } from '@floway-dev/protocols/common';
@@ -11,8 +7,6 @@ import type { GeminiPayload } from '@floway-dev/protocols/gemini';
 import type { MessagesPayload } from '@floway-dev/protocols/messages';
 import type { ResponsesPayload } from '@floway-dev/protocols/responses';
 
-// Type guard — `reasoning` is optional. Helpers below assume rules are
-// non-null but each sub-key may still be absent.
 const hasReasoning = (rules: ChatAliasRules): rules is ChatAliasRules & { reasoning: NonNullable<ChatAliasRules['reasoning']> } =>
   rules.reasoning !== undefined;
 
@@ -80,10 +74,8 @@ export const applyChatRulesToMessages = (body: MessagesPayload, rules: ChatAlias
   }
 };
 
-// Map the discrete `ReasoningEffort` presets onto Gemini's `thinkingLevel`
-// enum, which carries the same five tiers under different names. Anything
-// outside the closed set is dropped — Gemini's wire reads from a fixed
-// enum and an unknown tier would just be rejected upstream.
+// Discrete effort presets map onto Gemini's `thinkingLevel` enum; unknown
+// values are dropped because Gemini rejects out-of-enum tiers upstream.
 const GEMINI_THINKING_LEVEL_BY_EFFORT: Record<string, 'minimal' | 'low' | 'medium' | 'high' | 'xhigh'> = {
   none: 'minimal',
   low: 'low',
@@ -97,8 +89,7 @@ export const applyChatRulesToGemini = (body: GeminiPayload, rules: ChatAliasRule
     const { effort, budget_tokens, adaptive } = rules.reasoning;
     // Gemini collapses the three reasoning controls onto one `thinkingConfig`
     // sub-object. Adaptive wins by encoding budget=-1 (Gemini's adaptive
-    // sentinel); an explicit budget pins the count; effort sets the level
-    // preset. All three can coexist on the same object.
+    // sentinel); an explicit budget pins the count; effort sets the level.
     const thinkingConfig = { ...body.generationConfig?.thinkingConfig };
     if (adaptive === true) {
       thinkingConfig.thinkingBudget = -1;
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
index 0aab3821b..9f888f7da 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -53,9 +53,8 @@ export class AliasNoTargetAvailableError extends Error {
   }
 }
 
-// Lift `AliasNoTargetAvailableError` into a `ChatServeFailure` so a chat
-// serve can route it through its existing failure renderer without
-// special-casing the error type.
+// Lift `AliasNoTargetAvailableError` into a `ChatServeFailure` so the
+// existing failure renderer can surface it without special-casing.
 export const aliasFailureFromError = (error: AliasNoTargetAvailableError): { kind: 'alias-no-target-available'; aliasName: string; targetCount: number } => ({
   kind: 'alias-no-target-available',
   aliasName: error.aliasName,
@@ -76,12 +75,8 @@ interface ResolveAliasArgs {
   readonly repo: ModelAliasesRepo;
 }
 
-// Reports true when the given target_model_id resolves to at least one
-// enabled upstream binding that exposes any endpoint the inbound
-// `endpointKind` cares about. Mirrors the resolution path
-// `enumerateProviderCandidates` takes, narrowed to a yes/no answer so we
-// can pre-filter the alias target list once.
-//
+// Reports true when the target id resolves to at least one enabled upstream
+// binding exposing an endpoint the inbound `endpointKind` cares about.
 // `fetcherForUpstream` and `providers` are passed in (not derived here) so a
 // caller filtering N targets hits the underlying repo / dial factories once,
 // not N times.
diff --git a/packages/gateway/src/data-plane/model-aliases/serve-integration.ts b/packages/gateway/src/data-plane/model-aliases/serve-integration.ts
index f14308403..764dff7e5 100644
--- a/packages/gateway/src/data-plane/model-aliases/serve-integration.ts
+++ b/packages/gateway/src/data-plane/model-aliases/serve-integration.ts
@@ -1,18 +1,12 @@
 // Per-protocol alias preamble helpers. Each protocol's serve calls
-// `resolveAndApply<Protocol>` immediately after parsing the inbound payload
-// and before `enumerateProviderCandidates`. The helper:
-//   1. looks up the inbound model name in the alias repo,
-//   2. on a hit whose kind matches `chat`, picks one target row,
-//   3. stamps the row's rules onto the IR (overwriting any matching field),
-//   4. stages the `x-floway-alias` response header on the gateway ctx, and
-//   5. returns the resolved target_model_id so the caller substitutes it
-//      for `payload.model` before candidate enumeration runs.
+// `resolveAndApply<Protocol>` after parsing the inbound payload and before
+// `enumerateProviderCandidates`. The helper looks up the inbound model name,
+// stamps a target's rules onto the IR, stages the `x-floway-alias` response
+// header, and returns the resolved target id for substitution.
 //
-// Returns `null` when the inbound name is not an alias of kind=chat;
-// callers continue with the literal name and the catalog's miss surface
-// renders if nothing matches. Throws `AliasNoTargetAvailableError` when
-// the alias exists but every target is currently unroutable — caught at
-// the serve seam and rendered via the protocol's failure renderer.
+// Returns `null` when the inbound name is not an alias of kind=chat. Throws
+// `AliasNoTargetAvailableError` when the alias exists but every target is
+// currently unroutable — caught at the serve seam.
 
 import { applyChatRulesToChatCompletions, applyChatRulesToGemini, applyChatRulesToMessages, applyChatRulesToResponses } from './apply.ts';
 import { resolveAlias, type AliasResolution } from './resolve.ts';
@@ -44,10 +38,8 @@ const resolveChatAlias = async (modelName: string, ctx: GatewayCtx): Promise<Ali
   return resolution;
 };
 
-// All four `resolveAndApplyAliasFor*` helpers narrow the rule shape to
-// `ChatAliasRules` before calling the per-protocol overlay. Today every
-// chat-kind alias target carries `ChatAliasRules` per the wire schema; the
-// cast is the unavoidable narrowing from the generic `AliasRules` union.
+// Every chat-kind alias target carries `ChatAliasRules` per the wire
+// schema; the cast is the unavoidable narrowing from the generic union.
 const asChatRules = (rules: AliasResolution['rules']): ChatAliasRules => rules as ChatAliasRules;
 
 export const resolveAndApplyAliasForChatCompletions = async (payload: ChatCompletionsPayload, ctx: GatewayCtx): Promise<void> => {
@@ -96,7 +88,7 @@ export const resolveAliasForPassthrough = async (model: string, endpointKind: 'e
     currentColo: ctx.currentColo,
     repo: getRepo().modelAliases,
   });
-  if (resolution === null) return model;
+  if (!resolution) return model;
   ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, resolution.aliasName);
   return resolution.targetModelId;
 };
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index 667a0e0f7..5bb46e712 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -1,33 +1,20 @@
 // Synthesizes the alias entries that join the real-model catalog inside
-// `/v1/models`. One PublicModel per visible alias — hidden aliases
-// (visible_in_models_list = false) are dropped from the listing while
-// remaining routable. The synthesized entry carries an `aliasedFrom` block
-// so an alias-aware UI (today: the dashboard) can render the alias-of
-// relationship without a second round trip.
+// `/v1/models`. One PublicModel per visible alias. The synthesized entry
+// carries an `aliasedFrom` block so an alias-aware UI can render the
+// alias-of relationship without a second round trip.
 //
 // Capability metadata is the safe lower bound for the inbound request:
-//   • single-target → the sole target's metadata, narrowed by the alias's
-//     `rules` (a fixed reasoning effort collapses the reported effort set
-//     to that one value, a fixed budget collapses the reported range to
-//     a single point).
-//   • multi-target → the intersection across every currently-available
-//     target. A capability survives only when every target backing the
-//     alias declares it; whichever target gets picked at request time is
-//     then guaranteed to support whatever the catalog reported.
-//
-// "Available target" for intersection purposes means a target whose
-// `target_model_id` appears in `realModels` AND whose entry's `kind`
-// matches the alias's `kind`. Unavailable targets are excluded from the
-// intersection but still appear in `aliasedFrom.targets` so the dashboard
-// can show the full configuration.
+// single-target reports the sole target's metadata narrowed by the
+// alias's rules; multi-target reports the intersection across every
+// currently-available target so whichever target gets picked at request
+// time supports whatever the catalog reported.
 //
 // Collision: when an alias's `name` exactly equals a real model id, the
 // alias entry replaces the real entry in the final catalog. Two entries
 // with the same `id` would break OpenAI client deduplication; collapsing
 // to the alias entry preserves the operator's intent (the alias is the
-// reason both rows would have been present). The dashboard surfaces this
-// via a shadow warning in the alias editor; here it is purely a wire-shape
-// concern. The real entry is removed at the `loadModels` merge step.
+// reason both rows would have been present). The real entry is removed
+// at the `loadModels` merge step.
 
 import type { ModelAliasRecord } from '../../repo/types.ts';
 import { composeAliasDisplayName } from '@floway-dev/protocols/common';
@@ -55,7 +42,6 @@ const intersectArrays = <T>(arrays: readonly (readonly T[])[]): T[] => {
 };
 
 const intersectChat = (chats: readonly ChatModelInfo[]): ChatModelInfo | undefined => {
-  if (chats.length === 0) return undefined;
   const result: ChatModelInfo = {};
 
   const modalityChats = chats.filter(c => c.modalities !== undefined);
@@ -171,8 +157,7 @@ const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly InternalMod
   };
 
   // No backing target — still emit the row so the dashboard can show the
-  // alias with a no-target warning. Capability metadata stays absent so
-  // clients see no inherited claims.
+  // alias with a no-target warning.
   if (availableTargets.length === 0) return entry;
 
   if (availableTargets.length === 1) {
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index b7b34bd4a..e8cb7a409 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -65,8 +65,8 @@ const geminiModelLoadError = (error: unknown): Response => {
   return geminiError(502, error instanceof Error ? error.message : String(error));
 };
 
-// Mirror loadModels: real models plus chat-kind alias entries, with alias
-// names winning id collisions.
+// Real chat models plus chat-kind alias entries; see `loadModels` for the
+// collision rationale.
 const loadGeminiModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index 49e30c5ef..edbecb390 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -886,9 +886,8 @@ const cloneBackoffRow = (row: BackoffRow): BackoffRow => ({ ...row });
 
 const cloneModelAliasRecord = (record: ModelAliasRecord): ModelAliasRecord => ({
   ...record,
-  // Deep-clone the JSON payload so a caller's mutation of the returned record
-  // never leaks back into the store. Targets and their inner rule objects are
-  // plain JSON, so structuredClone is the cheapest faithful copy.
+  // Deep-clone so a caller's mutation of the returned record never leaks
+  // back into the store.
   targets: structuredClone(record.targets),
 });
 
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index 16645772e..b9cfd93fc 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -1663,7 +1663,6 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
 
   async update(oldName: string, record: ModelAliasRecord): Promise<void> {
     if (oldName === record.name) {
-      // Plain in-place update — the PK is unchanged, no rename to coordinate.
       const result = await this.db
         .prepare(
           `UPDATE model_aliases SET
@@ -1693,11 +1692,10 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
       return;
     }
 
-    // Rename. Verify the source row exists first so a missing oldName fails
-    // before any write hits the table. Then INSERT(new) + DELETE(old) atomically
-    // through the batch primitive — a PK collision against `record.name`
-    // bubbles up from the INSERT, which is exactly the "rename collides" signal
-    // the route layer translates to 409.
+    // Rename. Verify the source row exists first, then INSERT(new) +
+    // DELETE(old) atomically through the batch primitive — a PK collision
+    // against `record.name` bubbles up from the INSERT, which the route
+    // layer translates to 409.
     const existing = await this.getByName(oldName);
     if (!existing) throw new Error(`alias ${oldName} not found`);
 

From 1f116ad98680940264fdc98c1552dbb149d9606d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:21:51 +0800
Subject: [PATCH 059/170] cleanup(aliases): drop unused GatewayCtx.c field and
 stubAuthedContext

The branch added a Hono `AuthedContext` reference to `GatewayCtx` but
no production code path reads it; consumers that need the Hono `c`
already receive it as a sibling parameter. Drop the field, the
`stubAuthedContext` test helper, and the corresponding `c:` stub
lines across the test files.
---
 .../chat/chat-completions/attempt_test.ts       |  2 --
 .../demote-developer-to-system_test.ts          |  2 --
 .../demote-interleaved-system-to-user_test.ts   |  2 --
 ...able-reasoning-on-forced-tool-choice_test.ts |  2 --
 .../include-usage-stream-options_test.ts        |  2 --
 .../interceptors/normalize-usage_test.ts        |  2 --
 .../vendor-deepseek-normalize_test.ts           |  2 --
 .../interceptors/vendor-kimi-normalize_test.ts  |  2 --
 .../interceptors/vendor-qwen-normalize_test.ts  |  2 --
 .../chat/chat-completions/serve_test.ts         |  2 --
 .../src/data-plane/chat/gemini/attempt_test.ts  |  2 --
 .../interceptors/strip-safety-settings_test.ts  |  2 --
 .../strip-unsupported-part-fields_test.ts       |  2 --
 .../strip-unsupported-tools_test.ts             |  2 --
 .../interceptors/suppress-thought-parts_test.ts |  2 --
 .../src/data-plane/chat/gemini/respond_test.ts  |  2 --
 .../src/data-plane/chat/gemini/serve_test.ts    |  2 --
 .../data-plane/chat/messages/attempt_test.ts    |  2 --
 .../demote-interleaved-system-to-user_test.ts   |  2 --
 ...able-reasoning-on-forced-tool-choice_test.ts |  2 --
 .../strip-billing-attribution_test.ts           |  2 --
 .../interceptors/web-search-shim_test.ts        |  2 --
 .../data-plane/chat/messages/respond_test.ts    |  2 --
 .../src/data-plane/chat/messages/serve_test.ts  |  2 --
 .../data-plane/chat/responses/attempt_test.ts   |  2 --
 .../canonicalize-encrypted-content_test.ts      |  2 --
 .../demote-developer-to-system_test.ts          |  2 --
 .../demote-interleaved-system-to-user_test.ts   |  2 --
 ...able-reasoning-on-forced-tool-choice_test.ts |  2 --
 .../interceptors/retry-cyber-policy_test.ts     |  2 --
 .../interceptors/server-tool-shim_test.ts       |  3 ---
 .../image-generation-integration_test.ts        |  2 --
 .../server-tools/image-generation_test.ts       |  2 --
 .../vendor-deepseek-normalize_test.ts           |  2 --
 .../interceptors/vendor-qwen-normalize_test.ts  |  2 --
 .../src/data-plane/chat/responses/serve_test.ts |  2 --
 .../src/data-plane/chat/shared/gateway-ctx.ts   | 17 +++++------------
 .../src/data-plane/chat/shared/respond_test.ts  |  2 --
 .../chat/shared/upstream-telemetry_test.ts      |  2 --
 .../data-plane/model-aliases/resolve_test.ts    |  5 -----
 .../gateway/src/test-helpers/gateway-ctx.ts     |  9 ---------
 41 files changed, 5 insertions(+), 103 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
index bf3e3ef3c..89438a3cb 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
@@ -3,7 +3,6 @@ import { test, vi } from 'vitest';
 import { chatCompletionsAttempt } from './attempt.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -25,7 +24,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
index e4edd7951..8b9c9f8c5 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-developer-to-system_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { withDemoteDeveloperToSystem } from './demote-developer-to-system.ts';
 import type { ChatCompletionsInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import { eventResult } from '@floway-dev/provider';
@@ -17,7 +16,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
index 40cc5b48c..eee0403d9 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/demote-interleaved-system-to-user_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { withInterleavedSystemDemotedToUser } from './demote-interleaved-system-to-user.ts';
 import type { ChatCompletionsInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import { eventResult } from '@floway-dev/provider';
@@ -17,7 +16,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index 928b18b72..bb715b2d0 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts';
 import type { ChatCompletionsInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import { eventResult } from '@floway-dev/provider';
@@ -17,7 +16,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
index fb02a7314..90e27b8fa 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/include-usage-stream-options_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { withUsageStreamOptionsIncluded } from './include-usage-stream-options.ts';
 import type { ChatCompletionsInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import { eventResult } from '@floway-dev/provider';
@@ -17,7 +16,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
index 497a49cd8..746ff2e83 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/normalize-usage_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { withUsageNormalized } from './normalize-usage.ts';
 import type { ChatCompletionsInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -18,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
index 963125131..dd5bc69ce 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-deepseek-normalize_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import type { ChatCompletionsInvocation } from './types.ts';
 import { withVendorDeepseekChatCompletionsNormalize } from './vendor-deepseek-normalize.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -22,7 +21,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
index 3a6ee205f..e598b6f14 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-kimi-normalize_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import type { ChatCompletionsInvocation } from './types.ts';
 import { withVendorKimiChatCompletionsNormalize } from './vendor-kimi-normalize.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -18,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
index f1c536f5a..189545eec 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/interceptors/vendor-qwen-normalize_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import type { ChatCompletionsInvocation } from './types.ts';
 import { withVendorQwenChatCompletionsNormalize } from './vendor-qwen-normalize.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
 import { eventResult } from '@floway-dev/provider';
@@ -17,7 +16,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 9117e2f80..cf1ef6c1e 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -2,7 +2,6 @@ import { test, vi } from 'vitest';
 
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -69,7 +68,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
index 0e562021e..8bb93cf7d 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
@@ -3,7 +3,6 @@ import { test, vi } from 'vitest';
 import { geminiAttempt } from './attempt.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -26,7 +25,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
index 225c690da..fa4475ad4 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-safety-settings_test.ts
@@ -1,7 +1,6 @@
 import { test } from 'vitest';
 
 import { stripSafetySettings } from './strip-safety-settings.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -17,7 +16,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
index f7f0beb55..9e0306e13 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-part-fields_test.ts
@@ -1,7 +1,6 @@
 import { test } from 'vitest';
 
 import { stripUnsupportedPartFields } from './strip-unsupported-part-fields.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -17,7 +16,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
index b3ca33289..2d818f4ca 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/strip-unsupported-tools_test.ts
@@ -1,7 +1,6 @@
 import { test } from 'vitest';
 
 import { stripUnsupportedTools } from './strip-unsupported-tools.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -17,7 +16,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
index a9978fa9f..2659a8833 100644
--- a/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/interceptors/suppress-thought-parts_test.ts
@@ -1,7 +1,6 @@
 import { test } from 'vitest';
 
 import { suppressThoughtParts } from './suppress-thought-parts.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -17,7 +16,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
index 11629b1a9..192dfdc98 100644
--- a/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/respond_test.ts
@@ -2,7 +2,6 @@ import { Hono } from 'hono';
 import { test } from 'vitest';
 
 import { respondGemini } from './respond.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import { eventFrame } from '@floway-dev/protocols/common';
@@ -28,7 +27,6 @@ const ctx = (): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 43f2bd256..771c26c7e 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -2,7 +2,6 @@ import { test, vi } from 'vitest';
 
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -68,7 +67,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
index 1495df93b..845a0f3c0 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
@@ -3,7 +3,6 @@ import { test, vi } from 'vitest';
 import { messagesAttempt } from './attempt.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -25,7 +24,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
index da3a72d81..2980c1653 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/demote-interleaved-system-to-user_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { demoteInterleavedSystemToUser } from './demote-interleaved-system-to-user.ts';
 import type { MessagesInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -18,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index bd317db35..3a62c0111 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts';
 import type { MessagesInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -18,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
index 90dfa9a3e..a853330ba 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/strip-billing-attribution_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { stripBillingAttribution } from './strip-billing-attribution.ts';
 import type { MessagesInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -18,7 +17,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
index 396efd981..eb20ac927 100644
--- a/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/interceptors/web-search-shim_test.ts
@@ -13,7 +13,6 @@ import {
 } from './web-search-shim.ts';
 import { initRepo } from '../../../../repo/index.ts';
 import { InMemoryRepo } from '../../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import { DEFAULT_SEARCH_CONFIG } from '../../../tools/web-search/search-config.ts';
 import type { WebSearchProvider, WebSearchProviderResult } from '../../../tools/web-search/types.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
@@ -60,7 +59,6 @@ const gatewayCtx = (apiKeyId: string = 'test-key'): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/messages/respond_test.ts b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
index fba59f4b2..fc945bc6c 100644
--- a/packages/gateway/src/data-plane/chat/messages/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/respond_test.ts
@@ -4,7 +4,6 @@ import { test } from 'vitest';
 import { createMessagesStreamUsageState, respondMessages, tokenUsageFromMessagesFrame } from './respond.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -535,7 +534,6 @@ const makeRespondCtx = (): GatewayCtx => ({
   wantsStream: false,
   runtimeLocation: 'TEST',
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
   currentColo: 'TEST',
   dump: null,
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index 8b07302f9..36c0ccda9 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -2,7 +2,6 @@ import { test, vi } from 'vitest';
 
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -66,7 +65,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
index 0bf10177a..2e52ba1eb 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
@@ -7,7 +7,6 @@ import { createResponsesHttpStore } from './items/store.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { StoredResponsesItem } from '../../../repo/types.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -27,7 +26,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
index a1db87741..cdc9b64cb 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/canonicalize-encrypted-content_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { withReasoningEncryptedContentCanonicalized } from './canonicalize-encrypted-content.ts';
 import type { ResponsesInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -19,7 +18,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
index 3dc2ad972..cf75111bf 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-developer-to-system_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { withDemoteDeveloperToSystem } from './demote-developer-to-system.ts';
 import type { ResponsesInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { doneFrame } from '@floway-dev/protocols/common';
@@ -19,7 +18,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
index b5b7bed38..752e1d946 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/demote-interleaved-system-to-user_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { withInterleavedSystemDemotedToUser } from './demote-interleaved-system-to-user.ts';
 import type { ResponsesInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { doneFrame } from '@floway-dev/protocols/common';
@@ -19,7 +18,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
index b9bc85ed8..e70adef49 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/disable-reasoning-on-forced-tool-choice_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { withReasoningDisabledOnForcedToolChoice } from './disable-reasoning-on-forced-tool-choice.ts';
 import type { ResponsesInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { doneFrame } from '@floway-dev/protocols/common';
@@ -19,7 +18,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
index 1505ea5bb..619a223a6 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/retry-cyber-policy_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import { withCyberPolicyRetried } from './retry-cyber-policy.ts';
 import type { ResponsesInvocation } from './types.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
@@ -47,7 +46,6 @@ const stubCtx = (overrides: { abortSignal?: AbortSignal } = {}): GatewayCtx => (
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
   ...overrides,
 });
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
index 1842ff08b..25aa48de9 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tool-shim_test.ts
@@ -16,7 +16,6 @@ import { SHIM_TOOL_NAME, webSearchServerTool } from './server-tools/web-search.t
 import type { ResponsesInterceptor, ResponsesInvocation } from './types.ts';
 import { initRepo } from '../../../../repo/index.ts';
 import { InMemoryRepo } from '../../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import { resolveConfiguredWebSearchProvider } from '../../../tools/web-search/provider.ts';
 import type {
   ConfiguredWebSearchProvider,
@@ -350,7 +349,6 @@ const makeGatewayCtx = (apiKeyId: string = 'k1'): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
@@ -4500,7 +4498,6 @@ test('downstream AbortSignal threads through to provider search / fetchPage and
     dump: null,
     responseHeaders: new Headers(),
     backgroundScheduler: () => {},
-    c: stubAuthedContext(),
     requestStartedAt: 0,
     abortSignal: controller.signal,
   };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
index 94859eb9c..7a8530d60 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
@@ -2,7 +2,6 @@ import { beforeEach, test, vi } from 'vitest';
 
 import { initRepo } from '../../../../../repo/index.ts';
 import { InMemoryRepo } from '../../../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../../items/store.ts';
 import type { ResponsesInvocation } from '../types.ts';
@@ -146,7 +145,6 @@ const gatewayCtx = (): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
index f2513a078..4f9a9c724 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation_test.ts
@@ -19,7 +19,6 @@ import {
 } from './image-generation.ts';
 import { initRepo } from '../../../../../repo/index.ts';
 import { InMemoryRepo } from '../../../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../../items/store.ts';
 import type { ResponsesInvocation } from '../types.ts';
@@ -58,7 +57,6 @@ const gatewayCtx = (): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
index 31624e32a..baa0c9f19 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-deepseek-normalize_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import type { ResponsesInvocation } from './types.ts';
 import { withVendorDeepseekResponsesNormalize } from './vendor-deepseek-normalize.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { doneFrame } from '@floway-dev/protocols/common';
@@ -19,7 +18,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
index 06e39fa79..9e7e1cab6 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/vendor-qwen-normalize_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import type { ResponsesInvocation } from './types.ts';
 import { withVendorQwenResponsesNormalize } from './vendor-qwen-normalize.ts';
-import { stubAuthedContext } from '../../../../test-helpers/gateway-ctx.ts';
 import type { GatewayCtx } from '../../shared/gateway-ctx.ts';
 import { MemoryStatefulResponsesBacking, LayeredStatefulResponsesStore } from '../items/store.ts';
 import { doneFrame } from '@floway-dev/protocols/common';
@@ -19,7 +18,6 @@ const stubCtx: GatewayCtx = {
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 };
 
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index 548c49b8b..f9cf44eac 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -5,7 +5,6 @@ import { createResponsesHttpStore, MemoryStatefulResponsesBacking, LayeredStatef
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { StoredResponsesItem, StoredResponsesSnapshot } from '../../../repo/types.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
@@ -77,7 +76,6 @@ const makeGatewayCtx = (): GatewayCtx => ({
   dump: null,
   responseHeaders: new Headers(),
   backgroundScheduler: () => {},
-  c: stubAuthedContext(),
   requestStartedAt: 0,
 });
 
diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
index 4ef208a42..55770a854 100644
--- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
+++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
@@ -6,8 +6,6 @@ import { getCurrentColo } from '../../../runtime/runtime-info.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 
 export interface GatewayCtx {
-  // The inbound Hono context.
-  readonly c: AuthedContext;
   readonly apiKeyId: string;
   readonly upstreamIds: readonly string[] | null;
   readonly abortSignal?: AbortSignal;
@@ -27,12 +25,9 @@ export interface GatewayCtx {
   // `finalizeGatewayResponse` short-circuits the dump tee and returns the
   // response untouched.
   readonly dump: DumpAccumulator | null;
-  // Headers staged by the gateway during request processing (e.g. the
-  // `x-floway-alias` header the alias resolver stamps when it picked a
-  // target). `finalizeGatewayResponse` writes each entry onto the outbound
-  // response just before returning it, so the headers ride along regardless
-  // of whether the responder built the Response via Hono's streamSSE,
-  // `Response.json`, or a raw `new Response(...)`.
+  // Headers staged during request processing (today: `x-floway-alias`)
+  // and written onto the outbound response by `finalizeGatewayResponse`,
+  // regardless of how the responder built the body.
   readonly responseHeaders: Headers;
 }
 
@@ -69,7 +64,6 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
   if (opts.model !== undefined) dump?.requestedModel(opts.model);
   const colo = getCurrentColo(c.req.raw);
   return {
-    c,
     apiKeyId: apiKey.id,
     upstreamIds,
     abortSignal: controller?.signal,
@@ -87,9 +81,8 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
 // Run the dump-accumulator's finalize tee on the outgoing Response. Every
 // inbound HTTP wrapper returns its response through this seam so the dump
 // pipeline applies uniformly across happy-path, error, and passthrough paths.
-// Gateway-staged response headers (today: `x-floway-alias`) are written onto
-// the response here so they ride along regardless of how the responder
-// built the body.
+// Gateway-staged response headers (today: `x-floway-alias`) ride along here
+// regardless of how the responder built the body.
 export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => {
   for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value);
   return ctx.dump?.finalize(response) ?? response;
diff --git a/packages/gateway/src/data-plane/chat/shared/respond_test.ts b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
index 58c8c454a..328da2ee5 100644
--- a/packages/gateway/src/data-plane/chat/shared/respond_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/respond_test.ts
@@ -4,7 +4,6 @@ import type { GatewayCtx } from './gateway-ctx.ts';
 import { SourceStreamState, recordPerformance, recordUsage } from './respond.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import type { PerformanceTelemetryContext, TelemetryModelIdentity } from '@floway-dev/provider';
 import { assertEquals } from '@floway-dev/test-utils';
 
@@ -47,7 +46,6 @@ const setup = (): Harness => {
       responseHeaders: new Headers(),
       backgroundScheduler: promise => { background.push(promise); },
       requestStartedAt,
-      c: stubAuthedContext(),
     }),
   };
 };
diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
index 756dccda8..1af801e10 100644
--- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry_test.ts
@@ -4,7 +4,6 @@ import type { GatewayCtx } from './gateway-ctx.ts';
 import { withUpstreamTelemetry } from './upstream-telemetry.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
-import { stubAuthedContext } from '../../../test-helpers/gateway-ctx.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { PerformanceTelemetryContext } from '@floway-dev/provider';
@@ -25,7 +24,6 @@ const baseCtx = (overrides: Partial<GatewayCtx> = {}): GatewayCtx => {
     apiKeyId: 'key_1',
     upstreamIds: null,
     wantsStream: true,
-    c: stubAuthedContext(),
     requestStartedAt: 0,
     runtimeLocation: 'TEST',
     currentColo: 'TEST',
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
index 428b339d1..70d6c60da 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
@@ -7,7 +7,6 @@
 import { test, vi } from 'vitest';
 
 import type { ModelAliasRecord, ModelAliasesRepo } from '../../repo/types.ts';
-import { stubAuthedContext } from '../../test-helpers/gateway-ctx.ts';
 import type { ModelInterpretation, ProviderModelResolution } from '../providers/registry.ts';
 import { directFetcher } from '@floway-dev/provider';
 import { assert, assertEquals, assertRejects } from '@floway-dev/test-utils';
@@ -27,7 +26,6 @@ vi.mock('../providers/registry.ts', () => ({
     resolutions: interpretations
       .filter(i => routableModels.has(i.lookupId))
       .map(i => ({
-        interpretation: i,
         provider: i.provider,
         resolved: {
           id: i.lookupId,
@@ -79,9 +77,6 @@ const setRoutable = (...ids: string[]): void => {
   for (const id of ids) routableModels.set(id, { endpoints: ALWAYS_ROUTABLE_ENDPOINTS });
 };
 
-// Silence the unused-ctx warning helpers
-void stubAuthedContext;
-
 test('returns null when no alias matches the inbound name', async () => {
   setRoutable('gpt-5.4');
   const result = await resolveAlias({
diff --git a/packages/gateway/src/test-helpers/gateway-ctx.ts b/packages/gateway/src/test-helpers/gateway-ctx.ts
index 782d87370..3ebf1f474 100644
--- a/packages/gateway/src/test-helpers/gateway-ctx.ts
+++ b/packages/gateway/src/test-helpers/gateway-ctx.ts
@@ -1,12 +1,4 @@
 import type { GatewayCtx } from '../data-plane/chat/shared/gateway-ctx.ts';
-import type { AuthedContext } from '../middleware/auth.ts';
-
-// Minimal stub for the Hono `c` carried on `GatewayCtx`. Unit tests rarely
-// touch any methods on it; integration tests that need real Hono behavior
-// build the ctx via `createGatewayCtxFromHono` against a real `makeApp()`
-// request rather than going through this stub.
-export const stubAuthedContext = (): AuthedContext =>
-  ({ header: () => {} } as unknown as AuthedContext);
 
 // Shared minimal GatewayCtx for tests that exercise serve / respond /
 // interceptor code in isolation. Defaults satisfy every required field; pass
@@ -15,7 +7,6 @@ export const stubAuthedContext = (): AuthedContext =>
 // construct one and spread `{ abortSignal: controller.signal,
 // downstreamAbortController: controller }` into the overrides.
 export const mockGatewayCtx = (overrides: Partial<GatewayCtx> = {}): GatewayCtx => ({
-  c: stubAuthedContext(),
   apiKeyId: 'key_test',
   upstreamIds: null,
   wantsStream: false,

From 0e9024331b08e494cdb6d6c5f2c8c56c25f00208 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:22:02 +0800
Subject: [PATCH 060/170] cleanup(aliases): hoist alias-no-target message into
 shared helper

The same 'alias <name> has N target(s); none currently map to...'
string was hand-rolled in all four chat protocol renderers. Move the
canonical string into `shared/errors.ts` so future wording changes
land in one place.
---
 .../src/data-plane/chat/chat-completions/errors.ts   |  4 ++--
 .../gateway/src/data-plane/chat/gemini/errors.ts     |  4 ++--
 .../gateway/src/data-plane/chat/messages/errors.ts   |  4 ++--
 .../gateway/src/data-plane/chat/responses/errors.ts  |  4 ++--
 .../gateway/src/data-plane/chat/shared/errors.ts     | 12 ++++++------
 5 files changed, 14 insertions(+), 14 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/errors.ts b/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
index ea5b3673c..8f293bae6 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
@@ -1,5 +1,5 @@
 import { appendFailedUpstreams } from '../../shared/failed-upstreams.ts';
-import type { ChatServeFailure } from '../shared/errors.ts';
+import { aliasNoTargetMessage, type ChatServeFailure } from '../shared/errors.ts';
 import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -35,6 +35,6 @@ export const renderChatCompletionsFailure = (
   case 'model-unsupported':
     return openAiErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support the /chat/completions endpoint.`, failure.failedUpstreams));
   case 'alias-no-target-available':
-    return openAiErrorResult(404, `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
+    return openAiErrorResult(404, aliasNoTargetMessage(failure));
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/gemini/errors.ts b/packages/gateway/src/data-plane/chat/gemini/errors.ts
index f1f231d31..258047f8f 100644
--- a/packages/gateway/src/data-plane/chat/gemini/errors.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/errors.ts
@@ -1,5 +1,5 @@
 import { appendFailedUpstreams } from '../../shared/failed-upstreams.ts';
-import type { ChatServeFailure } from '../shared/errors.ts';
+import { aliasNoTargetMessage, type ChatServeFailure } from '../shared/errors.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiStreamEvent } from '@floway-dev/protocols/gemini';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -54,6 +54,6 @@ export const renderGeminiFailure = (
   case 'model-unsupported':
     return geminiRpcErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support ${endpoint === 'countTokens' ? 'countTokens' : 'the Gemini generateContent endpoint'}.`, failure.failedUpstreams));
   case 'alias-no-target-available':
-    return geminiRpcErrorResult(404, `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
+    return geminiRpcErrorResult(404, aliasNoTargetMessage(failure));
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/messages/errors.ts b/packages/gateway/src/data-plane/chat/messages/errors.ts
index 9c4d5f976..07536746a 100644
--- a/packages/gateway/src/data-plane/chat/messages/errors.ts
+++ b/packages/gateway/src/data-plane/chat/messages/errors.ts
@@ -1,5 +1,5 @@
 import { appendFailedUpstreams } from '../../shared/failed-upstreams.ts';
-import type { ChatServeFailure } from '../shared/errors.ts';
+import { aliasNoTargetMessage, type ChatServeFailure } from '../shared/errors.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -39,6 +39,6 @@ export const renderMessagesFailure = (
   case 'model-unsupported':
     return anthropicErrorResult(400, 'invalid_request_error', appendFailedUpstreams(`Model ${failure.model} does not support the ${endpointPath} endpoint.`, failure.failedUpstreams));
   case 'alias-no-target-available':
-    return anthropicErrorResult(404, 'not_found_error', `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
+    return anthropicErrorResult(404, 'not_found_error', aliasNoTargetMessage(failure));
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/errors.ts b/packages/gateway/src/data-plane/chat/responses/errors.ts
index 1af6c42d0..2325b8fce 100644
--- a/packages/gateway/src/data-plane/chat/responses/errors.ts
+++ b/packages/gateway/src/data-plane/chat/responses/errors.ts
@@ -1,5 +1,5 @@
 import { appendFailedUpstreams } from '../../shared/failed-upstreams.ts';
-import type { ChatServeFailure } from '../shared/errors.ts';
+import { aliasNoTargetMessage, type ChatServeFailure } from '../shared/errors.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ResponsesStreamEvent } from '@floway-dev/protocols/responses';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -35,6 +35,6 @@ export const renderResponsesFailure = (
   case 'model-unsupported':
     return openAiErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support the /responses endpoint.`, failure.failedUpstreams));
   case 'alias-no-target-available':
-    return openAiErrorResult(404, `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`);
+    return openAiErrorResult(404, aliasNoTargetMessage(failure));
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/shared/errors.ts b/packages/gateway/src/data-plane/chat/shared/errors.ts
index 896ff1a65..cd98509bd 100644
--- a/packages/gateway/src/data-plane/chat/shared/errors.ts
+++ b/packages/gateway/src/data-plane/chat/shared/errors.ts
@@ -10,14 +10,14 @@ export type ChatServeFailure =
   | { readonly kind: 'model-unsupported'; readonly model: string; readonly failedUpstreams?: readonly string[] }
   | { readonly kind: 'item-not-found'; readonly itemId: string }
   | { readonly kind: 'routing-unavailable'; readonly message: string }
-  // Alias name resolved, but no entry in its targets list currently maps to
-  // an enabled upstream binding that exposes the inbound endpoint group.
-  // Rendered as a 404 carrying the canonical
-  // `alias '<name>' has N target(s); none currently map to an enabled
-  // upstream binding` message — every protocol's renderer treats this as a
-  // model-not-found surface.
+  // Alias name resolved, but no entry in its targets list currently maps
+  // to an enabled upstream binding that exposes the inbound endpoint.
   | { readonly kind: 'alias-no-target-available'; readonly aliasName: string; readonly targetCount: number };
 
+// Canonical message every protocol renders for `alias-no-target-available`.
+export const aliasNoTargetMessage = (failure: { aliasName: string; targetCount: number }): string =>
+  `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`;
+
 class ChatServeFailureError extends Error {
   readonly failure: ChatServeFailure;
 

From adaefc23d22cf157d12eee58835303b81badc09a Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:23:55 +0800
Subject: [PATCH 061/170] cleanup(aliases): strip in-branch-history and spec
 references from translate comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove the 'Goal 2' uncommitted-spec reference, the 'legacy/pre-existing'
in-branch-history phrasing on responses-via-messages, and the
gateway-side rule-apply pass that the chat-completions-via-messages
comment promised but no code path delivers. Drop the
`mapSummaryToAnthropicDisplay` export — only one in-file consumer.
---
 .../src/chat-completions-via-messages/request.ts      |  4 +---
 .../translate/src/responses-via-messages/request.ts   | 11 +++++------
 packages/translate/src/shared/gemini-via/gemini.ts    |  6 +++---
 .../src/shared/via-messages/anthropic-extensions.ts   |  5 ++---
 4 files changed, 11 insertions(+), 15 deletions(-)

diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts
index d85c8e301..e9e6b6d6e 100644
--- a/packages/translate/src/chat-completions-via-messages/request.ts
+++ b/packages/translate/src/chat-completions-via-messages/request.ts
@@ -191,9 +191,7 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
 
   // Materialize the Floway extension fields onto their Messages-natural
   // slots. `anthropic_beta` is body-side residue that the per-upstream
-  // sanitizer strips after translation; the gateway-side rule-apply pass owns
-  // moving its value onto the outbound `anthropic-beta` header before the
-  // upstream call.
+  // sanitizer strips after translation.
   const thinking = buildMessagesThinkingFromExtensions({
     thinkingBudget: payload.thinking_budget,
     adaptiveThinking: payload.adaptive_thinking,
diff --git a/packages/translate/src/responses-via-messages/request.ts b/packages/translate/src/responses-via-messages/request.ts
index a1896a1f3..61b5edd57 100644
--- a/packages/translate/src/responses-via-messages/request.ts
+++ b/packages/translate/src/responses-via-messages/request.ts
@@ -332,12 +332,11 @@ export const translateResponsesToMessages = async (payload: ResponsesPayload, op
   if (formatSchema) outputConfig.format = { type: 'json_schema', schema: formatSchema };
   const hasOutputConfig = Object.keys(outputConfig).length > 0;
 
-  // Extension-driven thinking (`thinking_budget`, `adaptive_thinking`) wins
-  // over the native `effort === 'none'` disable, so the alias write-side
-  // facets that target the structured thinking slot survive the legacy
-  // disable shortcut. Native `reasoning.summary` does not surface onto
-  // Messages — the Responses-native vocabulary keeps its pre-existing
-  // translation contract and rides the upstream sanitizer.
+  // Extension-driven thinking (`thinking_budget`, `adaptive_thinking`)
+  // takes precedence over the `effort === 'none'` disable shortcut, so the
+  // alias overlay's structured thinking write survives. Native
+  // `reasoning.summary` does not surface onto Messages — the Responses IR
+  // keeps its native translation contract and rides the upstream sanitizer.
   const extensionThinking = buildMessagesThinkingFromExtensions({
     thinkingBudget: payload.thinking_budget,
     adaptiveThinking: payload.adaptive_thinking,
diff --git a/packages/translate/src/shared/gemini-via/gemini.ts b/packages/translate/src/shared/gemini-via/gemini.ts
index 0c98c09e3..024a92a46 100644
--- a/packages/translate/src/shared/gemini-via/gemini.ts
+++ b/packages/translate/src/shared/gemini-via/gemini.ts
@@ -117,9 +117,9 @@ export const geminiFunctionResponsePart = (part: GeminiPart, ids: GeminiToolCall
   return { response, id: unmatched?.shift() ?? id };
 };
 
-// Reasoning effort is freeform on the inbound IRs (per Goal 2: never gate
-// operator-typed values), but the gateway publishes a canonical closed set so
-// translate-side mappers can normalize without rewriting unknown values.
+// Reasoning effort is freeform on the inbound IRs, but the gateway
+// publishes a canonical closed set so translate-side mappers can normalize
+// without rewriting unknown values.
 export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
 
 export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig): ReasoningEffort | undefined => {
diff --git a/packages/translate/src/shared/via-messages/anthropic-extensions.ts b/packages/translate/src/shared/via-messages/anthropic-extensions.ts
index 513db32ce..a23b05472 100644
--- a/packages/translate/src/shared/via-messages/anthropic-extensions.ts
+++ b/packages/translate/src/shared/via-messages/anthropic-extensions.ts
@@ -8,9 +8,8 @@ import type { MessagesPayload, MessagesThinkingDisplay } from '@floway-dev/proto
 // `omitted` is the canonical hide-everything spelling, and `auto` returns
 // `undefined` so Anthropic's account-default takes over. Operator-typed
 // values that match neither vocabulary pass through verbatim — Anthropic
-// rejects unknown values at the wire, which is the explicit-failure path we
-// want per the alias design's no-enum-gating contract.
-export const mapSummaryToAnthropicDisplay = (summary: string): MessagesThinkingDisplay | string | undefined => {
+// rejects unknown values at the wire, which is the explicit-failure path.
+const mapSummaryToAnthropicDisplay = (summary: string): MessagesThinkingDisplay | string | undefined => {
   switch (summary) {
   case 'concise':
   case 'detailed':

From 6ac44b2c90bf516d1a85ae307bd5d316f337d316 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:23:55 +0800
Subject: [PATCH 062/170] cleanup(aliases): tighten Combobox normalized-item
 type

After `normalizedItems` runs, every item carries a non-optional
`label`. Switch the post-normalize collections to a tighter
`NormalizedItem` shape so the template and ordering helpers read
`item.label` directly instead of falling back via `label ?? value`.
---
 packages/ui/src/Combobox.vue | 17 ++++++++++++-----
 1 file changed, 12 insertions(+), 5 deletions(-)

diff --git a/packages/ui/src/Combobox.vue b/packages/ui/src/Combobox.vue
index 8bb6a1555..d111f5f52 100644
--- a/packages/ui/src/Combobox.vue
+++ b/packages/ui/src/Combobox.vue
@@ -22,6 +22,13 @@ interface Item {
   label?: string;
 }
 
+// Post-normalize shape: `label` is always set (defaults to `value`), so the
+// template can read `item.label` directly instead of falling back per row.
+interface NormalizedItem {
+  value: string;
+  label: string;
+}
+
 const value = defineModel<string>({ required: true });
 
 const props = withDefaults(defineProps<{
@@ -55,7 +62,7 @@ const { contains } = useFilter({ sensitivity: 'base' });
 
 // Normalize the items list to a single shape so the template only deals
 // with `{ value, label }`. Strings collapse to `{ value: s, label: s }`.
-const normalizedItems = computed<Item[]>(() => props.items.map(it =>
+const normalizedItems = computed<NormalizedItem[]>(() => props.items.map(it =>
   typeof it === 'string' ? { value: it, label: it } : { value: it.value, label: it.label ?? it.value }));
 
 // query mirrors value so the input always shows the committed string. Reka's
@@ -77,12 +84,12 @@ watch(query, q => { value.value = q; });
 // group. Empty query keeps the configured order untouched. The operator
 // always sees the full set of presets — typing narrows attention to the
 // top of the list without hiding the alternatives.
-const orderedItems = computed<Item[]>(() => {
+const orderedItems = computed<NormalizedItem[]>(() => {
   if (query.value === '') return normalizedItems.value;
-  const matches: Item[] = [];
-  const rest: Item[] = [];
+  const matches: NormalizedItem[] = [];
+  const rest: NormalizedItem[] = [];
   for (const item of normalizedItems.value) {
-    if (contains(item.label ?? item.value, query.value) || contains(item.value, query.value)) {
+    if (contains(item.label, query.value) || contains(item.value, query.value)) {
       matches.push(item);
     } else {
       rest.push(item);

From 8952cb36c6b91b87998aa4a303d0ea29357dcfb6 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:30:11 +0800
Subject: [PATCH 063/170] cleanup(aliases/web): inline aliasHasShadowWarning at
 its single call site

`aliasHasShadowWarning` was a one-line wrapper around
`computeShadowWarning(...) !== null` consumed only by `AliasRow.vue`,
which already imported `computeShadowWarning` for its tooltip. Inline
the check at the call site, drop the wrapper plus its test, and remove
the now-redundant `hasShadow` ref from the row.
---
 .../web/src/components/alias-edit/warnings.ts | 37 +++++--------------
 .../components/alias-edit/warnings_test.ts    | 26 +------------
 apps/web/src/components/settings/AliasRow.vue | 16 +++-----
 3 files changed, 18 insertions(+), 61 deletions(-)

diff --git a/apps/web/src/components/alias-edit/warnings.ts b/apps/web/src/components/alias-edit/warnings.ts
index 3455b9f41..9d7801717 100644
--- a/apps/web/src/components/alias-edit/warnings.ts
+++ b/apps/web/src/components/alias-edit/warnings.ts
@@ -1,20 +1,13 @@
 // Shared catalog lookups + warning computation for the alias dashboard
 // surfaces (Settings row, edit dialog, target row). Centralising these
-// helpers means the Settings card and the dialog read the same view of the
-// live /api/models catalog — both the "is this id a real (non-alias) model"
-// and "does this target's chat capability advertise <X>" checks resolve
-// against one source of truth instead of two parallel implementations.
+// helpers keeps the Settings card and the dialog reading the same view of
+// the live /api/models catalog.
 
-import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+import type { ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
 
-// A target id matches a catalog model when either form (prefixed or
-// unprefixed) resolves to that public id. The /api/models catalog already
-// emits the public id directly, so equality is enough; future prefix-form
-// surfaces can wire their normalisation in here without touching the
-// callers. Alias entries are excluded — at runtime target ids never
-// re-enter the alias layer, so the rule-warning lookup must compare against
-// the same "real model" surface the suggestion list and shadow-detection
-// helpers already use.
+// Excludes alias rows — target ids never re-enter the alias layer, so the
+// rule-warning lookup must compare against the same real-model surface that
+// `realModelIds` and `computeShadowWarning` use.
 export const findCatalogModel = (
   models: readonly ControlPlaneModel[] | null | undefined,
   targetModelId: string,
@@ -74,16 +67,14 @@ export const computeRuleWarnings = (
     out.push({ field: 'reasoning.mandatory', message: 'Target does not advertise mandatory reasoning.' });
   }
 
-  // Summary, verbosity, and serviceTier have no advertised catalog metadata
-  // today; their values are forwarded verbatim and never warn here.
+  // Summary, verbosity, and serviceTier carry no catalog metadata; their
+  // values forward verbatim and never warn here.
 
   return out;
 };
 
 // One model-level warning attached to one target row. Today the only
-// trigger is the target id failing to resolve to any catalog model — the
-// dashboard surfaces it via a yellow `!` icon in the row's action cluster
-// with a tooltip listing every warning on that row.
+// trigger is the target id failing to resolve to any catalog model.
 export interface AliasModelWarning {
   message: string;
 }
@@ -101,8 +92,7 @@ export const computeModelWarnings = (
 
 // Alias-level shadow warning. Fires iff the alias name matches a real
 // (non-alias) catalog model id AND no target inside the alias references
-// that real id (the seed pattern — alias name = first target's id —
-// deliberately suppresses the warning).
+// that real id — a target referencing the shadowed id suppresses the warning.
 export interface AliasShadowWarning {
   shadowedId: string;
   shadowedDisplayName: string | null;
@@ -123,10 +113,3 @@ export const computeShadowWarning = (
     shadowedDisplayName: displayName !== null && displayName !== shadowed.id ? displayName : null,
   };
 };
-
-// Convenience wrapper for the Settings row: derive whether this alias
-// trips the shadow warning against the live catalog.
-export const aliasHasShadowWarning = (
-  alias: ModelAlias,
-  models: readonly ControlPlaneModel[] | null | undefined,
-): boolean => computeShadowWarning(alias.name, alias.targets, models) !== null;
diff --git a/apps/web/src/components/alias-edit/warnings_test.ts b/apps/web/src/components/alias-edit/warnings_test.ts
index b9088173a..d8c4eefa3 100644
--- a/apps/web/src/components/alias-edit/warnings_test.ts
+++ b/apps/web/src/components/alias-edit/warnings_test.ts
@@ -1,7 +1,7 @@
 import { describe, expect, it } from 'vitest';
 
-import { aliasHasShadowWarning, computeModelWarnings, computeRuleWarnings, computeShadowWarning, findCatalogModel, realModelIds } from './warnings.ts';
-import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+import { computeModelWarnings, computeRuleWarnings, computeShadowWarning, findCatalogModel, realModelIds } from './warnings.ts';
+import type { ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
 
 const realModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
   upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
@@ -14,18 +14,6 @@ const aliasModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlP
   ...over,
 });
 
-const alias = (over: Partial<ModelAlias> & { name: string }): ModelAlias => ({
-  kind: 'chat',
-  selection: 'first-available',
-  display_name: null,
-  visible_in_models_list: true,
-  targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
-  sort_order: 0,
-  created_at: '2026-01-01T00:00:00Z',
-  updated_at: '2026-01-01T00:00:00Z',
-  ...over,
-});
-
 describe('realModelIds', () => {
   it('excludes alias entries and returns the remaining ids in catalog order', () => {
     const catalog: ControlPlaneModel[] = [
@@ -144,13 +132,3 @@ describe('computeShadowWarning', () => {
     expect(computeShadowWarning('', [{ target_model_id: 'gpt-5' }], catalog)).toBeNull();
   });
 });
-
-describe('aliasHasShadowWarning', () => {
-  const catalog: ControlPlaneModel[] = [realModel({ id: 'gpt-5' }), realModel({ id: 'plain' })];
-
-  it('mirrors computeShadowWarning', () => {
-    expect(aliasHasShadowWarning(alias({ name: 'gpt-5', targets: [{ target_model_id: 'plain', rules: {} as ChatAliasRules }] }), catalog)).toBe(true);
-    expect(aliasHasShadowWarning(alias({ name: 'gpt-5', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }), catalog)).toBe(false);
-    expect(aliasHasShadowWarning(alias({ name: 'free-name' }), catalog)).toBe(false);
-  });
-});
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index 0f3d1a879..2b1bb015f 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -1,14 +1,12 @@
 <script setup lang="ts">
 // One alias rendered as a two-line block in the Settings card. The action
-// cluster sits right-aligned and reserves the leftmost slot for the
-// alias-level warning icon — when no warning is firing the slot collapses
-// to zero width, so the edit and delete buttons keep the same on-screen
-// position whether or not a warning is present.
+// cluster sits right-aligned; the leftmost slot is reserved for the
+// alias-level warning icon when one fires.
 
 import { computed } from 'vue';
 
 import type { ControlPlaneModel, ModelAlias } from '../../api/types.ts';
-import { aliasHasShadowWarning, computeShadowWarning } from '../alias-edit/warnings.ts';
+import { computeShadowWarning } from '../alias-edit/warnings.ts';
 import { composeAliasDisplayName } from '@floway-dev/protocols/common';
 import { Tooltip } from '@floway-dev/ui';
 
@@ -22,9 +20,8 @@ defineEmits<{
   delete: [];
 }>();
 
-// Title resolution mirrors the spec's derivation rule: an operator-set
-// `display_name` always wins; falling back to the single-target compose
-// helper or to the alias `name` when multi-target.
+// Operator-set `display_name` wins; single-target aliases fall through to
+// the compose helper; multi-target falls back to `name`.
 const title = computed(() => {
   if (props.alias.display_name !== null) return props.alias.display_name;
   if (props.alias.targets.length === 1) {
@@ -45,7 +42,6 @@ const caption = computed(() => {
 });
 
 const shadowWarning = computed(() => computeShadowWarning(props.alias.name, props.alias.targets, props.models));
-const hasShadow = computed(() => aliasHasShadowWarning(props.alias, props.models));
 const shadowTooltip = computed(() => {
   const w = shadowWarning.value;
   if (!w) return '';
@@ -63,7 +59,7 @@ const shadowTooltip = computed(() => {
       </div>
 
       <div class="flex shrink-0 items-center gap-1">
-        <Tooltip v-if="hasShadow" :content="shadowTooltip">
+        <Tooltip v-if="shadowWarning" :content="shadowTooltip">
           <span
             class="inline-flex h-8 w-8 items-center justify-center rounded-md text-amber-400"
             aria-label="Alias warning"

From 3d3d08d63d3270193e9bc78fb8a1e4d962fe8719 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:30:20 +0800
Subject: [PATCH 064/170] cleanup(aliases): strip spec references and verbose
 comments
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Remove 'the spec calls for' / 'mirrors the X store' / in-branch-history
phrasing from the alias dashboard surface and the control-plane schema
comments. Drop the `?? ''` fallback on the route's `name` param —
Hono guarantees the path segment is present, so the previous fallback
masked a routing bug rather than surfacing one. Trim the
`props.record!.name` non-null assertion that the surrounding
`if (props.record)` already narrows. Also hoist the `displayName.trim()`
call so the save payload computes it once.
---
 .../components/alias-edit/AliasEditDialog.vue | 28 +++++++------------
 .../components/alias-edit/AliasTargetRow.vue  | 25 ++++++-----------
 .../src/components/models/ModelInfoBar.vue    | 10 +++----
 .../settings/AliasesSettingsCard.vue          |  4 +--
 apps/web/src/composables/useModelAliases.ts   |  6 ++--
 .../src/control-plane/model-aliases/routes.ts | 17 ++++++-----
 .../control-plane/model-aliases/serialize.ts  | 15 +++-------
 packages/gateway/src/control-plane/schemas.ts | 22 +++++++--------
 8 files changed, 48 insertions(+), 79 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index cb49f9055..ecac5409c 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -31,10 +31,8 @@ const modelsStore = useModelsStore();
 
 const mode = computed<'create' | 'edit'>(() => (props.record ? 'edit' : 'create'));
 
-// Empty rules per kind. Chat carries an open ChatAliasRules; embedding and
-// image carry an empty record per the spec — switching kind resets every
-// target row to this empty default so a chat-only rule doesn't survive a
-// switch to image.
+// Switching kind discards rule state — a chat-only rule must not survive a
+// switch into embedding/image.
 const emptyRulesFor = (k: AliasKind): AliasTarget['rules'] => (k === 'chat' ? {} as ChatAliasRules : {} as Record<string, never>);
 
 const blankTarget = (k: AliasKind): AliasTarget => ({ target_model_id: '', rules: emptyRulesFor(k) });
@@ -45,8 +43,8 @@ const kind = ref<AliasKind>(props.record?.kind ?? 'chat');
 const selection = ref<AliasSelection>(props.record?.selection ?? 'first-available');
 const visibleInModelsList = ref(props.record?.visible_in_models_list ?? true);
 
-// Local working copy of the targets list. Defaults to a single blank
-// target on create so the operator immediately sees a row to fill in.
+// Create mode starts with one blank target so the operator immediately sees
+// a row to fill in.
 const targets = ref<AliasTarget[]>(
   props.record
     ? props.record.targets.map(t => ({ target_model_id: t.target_model_id, rules: { ...t.rules } as AliasTarget['rules'] }))
@@ -55,9 +53,6 @@ const targets = ref<AliasTarget[]>(
 
 const setKind = (k: AliasKind) => {
   kind.value = k;
-  // Reset every target's rules to the new kind's empty default. The spec
-  // is explicit: switching kind discards rule state so a chat-only rule
-  // doesn't survive a switch into image.
   targets.value = targets.value.map(t => ({ target_model_id: t.target_model_id, rules: emptyRulesFor(k) }));
 };
 
@@ -83,13 +78,9 @@ const removeTarget = (idx: number) => {
 };
 
 // Suggestion list for every target-id combobox. Aliases are excluded so an
-// operator can't accidentally hop into the alias layer twice (it never
-// recurses at request time anyway, but suggesting alias names would just
-// be confusing).
+// operator can't accidentally hop into the alias layer twice.
 const targetIdItems = computed(() => realModelIds(modelsStore.models.value));
 
-// Alias-level warnings. Today only the shadow warning fires; future
-// alias-wide checks plug in here.
 const shadowWarning = computed(() => computeShadowWarning(aliasName.value.trim(), targets.value, modelsStore.models.value));
 
 const saving = ref(false);
@@ -115,14 +106,15 @@ const save = async () => {
   if (saveError.value !== null) return;
 
   const trimmedName = aliasName.value.trim();
+  const trimmedDisplay = displayName.value.trim();
   // The Hono RPC body type infers each target's `rules` as the loose
-  // `Record<string, unknown>` from the Zod schema. Build the payload with
-  // that loose shape so the typed save call doesn't need an `as`.
+  // `Record<string, unknown>` from the Zod schema, so build the payload
+  // with that loose shape and cast each target's rules to match.
   const body = {
     name: trimmedName,
     kind: kind.value,
     selection: selection.value,
-    display_name: displayName.value.trim() === '' ? null : displayName.value.trim(),
+    display_name: trimmedDisplay === '' ? null : trimmedDisplay,
     visible_in_models_list: visibleInModelsList.value,
     targets: targets.value.map(t => ({
       target_model_id: t.target_model_id.trim(),
@@ -138,7 +130,7 @@ const save = async () => {
       if (error) { saveError.value = error.message; return; }
     } else if (props.record) {
       const { error } = await callApi(() => api.api.aliases[':name'].$put({
-        param: { name: props.record!.name },
+        param: { name: props.record.name },
         json: body,
       }));
       if (error) { saveError.value = error.message; return; }
diff --git a/apps/web/src/components/alias-edit/AliasTargetRow.vue b/apps/web/src/components/alias-edit/AliasTargetRow.vue
index d3f21b4a0..9a5277579 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow.vue
+++ b/apps/web/src/components/alias-edit/AliasTargetRow.vue
@@ -1,9 +1,5 @@
 <script setup lang="ts">
-// One target row inside the alias edit dialog. Header: chevron + borderless
-// target-id combobox (the row's title) + action cluster (warning icon ·
-// up · down · remove). Body (chat kind only): flat rules form with one
-// rule-level warning rendered as inline yellow text under each annotated
-// field.
+// One target row inside the alias edit dialog.
 
 import { computed, ref } from 'vue';
 
@@ -37,9 +33,9 @@ const targetId = computed({
   set: v => { target.value = { ...target.value, target_model_id: v }; },
 });
 
-// Mutable mirror of the chat rules. Every field setter clones the rules so
-// the v-model emit fires and the parent's targets array stays referentially
-// up to date.
+// Read-only view of the rules as `ChatAliasRules`. The template gates this
+// to the chat branch; setters always clone the rules so the v-model emit
+// fires and the parent's targets array stays referentially up to date.
 const chatRules = computed<ChatAliasRules>(() => target.value.rules as ChatAliasRules);
 
 const setRules = (next: ChatAliasRules) => { target.value = { ...target.value, rules: next }; };
@@ -74,10 +70,10 @@ const setServiceTier = (raw: string) => {
   setRules(next);
 };
 
-// String-bound view of the integer budget. The form keeps the typed string
-// for round-trip stability (so an in-progress "" or "1024foo" doesn't
-// clobber the underlying numeric value mid-keystroke) and writes back to
-// the rules object only when the parsed number is a finite integer.
+// String-bound view of the integer budget. Keeping the typed string in
+// state means an in-progress "" or "1024foo" doesn't clobber the
+// underlying numeric value mid-keystroke; the rules object only updates
+// when the parsed number is a finite integer.
 const budgetText = ref(chatRules.value.reasoning?.budget_tokens === undefined ? '' : String(chatRules.value.reasoning.budget_tokens));
 const onBudgetChange = (raw: string) => {
   budgetText.value = raw;
@@ -90,10 +86,7 @@ const onBudgetChange = (raw: string) => {
   patchReasoning({ budget_tokens: Number(trimmed) });
 };
 
-// Suggestion lists for chat-rule comboboxes. The operator can still type
-// any value verbatim; the gateway forwards rule values without enum-gating
-// them so a brand-new upstream tier flows through without a frontend
-// release.
+// Suggestion lists for chat-rule comboboxes.
 const EFFORT_ITEMS = ['none', 'low', 'medium', 'high', 'xhigh'];
 const SUMMARY_ITEMS = ['auto', 'concise', 'detailed', 'none'];
 const VERBOSITY_ITEMS = ['low', 'medium', 'high'];
diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index 00e0165ef..3c7a78b07 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -17,9 +17,8 @@ const formatTokenLimit = (n: number) => {
   return n.toString();
 };
 
-// Alias-of badge: truncate the target list to the first three with a
-// "+N more" tail when needed. Keeps the badge readable for aliases that
-// fan out to a long fallback chain.
+// Truncate to the first three with a "+N more" tail to keep the badge
+// readable for aliases that fan out to a long fallback chain.
 const aliasOfLabel = computed<string | null>(() => {
   const a = props.model.aliasedFrom;
   if (!a) return null;
@@ -28,9 +27,8 @@ const aliasOfLabel = computed<string | null>(() => {
   return `alias of: ${ids.slice(0, 3).join(', ')} +${ids.length - 3} more`;
 });
 
-// Rule badge sequence. Single-target aliases keep the existing
-// per-rule badges; multi-target aliases collapse the rule set into one
-// "<field>: varies" pill per field configured on any target.
+// Single-target aliases render one badge per rule; multi-target aliases
+// collapse each field to "<field>: varies" when its values differ.
 const ruleBadges = computed<{ label: string }[]>(() => {
   const a = props.model.aliasedFrom;
   if (!a) return [];
diff --git a/apps/web/src/components/settings/AliasesSettingsCard.vue b/apps/web/src/components/settings/AliasesSettingsCard.vue
index 7741d5509..9ac26f29f 100644
--- a/apps/web/src/components/settings/AliasesSettingsCard.vue
+++ b/apps/web/src/components/settings/AliasesSettingsCard.vue
@@ -1,7 +1,5 @@
 <script setup lang="ts">
-// Settings card listing every alias the operator has configured. Mirrors
-// the Proxies card's chrome: section title, "Add alias" button, empty
-// state, list body, surfaced load-error banner.
+// Settings card listing every alias the operator has configured.
 
 import { computed } from 'vue';
 
diff --git a/apps/web/src/composables/useModelAliases.ts b/apps/web/src/composables/useModelAliases.ts
index 94d045767..020f226de 100644
--- a/apps/web/src/composables/useModelAliases.ts
+++ b/apps/web/src/composables/useModelAliases.ts
@@ -3,10 +3,8 @@ import { ref, shallowRef } from 'vue';
 import { callApi, useApi } from '../api/client.ts';
 import type { ModelAlias } from '../api/types.ts';
 
-// Module-scoped cache so concurrent callers share one fetch — mirrors the
-// proxies/upstreams stores. Settings tabs that mount in parallel reuse a
-// single in-flight request instead of fan-out per-component, and edits in
-// the Settings card reflect on the Models page without a page reload.
+// Module-scoped cache so concurrent callers share one fetch and edits in
+// the Settings card reflect on the Models page without a reload.
 const aliases = shallowRef<ModelAlias[] | null>(null);
 const loading = ref(false);
 const error = ref<string | null>(null);
diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
index 49956533c..b235ef8c5 100644
--- a/packages/gateway/src/control-plane/model-aliases/routes.ts
+++ b/packages/gateway/src/control-plane/model-aliases/routes.ts
@@ -1,6 +1,6 @@
-// Admin-only CRUD for model aliases. Wire shape (snake_case) is documented in
-// `@floway-dev/protocols/common`; this layer translates between the wire DTO
-// and the camelCase `ModelAliasRecord` the repo stores.
+// Admin-only CRUD for model aliases. Wire shape (snake_case) is in
+// `@floway-dev/protocols/common`; this layer maps to the camelCase
+// `ModelAliasRecord` the repo stores.
 
 import type { Context } from 'hono';
 
@@ -10,8 +10,7 @@ import { getRepo } from '../../repo/index.ts';
 import type { ModelAliasRecord } from '../../repo/types.ts';
 import type { createAliasBody, updateAliasBody } from '../schemas.ts';
 
-// Place a new alias at the end of the sort order by default. Empty list → 0
-// so the very first alias starts the sequence at the same origin as upstreams.
+// New alias goes at the end of the sort order by default. Empty list → 0.
 const nextSortOrder = (existing: readonly ModelAliasRecord[]): number =>
   existing.reduce((acc, record) => Math.max(acc, record.sortOrder), -1) + 1;
 
@@ -41,7 +40,7 @@ export const createAlias = async (c: CtxWithJson<typeof createAliasBody>) => {
 };
 
 export const updateAlias = async (c: CtxWithJson<typeof updateAliasBody>) => {
-  const oldName = c.req.param('name') ?? '';
+  const oldName = c.req.param('name')!;
   const body = c.req.valid('json');
   const repo = getRepo();
 
@@ -65,9 +64,9 @@ export const updateAlias = async (c: CtxWithJson<typeof updateAliasBody>) => {
 };
 
 export const deleteAlias = async (c: Context) => {
-  const name = c.req.param('name') ?? '';
-  // Idempotent: the spec calls for a successful response whether or not a row
-  // existed. 204 keeps the verb-shape parity with DELETE /api/proxies/:id.
+  const name = c.req.param('name')!;
+  // Idempotent — success whether or not a row existed. 204 keeps verb-shape
+  // parity with DELETE /api/proxies/:id.
   await getRepo().modelAliases.delete(name);
   return c.body(null, 204);
 };
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize.ts b/packages/gateway/src/control-plane/model-aliases/serialize.ts
index a3ca87ec3..df22548b4 100644
--- a/packages/gateway/src/control-plane/model-aliases/serialize.ts
+++ b/packages/gateway/src/control-plane/model-aliases/serialize.ts
@@ -1,7 +1,5 @@
-// Snake_case wire <-> camelCase record conversion for model aliases. The wire
-// shape (`ModelAlias`) lives in `@floway-dev/protocols/common` so the
-// dashboard and the control plane share one source of truth; this file is
-// the only place those two shapes meet.
+// Snake_case wire ↔ camelCase record conversion for model aliases. The wire
+// shape (`ModelAlias`) lives in `@floway-dev/protocols/common`.
 
 import type { ModelAliasRecord } from '../../repo/types.ts';
 import type { AliasKind, AliasSelection, AliasTarget, ModelAlias } from '@floway-dev/protocols/common';
@@ -18,9 +16,8 @@ export const recordToWire = (record: ModelAliasRecord): ModelAlias => ({
   updated_at: record.updatedAt,
 });
 
-// Wire payload accepted by the create / update body schemas. Every field
-// except `sort_order` is required at this layer; the route owns how the
-// sort order and timestamps are produced before calling wireToRecord.
+// Wire payload accepted by the create / update body schemas. `sort_order` is
+// optional; everything else is required.
 export interface ModelAliasWireInput {
   name: string;
   kind: AliasKind;
@@ -31,10 +28,6 @@ export interface ModelAliasWireInput {
   sort_order?: number;
 }
 
-// Build a record from a validated wire payload. The caller supplies the
-// fields the wire shape doesn't carry — `sortOrder` (computed via
-// nextSortOrder, or copied from the existing row on update), `createdAt`
-// (now for create, preserved on update), and `updatedAt` (always now).
 export const wireToRecord = (
   wire: ModelAliasWireInput,
   meta: { sortOrder: number; createdAt: string; updatedAt: string },
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index 0fce131b4..b42b39bfa 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -595,11 +595,9 @@ export const searchConfigSchema = z.object({
 
 // --- model aliases ---
 
-// Per-target chat rules. Field names mirror the IR slot each value overlays —
-// `reasoning.effort` / `verbosity` / `serviceTier` flow verbatim onto the
-// outbound request, so the schema does not narrow them against the target's
-// advertised capability metadata (the spec calls for verbatim forwarding so
-// the operator can drive a feature the catalog doesn't yet advertise).
+// Per-target chat rules. Field names mirror the IR slot each value overlays.
+// Values forward verbatim — no capability narrowing here, so an operator
+// can drive a feature the catalog hasn't advertised yet.
 const chatAliasReasoningSchema = z.object({
   effort: z.enum(['none', 'low', 'medium', 'high', 'xhigh']).optional(),
   budget_tokens: z.number().int().nonnegative().optional(),
@@ -614,10 +612,10 @@ const chatAliasRulesSchema = z.object({
   serviceTier: z.enum(['default', 'flex', 'priority', 'scale', 'fast']).optional(),
 }).strict();
 
-// Rules are validated against the alias-level kind in a superRefine pass on
-// the body schema below — chat-kind aliases accept ChatAliasRules; other kinds
-// require an empty object. Each target_model_id is opaque (no `/` semantics
-// inside the alias layer), so the only structural check is non-emptiness.
+// Rules are validated against the alias-level kind in the superRefine pass
+// below — chat-kind aliases accept ChatAliasRules; other kinds require an
+// empty object. Each target_model_id is opaque (no `/` semantics in the
+// alias layer), so the only structural check is non-emptiness.
 const aliasTargetSchema = z.object({
   target_model_id: z.string().min(1),
   rules: z.record(z.string(), z.unknown()),
@@ -636,9 +634,9 @@ const aliasBaseShape = {
 const aliasBodyCore = z.object(aliasBaseShape);
 
 // superRefine cross-validates each target's `rules` against the alias-level
-// kind. For chat: parse through chatAliasRulesSchema and surface the inner
-// issue verbatim. For embedding / image: today there are no per-target rules,
-// so the slot must be `{}` — populating it later just needs a fresh schema.
+// kind. Chat: parse through `chatAliasRulesSchema` and surface the inner
+// issue verbatim. Embedding / image: the slot must be `{}` until a future
+// schema lands.
 const aliasBodyRulesRefinement = (
   value: z.infer<typeof aliasBodyCore>,
   ctx: z.core.$RefinementCtx,

From 6c851385b03e3a20c0cf3a7ad9de682d377398ef Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:33:00 +0800
Subject: [PATCH 065/170] cleanup(aliases): fix-up typecheck post-cleanup

- Import ModelProviderInstance directly from @floway-dev/provider in
  resolve.ts; the registry module re-uses the type but does not export
  it, so the transitive import broke once unrelated edits perturbed it.
- Bind `props.record.name` to a local in AliasEditDialog before the
  async callback so the surrounding narrowing survives the await
  without a non-null assertion.
- Drop the now-unused ChatAliasRules import in warnings_test.
---
 apps/web/src/components/alias-edit/AliasEditDialog.vue   | 3 ++-
 apps/web/src/components/alias-edit/warnings_test.ts      | 2 +-
 packages/gateway/src/data-plane/model-aliases/resolve.ts | 3 +--
 3 files changed, 4 insertions(+), 4 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index ecac5409c..86756bde4 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -129,8 +129,9 @@ const save = async () => {
       const { error } = await callApi(() => api.api.aliases.$post({ json: body }));
       if (error) { saveError.value = error.message; return; }
     } else if (props.record) {
+      const oldName = props.record.name;
       const { error } = await callApi(() => api.api.aliases[':name'].$put({
-        param: { name: props.record.name },
+        param: { name: oldName },
         json: body,
       }));
       if (error) { saveError.value = error.message; return; }
diff --git a/apps/web/src/components/alias-edit/warnings_test.ts b/apps/web/src/components/alias-edit/warnings_test.ts
index d8c4eefa3..8dfe9da6f 100644
--- a/apps/web/src/components/alias-edit/warnings_test.ts
+++ b/apps/web/src/components/alias-edit/warnings_test.ts
@@ -1,7 +1,7 @@
 import { describe, expect, it } from 'vitest';
 
 import { computeModelWarnings, computeRuleWarnings, computeShadowWarning, findCatalogModel, realModelIds } from './warnings.ts';
-import type { ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
+import type { ControlPlaneModel } from '../../api/types.ts';
 
 const realModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
   upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
index 9f888f7da..0f2f84275 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -7,11 +7,10 @@
 
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import type { ModelAliasesRepo, ModelAliasRecord } from '../../repo/types.ts';
-import type { ModelProviderInstance } from '../providers/registry.ts';
 import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { AliasKind, AliasRules, ModelEndpointKey } from '@floway-dev/protocols/common';
-import type { Fetcher } from '@floway-dev/provider';
+import type { Fetcher, ModelProviderInstance } from '@floway-dev/provider';
 
 // Endpoint family the inbound request belongs to. Mirrors `AliasKind` but
 // named in the data-plane vocabulary so the resolver argument site reads as

From 0bf6498e6f5b3f8670f438bd8bf90e4e2e140425 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:41:24 +0800
Subject: [PATCH 066/170] cleanup(aliases): single-source the alias-no-target
 message string
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Round-1 cleanup hoisted the wording into `aliasNoTargetMessage` but the
identical template literal stayed in the `AliasNoTargetAvailableError`
constructor — and passthrough callers (embeddings, images) thread
`error.message` straight onto their 404 bodies while chat surfaces go
through the helper, so a future wording change would diverge silently
between the two paths. Move the helper to live next to the Error class
and have the constructor call it; re-export from `chat/shared/errors.ts`
so existing chat-protocol renderers keep their import path.
---
 packages/gateway/src/data-plane/chat/shared/errors.ts    | 4 +---
 packages/gateway/src/data-plane/model-aliases/resolve.ts | 9 ++++++++-
 2 files changed, 9 insertions(+), 4 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/shared/errors.ts b/packages/gateway/src/data-plane/chat/shared/errors.ts
index cd98509bd..8e708729e 100644
--- a/packages/gateway/src/data-plane/chat/shared/errors.ts
+++ b/packages/gateway/src/data-plane/chat/shared/errors.ts
@@ -14,9 +14,7 @@ export type ChatServeFailure =
   // to an enabled upstream binding that exposes the inbound endpoint.
   | { readonly kind: 'alias-no-target-available'; readonly aliasName: string; readonly targetCount: number };
 
-// Canonical message every protocol renders for `alias-no-target-available`.
-export const aliasNoTargetMessage = (failure: { aliasName: string; targetCount: number }): string =>
-  `alias '${failure.aliasName}' has ${failure.targetCount} target(s); none currently map to an enabled upstream binding`;
+export { aliasNoTargetMessage } from '../../model-aliases/resolve.ts';
 
 class ChatServeFailureError extends Error {
   readonly failure: ChatServeFailure;
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
index 0f2f84275..dfb3d287b 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -36,6 +36,13 @@ export interface AliasResolution {
   readonly aliasName: string;
 }
 
+// Canonical wording for the alias-no-target-available 404. The Error class
+// and every protocol-shaped renderer (chat/{*}/errors.ts, embeddings/serve,
+// images/serve) read the same string from here so wording changes land in
+// one place.
+export const aliasNoTargetMessage = (params: { aliasName: string; targetCount: number }): string =>
+  `alias '${params.aliasName}' has ${params.targetCount} target(s); none currently map to an enabled upstream binding`;
+
 // Thrown when the alias name was found but no target currently resolves to
 // an enabled upstream binding that exposes the inbound endpoint. Caught at
 // each protocol's serve seam and surfaced as a 404 in the protocol-specific
@@ -45,7 +52,7 @@ export class AliasNoTargetAvailableError extends Error {
   readonly targetCount: number;
 
   constructor(aliasName: string, targetCount: number) {
-    super(`alias '${aliasName}' has ${targetCount} target(s); none currently map to an enabled upstream binding`);
+    super(aliasNoTargetMessage({ aliasName, targetCount }));
     this.name = 'AliasNoTargetAvailableError';
     this.aliasName = aliasName;
     this.targetCount = targetCount;

From ffb3374936379675f24be1f73c36125590414dc3 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:42:01 +0800
Subject: [PATCH 067/170] test(aliases/web): pin findCatalogModel against
 alias-row shadowing a target id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Round-1 fix `2b4374e4` excluded alias entries from findCatalogModel but
the existing test only covered the real-model happy path — it would have
passed against the pre-fix impl too. Add the regression case: alias row
sharing the target id must not surface as the catalog row.
---
 apps/web/src/components/alias-edit/warnings_test.ts | 10 ++++++++++
 1 file changed, 10 insertions(+)

diff --git a/apps/web/src/components/alias-edit/warnings_test.ts b/apps/web/src/components/alias-edit/warnings_test.ts
index 8dfe9da6f..9d0165974 100644
--- a/apps/web/src/components/alias-edit/warnings_test.ts
+++ b/apps/web/src/components/alias-edit/warnings_test.ts
@@ -36,6 +36,16 @@ describe('findCatalogModel', () => {
     expect(findCatalogModel(catalog, 'claude')?.id).toBe('claude');
     expect(findCatalogModel(catalog, 'unknown')).toBeUndefined();
   });
+
+  it('skips alias rows that share an id with a target — they never re-enter the alias layer at runtime', () => {
+    // Both rows share id 'auto-review' (the alias name shadowing nothing
+    // real). findCatalogModel must not return the alias entry — its
+    // capability metadata is the wrong source for a real-model rule
+    // warning. computeModelWarnings should treat the id as unknown
+    // instead.
+    const catalog: ControlPlaneModel[] = [aliasModel({ id: 'auto-review' })];
+    expect(findCatalogModel(catalog, 'auto-review')).toBeUndefined();
+  });
 });
 
 describe('computeModelWarnings', () => {

From f065e2da35ba29a3f694c22d6256052d9ae79817 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Fri, 26 Jun 2026 19:46:17 +0800
Subject: [PATCH 068/170] cleanup(aliases): move aliasFailureFromError to
 chat/shared/errors.ts

Co-located with ChatServeFailure where every caller already lives.
Tightens the return type to Extract<ChatServeFailure, ...> so the
inline literal can't drift from the union variant it lifts into.
---
 .../src/data-plane/chat/chat-completions/serve.ts      |  3 ++-
 packages/gateway/src/data-plane/chat/gemini/serve.ts   |  3 ++-
 packages/gateway/src/data-plane/chat/messages/serve.ts |  3 ++-
 .../src/data-plane/chat/responses/serve-prep.ts        |  3 ++-
 packages/gateway/src/data-plane/chat/shared/errors.ts  | 10 ++++++++++
 .../gateway/src/data-plane/model-aliases/resolve.ts    |  8 --------
 6 files changed, 18 insertions(+), 12 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 3f04c8b8b..8fc9f06bf 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -1,10 +1,11 @@
 import { chatCompletionsAttempt } from './attempt.ts';
 import { renderChatCompletionsFailure } from './errors.ts';
 import { planChatCompletionsRouting } from './routing.ts';
-import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
 import { resolveAndApplyAliasForChatCompletions } from '../../model-aliases/serve-integration.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
+import { aliasFailureFromError } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index 091143618..f12feee71 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -1,10 +1,11 @@
 import { geminiAttempt } from './attempt.ts';
 import { renderGeminiFailure } from './errors.ts';
 import { planGeminiRouting } from './routing.ts';
-import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
 import { resolveAndApplyAliasForGemini } from '../../model-aliases/serve-integration.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
+import { aliasFailureFromError } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index 1a7b22372..fa1b966e2 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -1,10 +1,11 @@
 import { messagesAttempt } from './attempt.ts';
 import { renderMessagesFailure } from './errors.ts';
 import { planMessagesRouting } from './routing.ts';
-import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
 import { resolveAndApplyAliasForMessages } from '../../model-aliases/serve-integration.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
+import { aliasFailureFromError } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index 1d13c81cf..294916922 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -1,9 +1,10 @@
 import { renderResponsesFailure } from './errors.ts';
 import type { StatefulResponsesStore } from './items/store.ts';
 import { planResponsesRouting } from './routing.ts';
-import { AliasNoTargetAvailableError, aliasFailureFromError } from '../../model-aliases/resolve.ts';
+import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
 import { resolveAndApplyAliasForResponses } from '../../model-aliases/serve-integration.ts';
 import { enumerateProviderCandidates, type ChatCandidate } from '../shared/candidates.ts';
+import { aliasFailureFromError } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ResponsesInputItem, ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
diff --git a/packages/gateway/src/data-plane/chat/shared/errors.ts b/packages/gateway/src/data-plane/chat/shared/errors.ts
index 8e708729e..599b529b9 100644
--- a/packages/gateway/src/data-plane/chat/shared/errors.ts
+++ b/packages/gateway/src/data-plane/chat/shared/errors.ts
@@ -1,3 +1,5 @@
+import type { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
+
 // Failures a protocol can render before reaching an upstream; unexpected
 // throws bubble as-is. `failedUpstreams` on model-{missing,unsupported}
 // carries the upstream names whose catalog fetch threw during this
@@ -16,6 +18,14 @@ export type ChatServeFailure =
 
 export { aliasNoTargetMessage } from '../../model-aliases/resolve.ts';
 
+// Lift `AliasNoTargetAvailableError` into a `ChatServeFailure` so the
+// existing failure renderer can surface it without special-casing.
+export const aliasFailureFromError = (error: AliasNoTargetAvailableError): Extract<ChatServeFailure, { kind: 'alias-no-target-available' }> => ({
+  kind: 'alias-no-target-available',
+  aliasName: error.aliasName,
+  targetCount: error.targetCount,
+});
+
 class ChatServeFailureError extends Error {
   readonly failure: ChatServeFailure;
 
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
index dfb3d287b..ac2bb9213 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -59,14 +59,6 @@ export class AliasNoTargetAvailableError extends Error {
   }
 }
 
-// Lift `AliasNoTargetAvailableError` into a `ChatServeFailure` so the
-// existing failure renderer can surface it without special-casing.
-export const aliasFailureFromError = (error: AliasNoTargetAvailableError): { kind: 'alias-no-target-available'; aliasName: string; targetCount: number } => ({
-  kind: 'alias-no-target-available',
-  aliasName: error.aliasName,
-  targetCount: error.targetCount,
-});
-
 interface ResolveAliasArgs {
   readonly modelName: string;
   readonly endpointKind: AliasEndpointKind;

From a12986d68f5f3ad75f7218935f6cb828bae4adcd Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 00:46:25 +0800
Subject: [PATCH 069/170] refactor(aliases): unify alias resolution across
 every data-plane endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Alias resolution becomes a single hook inside `resolveModelForRequest`
(passthrough endpoints — embeddings/images/completions) and
`enumerateProviderCandidates` (chat endpoints — chat-completions,
messages, responses, gemini). Every data-plane endpoint now consults the
alias surface through the exact same code path; the per-endpoint preamble
helpers in `serve-integration.ts` are deleted.

The resolver is now endpoint-blind: alias names are opaque global
mappings, and the routability filter only checks whether a target id
resolves to any enabled binding. Endpoint suitability is the prefix-
routing layer's job — a kind-mismatched call (chat alias hit from
/embeddings) gets the resolved target id back and then 404s naturally at
prefix routing when the target doesn't expose the inbound endpoint.

Concrete shape changes:
- `resolveModelForRequest` and `enumerateProviderCandidates` return an
  optional `aliasResolution` field; chat serves overlay rules and stage
  the `x-floway-alias` response header from it; passthroughServe stamps
  the header and centralises the AliasNoTargetAvailable 404 path.
- `resolveAlias` takes the surrounding model resolver's `providers` and
  `fetcherForUpstream` directly so the per-request catalog stack is paid
  once, not twice. Endpoint-kind and the `ENDPOINTS_FOR_KIND` filter are
  removed.
- `applyChatRulesTo<Protocol>` accepts the resolver's wide `AliasRules`
  union and narrows internally so the chat serves can pass
  `aliasResolution.rules` without an upstream cast.

Spec updated to reflect endpoint-blind resolution and the shared-helper
invocation model.
---
 .../data-plane/chat/chat-completions/serve.ts | 32 ++++---
 .../chat/chat-completions/serve_test.ts       | 19 +++-
 .../src/data-plane/chat/gemini/serve.ts       | 58 +++++++-----
 .../src/data-plane/chat/gemini/serve_test.ts  | 19 +++-
 .../src/data-plane/chat/messages/serve.ts     | 54 ++++++-----
 .../data-plane/chat/messages/serve_test.ts    | 19 +++-
 .../data-plane/chat/responses/serve-prep.ts   | 33 ++++---
 .../data-plane/chat/responses/serve_test.ts   | 19 +++-
 .../src/data-plane/chat/shared/candidates.ts  | 37 +++++++-
 .../src/data-plane/embeddings/serve.ts        | 14 +--
 .../gateway/src/data-plane/images/serve.ts    | 26 +----
 .../src/data-plane/model-aliases/apply.ts     | 68 ++++++++------
 .../src/data-plane/model-aliases/resolve.ts   | 84 ++++++-----------
 .../data-plane/model-aliases/resolve_test.ts  | 81 ++++------------
 .../model-aliases/serve-integration.ts        | 94 -------------------
 .../src/data-plane/providers/registry.ts      | 29 +++++-
 .../data-plane/shared/passthrough-serve.ts    | 21 ++++-
 17 files changed, 340 insertions(+), 367 deletions(-)
 delete mode 100644 packages/gateway/src/data-plane/model-aliases/serve-integration.ts

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 8fc9f06bf..65251e277 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -1,8 +1,8 @@
 import { chatCompletionsAttempt } from './attempt.ts';
 import { renderChatCompletionsFailure } from './errors.ts';
 import { planChatCompletionsRouting } from './routing.ts';
+import { ALIAS_RESPONSE_HEADER, applyChatRulesToChatCompletions } from '../../model-aliases/apply.ts';
 import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
-import { resolveAndApplyAliasForChatCompletions } from '../../model-aliases/serve-integration.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import { aliasFailureFromError } from '../shared/errors.ts';
@@ -21,23 +21,29 @@ export interface ChatCompletionsServeGenerateArgs {
 export const chatCompletionsServe = {
   generate: async (args: ChatCompletionsServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<ChatCompletionsStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
+    let enumerated;
     try {
-      await resolveAndApplyAliasForChatCompletions(payload, ctx);
+      enumerated = await enumerateProviderCandidates({
+        upstreamIds: ctx.upstreamIds,
+        model: payload.model,
+        pickTarget: endpoints =>
+          endpoints.chatCompletions ? 'chat-completions'
+            : endpoints.messages ? 'messages'
+              : endpoints.responses ? 'responses'
+                : null,
+        scheduler: ctx.backgroundScheduler,
+        currentColo: ctx.currentColo,
+      });
     } catch (error) {
       if (error instanceof AliasNoTargetAvailableError) return renderChatCompletionsFailure(aliasFailureFromError(error));
       throw error;
     }
-    const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
-      upstreamIds: ctx.upstreamIds,
-      model: payload.model,
-      pickTarget: endpoints =>
-        endpoints.chatCompletions ? 'chat-completions'
-          : endpoints.messages ? 'messages'
-            : endpoints.responses ? 'responses'
-              : null,
-      scheduler: ctx.backgroundScheduler,
-      currentColo: ctx.currentColo,
-    });
+    const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
+    if (aliasResolution) {
+      payload.model = aliasResolution.targetModelId;
+      applyChatRulesToChatCompletions(payload, aliasResolution.rules);
+      ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
+    }
     const decision = await planChatCompletionsRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderChatCompletionsFailure(decision.failure);
 
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index cf1ef6c1e..02d6079cc 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -18,13 +18,26 @@ const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; read
 const lastCandidatesCall: { model?: string } = {};
 vi.mock('../shared/candidates.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+  const { resolveAlias } = await import('../../model-aliases/resolve.ts');
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
-      lastCandidatesCall.model = args.model;
+    enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
+      // Mirror the real entry's alias resolution so the rule-overlay test
+      // sees the resolved target id reach the candidates layer and the
+      // serve overlays rules from the returned `aliasResolution`. Tests
+      // queue the resolution via `aliasResolutionQueue`.
+      const aliasResolution = await resolveAlias({
+        modelName: args.model,
+        providers: [],
+        fetcherForUpstream: () => directFetcher,
+        scheduler: args.scheduler,
+        repo: { getByName: () => Promise.resolve(null) } as never,
+      });
+      const effectiveModel = aliasResolution?.targetModelId ?? args.model;
+      lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
-      return next;
+      return { ...next, failedUpstreams: [], ...(aliasResolution !== null ? { aliasResolution } : {}) };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index f12feee71..3a949ec27 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -1,8 +1,8 @@
 import { geminiAttempt } from './attempt.ts';
 import { renderGeminiFailure } from './errors.ts';
 import { planGeminiRouting } from './routing.ts';
+import { ALIAS_RESPONSE_HEADER, applyChatRulesToGemini } from '../../model-aliases/apply.ts';
 import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
-import { resolveAndApplyAliasForGemini } from '../../model-aliases/serve-integration.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import { aliasFailureFromError } from '../shared/errors.ts';
@@ -33,22 +33,27 @@ export interface GeminiServeCountTokensArgs {
 export const geminiServe = {
   generate: async (args: GeminiServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<GeminiStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
-    let model: string;
+    let enumerated;
     try {
-      model = await resolveAndApplyAliasForGemini(args.model, payload, ctx);
+      enumerated = await enumerateProviderCandidates({
+        upstreamIds: ctx.upstreamIds,
+        model: args.model,
+        // Gemini has no native upstream target in the provider API; prefer
+        // Chat Completions, then Messages, then Responses.
+        pickTarget: endpoints => endpoints.chatCompletions ? 'chat-completions' : endpoints.messages ? 'messages' : endpoints.responses ? 'responses' : null,
+        scheduler: ctx.backgroundScheduler,
+        currentColo: ctx.currentColo,
+      });
     } catch (error) {
       if (error instanceof AliasNoTargetAvailableError) return renderGeminiFailure(aliasFailureFromError(error), 'generate');
       throw error;
     }
-    const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
-      upstreamIds: ctx.upstreamIds,
-      model,
-      // Gemini has no native upstream target in the provider API; prefer
-      // Chat Completions, then Messages, then Responses.
-      pickTarget: endpoints => endpoints.chatCompletions ? 'chat-completions' : endpoints.messages ? 'messages' : endpoints.responses ? 'responses' : null,
-      scheduler: ctx.backgroundScheduler,
-      currentColo: ctx.currentColo,
-    });
+    const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
+    const model = aliasResolution?.targetModelId ?? args.model;
+    if (aliasResolution) {
+      applyChatRulesToGemini(payload, aliasResolution.rules);
+      ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
+    }
     const decision = await planGeminiRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderGeminiFailure(decision.failure, 'generate');
 
@@ -70,23 +75,28 @@ export const geminiServe = {
 
   countTokens: async (args: GeminiServeCountTokensArgs): Promise<ExecuteResult<ProtocolFrame<GeminiStreamEvent>> | PlainResult> => {
     const { payload, ctx, store, headers } = args;
-    let model: string;
+    let enumerated;
     try {
-      model = await resolveAndApplyAliasForGemini(args.model, payload, ctx);
+      enumerated = await enumerateProviderCandidates({
+        upstreamIds: ctx.upstreamIds,
+        model: args.model,
+        // Gemini countTokens has no native upstream support; only providers
+        // exposing the Messages endpoint qualify because we translate Gemini
+        // → Messages and call Messages count_tokens upstream.
+        pickTarget: endpoints => endpoints.messages ? 'messages' : null,
+        scheduler: ctx.backgroundScheduler,
+        currentColo: ctx.currentColo,
+      });
     } catch (error) {
       if (error instanceof AliasNoTargetAvailableError) return renderGeminiFailure(aliasFailureFromError(error), 'countTokens');
       throw error;
     }
-    const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
-      upstreamIds: ctx.upstreamIds,
-      model,
-      // Gemini countTokens has no native upstream support; only providers
-      // exposing the Messages endpoint qualify because we translate Gemini
-      // → Messages and call Messages count_tokens upstream.
-      pickTarget: endpoints => endpoints.messages ? 'messages' : null,
-      scheduler: ctx.backgroundScheduler,
-      currentColo: ctx.currentColo,
-    });
+    const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
+    const model = aliasResolution?.targetModelId ?? args.model;
+    if (aliasResolution) {
+      applyChatRulesToGemini(payload, aliasResolution.rules);
+      ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
+    }
     const decision = await planGeminiRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderGeminiFailure(decision.failure, 'countTokens');
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 771c26c7e..e1a535ef3 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -17,13 +17,26 @@ const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; read
 const lastCandidatesCall: { model?: string } = {};
 vi.mock('../shared/candidates.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+  const { resolveAlias } = await import('../../model-aliases/resolve.ts');
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
-      lastCandidatesCall.model = args.model;
+    enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
+      // Mirror the real entry's alias resolution so the rule-overlay test
+      // sees the resolved target id reach the candidates layer and the
+      // serve overlays rules from the returned `aliasResolution`. Tests
+      // queue the resolution via `aliasResolutionQueue`.
+      const aliasResolution = await resolveAlias({
+        modelName: args.model,
+        providers: [],
+        fetcherForUpstream: () => directFetcher,
+        scheduler: args.scheduler,
+        repo: { getByName: () => Promise.resolve(null) } as never,
+      });
+      const effectiveModel = aliasResolution?.targetModelId ?? args.model;
+      lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
-      return next;
+      return { ...next, failedUpstreams: [], ...(aliasResolution !== null ? { aliasResolution } : {}) };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index fa1b966e2..3c99f718d 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -1,8 +1,8 @@
 import { messagesAttempt } from './attempt.ts';
 import { renderMessagesFailure } from './errors.ts';
 import { planMessagesRouting } from './routing.ts';
+import { ALIAS_RESPONSE_HEADER, applyChatRulesToMessages } from '../../model-aliases/apply.ts';
 import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
-import { resolveAndApplyAliasForMessages } from '../../model-aliases/serve-integration.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import { aliasFailureFromError } from '../shared/errors.ts';
@@ -28,23 +28,29 @@ export interface MessagesServeCountTokensArgs {
 export const messagesServe = {
   generate: async (args: MessagesServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<MessagesStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
+    let enumerated;
     try {
-      await resolveAndApplyAliasForMessages(payload, ctx);
+      enumerated = await enumerateProviderCandidates({
+        upstreamIds: ctx.upstreamIds,
+        model: payload.model,
+        pickTarget: endpoints =>
+          endpoints.messages ? 'messages'
+            : endpoints.responses ? 'responses'
+              : endpoints.chatCompletions ? 'chat-completions'
+                : null,
+        scheduler: ctx.backgroundScheduler,
+        currentColo: ctx.currentColo,
+      });
     } catch (error) {
       if (error instanceof AliasNoTargetAvailableError) return renderMessagesFailure(aliasFailureFromError(error), 'generate');
       throw error;
     }
-    const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
-      upstreamIds: ctx.upstreamIds,
-      model: payload.model,
-      pickTarget: endpoints =>
-        endpoints.messages ? 'messages'
-          : endpoints.responses ? 'responses'
-            : endpoints.chatCompletions ? 'chat-completions'
-              : null,
-      scheduler: ctx.backgroundScheduler,
-      currentColo: ctx.currentColo,
-    });
+    const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
+    if (aliasResolution) {
+      payload.model = aliasResolution.targetModelId;
+      applyChatRulesToMessages(payload, aliasResolution.rules);
+      ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
+    }
     const decision = await planMessagesRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderMessagesFailure(decision.failure, 'generate');
 
@@ -66,19 +72,25 @@ export const messagesServe = {
 
   countTokens: async (args: MessagesServeCountTokensArgs): Promise<ExecuteResult<ProtocolFrame<MessagesStreamEvent>> | PlainResult> => {
     const { payload, ctx, store, headers } = args;
+    let enumerated;
     try {
-      await resolveAndApplyAliasForMessages(payload, ctx);
+      enumerated = await enumerateProviderCandidates({
+        upstreamIds: ctx.upstreamIds,
+        model: payload.model,
+        pickTarget: endpoints => endpoints.messages ? 'messages' : null,
+        scheduler: ctx.backgroundScheduler,
+        currentColo: ctx.currentColo,
+      });
     } catch (error) {
       if (error instanceof AliasNoTargetAvailableError) return renderMessagesFailure(aliasFailureFromError(error), 'countTokens');
       throw error;
     }
-    const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
-      upstreamIds: ctx.upstreamIds,
-      model: payload.model,
-      pickTarget: endpoints => endpoints.messages ? 'messages' : null,
-      scheduler: ctx.backgroundScheduler,
-      currentColo: ctx.currentColo,
-    });
+    const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
+    if (aliasResolution) {
+      payload.model = aliasResolution.targetModelId;
+      applyChatRulesToMessages(payload, aliasResolution.rules);
+      ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
+    }
     const decision = await planMessagesRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderMessagesFailure(decision.failure, 'countTokens');
 
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index 36c0ccda9..6fcf560c0 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -15,13 +15,26 @@ const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; read
 const lastCandidatesCall: { model?: string } = {};
 vi.mock('../shared/candidates.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+  const { resolveAlias } = await import('../../model-aliases/resolve.ts');
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
-      lastCandidatesCall.model = args.model;
+    enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
+      // Mirror the real entry's alias resolution so the rule-overlay test
+      // sees the resolved target id reach the candidates layer and the
+      // serve overlays rules from the returned `aliasResolution`. Tests
+      // queue the resolution via `aliasResolutionQueue`.
+      const aliasResolution = await resolveAlias({
+        modelName: args.model,
+        providers: [],
+        fetcherForUpstream: () => directFetcher,
+        scheduler: args.scheduler,
+        repo: { getByName: () => Promise.resolve(null) } as never,
+      });
+      const effectiveModel = aliasResolution?.targetModelId ?? args.model;
+      lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
-      return next;
+      return { ...next, failedUpstreams: [], ...(aliasResolution !== null ? { aliasResolution } : {}) };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index 294916922..5339a585c 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -1,8 +1,8 @@
 import { renderResponsesFailure } from './errors.ts';
 import type { StatefulResponsesStore } from './items/store.ts';
 import { planResponsesRouting } from './routing.ts';
+import { ALIAS_RESPONSE_HEADER, applyChatRulesToResponses } from '../../model-aliases/apply.ts';
 import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
-import { resolveAndApplyAliasForResponses } from '../../model-aliases/serve-integration.ts';
 import { enumerateProviderCandidates, type ChatCandidate } from '../shared/candidates.ts';
 import { aliasFailureFromError } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
@@ -79,10 +79,11 @@ export type ResponsesServePlan =
 
 // Runs the shared serve-side prep both `responsesServe.generate` and
 // `responsesServe.compact` need before dispatching to `responsesAttempt`:
-// expand any `previous_response_id`, enumerate candidates, plan routing,
-// stage the user input, and pick the first candidate. Returns a rendered
-// failure result when no candidate is viable so the caller can surface it
-// directly without re-deriving the model-error branch.
+// expand any `previous_response_id`, enumerate candidates (which internally
+// runs alias resolution), overlay alias rules + stage the response header,
+// plan routing, stage the user input, and pick the first candidate. Returns
+// a rendered failure result when no candidate is viable so the caller can
+// surface it directly without re-deriving the model-error branch.
 export const prepareResponsesServePlan = async (args: {
   readonly payload: ResponsesPayload;
   readonly ctx: GatewayCtx;
@@ -91,19 +92,25 @@ export const prepareResponsesServePlan = async (args: {
 }): Promise<ResponsesServePlan> => {
   const { payload, ctx, store, pickTarget } = args;
   const prepared = await expandPreviousResponseId(payload, store);
+  let enumerated;
   try {
-    await resolveAndApplyAliasForResponses(prepared, ctx);
+    enumerated = await enumerateProviderCandidates({
+      upstreamIds: ctx.upstreamIds,
+      model: prepared.model,
+      pickTarget,
+      scheduler: ctx.backgroundScheduler,
+      currentColo: ctx.currentColo,
+    });
   } catch (error) {
     if (error instanceof AliasNoTargetAvailableError) return { kind: 'failure', result: renderResponsesFailure(aliasFailureFromError(error)) };
     throw error;
   }
-  const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
-    upstreamIds: ctx.upstreamIds,
-    model: prepared.model,
-    pickTarget,
-    scheduler: ctx.backgroundScheduler,
-    currentColo: ctx.currentColo,
-  });
+  const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
+  if (aliasResolution) {
+    prepared.model = aliasResolution.targetModelId;
+    applyChatRulesToResponses(prepared, aliasResolution.rules);
+    ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
+  }
   const decision = await planResponsesRouting({ payload: prepared, candidates, store });
   if (decision.kind === 'failure') return { kind: 'failure', result: renderResponsesFailure(decision.failure) };
   // Stage the user-supplied input from the original payload — not the
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index f9cf44eac..c6a210543 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -25,13 +25,26 @@ const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; read
 const lastCandidatesCall: { model?: string } = {};
 vi.mock('../shared/candidates.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+  const { resolveAlias } = await import('../../model-aliases/resolve.ts');
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async (args: { model: string }) => {
-      lastCandidatesCall.model = args.model;
+    enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
+      // Mirror the real entry's alias resolution so the rule-overlay test
+      // sees the resolved target id reach the candidates layer and the
+      // serve overlays rules from the returned `aliasResolution`. Tests
+      // queue the resolution via `aliasResolutionQueue`.
+      const aliasResolution = await resolveAlias({
+        modelName: args.model,
+        providers: [],
+        fetcherForUpstream: () => directFetcher,
+        scheduler: args.scheduler,
+        repo: { getByName: () => Promise.resolve(null) } as never,
+      });
+      const effectiveModel = aliasResolution?.targetModelId ?? args.model;
+      lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
-      return next;
+      return { ...next, failedUpstreams: [], ...(aliasResolution !== null ? { aliasResolution } : {}) };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates.ts b/packages/gateway/src/data-plane/chat/shared/candidates.ts
index 591f44f55..229284edb 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates.ts
@@ -1,4 +1,6 @@
 import { createPerRequestFetcher } from '../../../dial/per-request.ts';
+import { getRepo } from '../../../repo/index.ts';
+import { type AliasResolution, resolveAlias } from '../../model-aliases/resolve.ts';
 import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { ModelEndpoints } from '@floway-dev/protocols/common';
@@ -13,7 +15,10 @@ export type ChatCandidate = ProviderCandidate;
 // "model is missing entirely" failure from "model exists but does not
 // expose the endpoint this source needs", plus the names of upstreams
 // whose catalog fetch rejected this round so the caller's failure
-// renderer can surface them parenthetically.
+// renderer can surface them parenthetically. Alias resolution runs inside
+// this entry — if the inbound id is an alias, the resolution is returned
+// on `aliasResolution` so the caller can overlay rules onto the IR and
+// stage the `x-floway-alias` response header.
 export const enumerateProviderCandidates = async ({
   upstreamIds, model, pickTarget, scheduler, currentColo,
 }: {
@@ -29,10 +34,29 @@ export const enumerateProviderCandidates = async ({
   // into the per-request fetcher so colo-scoped fallback entries can be
   // honoured at dial time.
   currentColo: string;
-}): Promise<{ readonly candidates: readonly ChatCandidate[]; readonly sawModel: boolean; readonly failedUpstreams: readonly string[] }> => {
+}): Promise<{
+  readonly candidates: readonly ChatCandidate[];
+  readonly sawModel: boolean;
+  readonly failedUpstreams: readonly string[];
+  readonly aliasResolution?: AliasResolution;
+}> => {
   const fetcherForUpstream = await createPerRequestFetcher(currentColo);
   const providers = await listModelProviders(upstreamIds);
 
+  // Alias resolution runs above prefix routing so every data-plane endpoint
+  // sees the same alias surface. The target id is fed verbatim into prefix
+  // routing; alias names never re-enter the alias layer.
+  // `AliasNoTargetAvailableError` propagates so the chat serve's catch maps
+  // it to its protocol-native 404.
+  const aliasResolution = await resolveAlias({
+    modelName: model,
+    providers,
+    fetcherForUpstream,
+    scheduler,
+    repo: getRepo().modelAliases,
+  });
+  const effectiveModel = aliasResolution?.targetModelId ?? model;
+
   // Each (provider, lookupId) interpretation describes one way the inbound
   // id can address an upstream — bare form for `[unprefixed]`-addressable
   // upstreams, stripped form for `[prefixed]`-addressable upstreams when the
@@ -41,7 +65,7 @@ export const enumerateProviderCandidates = async ({
   // `resolveModelForRequest`; first-viable-wins ordering follows configured
   // sort_order across upstreams, with the unprefixed interpretation pushed
   // before the prefixed one within a single upstream.
-  const interpretations = enumerateModelInterpretations(model, providers);
+  const interpretations = enumerateModelInterpretations(effectiveModel, providers);
   const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
 
   const candidates: ChatCandidate[] = [];
@@ -54,5 +78,10 @@ export const enumerateProviderCandidates = async ({
     candidates.push({ provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) });
   }
 
-  return { candidates, sawModel, failedUpstreams };
+  return {
+    candidates,
+    sawModel,
+    failedUpstreams,
+    ...(aliasResolution !== null ? { aliasResolution } : {}),
+  };
 };
diff --git a/packages/gateway/src/data-plane/embeddings/serve.ts b/packages/gateway/src/data-plane/embeddings/serve.ts
index 3d83cd1e9..9c33e6736 100644
--- a/packages/gateway/src/data-plane/embeddings/serve.ts
+++ b/packages/gateway/src/data-plane/embeddings/serve.ts
@@ -5,8 +5,6 @@ import type { Context } from 'hono';
 
 import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
 import { readRequestBody } from '../chat/shared/request-body.ts';
-import { AliasNoTargetAvailableError } from '../model-aliases/resolve.ts';
-import { resolveAliasForPassthrough } from '../model-aliases/serve-integration.ts';
 import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
 import { tokenUsageFromEmbeddingsBody } from '../shared/telemetry/usage.ts';
 
@@ -55,21 +53,11 @@ export const embeddings = async (c: Context): Promise<Response> => {
   }
 
   ctx.dump?.requestedModel(request.model);
-  let resolvedModel: string;
-  try {
-    resolvedModel = await resolveAliasForPassthrough(request.model, 'embedding', ctx);
-  } catch (error) {
-    if (error instanceof AliasNoTargetAvailableError) {
-      ctx.dump?.error('gateway');
-      return finalizeGatewayResponse(ctx, passthroughApiError(c, error.message, 404));
-    }
-    throw error;
-  }
   const response = await passthroughServe({
     c,
     ctx,
     sourceApi: '/embeddings',
-    model: resolvedModel,
+    model: request.model,
     bindingServesEndpoint: binding => binding.upstreamModel.endpoints.embeddings !== undefined,
     call: async (binding, opts) => {
       const { model: _model, ...body } = request.body;
diff --git a/packages/gateway/src/data-plane/images/serve.ts b/packages/gateway/src/data-plane/images/serve.ts
index ed3c7891d..58f8a7a25 100644
--- a/packages/gateway/src/data-plane/images/serve.ts
+++ b/packages/gateway/src/data-plane/images/serve.ts
@@ -12,8 +12,6 @@ import type { Context } from 'hono';
 
 import { createGatewayCtxFromHono, finalizeGatewayResponse } from '../chat/shared/gateway-ctx.ts';
 import { readRequestBody } from '../chat/shared/request-body.ts';
-import { AliasNoTargetAvailableError } from '../model-aliases/resolve.ts';
-import { resolveAliasForPassthrough } from '../model-aliases/serve-integration.ts';
 import { passthroughApiError, passthroughServe } from '../shared/passthrough-serve.ts';
 import { tokenUsageFromImagesBody } from '../shared/telemetry/usage.ts';
 
@@ -54,21 +52,11 @@ export const imagesGenerations = async (c: Context): Promise<Response> => {
   }
 
   ctx.dump?.requestedModel(request.model);
-  let resolvedModel: string;
-  try {
-    resolvedModel = await resolveAliasForPassthrough(request.model, 'image', ctx);
-  } catch (error) {
-    if (error instanceof AliasNoTargetAvailableError) {
-      ctx.dump?.error('gateway');
-      return finalizeGatewayResponse(ctx, passthroughApiError(c, error.message, 404));
-    }
-    throw error;
-  }
   const response = await passthroughServe({
     c,
     ctx,
     sourceApi: '/images/generations',
-    model: resolvedModel,
+    model: request.model,
     bindingServesEndpoint: binding => binding.upstreamModel.endpoints.imagesGenerations !== undefined,
     call: (binding, opts) => {
       const { model: _model, ...body } = request.body;
@@ -103,21 +91,11 @@ export const imagesEdits = async (c: Context): Promise<Response> => {
   }
 
   ctx.dump?.requestedModel(modelRaw);
-  let resolvedModel: string;
-  try {
-    resolvedModel = await resolveAliasForPassthrough(modelRaw, 'image', ctx);
-  } catch (error) {
-    if (error instanceof AliasNoTargetAvailableError) {
-      ctx.dump?.error('gateway');
-      return finalizeGatewayResponse(ctx, passthroughApiError(c, error.message, 404));
-    }
-    throw error;
-  }
   const response = await passthroughServe({
     c,
     ctx,
     sourceApi: '/images/edits',
-    model: resolvedModel,
+    model: modelRaw,
     bindingServesEndpoint: binding => binding.upstreamModel.endpoints.imagesEdits !== undefined,
     call: (binding, opts) => {
       // ModelProvider.callImagesEdits takes ownership of the FormData and
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index 5d73c4448..6f89d822d 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -1,30 +1,42 @@
 // Per-protocol rule overlay. Alias rules overwrite IR fields they name;
-// fields the target IR cannot express are silently dropped.
+// fields the target IR cannot express are silently dropped. The functions
+// accept the resolver's wide `AliasRules` union and narrow internally —
+// non-chat aliases carry an empty rules object so the chat-only fields are
+// all undefined and the overlay is a no-op.
 
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
-import type { ChatAliasRules } from '@floway-dev/protocols/common';
+import type { AliasRules, ChatAliasRules } from '@floway-dev/protocols/common';
 import type { GeminiPayload } from '@floway-dev/protocols/gemini';
 import type { MessagesPayload } from '@floway-dev/protocols/messages';
 import type { ResponsesPayload } from '@floway-dev/protocols/responses';
 
+// Per-request response header that names the alias the inbound id resolved
+// through. Downstream observability ties together "client asked for X" /
+// "upstream saw Y" via this header.
+export const ALIAS_RESPONSE_HEADER = 'x-floway-alias';
+
+const asChat = (rules: AliasRules): ChatAliasRules => rules as ChatAliasRules;
+
 const hasReasoning = (rules: ChatAliasRules): rules is ChatAliasRules & { reasoning: NonNullable<ChatAliasRules['reasoning']> } =>
   rules.reasoning !== undefined;
 
-export const applyChatRulesToChatCompletions = (body: ChatCompletionsPayload, rules: ChatAliasRules): void => {
-  if (hasReasoning(rules)) {
-    const { effort, budget_tokens, adaptive, summary } = rules.reasoning;
+export const applyChatRulesToChatCompletions = (body: ChatCompletionsPayload, rules: AliasRules): void => {
+  const chat = asChat(rules);
+  if (hasReasoning(chat)) {
+    const { effort, budget_tokens, adaptive, summary } = chat.reasoning;
     if (effort !== undefined) body.reasoning_effort = effort;
     if (budget_tokens !== undefined) body.thinking_budget = budget_tokens;
     if (adaptive !== undefined) body.adaptive_thinking = adaptive;
     if (summary !== undefined) body.reasoning_summary = summary;
   }
-  if (rules.verbosity !== undefined) body.verbosity = rules.verbosity;
-  if (rules.serviceTier !== undefined) body.service_tier = rules.serviceTier;
+  if (chat.verbosity !== undefined) body.verbosity = chat.verbosity;
+  if (chat.serviceTier !== undefined) body.service_tier = chat.serviceTier;
 };
 
-export const applyChatRulesToResponses = (body: ResponsesPayload, rules: ChatAliasRules): void => {
-  if (hasReasoning(rules)) {
-    const { effort, budget_tokens, adaptive, summary } = rules.reasoning;
+export const applyChatRulesToResponses = (body: ResponsesPayload, rules: AliasRules): void => {
+  const chat = asChat(rules);
+  if (hasReasoning(chat)) {
+    const { effort, budget_tokens, adaptive, summary } = chat.reasoning;
     if (effort !== undefined || summary !== undefined) {
       const existing = body.reasoning ?? {};
       body.reasoning = {
@@ -36,15 +48,16 @@ export const applyChatRulesToResponses = (body: ResponsesPayload, rules: ChatAli
     if (budget_tokens !== undefined) body.thinking_budget = budget_tokens;
     if (adaptive !== undefined) body.adaptive_thinking = adaptive;
   }
-  if (rules.verbosity !== undefined) {
-    body.text = { ...body.text, verbosity: rules.verbosity };
+  if (chat.verbosity !== undefined) {
+    body.text = { ...body.text, verbosity: chat.verbosity };
   }
-  if (rules.serviceTier !== undefined) body.service_tier = rules.serviceTier;
+  if (chat.serviceTier !== undefined) body.service_tier = chat.serviceTier;
 };
 
-export const applyChatRulesToMessages = (body: MessagesPayload, rules: ChatAliasRules): void => {
-  if (hasReasoning(rules)) {
-    const { effort, budget_tokens, adaptive } = rules.reasoning;
+export const applyChatRulesToMessages = (body: MessagesPayload, rules: AliasRules): void => {
+  const chat = asChat(rules);
+  if (hasReasoning(chat)) {
+    const { effort, budget_tokens, adaptive } = chat.reasoning;
     // Anthropic stores explicit effort in `output_config.effort`; budget /
     // adaptive ride on `thinking.*`. Splitting them so both can be set in
     // the same overlay (effort fixed + budget pinned, e.g.) without one
@@ -58,18 +71,18 @@ export const applyChatRulesToMessages = (body: MessagesPayload, rules: ChatAlias
       body.thinking = { ...body.thinking, type: 'enabled', budget_tokens };
     }
   }
-  if (rules.verbosity !== undefined) body.verbosity = rules.verbosity;
-  if (rules.serviceTier !== undefined) {
+  if (chat.verbosity !== undefined) body.verbosity = chat.verbosity;
+  if (chat.serviceTier !== undefined) {
     // The cross-protocol bridge in translate maps `speed: 'fast'` ↔
     // `service_tier: 'fast'`; on a native Messages target the alias rule
     // `serviceTier: 'fast'` lands on `speed` so the upstream sees Fast Mode
     // through its native field. Other tier values pass through on
     // `service_tier` since Messages's native enum (`auto`/`standard_only`)
     // doesn't model them.
-    if (rules.serviceTier === 'fast') {
+    if (chat.serviceTier === 'fast') {
       body.speed = 'fast';
     } else {
-      body.service_tier = rules.serviceTier;
+      body.service_tier = chat.serviceTier;
     }
   }
 };
@@ -84,9 +97,10 @@ const GEMINI_THINKING_LEVEL_BY_EFFORT: Record<string, 'minimal' | 'low' | 'mediu
   xhigh: 'xhigh',
 };
 
-export const applyChatRulesToGemini = (body: GeminiPayload, rules: ChatAliasRules): void => {
-  if (hasReasoning(rules)) {
-    const { effort, budget_tokens, adaptive } = rules.reasoning;
+export const applyChatRulesToGemini = (body: GeminiPayload, rules: AliasRules): void => {
+  const chat = asChat(rules);
+  if (hasReasoning(chat)) {
+    const { effort, budget_tokens, adaptive } = chat.reasoning;
     // Gemini collapses the three reasoning controls onto one `thinkingConfig`
     // sub-object. Adaptive wins by encoding budget=-1 (Gemini's adaptive
     // sentinel); an explicit budget pins the count; effort sets the level.
@@ -104,10 +118,10 @@ export const applyChatRulesToGemini = (body: GeminiPayload, rules: ChatAliasRule
       body.generationConfig = { ...body.generationConfig, thinkingConfig };
     }
   }
-  if (rules.verbosity !== undefined) {
-    body.generationConfig = { ...body.generationConfig, verbosity: rules.verbosity };
+  if (chat.verbosity !== undefined) {
+    body.generationConfig = { ...body.generationConfig, verbosity: chat.verbosity };
   }
-  if (rules.serviceTier !== undefined) {
-    body.generationConfig = { ...body.generationConfig, serviceTier: rules.serviceTier };
+  if (chat.serviceTier !== undefined) {
+    body.generationConfig = { ...body.generationConfig, serviceTier: chat.serviceTier };
   }
 };
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
index ac2bb9213..90f06cd43 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -4,30 +4,21 @@
 // never re-enter the alias layer, so recursion is impossible by
 // construction and the shadow-the-real-model pattern (an alias whose first
 // target is its own name) Just Works.
+//
+// The resolver is endpoint-blind: alias names are opaque global mappings
+// and the routability filter only checks whether a target id resolves to
+// any enabled upstream binding. A kind-mismatched call (e.g. a chat alias
+// hit from /embeddings) gets the resolved target id back; if that target
+// does not expose the inbound endpoint, prefix routing surfaces the natural
+// "endpoint not supported" 404. The `AliasKind` on the row only governs UI
+// rule forms and the `/v1/models` listing block.
 
-import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import type { ModelAliasesRepo, ModelAliasRecord } from '../../repo/types.ts';
-import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../providers/registry.ts';
+import { collectInterpretationOutcomes, enumerateModelInterpretations } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
-import type { AliasKind, AliasRules, ModelEndpointKey } from '@floway-dev/protocols/common';
+import type { AliasRules } from '@floway-dev/protocols/common';
 import type { Fetcher, ModelProviderInstance } from '@floway-dev/provider';
 
-// Endpoint family the inbound request belongs to. Mirrors `AliasKind` but
-// named in the data-plane vocabulary so the resolver argument site reads as
-// "this is the request's endpoint group", not "this is some alias".
-export type AliasEndpointKind = AliasKind;
-
-// The endpoints (`ModelEndpoints` keys) an inbound `AliasEndpointKind` will
-// accept. A target row is considered routable when it resolves to a binding
-// whose `endpoints` map contains any one of these keys. Chat aliases accept
-// any chat surface — the source serve will pick the actual upstream target
-// API when it runs.
-const ENDPOINTS_FOR_KIND: Record<AliasEndpointKind, readonly ModelEndpointKey[]> = {
-  chat: ['chatCompletions', 'responses', 'messages'],
-  embedding: ['embeddings'],
-  image: ['imagesGenerations', 'imagesEdits'],
-};
-
 export interface AliasResolution {
   readonly targetModelId: string;
   readonly rules: AliasRules;
@@ -37,16 +28,14 @@ export interface AliasResolution {
 }
 
 // Canonical wording for the alias-no-target-available 404. The Error class
-// and every protocol-shaped renderer (chat/{*}/errors.ts, embeddings/serve,
-// images/serve) read the same string from here so wording changes land in
-// one place.
+// and every protocol-shaped renderer (chat/{*}/errors.ts, passthroughServe)
+// read the same string from here so wording changes land in one place.
 export const aliasNoTargetMessage = (params: { aliasName: string; targetCount: number }): string =>
   `alias '${params.aliasName}' has ${params.targetCount} target(s); none currently map to an enabled upstream binding`;
 
 // Thrown when the alias name was found but no target currently resolves to
-// an enabled upstream binding that exposes the inbound endpoint. Caught at
-// each protocol's serve seam and surfaced as a 404 in the protocol-specific
-// error envelope.
+// an enabled upstream binding. Caught at each protocol's serve seam and
+// surfaced as a 404 in the protocol-specific error envelope.
 export class AliasNoTargetAvailableError extends Error {
   readonly aliasName: string;
   readonly targetCount: number;
@@ -61,26 +50,23 @@ export class AliasNoTargetAvailableError extends Error {
 
 interface ResolveAliasArgs {
   readonly modelName: string;
-  readonly endpointKind: AliasEndpointKind;
-  // Upstream cap intersected from the per-user + per-api-key whitelists.
-  // null means unrestricted; matches the same parameter on
-  // `enumerateProviderCandidates` / `listModelProviders`.
-  readonly upstreamIds: readonly string[] | null;
   readonly scheduler: BackgroundScheduler;
-  readonly currentColo: string;
+  // The same per-request fetcher and provider list the surrounding model
+  // resolver already built. Sharing them keeps the upstream-list + proxy-
+  // factory cost paid once per request rather than twice.
+  readonly providers: readonly ModelProviderInstance[];
+  readonly fetcherForUpstream: (upstreamId: string) => Fetcher;
   // Injected so tests can hand in a stub; the per-request ctx already owns
   // a concrete one via `getRepo().modelAliases`.
   readonly repo: ModelAliasesRepo;
 }
 
 // Reports true when the target id resolves to at least one enabled upstream
-// binding exposing an endpoint the inbound `endpointKind` cares about.
-// `fetcherForUpstream` and `providers` are passed in (not derived here) so a
-// caller filtering N targets hits the underlying repo / dial factories once,
-// not N times.
+// binding, irrespective of which endpoint that binding exposes. Endpoint
+// suitability is the prefix-routing layer's job; the resolver only proves
+// the target is reachable somewhere in the catalog.
 const candidateIsRoutable = async (
   targetModelId: string,
-  endpointKind: AliasEndpointKind,
   providers: readonly ModelProviderInstance[],
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
@@ -88,9 +74,7 @@ const candidateIsRoutable = async (
   if (providers.length === 0) return false;
   const interpretations = enumerateModelInterpretations(targetModelId, providers);
   const { resolutions } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
-  const accepted = ENDPOINTS_FOR_KIND[endpointKind];
-  return resolutions.some(({ resolved }) =>
-    accepted.some(key => resolved.binding.upstreamModel.endpoints[key] !== undefined));
+  return resolutions.length > 0;
 };
 
 // Pre-pick the available pool ONCE. Order is preserved so
@@ -98,33 +82,21 @@ const candidateIsRoutable = async (
 // uniformly within whatever subset survived availability filtering.
 const buildAvailablePool = async (
   record: ModelAliasRecord,
-  endpointKind: AliasEndpointKind,
-  upstreamIds: readonly string[] | null,
+  providers: readonly ModelProviderInstance[],
+  fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
-  currentColo: string,
 ): Promise<ModelAliasRecord['targets']> => {
-  // Hoist both registry calls out of the per-target loop: their results
-  // depend only on (upstreamIds, currentColo), not on the target id, so the
-  // upstreams-list + proxy-factory cost is paid once per alias instead of
-  // once per target row.
-  const fetcherForUpstream = await createPerRequestFetcher(currentColo);
-  const providers = await listModelProviders(upstreamIds);
   const availability = await Promise.all(record.targets.map(target =>
-    candidateIsRoutable(target.target_model_id, endpointKind, providers, fetcherForUpstream, scheduler)));
+    candidateIsRoutable(target.target_model_id, providers, fetcherForUpstream, scheduler)));
   return record.targets.filter((_, index) => availability[index]);
 };
 
 export const resolveAlias = async (args: ResolveAliasArgs): Promise<AliasResolution | null> => {
-  const { modelName, endpointKind, upstreamIds, scheduler, currentColo, repo } = args;
+  const { modelName, providers, fetcherForUpstream, scheduler, repo } = args;
   const record = await repo.getByName(modelName);
   if (!record) return null;
 
-  // Kind-mismatch is silent: the literal string falls through to prefix
-  // routing, which will 404 on its own if nothing in the catalog matches.
-  // Mirrors the "unknown model" surface a plain id would produce.
-  if (record.kind !== endpointKind) return null;
-
-  const pool = await buildAvailablePool(record, endpointKind, upstreamIds, scheduler, currentColo);
+  const pool = await buildAvailablePool(record, providers, fetcherForUpstream, scheduler);
   if (pool.length === 0) throw new AliasNoTargetAvailableError(record.name, record.targets.length);
 
   const picked = record.selection === 'first-available'
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
index 70d6c60da..cc081660a 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
@@ -1,25 +1,22 @@
-// Behavioral coverage for the alias resolver. Mocks `providers/registry.ts`
-// + the per-request fetcher so each test can hand-script which target
-// model ids look routable; the resolver itself runs unmocked, so its
-// filter logic (kind match, availability, selection strategy) is the
-// thing under test.
+// Behavioral coverage for the alias resolver. Mocks the lower-layer
+// catalog seam (`enumerateModelInterpretations` + `collectInterpretationOutcomes`
+// out of `providers/registry.ts`) so each test can hand-script which
+// target model ids look routable; the resolver itself runs unmocked, so
+// its filter logic (availability, selection strategy) is the thing under
+// test. The resolver is endpoint-blind — a target is routable iff it
+// resolves to ANY enabled binding — so the mock no longer differentiates
+// endpoints.
 
 import { test, vi } from 'vitest';
 
 import type { ModelAliasRecord, ModelAliasesRepo } from '../../repo/types.ts';
 import type { ModelInterpretation, ProviderModelResolution } from '../providers/registry.ts';
-import { directFetcher } from '@floway-dev/provider';
+import { directFetcher, type Fetcher } from '@floway-dev/provider';
 import { assert, assertEquals, assertRejects } from '@floway-dev/test-utils';
 
-// Avoid the real `listModelProviders` (which reads the global repo) and the
-// real `collectInterpretationOutcomes` (which goes through the per-request
-// fetcher cache). The mocks let each test stamp which target_model_ids are
-// "routable" right now and which endpoints they expose.
-const routableModels = new Map<string, { endpoints: Record<string, unknown> }>();
-const ALWAYS_ROUTABLE_ENDPOINTS = { chatCompletions: {}, responses: {}, messages: {} };
+const routableModels = new Set<string>();
 
 vi.mock('../providers/registry.ts', () => ({
-  listModelProviders: vi.fn(async () => [{ upstream: 'u_test', name: 'u_test', modelPrefix: null }]),
   enumerateModelInterpretations: vi.fn((modelId: string, providers: readonly { upstream: string }[]): ModelInterpretation[] =>
     providers.map(p => ({ provider: p, lookupId: modelId } as unknown as ModelInterpretation))),
   collectInterpretationOutcomes: vi.fn(async (interpretations: readonly { provider: { upstream: string }; lookupId: string }[]) => ({
@@ -29,18 +26,14 @@ vi.mock('../providers/registry.ts', () => ({
         provider: i.provider,
         resolved: {
           id: i.lookupId,
-          model: { id: i.lookupId, endpoints: routableModels.get(i.lookupId)!.endpoints },
-          binding: { upstream: i.provider.upstream, upstreamModel: { id: i.lookupId, endpoints: routableModels.get(i.lookupId)!.endpoints } },
+          model: { id: i.lookupId, endpoints: {} },
+          binding: { upstream: i.provider.upstream, upstreamModel: { id: i.lookupId, endpoints: {} } },
         } as unknown as ProviderModelResolution,
       })),
     failedUpstreams: [],
   })),
 }));
 
-vi.mock('../../dial/per-request.ts', () => ({
-  createPerRequestFetcher: vi.fn(async () => () => directFetcher),
-}));
-
 const { resolveAlias, AliasNoTargetAvailableError } = await import('./resolve.ts');
 
 const stubRepoFor = (record: ModelAliasRecord | null): ModelAliasesRepo => ({
@@ -65,16 +58,18 @@ const aliasRecord = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasRecor
   ...overrides,
 });
 
+const fetcherForUpstream: (upstreamId: string) => Fetcher = () => directFetcher;
+const providers = [{ upstream: 'u_test', name: 'u_test', modelPrefix: null }] as unknown as Parameters<typeof resolveAlias>[0]['providers'];
+
 const RESOLVE_DEFAULTS = {
-  endpointKind: 'chat' as const,
-  upstreamIds: null,
+  providers,
+  fetcherForUpstream,
   scheduler: () => {},
-  currentColo: 'TEST',
 };
 
 const setRoutable = (...ids: string[]): void => {
   routableModels.clear();
-  for (const id of ids) routableModels.set(id, { endpoints: ALWAYS_ROUTABLE_ENDPOINTS });
+  for (const id of ids) routableModels.add(id);
 };
 
 test('returns null when no alias matches the inbound name', async () => {
@@ -87,7 +82,7 @@ test('returns null when no alias matches the inbound name', async () => {
   assertEquals(result, null);
 });
 
-test('returns the target and rules when kind matches and a single target is available', async () => {
+test('returns the target and rules when a single target is available', async () => {
   setRoutable('gpt-5.4');
   const result = await resolveAlias({
     ...RESOLVE_DEFAULTS,
@@ -100,17 +95,6 @@ test('returns the target and rules when kind matches and a single target is avai
   assertEquals(result.rules, { reasoning: { effort: 'low' } });
 });
 
-test('returns null when the alias kind does not match the inbound endpoint group', async () => {
-  setRoutable('gpt-5.4');
-  const result = await resolveAlias({
-    ...RESOLVE_DEFAULTS,
-    endpointKind: 'embedding',
-    modelName: 'gpt-fast',
-    repo: stubRepoFor(aliasRecord()),
-  });
-  assertEquals(result, null);
-});
-
 test('throws AliasNoTargetAvailableError when the alias exists but no target is currently routable', async () => {
   setRoutable(); // catalog empty
   await assertRejects(
@@ -207,30 +191,3 @@ test('shadow pattern: alias falls back to the second target when the real model
   assertEquals(result.targetModelId, 'gpt-5.4');
   assertEquals(result.rules, { reasoning: { effort: 'low' } });
 });
-
-test('embedding-kind alias accepts only embedding endpoints', async () => {
-  routableModels.clear();
-  routableModels.set('text-embedding-3', { endpoints: { embeddings: {} } });
-  routableModels.set('gpt-5.4', { endpoints: ALWAYS_ROUTABLE_ENDPOINTS });
-
-  const okResult = await resolveAlias({
-    ...RESOLVE_DEFAULTS,
-    endpointKind: 'embedding',
-    modelName: 'embed-fast',
-    repo: stubRepoFor(aliasRecord({ name: 'embed-fast', kind: 'embedding', targets: [{ target_model_id: 'text-embedding-3', rules: {} }] })),
-  });
-  assert(okResult !== null);
-  assertEquals(okResult.targetModelId, 'text-embedding-3');
-
-  await assertRejects(
-    () => resolveAlias({
-      ...RESOLVE_DEFAULTS,
-      endpointKind: 'embedding',
-      modelName: 'embed-fast',
-      // gpt-5.4 is in the catalog but only exposes chat endpoints, so it
-      // cannot satisfy an embedding-kind alias.
-      repo: stubRepoFor(aliasRecord({ name: 'embed-fast', kind: 'embedding', targets: [{ target_model_id: 'gpt-5.4', rules: {} }] })),
-    }),
-    AliasNoTargetAvailableError,
-  );
-});
diff --git a/packages/gateway/src/data-plane/model-aliases/serve-integration.ts b/packages/gateway/src/data-plane/model-aliases/serve-integration.ts
deleted file mode 100644
index 764dff7e5..000000000
--- a/packages/gateway/src/data-plane/model-aliases/serve-integration.ts
+++ /dev/null
@@ -1,94 +0,0 @@
-// Per-protocol alias preamble helpers. Each protocol's serve calls
-// `resolveAndApply<Protocol>` after parsing the inbound payload and before
-// `enumerateProviderCandidates`. The helper looks up the inbound model name,
-// stamps a target's rules onto the IR, stages the `x-floway-alias` response
-// header, and returns the resolved target id for substitution.
-//
-// Returns `null` when the inbound name is not an alias of kind=chat. Throws
-// `AliasNoTargetAvailableError` when the alias exists but every target is
-// currently unroutable — caught at the serve seam.
-
-import { applyChatRulesToChatCompletions, applyChatRulesToGemini, applyChatRulesToMessages, applyChatRulesToResponses } from './apply.ts';
-import { resolveAlias, type AliasResolution } from './resolve.ts';
-import { getRepo } from '../../repo/index.ts';
-import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
-import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
-import type { ChatAliasRules } from '@floway-dev/protocols/common';
-import type { GeminiPayload } from '@floway-dev/protocols/gemini';
-import type { MessagesPayload } from '@floway-dev/protocols/messages';
-import type { ResponsesPayload } from '@floway-dev/protocols/responses';
-
-const ALIAS_RESPONSE_HEADER = 'x-floway-alias';
-
-// Common preamble: resolve the alias against the request's chat endpoint
-// group and stage the response header. Returns the resolution (or null) so
-// the caller can apply rules through its protocol's overlay helper. The
-// chat-kind check lives inside the resolver — a kind mismatch silently
-// returns null here.
-const resolveChatAlias = async (modelName: string, ctx: GatewayCtx): Promise<AliasResolution | null> => {
-  const resolution = await resolveAlias({
-    modelName,
-    endpointKind: 'chat',
-    upstreamIds: ctx.upstreamIds,
-    scheduler: ctx.backgroundScheduler,
-    currentColo: ctx.currentColo,
-    repo: getRepo().modelAliases,
-  });
-  if (resolution !== null) ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, resolution.aliasName);
-  return resolution;
-};
-
-// Every chat-kind alias target carries `ChatAliasRules` per the wire
-// schema; the cast is the unavoidable narrowing from the generic union.
-const asChatRules = (rules: AliasResolution['rules']): ChatAliasRules => rules as ChatAliasRules;
-
-export const resolveAndApplyAliasForChatCompletions = async (payload: ChatCompletionsPayload, ctx: GatewayCtx): Promise<void> => {
-  const resolution = await resolveChatAlias(payload.model, ctx);
-  if (!resolution) return;
-  payload.model = resolution.targetModelId;
-  applyChatRulesToChatCompletions(payload, asChatRules(resolution.rules));
-};
-
-export const resolveAndApplyAliasForResponses = async (payload: ResponsesPayload, ctx: GatewayCtx): Promise<void> => {
-  const resolution = await resolveChatAlias(payload.model, ctx);
-  if (!resolution) return;
-  payload.model = resolution.targetModelId;
-  applyChatRulesToResponses(payload, asChatRules(resolution.rules));
-};
-
-export const resolveAndApplyAliasForMessages = async (payload: MessagesPayload, ctx: GatewayCtx): Promise<void> => {
-  const resolution = await resolveChatAlias(payload.model, ctx);
-  if (!resolution) return;
-  payload.model = resolution.targetModelId;
-  applyChatRulesToMessages(payload, asChatRules(resolution.rules));
-};
-
-// Gemini's model id is carried on the URL path, not the body — the caller
-// passes it in alongside the payload and gets the resolved id back so it
-// can substitute into the candidate-enumeration call. The payload is still
-// mutated in place to overlay rules.
-export const resolveAndApplyAliasForGemini = async (model: string, payload: GeminiPayload, ctx: GatewayCtx): Promise<string> => {
-  const resolution = await resolveChatAlias(model, ctx);
-  if (!resolution) return model;
-  applyChatRulesToGemini(payload, asChatRules(resolution.rules));
-  return resolution.targetModelId;
-};
-
-// Passthrough endpoints (embeddings, images) don't carry rules today; the
-// resolver still runs to substitute the target id and stage the response
-// header. Returns the resolved target_model_id (or the original name on
-// miss). Throws `AliasNoTargetAvailableError` on the all-unroutable case
-// like the chat helpers do.
-export const resolveAliasForPassthrough = async (model: string, endpointKind: 'embedding' | 'image', ctx: GatewayCtx): Promise<string> => {
-  const resolution = await resolveAlias({
-    modelName: model,
-    endpointKind,
-    upstreamIds: ctx.upstreamIds,
-    scheduler: ctx.backgroundScheduler,
-    currentColo: ctx.currentColo,
-    repo: getRepo().modelAliases,
-  });
-  if (!resolution) return model;
-  ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, resolution.aliasName);
-  return resolution.targetModelId;
-};
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 79a4f1c5a..98eddc4c7 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -1,5 +1,6 @@
 import { fetchUpstreamModelsCached } from './models-cache.ts';
 import { getRepo } from '../../repo/index.ts';
+import { type AliasResolution, resolveAlias } from '../model-aliases/resolve.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import { type ModelEndpointKey, type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common';
 import type { InternalModel, ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
@@ -277,6 +278,12 @@ interface ModelResolution {
   // surfaces, but inlined into the per-request 404/400 so a client sees
   // why their model might be temporarily missing.
   failedUpstreams: readonly string[];
+  // Set when the inbound id resolved through the alias layer. Callers
+  // stage the `x-floway-alias` response header from this and ignore it
+  // otherwise. `AliasNoTargetAvailableError` is thrown out of
+  // `resolveModelForRequest` itself when the alias exists but has no
+  // routable target, and is caught at each protocol's serve seam.
+  aliasResolution?: AliasResolution;
 }
 
 export interface ProviderModelResolution {
@@ -369,9 +376,27 @@ export const resolveModelForRequest = async (
     throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
   }
 
-  const interpretations = enumerateModelInterpretations(modelId, providers);
+  // Alias resolution runs above prefix routing so every data-plane endpoint
+  // sees the same alias surface. The target id is then fed verbatim back
+  // into prefix routing; alias names never re-enter the alias layer.
+  // `AliasNoTargetAvailableError` propagates so the protocol's catch maps
+  // it to its native 404.
+  const aliasResolution = await resolveAlias({
+    modelName: modelId,
+    providers,
+    fetcherForUpstream,
+    scheduler,
+    repo: getRepo().modelAliases,
+  });
+  const effectiveModelId = aliasResolution?.targetModelId ?? modelId;
+
+  const interpretations = enumerateModelInterpretations(effectiveModelId, providers);
   const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
-  return { matches: resolutions.map(r => r.resolved), failedUpstreams };
+  return {
+    matches: resolutions.map(r => r.resolved),
+    failedUpstreams,
+    ...(aliasResolution !== null ? { aliasResolution } : {}),
+  };
 };
 
 export const resolveModelForProvider = async (
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 1add1a115..0604e51d1 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -24,6 +24,8 @@ import type { AuthedContext } from '../../middleware/auth.ts';
 import type { TokenUsage } from '../../repo/types.ts';
 import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
 import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
+import { ALIAS_RESPONSE_HEADER } from '../model-aliases/apply.ts';
+import { AliasNoTargetAvailableError } from '../model-aliases/resolve.ts';
 import { resolveModelForRequest } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import { doneFrame, eventFrame, parseSSEStream, parseTargetStreamFrames, type ProtocolFrame, sseCommentFrame, sseFrame } from '@floway-dev/protocols/common';
@@ -128,8 +130,23 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
     // the inbound public id. Iteration order follows configured sort_order
     // across upstreams, with the unprefixed interpretation pushed before the
     // prefixed one within a single upstream. The first match whose binding
-    // satisfies the endpoint capability wins.
-    const { matches, failedUpstreams } = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler);
+    // satisfies the endpoint capability wins. `resolveModelForRequest` also
+    // owns alias resolution: when the inbound id is an alias, the returned
+    // `aliasResolution` carries the original alias name (for the response
+    // header) and the targets feeding `matches` are the alias's resolved
+    // target id; `AliasNoTargetAvailableError` propagates as the 404 below.
+    let resolution;
+    try {
+      resolution = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler);
+    } catch (e) {
+      if (e instanceof AliasNoTargetAvailableError) {
+        ctx.dump?.error('gateway');
+        return passthroughApiError(c, e.message, 404);
+      }
+      throw e;
+    }
+    const { matches, failedUpstreams, aliasResolution } = resolution;
+    if (aliasResolution) ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
     if (matches.length === 0) {
       ctx.dump?.error('gateway');
       return passthroughApiError(c, appendFailedUpstreams(`Model ${model} is not available on any configured upstream.`, failedUpstreams), 404);

From 5dd407a43db5e7e8caf815e73111b7ed75dff292 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 00:49:28 +0800
Subject: [PATCH 070/170] refactor(aliases): make
 ReasoningEffort/Summary/Verbosity/ServiceTier free-form strings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The gateway forwards alias rule values verbatim and never enum-gates
them at the wire boundary, so the protocol-level types and the control-
plane zod schema accept any string. The dashboard's combobox inputs
still pin the canonical presets ('none'/'low'/.../ etc.) as type-ahead
suggestions, but operators can type unknown values when an upstream
exposes a preset the gateway has not yet enumerated.

Translate's gemini-local `ReasoningEffort` type (which previously
included `minimal` and `max` outside the protocols enum) collapses to
plain `string` for the same reason — translate never canonicalises
unknown effort values, so the explicit closed set was an artificial
narrowing.
---
 .../components/alias-edit/AliasTargetRow.vue  | 13 +++++-----
 packages/gateway/src/control-plane/schemas.ts | 13 ++++++----
 packages/protocols/src/common/aliases.ts      | 26 ++++++++++++-------
 .../translate/src/shared/gemini-via/gemini.ts | 12 ++++-----
 4 files changed, 38 insertions(+), 26 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasTargetRow.vue b/apps/web/src/components/alias-edit/AliasTargetRow.vue
index 9a5277579..e8aa46aac 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow.vue
+++ b/apps/web/src/components/alias-edit/AliasTargetRow.vue
@@ -5,7 +5,6 @@ import { computed, ref } from 'vue';
 
 import { computeModelWarnings, computeRuleWarnings, findCatalogModel } from './warnings.ts';
 import type { AliasKind, AliasTarget, ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
-import type { ReasoningEffort, ReasoningSummary, ServiceTier, Verbosity } from '@floway-dev/protocols/common';
 import { Combobox, Switch, Tooltip } from '@floway-dev/ui';
 
 const target = defineModel<AliasTarget>({ required: true });
@@ -54,19 +53,19 @@ const patchReasoning = (patch: Partial<NonNullable<ChatAliasRules['reasoning']>>
   }
 };
 
-const setEffort = (raw: string) => patchReasoning({ effort: raw === '' ? undefined : (raw as ReasoningEffort) });
-const setSummary = (raw: string) => patchReasoning({ summary: raw === '' ? undefined : (raw as ReasoningSummary) });
+const setEffort = (raw: string) => patchReasoning({ effort: raw === '' ? undefined : raw });
+const setSummary = (raw: string) => patchReasoning({ summary: raw === '' ? undefined : raw });
 const setAdaptive = (on: boolean | undefined) => patchReasoning({ adaptive: on === true ? true : undefined });
 const setVerbosity = (raw: string) => {
   const next = { ...chatRules.value };
   if (raw === '') delete next.verbosity;
-  else next.verbosity = raw as Verbosity;
+  else next.verbosity = raw;
   setRules(next);
 };
 const setServiceTier = (raw: string) => {
   const next = { ...chatRules.value };
   if (raw === '') delete next.serviceTier;
-  else next.serviceTier = raw as ServiceTier;
+  else next.serviceTier = raw;
   setRules(next);
 };
 
@@ -86,7 +85,9 @@ const onBudgetChange = (raw: string) => {
   patchReasoning({ budget_tokens: Number(trimmed) });
 };
 
-// Suggestion lists for chat-rule comboboxes.
+// Suggestion lists for chat-rule comboboxes. Combobox accepts free-form
+// values; these arrays are the canonical presets the dashboard pins as
+// type-ahead hints.
 const EFFORT_ITEMS = ['none', 'low', 'medium', 'high', 'xhigh'];
 const SUMMARY_ITEMS = ['auto', 'concise', 'detailed', 'none'];
 const VERBOSITY_ITEMS = ['low', 'medium', 'high'];
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index b42b39bfa..5bb25c538 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -597,19 +597,22 @@ export const searchConfigSchema = z.object({
 
 // Per-target chat rules. Field names mirror the IR slot each value overlays.
 // Values forward verbatim — no capability narrowing here, so an operator
-// can drive a feature the catalog hasn't advertised yet.
+// can drive a feature the catalog hasn't advertised yet. The four value
+// fields below (`effort`, `summary`, `verbosity`, `serviceTier`) accept
+// any string for the same reason; the dashboard pins canonical presets as
+// combobox suggestions.
 const chatAliasReasoningSchema = z.object({
-  effort: z.enum(['none', 'low', 'medium', 'high', 'xhigh']).optional(),
+  effort: z.string().min(1).optional(),
   budget_tokens: z.number().int().nonnegative().optional(),
   adaptive: z.boolean().optional(),
-  summary: z.enum(['auto', 'concise', 'detailed', 'none']).optional(),
+  summary: z.string().min(1).optional(),
   mandatory: z.boolean().optional(),
 }).strict();
 
 const chatAliasRulesSchema = z.object({
   reasoning: chatAliasReasoningSchema.optional(),
-  verbosity: z.enum(['low', 'medium', 'high']).optional(),
-  serviceTier: z.enum(['default', 'flex', 'priority', 'scale', 'fast']).optional(),
+  verbosity: z.string().min(1).optional(),
+  serviceTier: z.string().min(1).optional(),
 }).strict();
 
 // Rules are validated against the alias-level kind in the superRefine pass
diff --git a/packages/protocols/src/common/aliases.ts b/packages/protocols/src/common/aliases.ts
index 6958bd8e8..dd12ece40 100644
--- a/packages/protocols/src/common/aliases.ts
+++ b/packages/protocols/src/common/aliases.ts
@@ -22,19 +22,27 @@ export type AliasKind = 'chat' | 'embedding' | 'image';
 // When the pool is empty both strategies surface the same 404 to the caller.
 export type AliasSelection = 'random' | 'first-available';
 
-// Discrete reasoning-effort presets understood across upstreams. `xhigh`
-// matches the wire value Anthropic / OpenAI use for the highest tier.
-export type ReasoningEffort = 'none' | 'low' | 'medium' | 'high' | 'xhigh';
+// Discrete reasoning-effort presets understood across upstreams. Typed as
+// `string` because the gateway forwards rule values verbatim and never
+// enum-gates them at the wire boundary; the dashboard pins the canonical
+// presets ('none' | 'low' | 'medium' | 'high' | 'xhigh') as combobox
+// suggestions so operators see the typical choices.
+export type ReasoningEffort = string;
 
 // Reasoning-summary verbosity hint emitted on the Responses / Chat surface.
-export type ReasoningSummary = 'auto' | 'concise' | 'detailed' | 'none';
+// String for the same forward-verbatim reason as `ReasoningEffort`;
+// canonical presets are 'auto' | 'concise' | 'detailed' | 'none'.
+export type ReasoningSummary = string;
 
-// Output verbosity hint (OpenAI Responses `verbosity`).
-export type Verbosity = 'low' | 'medium' | 'high';
+// Output verbosity hint (OpenAI Responses `verbosity`). String for the same
+// forward-verbatim reason as `ReasoningEffort`; canonical presets are
+// 'low' | 'medium' | 'high'.
+export type Verbosity = string;
 
-// Per-request service tier the upstream advertises (Anthropic `fast`,
-// OpenAI `priority` / `flex` / `scale`, default tier).
-export type ServiceTier = 'default' | 'flex' | 'priority' | 'scale' | 'fast';
+// Per-request service tier the upstream advertises. String for the same
+// forward-verbatim reason as `ReasoningEffort`; canonical presets are
+// 'default' | 'flex' | 'priority' | 'scale' | 'fast'.
+export type ServiceTier = string;
 
 // Rule overlay applied to a chat-kind alias target. Every field is optional;
 // an absent field leaves the inbound request value untouched. Rule values
diff --git a/packages/translate/src/shared/gemini-via/gemini.ts b/packages/translate/src/shared/gemini-via/gemini.ts
index 024a92a46..636e7f9b6 100644
--- a/packages/translate/src/shared/gemini-via/gemini.ts
+++ b/packages/translate/src/shared/gemini-via/gemini.ts
@@ -117,12 +117,12 @@ export const geminiFunctionResponsePart = (part: GeminiPart, ids: GeminiToolCall
   return { response, id: unmatched?.shift() ?? id };
 };
 
-// Reasoning effort is freeform on the inbound IRs, but the gateway
-// publishes a canonical closed set so translate-side mappers can normalize
-// without rewriting unknown values.
-export type ReasoningEffort = 'none' | 'minimal' | 'low' | 'medium' | 'high' | 'xhigh' | 'max';
+// Reasoning effort is freeform on the inbound IRs — the gateway never
+// enum-gates these values at the wire boundary — so the translate-side
+// mappers below return whatever Gemini surfaced for `thinkingLevel` /
+// derived from `thinkingBudget` verbatim.
 
-export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig): ReasoningEffort | undefined => {
+export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig): string | undefined => {
   switch (thinkingConfig?.thinkingLevel) {
   case 'minimal':
     return 'minimal';
@@ -141,7 +141,7 @@ export const geminiThinkingLevelEffort = (thinkingConfig?: GeminiThinkingConfig)
   }
 };
 
-export const geminiReasoningEffort = (thinkingConfig?: GeminiThinkingConfig): ReasoningEffort | null => {
+export const geminiReasoningEffort = (thinkingConfig?: GeminiThinkingConfig): string | null => {
   if (!thinkingConfig) return null;
 
   if (thinkingConfig.thinkingBudget !== undefined) {

From 012442b622faedd82db7a3b3077a5b471f90fe24 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 00:51:53 +0800
Subject: [PATCH 071/170] cleanup(aliases): drop the unused
 ChatAliasRules.reasoning.mandatory field

The field was reserved on the rules schema but never had a UI input,
never reached the apply-rules overlay, and produced no observable
behaviour at the wire. The dashboard's `mandatory` toggle lives on the
model capability metadata (ChatModelInfo.reasoning.mandatory), which is
a separate concept and stays.

Spec already aligned in commit A.
---
 apps/web/src/components/alias-edit/warnings.ts | 6 +-----
 packages/gateway/src/control-plane/schemas.ts  | 1 -
 packages/protocols/src/common/aliases.ts       | 2 --
 packages/protocols/src/common/aliases_test.ts  | 4 ++--
 4 files changed, 3 insertions(+), 10 deletions(-)

diff --git a/apps/web/src/components/alias-edit/warnings.ts b/apps/web/src/components/alias-edit/warnings.ts
index 9d7801717..355758388 100644
--- a/apps/web/src/components/alias-edit/warnings.ts
+++ b/apps/web/src/components/alias-edit/warnings.ts
@@ -23,7 +23,7 @@ export const realModelIds = (models: readonly ControlPlaneModel[] | null | undef
 // the form's `data-field` attribute so the dialog can render the warning
 // directly under the input it annotates.
 export interface AliasRuleWarning {
-  field: 'reasoning.effort' | 'reasoning.budget_tokens' | 'reasoning.adaptive' | 'reasoning.summary' | 'reasoning.mandatory' | 'verbosity' | 'serviceTier';
+  field: 'reasoning.effort' | 'reasoning.budget_tokens' | 'reasoning.adaptive' | 'reasoning.summary' | 'verbosity' | 'serviceTier';
   message: string;
 }
 
@@ -63,10 +63,6 @@ export const computeRuleWarnings = (
     out.push({ field: 'reasoning.adaptive', message: 'Target does not advertise adaptive reasoning.' });
   }
 
-  if (rules.reasoning?.mandatory === true && reasoning?.mandatory !== true) {
-    out.push({ field: 'reasoning.mandatory', message: 'Target does not advertise mandatory reasoning.' });
-  }
-
   // Summary, verbosity, and serviceTier carry no catalog metadata; their
   // values forward verbatim and never warn here.
 
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index 5bb25c538..99f73bdb4 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -606,7 +606,6 @@ const chatAliasReasoningSchema = z.object({
   budget_tokens: z.number().int().nonnegative().optional(),
   adaptive: z.boolean().optional(),
   summary: z.string().min(1).optional(),
-  mandatory: z.boolean().optional(),
 }).strict();
 
 const chatAliasRulesSchema = z.object({
diff --git a/packages/protocols/src/common/aliases.ts b/packages/protocols/src/common/aliases.ts
index dd12ece40..7c1183aec 100644
--- a/packages/protocols/src/common/aliases.ts
+++ b/packages/protocols/src/common/aliases.ts
@@ -54,7 +54,6 @@ export interface ChatAliasRules {
     budget_tokens?: number;
     adaptive?: boolean;
     summary?: ReasoningSummary;
-    mandatory?: boolean;
   };
   verbosity?: Verbosity;
   serviceTier?: ServiceTier;
@@ -100,7 +99,6 @@ const aliasRulePartLabels = (rules: AliasRules): string[] => {
   if (chat.reasoning?.budget_tokens !== undefined) parts.push(`${chat.reasoning.budget_tokens}tok budget`);
   if (chat.reasoning?.adaptive === true) parts.push('adaptive');
   else if (chat.reasoning?.adaptive === false) parts.push('non-adaptive');
-  if (chat.reasoning?.mandatory === true) parts.push('mandatory reasoning');
   if (chat.reasoning?.summary !== undefined) parts.push(`summary: ${chat.reasoning.summary}`);
   if (chat.verbosity !== undefined) parts.push(`${chat.verbosity} verbosity`);
   if (chat.serviceTier !== undefined) parts.push(`${chat.serviceTier} tier`);
diff --git a/packages/protocols/src/common/aliases_test.ts b/packages/protocols/src/common/aliases_test.ts
index 719ff4ee1..558c19fa5 100644
--- a/packages/protocols/src/common/aliases_test.ts
+++ b/packages/protocols/src/common/aliases_test.ts
@@ -27,8 +27,8 @@ describe('formatAliasRulesInline', () => {
 
   test('renders boolean reasoning toggles in their dedicated wording', () => {
     expect(formatAliasRulesInline({
-      reasoning: { adaptive: false, mandatory: true, summary: 'concise' },
-    })).toBe('non-adaptive, mandatory reasoning, summary: concise');
+      reasoning: { adaptive: false, summary: 'concise' },
+    })).toBe('non-adaptive, summary: concise');
   });
 
   test('emits adaptive when reasoning.adaptive is true and budget_tokens when set', () => {

From b7bd3616715ef25bf22381ca045c0428444535ba Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 00:58:24 +0800
Subject: [PATCH 072/170] cleanup(aliases): drop the unused anthropic_beta IR
 extension and Gemini sanitizer
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two pieces of v1-rubble from the extension/sanitizer infrastructure:

1. The `anthropic_beta` Floway extension field on the chat-completions /
   responses / gemini IRs. No production caller emitted it onto an inbound
   request and no rule-apply pass materialized it onto the outbound
   `anthropic-beta` header, so the field, its sanitizer manifest entry,
   and the associated `applyAnthropicBetaToHeaders` /
   `mergeAnthropicBetaTokens` helpers were dead weight. The Messages-body
   validator in `messages/http.ts` continues to reject inbound
   `anthropic_beta` / `betas` keys because the Anthropic protocol carries
   them in a header, not in the body — that path is unrelated to the
   deleted Floway extension and stays.

2. `sanitizeForGeminiUpstream` and `FLOWAY_EXTENSION_FIELDS.gemini`.
   Gemini is never an upstream target — every Gemini-bound request goes
   through translation to chat-completions / responses / messages — so
   the Gemini-specific sanitizer had zero production callers.

`mapSummaryToAnthropicDisplay` collapsed to a private helper of its sole
caller `buildMessagesThinkingFromExtensions` in the same file.
---
 .../src/data-plane/chat/shared/sanitize.ts    |  8 --
 .../data-plane/chat/shared/sanitize_test.ts   | 21 +----
 .../protocols/src/chat-completions/index.ts   |  2 -
 packages/protocols/src/extensions/index.ts    |  8 +-
 packages/protocols/src/gemini/index.ts        |  2 -
 packages/protocols/src/responses/index.ts     |  2 -
 .../chat-completions-via-messages/request.ts  |  3 +-
 .../request_test.ts                           | 14 ---
 .../request_test.ts                           |  2 -
 .../request_test.ts                           | 13 ---
 .../src/gemini-via-messages/request_test.ts   | 17 ----
 .../src/gemini-via-responses/request_test.ts  | 12 ---
 .../messages-via-responses/request_test.ts    |  1 -
 .../request_test.ts                           |  2 -
 .../responses-via-messages/request_test.ts    |  5 -
 .../via-messages/anthropic-extensions.ts      | 94 ++++++-------------
 16 files changed, 34 insertions(+), 172 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
index 64f404944..877e5757a 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -35,11 +35,3 @@ export const sanitizeForResponsesUpstream = (body: Record<string, unknown>, trac
 export const sanitizeForMessagesUpstream = (body: Record<string, unknown>, trace?: SanitizeTraceCtx): void => {
   stripKeys(body, FLOWAY_EXTENSION_FIELDS.messages, 'messages', trace);
 };
-
-export const sanitizeForGeminiUpstream = (body: Record<string, unknown>, trace?: SanitizeTraceCtx): void => {
-  stripKeys(body, FLOWAY_EXTENSION_FIELDS.gemini.topLevel, 'gemini', trace);
-  const generationConfig = body.generationConfig;
-  if (generationConfig && typeof generationConfig === 'object') {
-    stripKeys(generationConfig as Record<string, unknown>, FLOWAY_EXTENSION_FIELDS.gemini.generationConfig, 'gemini', trace, 'generationConfig.');
-  }
-};
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
index 0dd52330d..8c27fc864 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
@@ -2,7 +2,6 @@ import { test } from 'vitest';
 
 import {
   sanitizeForChatCompletionsUpstream,
-  sanitizeForGeminiUpstream,
   sanitizeForMessagesUpstream,
   sanitizeForResponsesUpstream,
   type SanitizeTraceCtx,
@@ -32,7 +31,7 @@ test('sanitizeForMessagesUpstream strips verbosity and emits one trace line', ()
 test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves native fields', () => {
   const body: Record<string, unknown> = {
     thinking_budget: 4096,
-    anthropic_beta: ['ctx-1m'],
+    reasoning_summary: 'concise',
     reasoning_effort: 'high',
     model: 'x',
   };
@@ -42,29 +41,15 @@ test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves nat
   assertEquals(lines.length, 2);
   assertEquals(lines.every(l => l.targetProtocol === 'chat-completions'), true);
   const droppedFields = lines.map(l => l.field).sort();
-  assertEquals(droppedFields, ['anthropic_beta', 'thinking_budget']);
+  assertEquals(droppedFields, ['reasoning_summary', 'thinking_budget']);
 });
 
 test('sanitizeForResponsesUpstream strips extensions without a trace context', () => {
-  const body: Record<string, unknown> = { adaptive_thinking: true, anthropic_beta: ['ctx-1m'] };
+  const body: Record<string, unknown> = { adaptive_thinking: true, thinking_budget: 4096 };
   sanitizeForResponsesUpstream(body);
   assertEquals(body, {});
 });
 
-test('sanitizeForGeminiUpstream walks top-level and generationConfig', () => {
-  const body: Record<string, unknown> = {
-    generationConfig: { verbosity: 'low', thinkingConfig: { thinkingBudget: 100 } },
-    anthropicBeta: ['ctx-1m'],
-  };
-  const { ctx, lines } = makeTrace();
-  sanitizeForGeminiUpstream(body, ctx);
-  assertEquals(body, { generationConfig: { thinkingConfig: { thinkingBudget: 100 } } });
-  assertEquals(lines.length, 2);
-  const droppedFields = lines.map(l => l.field).sort();
-  assertEquals(droppedFields, ['anthropicBeta', 'generationConfig.verbosity']);
-  assertEquals(lines.every(l => l.targetProtocol === 'gemini'), true);
-});
-
 test('sanitizer is idempotent — a second run emits no additional traces', () => {
   const body: Record<string, unknown> = { verbosity: 'low', model: 'x' };
   const { ctx, lines } = makeTrace();
diff --git a/packages/protocols/src/chat-completions/index.ts b/packages/protocols/src/chat-completions/index.ts
index fc8a76d7f..07a502ff5 100644
--- a/packages/protocols/src/chat-completions/index.ts
+++ b/packages/protocols/src/chat-completions/index.ts
@@ -36,8 +36,6 @@ export interface ChatCompletionsPayload {
   adaptive_thinking?: boolean;
   /** Floway protocol extension. Translated to OpenAI Responses `reasoning.summary` / Anthropic `thinking.display` / Gemini `thinkingConfig.includeThoughts` when routed to those upstreams; dropped on OpenAI Chat targets. */
   reasoning_summary?: string;
-  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
-  anthropic_beta?: readonly string[];
 }
 
 export interface ChatCompletionsTool {
diff --git a/packages/protocols/src/extensions/index.ts b/packages/protocols/src/extensions/index.ts
index 00edf50f1..5d64f0e46 100644
--- a/packages/protocols/src/extensions/index.ts
+++ b/packages/protocols/src/extensions/index.ts
@@ -5,11 +5,7 @@
  * extension residue before the upstream HTTP call.
  */
 export const FLOWAY_EXTENSION_FIELDS = {
-  chatCompletions: ['thinking_budget', 'adaptive_thinking', 'reasoning_summary', 'anthropic_beta'] as const,
-  responses: ['thinking_budget', 'adaptive_thinking', 'anthropic_beta'] as const,
+  chatCompletions: ['thinking_budget', 'adaptive_thinking', 'reasoning_summary'] as const,
+  responses: ['thinking_budget', 'adaptive_thinking'] as const,
   messages: ['verbosity'] as const,
-  gemini: {
-    topLevel: ['anthropicBeta'] as const,
-    generationConfig: ['verbosity', 'serviceTier'] as const,
-  },
 } as const;
diff --git a/packages/protocols/src/gemini/index.ts b/packages/protocols/src/gemini/index.ts
index f8878eda2..c102e3668 100644
--- a/packages/protocols/src/gemini/index.ts
+++ b/packages/protocols/src/gemini/index.ts
@@ -6,8 +6,6 @@ export interface GeminiPayload {
   generationConfig?: GeminiGenerationConfig;
   safetySettings?: GeminiSafetySetting[];
   cachedContent?: string;
-  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
-  anthropicBeta?: readonly string[];
 }
 
 export interface GeminiContent {
diff --git a/packages/protocols/src/responses/index.ts b/packages/protocols/src/responses/index.ts
index 53cf084af..315fa1850 100644
--- a/packages/protocols/src/responses/index.ts
+++ b/packages/protocols/src/responses/index.ts
@@ -41,8 +41,6 @@ export interface ResponsesPayload {
   thinking_budget?: number;
   /** Floway protocol extension. Translated to Anthropic `thinking.type: "adaptive"` / Gemini `thinkingConfig.thinkingBudget: -1` when routed to those upstreams; dropped on OpenAI Chat/Responses targets. */
   adaptive_thinking?: boolean;
-  /** Floway protocol extension. Translated to the Anthropic `anthropic-beta` header (list-merged, deduped) when routed to a Messages upstream; dropped elsewhere. */
-  anthropic_beta?: readonly string[];
 }
 
 // Narrower payload for `/responses/compact`. The official endpoint accepts a
diff --git a/packages/translate/src/chat-completions-via-messages/request.ts b/packages/translate/src/chat-completions-via-messages/request.ts
index e9e6b6d6e..91118ff4a 100644
--- a/packages/translate/src/chat-completions-via-messages/request.ts
+++ b/packages/translate/src/chat-completions-via-messages/request.ts
@@ -190,8 +190,7 @@ export const translateChatCompletionsToMessages = async (payload: ChatCompletion
   const hasOutputConfig = Object.keys(outputConfig).length > 0;
 
   // Materialize the Floway extension fields onto their Messages-natural
-  // slots. `anthropic_beta` is body-side residue that the per-upstream
-  // sanitizer strips after translation.
+  // slots.
   const thinking = buildMessagesThinkingFromExtensions({
     thinkingBudget: payload.thinking_budget,
     adaptiveThinking: payload.adaptive_thinking,
diff --git a/packages/translate/src/chat-completions-via-messages/request_test.ts b/packages/translate/src/chat-completions-via-messages/request_test.ts
index 9dede4c21..7f47b100d 100644
--- a/packages/translate/src/chat-completions-via-messages/request_test.ts
+++ b/packages/translate/src/chat-completions-via-messages/request_test.ts
@@ -1283,17 +1283,3 @@ test('translateChatCompletionsToMessages does not emit Messages-protocol fields
   assertEquals(result.speed, undefined);
   assertEquals(result.service_tier, undefined);
 });
-
-test('translateChatCompletionsToMessages leaves anthropic_beta as inbound residue (header injection is the gateway-side rule-apply step)', async () => {
-  const result = await translateChatCompletionsToMessages(
-    mkPayload({
-      messages: [{ role: 'user', content: 'hi' }],
-      anthropic_beta: ['fast-mode-2026-02-01', 'context-1m-2025-08-07'],
-    }),
-  );
-
-  // The translated body must not echo the OpenAI-family `anthropic_beta`
-  // field; the per-upstream sanitizer is responsible for stripping any
-  // residue, and the rule-apply pass handles the outbound header.
-  assertEquals('anthropic_beta' in result, false);
-});
diff --git a/packages/translate/src/chat-completions-via-responses/request_test.ts b/packages/translate/src/chat-completions-via-responses/request_test.ts
index 70af2527d..00a78203c 100644
--- a/packages/translate/src/chat-completions-via-responses/request_test.ts
+++ b/packages/translate/src/chat-completions-via-responses/request_test.ts
@@ -461,14 +461,12 @@ test('translateChatCompletionsToResponses leaves Messages-only extensions as inb
     messages: [{ role: 'user', content: 'hi' }],
     thinking_budget: 4096,
     adaptive_thinking: true,
-    anthropic_beta: ['fast-mode-2026-02-01'],
   });
 
   // Responses has no slot for any of these; the sanitizer strips the
   // residue. Translate must not invent a target field.
   assertEquals('thinking_budget' in result, false);
   assertEquals('adaptive_thinking' in result, false);
-  assertEquals('anthropic_beta' in result, false);
 });
 
 test('translateChatCompletionsToResponses passes a fully extension-free payload through unchanged from prior behavior', () => {
diff --git a/packages/translate/src/gemini-via-chat-completions/request_test.ts b/packages/translate/src/gemini-via-chat-completions/request_test.ts
index debd6b707..7fecec34c 100644
--- a/packages/translate/src/gemini-via-chat-completions/request_test.ts
+++ b/packages/translate/src/gemini-via-chat-completions/request_test.ts
@@ -498,19 +498,6 @@ test('buildTargetRequest emits generationConfig.serviceTier onto Chat service_ti
   assertEquals(result.service_tier, 'priority');
 });
 
-test('buildTargetRequest drops top-level Anthropic extensions (anthropicBeta) on Chat', () => {
-  const result = buildTargetRequest(
-    {
-      contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
-      anthropicBeta: ['fast-mode-2026-02-01'],
-    },
-    'gpt-test',
-  );
-
-  assertEquals('anthropicBeta' in result, false);
-  assertEquals('anthropic_beta' in result, false);
-});
-
 test('buildTargetRequest extends reasoning_effort enum to recognize xhigh and max', () => {
   const xhigh = buildTargetRequest(
     { contents: [{ role: 'user', parts: [{ text: 'hi' }] }], generationConfig: { thinkingConfig: { thinkingLevel: 'xhigh' } } },
diff --git a/packages/translate/src/gemini-via-messages/request_test.ts b/packages/translate/src/gemini-via-messages/request_test.ts
index 870d3f388..619e739e5 100644
--- a/packages/translate/src/gemini-via-messages/request_test.ts
+++ b/packages/translate/src/gemini-via-messages/request_test.ts
@@ -427,20 +427,3 @@ test('buildTargetRequest drops verbosity extension on Messages (no slot)', () =>
 
   assertEquals('verbosity' in result, false);
 });
-
-test('buildTargetRequest leaves anthropicBeta as inbound residue for the gateway header pass', () => {
-  const result = buildTargetRequest(
-    {
-      contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
-      anthropicBeta: ['fast-mode-2026-02-01'],
-    },
-    'claude-test',
-    noOptions,
-  );
-
-  // Translate cannot move it to a header; the gateway-side rule-apply pass
-  // (Task 5) materializes anthropicBeta into the outbound anthropic-beta
-  // header. The body must not echo it.
-  assertEquals('anthropicBeta' in result, false);
-  assertEquals('anthropic_beta' in result, false);
-});
diff --git a/packages/translate/src/gemini-via-responses/request_test.ts b/packages/translate/src/gemini-via-responses/request_test.ts
index 747bfed02..459b8d510 100644
--- a/packages/translate/src/gemini-via-responses/request_test.ts
+++ b/packages/translate/src/gemini-via-responses/request_test.ts
@@ -431,15 +431,3 @@ test('buildTargetRequest emits generationConfig.serviceTier onto Responses servi
 
   assertEquals(result.service_tier, 'priority');
 });
-
-test('buildTargetRequest drops top-level Anthropic extensions on Responses', () => {
-  const result = buildTargetRequest(
-    {
-      contents: [{ role: 'user', parts: [{ text: 'hi' }] }],
-      anthropicBeta: ['fast-mode-2026-02-01'],
-    },
-    'gpt-test',
-  );
-
-  assertEquals('anthropicBeta' in result, false);
-});
diff --git a/packages/translate/src/messages-via-responses/request_test.ts b/packages/translate/src/messages-via-responses/request_test.ts
index 4c32d1b44..94fd4a744 100644
--- a/packages/translate/src/messages-via-responses/request_test.ts
+++ b/packages/translate/src/messages-via-responses/request_test.ts
@@ -542,7 +542,6 @@ test('translateMessagesToResponses drops Anthropic-only mode knobs the Responses
   // bridge test below and is intentionally excluded here.
   assertEquals('thinking_budget' in result, false);
   assertEquals('adaptive_thinking' in result, false);
-  assertEquals('anthropic_beta' in result, false);
 });
 
 // ── speed ↔ service_tier bridge ──
diff --git a/packages/translate/src/responses-via-chat-completions/request_test.ts b/packages/translate/src/responses-via-chat-completions/request_test.ts
index 7ebfb6e15..4474d5089 100644
--- a/packages/translate/src/responses-via-chat-completions/request_test.ts
+++ b/packages/translate/src/responses-via-chat-completions/request_test.ts
@@ -1486,12 +1486,10 @@ test('translateResponsesToChatCompletions leaves Messages-only extensions as inb
     input: [{ type: 'message', role: 'user', content: 'hi' }],
     thinking_budget: 4096,
     adaptive_thinking: true,
-    anthropic_beta: ['fast-mode-2026-02-01'],
   });
 
   assertEquals('thinking_budget' in result.target, false);
   assertEquals('adaptive_thinking' in result.target, false);
-  assertEquals('anthropic_beta' in result.target, false);
 });
 
 test('translateResponsesToChatCompletions drops reasoning.summary (Chat has no slot)', () => {
diff --git a/packages/translate/src/responses-via-messages/request_test.ts b/packages/translate/src/responses-via-messages/request_test.ts
index 291bddad5..bcd891764 100644
--- a/packages/translate/src/responses-via-messages/request_test.ts
+++ b/packages/translate/src/responses-via-messages/request_test.ts
@@ -708,8 +708,3 @@ test('translateResponsesToMessages emits adaptive_thinking onto thinking.{adapti
   const result = await translateResponsesToMessages(minimalResponsesPayload({ adaptive_thinking: true }));
   assertEquals(result.target.thinking, { type: 'adaptive' });
 });
-
-test('translateResponsesToMessages leaves anthropic_beta as inbound residue for the gateway header pass', async () => {
-  const result = await translateResponsesToMessages(minimalResponsesPayload({ anthropic_beta: ['fast-mode-2026-02-01'] }));
-  assertEquals('anthropic_beta' in result.target, false);
-});
diff --git a/packages/translate/src/shared/via-messages/anthropic-extensions.ts b/packages/translate/src/shared/via-messages/anthropic-extensions.ts
index a23b05472..52a8ace56 100644
--- a/packages/translate/src/shared/via-messages/anthropic-extensions.ts
+++ b/packages/translate/src/shared/via-messages/anthropic-extensions.ts
@@ -1,82 +1,30 @@
 import type { MessagesPayload, MessagesThinkingDisplay } from '@floway-dev/protocols/messages';
 
-// Anthropic structured `thinking.display` enumerates three modes; the
-// inbound IR's `reasoning_summary` extension and the Responses-native
-// `reasoning.summary` share an OpenAI-style {auto|concise|detailed|omitted}
-// vocabulary. The mapping collapses concise+detailed onto Anthropic's single
-// `summarized` mode (both surface a redacted summary, not the full chain),
-// `omitted` is the canonical hide-everything spelling, and `auto` returns
-// `undefined` so Anthropic's account-default takes over. Operator-typed
-// values that match neither vocabulary pass through verbatim — Anthropic
-// rejects unknown values at the wire, which is the explicit-failure path.
-const mapSummaryToAnthropicDisplay = (summary: string): MessagesThinkingDisplay | string | undefined => {
-  switch (summary) {
-  case 'concise':
-  case 'detailed':
-    return 'summarized';
-  case 'omitted':
-    return 'omitted';
-  case 'auto':
-    return undefined;
-  default:
-    return summary;
-  }
-};
-
-// Merge a beta token list onto an existing `anthropic-beta` header value.
-// The header is a case-sensitive, comma-separated list per the Anthropic
-// docs; dedupe is by exact-match equality so operators can carry parallel
-// tokens that differ only by date suffix. Re-joined with `, ` so the wire
-// shape matches both Anthropic's own examples and downstream gateways
-// (envoyproxy/ai-gateway).
-// References:
-// - https://platform.claude.com/docs/en/api/beta-headers
-// - https://github.com/envoyproxy/ai-gateway
-export const mergeAnthropicBetaTokens = (existing: string | null | undefined, additions: readonly string[]): string => {
-  const seen = new Set<string>();
-  const merged: string[] = [];
-  const collect = (token: string): void => {
-    const trimmed = token.trim();
-    if (!trimmed || seen.has(trimmed)) return;
-    seen.add(trimmed);
-    merged.push(trimmed);
-  };
-
-  if (existing) {
-    for (const token of existing.split(',')) collect(token);
-  }
-  for (const token of additions) collect(token);
-
-  return merged.join(', ');
-};
-
-// Materialize the Messages-bound `anthropic_beta` extension list onto an
-// outbound request's `anthropic-beta` header. The helper takes a `Headers`
-// object so the caller (typically the gateway-side rule-apply pass) doesn't
-// have to re-parse and re-set the header itself.
-export const applyAnthropicBetaToHeaders = (headers: Headers, additions: readonly string[]): void => {
-  if (!additions.length) return;
-  const merged = mergeAnthropicBetaTokens(headers.get('anthropic-beta'), additions);
-  if (merged) headers.set('anthropic-beta', merged);
-};
-
 // Build a Messages `thinking` block from the Floway extension fields a
 // non-Messages inbound carries (`thinking_budget`, `adaptive_thinking`,
 // `reasoning_summary`). `adaptive_thinking: true` overrides `thinking_budget`
 // because the alias write-side validator enforces single-facet selection;
 // when both still arrive the adaptive choice wins.
 //
-// `reasoningSummary` is the OpenAI-style summary vocabulary
-// ({auto|concise|detailed|omitted} plus pass-through). It synthesizes
-// `thinking.{type:'enabled', display}` when the inbound carries summary
-// but no budget/adaptive signal — without an explicit thinking mode
-// Anthropic would otherwise discard the display field.
+// The summary mapping collapses the OpenAI-style {auto|concise|detailed|
+// omitted} vocabulary onto Anthropic's structured `thinking.display`
+// enumeration: concise + detailed both surface a redacted summary, so they
+// collapse to `summarized`; `omitted` is the canonical hide-everything
+// spelling; `auto` returns undefined so Anthropic's account default takes
+// over. Operator-typed values that match neither vocabulary pass through
+// verbatim — Anthropic rejects unknown values at the wire, which is the
+// explicit-failure path.
+//
+// `reasoningSummary` synthesizes `thinking.{type:'enabled', display}` when
+// the inbound carries summary but no budget/adaptive signal — without an
+// explicit thinking mode Anthropic would otherwise discard the display
+// field.
 export const buildMessagesThinkingFromExtensions = (input: {
   thinkingBudget?: number;
   adaptiveThinking?: boolean;
   reasoningSummary?: string;
 }): MessagesPayload['thinking'] | undefined => {
-  const display = input.reasoningSummary !== undefined ? mapSummaryToAnthropicDisplay(input.reasoningSummary) : undefined;
+  const display = input.reasoningSummary !== undefined ? mapSummary(input.reasoningSummary) : undefined;
   const displayPart = display !== undefined ? { display: display as MessagesThinkingDisplay } : {};
 
   if (input.adaptiveThinking === true) {
@@ -90,3 +38,17 @@ export const buildMessagesThinkingFromExtensions = (input: {
   }
   return undefined;
 };
+
+const mapSummary = (summary: string): MessagesThinkingDisplay | string | undefined => {
+  switch (summary) {
+  case 'concise':
+  case 'detailed':
+    return 'summarized';
+  case 'omitted':
+    return 'omitted';
+  case 'auto':
+    return undefined;
+  default:
+    return summary;
+  }
+};

From 9e54f70b4e6d2fec33df9927353845e853130ea8 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 01:01:16 +0800
Subject: [PATCH 073/170] cleanup(aliases): extract mergeAliasesIntoModels and
 inline deriveDisplayName

Two structural cleanups around the alias-listing surface:

1. The "synthesize alias entries, drop colliding real ids, append" merge
   logic was open-coded in both `data-plane/models/load.ts` (public
   `/v1/models`) and `control-plane/models/routes.ts` (dashboard
   `/api/models`). Extracted as `mergeAliasesIntoModels(...)` in
   `data-plane/models/alias-listing.ts` parameterised by `mapReal` and
   `wrapAlias` so each caller stays typed against its own row shape
   (PublicModel vs ControlPlaneModel).

2. `deriveDisplayName` lived as a private helper with one call site;
   inlined into `synthesizeOne` since its full body is shorter than the
   call line plus the helper definition.
---
 .../src/control-plane/models/routes.ts        | 14 +++----
 .../src/data-plane/models/alias-listing.ts    | 42 +++++++++++++++----
 .../gateway/src/data-plane/models/load.ts     | 20 ++++-----
 3 files changed, 48 insertions(+), 28 deletions(-)

diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 752fe0105..0487d497c 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -1,6 +1,6 @@
 import type { Context } from 'hono';
 
-import { synthesizeListedAliases } from '../../data-plane/models/alias-listing.ts';
+import { mergeAliasesIntoModels } from '../../data-plane/models/alias-listing.ts';
 import { toPublicModel } from '../../data-plane/models/load.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from '../../data-plane/models/shared.ts';
 import { getModels } from '../../data-plane/providers/registry.ts';
@@ -48,12 +48,12 @@ export const controlPlaneModels = async (c: Context) => {
       ),
       getRepo().modelAliases.list(),
     ]);
-    const aliasEntries = synthesizeListedAliases({ aliases, realModels: models });
-    const aliasIds = new Set(aliasEntries.map(entry => entry.id));
-    const data: ControlPlaneModel[] = [
-      ...models.filter(model => !aliasIds.has(model.id)).map(toControlPlaneModel),
-      ...aliasEntries.map(entry => ({ ...entry, upstreams: [] })),
-    ];
+    const data = mergeAliasesIntoModels({
+      realModels: models,
+      aliases,
+      mapReal: toControlPlaneModel,
+      wrapAlias: entry => ({ ...entry, upstreams: [] }),
+    });
     const response: ControlPlaneModelsResponse = {
       object: 'list',
       has_more: false,
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index 5bb46e712..73cc192cd 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -19,7 +19,7 @@
 import type { ModelAliasRecord } from '../../repo/types.ts';
 import { composeAliasDisplayName } from '@floway-dev/protocols/common';
 import type { AliasTarget, ChatAliasRules, ChatModelInfo, PublicModel, PublicModelAliasedFrom } from '@floway-dev/protocols/common';
-import type { InternalModel } from '@floway-dev/provider';
+import type { InternalModel, ResolvedModel } from '@floway-dev/provider';
 
 export interface ListedAliasInputs {
   readonly aliases: readonly ModelAliasRecord[];
@@ -125,12 +125,6 @@ const narrowChatByRules = (chat: ChatModelInfo | undefined, target: AliasTarget)
   return out;
 };
 
-const deriveDisplayName = (alias: ModelAliasRecord): string => {
-  if (alias.displayName !== null) return alias.displayName;
-  if (alias.targets.length === 1) return composeAliasDisplayName(alias.targets[0].target_model_id, alias.targets[0].rules);
-  return alias.name;
-};
-
 const buildAliasedFrom = (alias: ModelAliasRecord): PublicModelAliasedFrom => ({
   name: alias.name,
   kind: alias.kind,
@@ -146,11 +140,18 @@ const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly InternalMod
     .map(target => ({ target, real: realById.get(target.target_model_id) }))
     .filter((entry): entry is { target: AliasTarget; real: InternalModel } => entry.real !== undefined && entry.real.kind === alias.kind);
 
+  // Display name precedence: operator-set wins; otherwise derive from the
+  // sole target's id + rules when single-target; multi-target falls back to
+  // the alias's own name because no single target represents the alias.
+  const displayName = alias.displayName ?? (alias.targets.length === 1
+    ? composeAliasDisplayName(alias.targets[0].target_model_id, alias.targets[0].rules)
+    : alias.name);
+
   const entry: PublicModel = {
     id: alias.name,
     object: 'model',
     type: 'model',
-    display_name: deriveDisplayName(alias),
+    display_name: displayName,
     limits: {},
     kind: alias.kind,
     aliasedFrom: buildAliasedFrom(alias),
@@ -188,3 +189,28 @@ export const synthesizeListedAliases = (input: ListedAliasInputs): PublicModel[]
   sortAliases(input.aliases)
     .filter(alias => alias.visibleInModelsList)
     .map(alias => synthesizeOne(alias, input.realModels));
+
+// Compose real-model entries with visible alias entries into a single typed
+// list. Both data-plane `/v1/models` and the dashboard's `/api/models`
+// share the same merge rule: when an alias's `name` collides with a real
+// model id, the alias entry wins and the colliding real entry is dropped
+// — two entries with the same `id` would break OpenAI-client deduplication,
+// and the alias was added by the operator deliberately, so collapsing to
+// it preserves intent. `mapReal` shapes each real model into the caller's
+// row type; `wrapAlias` lifts a synthesized `PublicModel` alias entry into
+// the same row type (the dashboard, for example, adds an empty `upstreams`
+// array since alias rows do not bind to an upstream directly).
+export const mergeAliasesIntoModels = <T extends PublicModel>(input: {
+  readonly realModels: readonly ResolvedModel[];
+  readonly aliases: readonly ModelAliasRecord[];
+  readonly mapReal: (model: ResolvedModel) => T;
+  readonly wrapAlias: (entry: PublicModel) => T;
+}): T[] => {
+  const { realModels, aliases, mapReal, wrapAlias } = input;
+  const aliasEntries = synthesizeListedAliases({ aliases, realModels });
+  const aliasIds = new Set(aliasEntries.map(entry => entry.id));
+  return [
+    ...realModels.filter(model => !aliasIds.has(model.id)).map(mapReal),
+    ...aliasEntries.map(wrapAlias),
+  ];
+};
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index e0a8b513a..849b7fdab 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,4 +1,4 @@
-import { synthesizeListedAliases } from './alias-listing.ts';
+import { mergeAliasesIntoModels } from './alias-listing.ts';
 import type { ModelAliasesRepo } from '../../repo/types.ts';
 import { getModels } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
@@ -24,12 +24,6 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
   return info;
 };
 
-// Merge real-model entries with alias entries synthesized off the operator's
-// alias catalog. An alias whose `name` collides with a real model id wins —
-// two entries with the same `id` would break OpenAI client deduplication, and
-// the alias was added by the operator deliberately, so collapsing to it
-// preserves intent. `synthesizeListedAliases` already produces the alias entry;
-// the merge step drops the real entry with that id.
 export const loadModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
@@ -40,12 +34,12 @@ export const loadModels = async (
     getModels(upstreamFilter, fetcherForUpstream, scheduler),
     aliasRepo.list(),
   ]);
-  const aliasEntries = synthesizeListedAliases({ aliases, realModels });
-  const aliasIds = new Set(aliasEntries.map(entry => entry.id));
-  const data: PublicModel[] = [
-    ...realModels.map(toPublicModel).filter(model => !aliasIds.has(model.id)),
-    ...aliasEntries,
-  ];
+  const data = mergeAliasesIntoModels({
+    realModels,
+    aliases,
+    mapReal: toPublicModel,
+    wrapAlias: entry => entry,
+  });
   return {
     object: 'list',
     has_more: false,

From f5d827c4ecb582a15920b32828741ae33a0a7e65 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 01:07:29 +0800
Subject: [PATCH 074/170] cleanup(aliases): badge field key, drop targetCount,
 collapse model-warning shape
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Three small surface tightenings:

1. `AliasRuleBadge` carries an explicit `field` key ('reasoning.effort',
   'reasoning.budget_tokens', 'reasoning.adaptive', 'reasoning.summary',
   'verbosity', 'serviceTier') so the dashboard's multi-target collapse
   bucket walks by field directly instead of parsing labels for the
   leading word.

2. `AliasNoTargetAvailableError.targetCount` is gone — the wording
   `aliasNoTargetMessage` builds is already baked into `error.message`,
   and the lifted `ChatServeFailure` now carries `message: string` so
   each protocol's renderer reads it verbatim instead of re-deriving via
   `aliasNoTargetMessage(failure)`. The helper stays as the single
   source of truth for the wording but is called only from the Error
   constructor.

3. `AliasModelWarning` (the {message} struct) collapsed to plain
   `string[]` since the only consumer joins them with newlines for a
   tooltip.
---
 .../components/alias-edit/AliasTargetRow.vue  |  2 +-
 .../web/src/components/alias-edit/warnings.ts | 13 ++--
 .../components/alias-edit/warnings_test.ts    |  4 +-
 .../src/components/models/ModelInfoBar.vue    | 17 +++--
 .../chat/chat-completions/errors.ts           |  4 +-
 .../src/data-plane/chat/gemini/errors.ts      |  4 +-
 .../src/data-plane/chat/messages/errors.ts    |  4 +-
 .../src/data-plane/chat/responses/errors.ts   |  4 +-
 .../src/data-plane/chat/shared/errors.ts      | 11 ++--
 .../src/data-plane/model-aliases/resolve.ts   | 10 ++-
 packages/protocols/src/common/aliases.ts      | 63 +++++++++++--------
 packages/protocols/src/common/aliases_test.ts |  8 +--
 12 files changed, 73 insertions(+), 71 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasTargetRow.vue b/apps/web/src/components/alias-edit/AliasTargetRow.vue
index e8aa46aac..57d3df2cc 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow.vue
+++ b/apps/web/src/components/alias-edit/AliasTargetRow.vue
@@ -97,7 +97,7 @@ const catalog = computed(() => findCatalogModel(props.models, target.value.targe
 const modelWarnings = computed(() => computeModelWarnings(target.value.target_model_id, catalog.value));
 const ruleWarnings = computed(() => computeRuleWarnings(chatRules.value, catalog.value));
 const warningFor = (field: string) => ruleWarnings.value.find(w => w.field === field)?.message;
-const modelWarningTooltip = computed(() => modelWarnings.value.map(w => w.message).join('\n'));
+const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
 </script>
 
 <template>
diff --git a/apps/web/src/components/alias-edit/warnings.ts b/apps/web/src/components/alias-edit/warnings.ts
index 355758388..0d9f07bee 100644
--- a/apps/web/src/components/alias-edit/warnings.ts
+++ b/apps/web/src/components/alias-edit/warnings.ts
@@ -69,19 +69,16 @@ export const computeRuleWarnings = (
   return out;
 };
 
-// One model-level warning attached to one target row. Today the only
-// trigger is the target id failing to resolve to any catalog model.
-export interface AliasModelWarning {
-  message: string;
-}
-
+// Model-level warnings for one target row. Today the only trigger is the
+// target id failing to resolve to any catalog model. Returned as plain
+// strings — the dialog already joins them with newlines for the tooltip.
 export const computeModelWarnings = (
   targetModelId: string,
   catalog: ControlPlaneModel | undefined,
-): AliasModelWarning[] => {
+): string[] => {
   if (targetModelId === '') return [];
   if (catalog === undefined) {
-    return [{ message: `"${targetModelId}" does not currently resolve to any enabled upstream binding.` }];
+    return [`"${targetModelId}" does not currently resolve to any enabled upstream binding.`];
   }
   return [];
 };
diff --git a/apps/web/src/components/alias-edit/warnings_test.ts b/apps/web/src/components/alias-edit/warnings_test.ts
index 9d0165974..f50a17bdd 100644
--- a/apps/web/src/components/alias-edit/warnings_test.ts
+++ b/apps/web/src/components/alias-edit/warnings_test.ts
@@ -57,8 +57,8 @@ describe('computeModelWarnings', () => {
   it('returns a "does not resolve" warning when the target is unknown', () => {
     const w = computeModelWarnings('mystery-model', undefined);
     expect(w).toHaveLength(1);
-    expect(w[0].message).toContain('mystery-model');
-    expect(w[0].message).toContain('does not currently resolve');
+    expect(w[0]).toContain('mystery-model');
+    expect(w[0]).toContain('does not currently resolve');
   });
 
   it('emits no warning for an empty id (the row is mid-edit)', () => {
diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index 3c7a78b07..d2018e2b5 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -3,7 +3,7 @@ import { computed } from 'vue';
 
 import type { ControlPlaneModel } from '../../api/types.ts';
 import { providerBadgeClass, providerMeta } from '../upstreams/provider-meta.ts';
-import { formatAliasRuleBadges } from '@floway-dev/protocols/common';
+import { type AliasRuleBadgeField, formatAliasRuleBadges } from '@floway-dev/protocols/common';
 
 const props = defineProps<{
   model: ControlPlaneModel;
@@ -28,22 +28,19 @@ const aliasOfLabel = computed<string | null>(() => {
 });
 
 // Single-target aliases render one badge per rule; multi-target aliases
-// collapse each field to "<field>: varies" when its values differ.
+// collapse to "<field>: varies" for any field whose values disagree across
+// targets. Each badge carries an explicit `field` key so the bucket walk
+// groups by the rule slot directly rather than parsing the label string.
 const ruleBadges = computed<{ label: string }[]>(() => {
   const a = props.model.aliasedFrom;
   if (!a) return [];
   if (a.targets.length === 1) return formatAliasRuleBadges(a.targets[0].rules);
-  // Walk each target and bucket their badge labels by the field they
-  // describe (the leading word of every badge — "low effort", "summary:
-  // auto"). Any field that shows up in two distinct shapes collapses to
-  // "<field>: varies".
-  const byField = new Map<string, Set<string>>();
+  const byField = new Map<AliasRuleBadgeField, Set<string>>();
   for (const t of a.targets) {
     for (const badge of formatAliasRuleBadges(t.rules)) {
-      const field = badge.label.includes(':') ? badge.label.split(':')[0].trim() : badge.label.split(' ').slice(1).join(' ').trim() || badge.label;
-      const set = byField.get(field) ?? new Set<string>();
+      const set = byField.get(badge.field) ?? new Set<string>();
       set.add(badge.label);
-      byField.set(field, set);
+      byField.set(badge.field, set);
     }
   }
   return Array.from(byField.entries()).map(([field, set]) => ({
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/errors.ts b/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
index 8f293bae6..f2ca73a4c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/errors.ts
@@ -1,5 +1,5 @@
 import { appendFailedUpstreams } from '../../shared/failed-upstreams.ts';
-import { aliasNoTargetMessage, type ChatServeFailure } from '../shared/errors.ts';
+import type { ChatServeFailure } from '../shared/errors.ts';
 import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -35,6 +35,6 @@ export const renderChatCompletionsFailure = (
   case 'model-unsupported':
     return openAiErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support the /chat/completions endpoint.`, failure.failedUpstreams));
   case 'alias-no-target-available':
-    return openAiErrorResult(404, aliasNoTargetMessage(failure));
+    return openAiErrorResult(404, failure.message);
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/gemini/errors.ts b/packages/gateway/src/data-plane/chat/gemini/errors.ts
index 258047f8f..cc560a85f 100644
--- a/packages/gateway/src/data-plane/chat/gemini/errors.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/errors.ts
@@ -1,5 +1,5 @@
 import { appendFailedUpstreams } from '../../shared/failed-upstreams.ts';
-import { aliasNoTargetMessage, type ChatServeFailure } from '../shared/errors.ts';
+import type { ChatServeFailure } from '../shared/errors.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiStreamEvent } from '@floway-dev/protocols/gemini';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -54,6 +54,6 @@ export const renderGeminiFailure = (
   case 'model-unsupported':
     return geminiRpcErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support ${endpoint === 'countTokens' ? 'countTokens' : 'the Gemini generateContent endpoint'}.`, failure.failedUpstreams));
   case 'alias-no-target-available':
-    return geminiRpcErrorResult(404, aliasNoTargetMessage(failure));
+    return geminiRpcErrorResult(404, failure.message);
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/messages/errors.ts b/packages/gateway/src/data-plane/chat/messages/errors.ts
index 07536746a..7bb260396 100644
--- a/packages/gateway/src/data-plane/chat/messages/errors.ts
+++ b/packages/gateway/src/data-plane/chat/messages/errors.ts
@@ -1,5 +1,5 @@
 import { appendFailedUpstreams } from '../../shared/failed-upstreams.ts';
-import { aliasNoTargetMessage, type ChatServeFailure } from '../shared/errors.ts';
+import type { ChatServeFailure } from '../shared/errors.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -39,6 +39,6 @@ export const renderMessagesFailure = (
   case 'model-unsupported':
     return anthropicErrorResult(400, 'invalid_request_error', appendFailedUpstreams(`Model ${failure.model} does not support the ${endpointPath} endpoint.`, failure.failedUpstreams));
   case 'alias-no-target-available':
-    return anthropicErrorResult(404, 'not_found_error', aliasNoTargetMessage(failure));
+    return anthropicErrorResult(404, 'not_found_error', failure.message);
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/responses/errors.ts b/packages/gateway/src/data-plane/chat/responses/errors.ts
index 2325b8fce..86c880131 100644
--- a/packages/gateway/src/data-plane/chat/responses/errors.ts
+++ b/packages/gateway/src/data-plane/chat/responses/errors.ts
@@ -1,5 +1,5 @@
 import { appendFailedUpstreams } from '../../shared/failed-upstreams.ts';
-import { aliasNoTargetMessage, type ChatServeFailure } from '../shared/errors.ts';
+import type { ChatServeFailure } from '../shared/errors.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ResponsesStreamEvent } from '@floway-dev/protocols/responses';
 import type { ExecuteResult } from '@floway-dev/provider';
@@ -35,6 +35,6 @@ export const renderResponsesFailure = (
   case 'model-unsupported':
     return openAiErrorResult(400, appendFailedUpstreams(`Model ${failure.model} does not support the /responses endpoint.`, failure.failedUpstreams));
   case 'alias-no-target-available':
-    return openAiErrorResult(404, aliasNoTargetMessage(failure));
+    return openAiErrorResult(404, failure.message);
   }
 };
diff --git a/packages/gateway/src/data-plane/chat/shared/errors.ts b/packages/gateway/src/data-plane/chat/shared/errors.ts
index 599b529b9..7b71d27f5 100644
--- a/packages/gateway/src/data-plane/chat/shared/errors.ts
+++ b/packages/gateway/src/data-plane/chat/shared/errors.ts
@@ -13,17 +13,16 @@ export type ChatServeFailure =
   | { readonly kind: 'item-not-found'; readonly itemId: string }
   | { readonly kind: 'routing-unavailable'; readonly message: string }
   // Alias name resolved, but no entry in its targets list currently maps
-  // to an enabled upstream binding that exposes the inbound endpoint.
-  | { readonly kind: 'alias-no-target-available'; readonly aliasName: string; readonly targetCount: number };
-
-export { aliasNoTargetMessage } from '../../model-aliases/resolve.ts';
+  // to an enabled upstream binding. `message` carries the canonical
+  // wording the Error class already built, so renderers do not re-derive
+  // it.
+  | { readonly kind: 'alias-no-target-available'; readonly message: string };
 
 // Lift `AliasNoTargetAvailableError` into a `ChatServeFailure` so the
 // existing failure renderer can surface it without special-casing.
 export const aliasFailureFromError = (error: AliasNoTargetAvailableError): Extract<ChatServeFailure, { kind: 'alias-no-target-available' }> => ({
   kind: 'alias-no-target-available',
-  aliasName: error.aliasName,
-  targetCount: error.targetCount,
+  message: error.message,
 });
 
 class ChatServeFailureError extends Error {
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
index 90f06cd43..07c13b85c 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -27,10 +27,10 @@ export interface AliasResolution {
   readonly aliasName: string;
 }
 
-// Canonical wording for the alias-no-target-available 404. The Error class
-// and every protocol-shaped renderer (chat/{*}/errors.ts, passthroughServe)
-// read the same string from here so wording changes land in one place.
-export const aliasNoTargetMessage = (params: { aliasName: string; targetCount: number }): string =>
+// Canonical wording for the alias-no-target-available 404. Called only
+// from inside the `AliasNoTargetAvailableError` constructor so wording
+// changes land in one place; consumers read `error.message` directly.
+const aliasNoTargetMessage = (params: { aliasName: string; targetCount: number }): string =>
   `alias '${params.aliasName}' has ${params.targetCount} target(s); none currently map to an enabled upstream binding`;
 
 // Thrown when the alias name was found but no target currently resolves to
@@ -38,13 +38,11 @@ export const aliasNoTargetMessage = (params: { aliasName: string; targetCount: n
 // surfaced as a 404 in the protocol-specific error envelope.
 export class AliasNoTargetAvailableError extends Error {
   readonly aliasName: string;
-  readonly targetCount: number;
 
   constructor(aliasName: string, targetCount: number) {
     super(aliasNoTargetMessage({ aliasName, targetCount }));
     this.name = 'AliasNoTargetAvailableError';
     this.aliasName = aliasName;
-    this.targetCount = targetCount;
   }
 }
 
diff --git a/packages/protocols/src/common/aliases.ts b/packages/protocols/src/common/aliases.ts
index 7c1183aec..60e43d5fc 100644
--- a/packages/protocols/src/common/aliases.ts
+++ b/packages/protocols/src/common/aliases.ts
@@ -85,43 +85,54 @@ export interface ModelAlias {
   updated_at: string;
 }
 
-// Inline-prose parts for an alias's rules, in the canonical field order. The
-// same builder backs `formatAliasRulesInline` (joins with `, ` for a single
-// summary string) and `formatAliasRuleBadges` (one badge per part). Keeping
-// every surface — inline copy, badge sequence, parenthesized suffix in the
-// derived display name — on a single ordered walk means an operator who
-// configures `effort + verbosity` sees them in the same order whether the
-// dashboard renders badges or a comma-joined caption.
-const aliasRulePartLabels = (rules: AliasRules): string[] => {
-  const chat = rules as ChatAliasRules;
-  const parts: string[] = [];
-  if (chat.reasoning?.effort !== undefined) parts.push(`${chat.reasoning.effort} effort`);
-  if (chat.reasoning?.budget_tokens !== undefined) parts.push(`${chat.reasoning.budget_tokens}tok budget`);
-  if (chat.reasoning?.adaptive === true) parts.push('adaptive');
-  else if (chat.reasoning?.adaptive === false) parts.push('non-adaptive');
-  if (chat.reasoning?.summary !== undefined) parts.push(`summary: ${chat.reasoning.summary}`);
-  if (chat.verbosity !== undefined) parts.push(`${chat.verbosity} verbosity`);
-  if (chat.serviceTier !== undefined) parts.push(`${chat.serviceTier} tier`);
-  return parts;
-};
-
-// One badge per configured rule field, in the canonical order. `value` is
-// reserved for callers that want to render a separate value pill alongside
-// the label; today every part already self-describes through `label`, so
+// One badge per configured rule field, in the canonical order. `field`
+// names the specific rule slot the badge describes so consumers (the
+// dashboard's `ModelInfoBar`, alias-of multi-target collapse) can group
+// by field without parsing the human-readable label. `value` is reserved
+// for callers that want to render a separate value pill alongside the
+// label; today every part already self-describes through `label`, so
 // `value` stays undefined.
+export type AliasRuleBadgeField =
+  | 'reasoning.effort'
+  | 'reasoning.budget_tokens'
+  | 'reasoning.adaptive'
+  | 'reasoning.summary'
+  | 'verbosity'
+  | 'serviceTier';
+
 export interface AliasRuleBadge {
   label: string;
+  field: AliasRuleBadgeField;
   value?: string;
 }
 
-export const formatAliasRuleBadges = (rules: AliasRules): AliasRuleBadge[] =>
-  aliasRulePartLabels(rules).map(label => ({ label }));
+// Inline-prose parts for an alias's rules, in the canonical field order. The
+// same builder backs `formatAliasRulesInline` (joins labels with `, ` for a
+// single summary string) and `formatAliasRuleBadges` (emits badge rows).
+// Keeping every surface — inline copy, badge sequence, parenthesized
+// suffix in the derived display name — on a single ordered walk means an
+// operator who configures `effort + verbosity` sees them in the same order
+// whether the dashboard renders badges or a comma-joined caption.
+const aliasRuleParts = (rules: AliasRules): AliasRuleBadge[] => {
+  const chat = rules as ChatAliasRules;
+  const parts: AliasRuleBadge[] = [];
+  if (chat.reasoning?.effort !== undefined) parts.push({ field: 'reasoning.effort', label: `${chat.reasoning.effort} effort` });
+  if (chat.reasoning?.budget_tokens !== undefined) parts.push({ field: 'reasoning.budget_tokens', label: `${chat.reasoning.budget_tokens}tok budget` });
+  if (chat.reasoning?.adaptive === true) parts.push({ field: 'reasoning.adaptive', label: 'adaptive' });
+  else if (chat.reasoning?.adaptive === false) parts.push({ field: 'reasoning.adaptive', label: 'non-adaptive' });
+  if (chat.reasoning?.summary !== undefined) parts.push({ field: 'reasoning.summary', label: `summary: ${chat.reasoning.summary}` });
+  if (chat.verbosity !== undefined) parts.push({ field: 'verbosity', label: `${chat.verbosity} verbosity` });
+  if (chat.serviceTier !== undefined) parts.push({ field: 'serviceTier', label: `${chat.serviceTier} tier` });
+  return parts;
+};
+
+export const formatAliasRuleBadges = (rules: AliasRules): AliasRuleBadge[] => aliasRuleParts(rules);
 
 // Comma-joined version of the same ordered parts. Empty string when no
 // rule applies — callers should drop the line entirely rather than render
 // blank.
 export const formatAliasRulesInline = (rules: AliasRules): string =>
-  aliasRulePartLabels(rules).join(', ');
+  aliasRuleParts(rules).map(p => p.label).join(', ');
 
 // Derived display name for a single-target alias whose operator did not set
 // `display_name`. Bare `target_model_id` when no rule is configured; with
diff --git a/packages/protocols/src/common/aliases_test.ts b/packages/protocols/src/common/aliases_test.ts
index 558c19fa5..adb380af5 100644
--- a/packages/protocols/src/common/aliases_test.ts
+++ b/packages/protocols/src/common/aliases_test.ts
@@ -39,14 +39,14 @@ describe('formatAliasRulesInline', () => {
 });
 
 describe('formatAliasRuleBadges', () => {
-  test('returns one badge per configured part in the canonical order', () => {
+  test('returns one badge per configured part in the canonical order with explicit field keys', () => {
     expect(formatAliasRuleBadges({
       reasoning: { effort: 'high', budget_tokens: 2048 },
       verbosity: 'medium',
     })).toEqual([
-      { label: 'high effort' },
-      { label: '2048tok budget' },
-      { label: 'medium verbosity' },
+      { field: 'reasoning.effort', label: 'high effort' },
+      { field: 'reasoning.budget_tokens', label: '2048tok budget' },
+      { field: 'verbosity', label: 'medium verbosity' },
     ]);
   });
 

From 7c1c85b3c3c8955959432c7c31aa15ff294e20af Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 01:10:33 +0800
Subject: [PATCH 075/170] cleanup(aliases): make Combobox live-watch the sole
 writer and reconcile budgetText on parent reset
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Two small responsiveness fixes:

1. `Combobox.vue` previously had two writers to `value`: a `watch(query)`
   that pushed the raw text on every keystroke AND `commitTyped` that
   re-wrote with `.trim()` on Enter/select. Collapsed to one — the live
   watch is the sole writer (so consumers' save gates see typed text
   immediately, which the alias-edit dialog needs), and `commitTyped`
   only closes the popover. Trimming is the caller's responsibility and
   already happens in every consumer's save handler.

2. `AliasTargetRow.budgetText` was initialised once from the parent rule
   and never reconciled. When the parent reset the rule object (the
   dialog's kind switch re-initialises every target row's rules), the
   input still showed the old number. A `watch` now syncs the input
   text whenever the parent's parsed budget changes.
---
 .../components/alias-edit/AliasTargetRow.vue   | 10 ++++++++--
 packages/ui/src/Combobox.vue                   | 18 ++++++++++--------
 2 files changed, 18 insertions(+), 10 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasTargetRow.vue b/apps/web/src/components/alias-edit/AliasTargetRow.vue
index 57d3df2cc..de126ef5d 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow.vue
+++ b/apps/web/src/components/alias-edit/AliasTargetRow.vue
@@ -1,7 +1,7 @@
 <script setup lang="ts">
 // One target row inside the alias edit dialog.
 
-import { computed, ref } from 'vue';
+import { computed, ref, watch } from 'vue';
 
 import { computeModelWarnings, computeRuleWarnings, findCatalogModel } from './warnings.ts';
 import type { AliasKind, AliasTarget, ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
@@ -72,8 +72,14 @@ const setServiceTier = (raw: string) => {
 // String-bound view of the integer budget. Keeping the typed string in
 // state means an in-progress "" or "1024foo" doesn't clobber the
 // underlying numeric value mid-keystroke; the rules object only updates
-// when the parsed number is a finite integer.
+// when the parsed number is a finite integer. The watch syncs the input
+// back to the parent's value when the parent resets the rule object (e.g.
+// the dialog switches `kind` and re-initialises every target row's rules).
 const budgetText = ref(chatRules.value.reasoning?.budget_tokens === undefined ? '' : String(chatRules.value.reasoning.budget_tokens));
+watch(() => chatRules.value.reasoning?.budget_tokens, parsed => {
+  const next = parsed === undefined ? '' : String(parsed);
+  if (next !== budgetText.value.trim()) budgetText.value = next;
+});
 const onBudgetChange = (raw: string) => {
   budgetText.value = raw;
   const trimmed = raw.trim();
diff --git a/packages/ui/src/Combobox.vue b/packages/ui/src/Combobox.vue
index d111f5f52..52d3fe17b 100644
--- a/packages/ui/src/Combobox.vue
+++ b/packages/ui/src/Combobox.vue
@@ -72,11 +72,13 @@ const normalizedItems = computed<NormalizedItem[]>(() => props.items.map(it =>
 const query = ref(value.value);
 watch(value, v => { if (v !== query.value) query.value = v; });
 
-// Free-form commit: any keystroke or blur sets the model to the current
-// query so the dialog's submit handler always reads the latest typed text,
-// even when the operator never clicks a suggestion row. ComboboxRoot's
-// own onChange path still fires for clicked rows and just re-writes the
-// same value.
+// Single writer: every keystroke pushes the raw query into `value` so the
+// save gate of any consuming dialog reflects what the operator typed
+// without waiting for blur. Trimming is the caller's job — doing it here
+// would clip a trailing space mid-keystroke and force the operator to
+// re-press space to keep typing. ComboboxRoot's own onChange path still
+// fires for clicked rows and just re-writes the same value via this
+// watch.
 watch(query, q => { value.value = q; });
 
 // Always show every suggestion; rank items whose label or value contains the
@@ -106,10 +108,10 @@ const open = ref(false);
 
 // Reka's Combobox only registers items present in the DOM. When the operator
 // types a brand-new value, surface a synthesized "Use 'foo'" row so the
-// arrow keys + Enter path still commits it. Without this row Enter on a
-// brand-new value falls back to default form behavior.
+// arrow keys + Enter path still commits it. The `watch(query)` above is
+// the sole writer to `value`; this handler just closes the popover after
+// the operator confirms.
 const commitTyped = async () => {
-  value.value = trimmedQuery.value;
   open.value = false;
   await nextTick();
 };

From 84809a23fc5059fc21e49237abaf6bcc1bdb54f1 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 01:37:14 +0800
Subject: [PATCH 076/170] feat(aliases/web): two-column rule grid in the
 expanded target row
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The six rule fields stacked vertically — easy to scan but a lot of
scrolling on a wide dialog. Wrap them in a responsive grid: one column
on narrow viewports, two columns at sm+ so a wide dialog reads as three
rows of paired controls.
---
 apps/web/src/components/alias-edit/AliasTargetRow.vue | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasTargetRow.vue b/apps/web/src/components/alias-edit/AliasTargetRow.vue
index de126ef5d..1ad486828 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow.vue
+++ b/apps/web/src/components/alias-edit/AliasTargetRow.vue
@@ -175,8 +175,8 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
       </div>
     </header>
 
-    <div v-if="expanded" class="space-y-3 border-t border-white/[0.06] p-3">
-      <template v-if="kind === 'chat'">
+    <div v-if="expanded" class="border-t border-white/[0.06] p-3">
+      <div v-if="kind === 'chat'" class="grid grid-cols-1 gap-3 sm:grid-cols-2">
         <div>
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Reasoning effort</label>
           <Combobox
@@ -245,7 +245,7 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
           />
           <p v-if="warningFor('serviceTier')" class="mt-1 text-xs text-amber-300">{{ warningFor('serviceTier') }}</p>
         </div>
-      </template>
+      </div>
 
       <p v-else class="text-xs text-gray-500">No per-target rules for this kind.</p>
     </div>

From 77407f493551cfac690faaf41615693bfa41d821 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 01:47:49 +0800
Subject: [PATCH 077/170] feat(aliases/web): kind-aware target combobox,
 mismatch warning, disabled expand for non-chat
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Six related dialog tweaks:

- The target-id combobox suggestion list filters to non-alias catalog
  rows whose `kind` matches the alias's kind, so an embedding alias only
  hints at embedding models. A new `realModelIdsOfKind` helper carries
  the filter; operators can still type any opaque string.

- `computeModelWarnings` gains an `aliasKind` parameter and emits a
  kind-mismatch warning when the catalog row exists but its kind
  differs from the alias's kind. The dashboard's "this model isn't the
  right kind" feedback now matches the runtime "endpoint not supported"
  outcome before the operator hits Save.

- The row expand chevron is disabled when `kind !== 'chat'` — non-chat
  targets carry no rules to show. Switching kind also force-collapses
  any open chat row so the body doesn't strand on a stale kind.

- "Alias name" → "Alias id" everywhere in the dialog and its validation
  messages; the alias id is an opaque routing key, not a display label.

- Display-name placeholder mirrors the current alias-id value (or
  `my-alias-id` while it's empty) so the operator sees what the derived
  display will read as.

- Rule-input placeholders use `e.g. <preset>` to disambiguate "this is a
  hint" from "this is the default".
---
 .../components/alias-edit/AliasEditDialog.vue | 19 +++++++-----
 .../alias-edit/AliasEditDialog_test.ts        |  7 ++---
 .../components/alias-edit/AliasTargetRow.vue  | 23 ++++++++++-----
 .../alias-edit/AliasTargetRow_test.ts         |  9 ++++--
 .../web/src/components/alias-edit/warnings.ts | 29 +++++++++++++++----
 .../components/alias-edit/warnings_test.ts    | 20 +++++++++----
 6 files changed, 74 insertions(+), 33 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 86756bde4..c21e1167c 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -7,7 +7,7 @@
 import { computed, ref } from 'vue';
 
 import AliasTargetRow from './AliasTargetRow.vue';
-import { computeShadowWarning, realModelIds } from './warnings.ts';
+import { computeShadowWarning, realModelIdsOfKind } from './warnings.ts';
 import { callApi, useApi } from '../../api/client.ts';
 import type { AliasKind, AliasSelection, AliasTarget, ChatAliasRules, ModelAlias } from '../../api/types.ts';
 import { useModelAliases } from '../../composables/useModelAliases.ts';
@@ -77,9 +77,12 @@ const removeTarget = (idx: number) => {
   targets.value = targets.value.filter((_, i) => i !== idx);
 };
 
-// Suggestion list for every target-id combobox. Aliases are excluded so an
-// operator can't accidentally hop into the alias layer twice.
-const targetIdItems = computed(() => realModelIds(modelsStore.models.value));
+// Suggestion list for every target-id combobox. Filtered to non-alias
+// catalog rows of the alias's current kind so an embedding alias only
+// hints at embedding models. Aliases never re-enter the alias layer at
+// runtime, so they're excluded too. Operators can still type any
+// opaque string — the list is a hint, not a constraint.
+const targetIdItems = computed(() => realModelIdsOfKind(modelsStore.models.value, kind.value));
 
 const shadowWarning = computed(() => computeShadowWarning(aliasName.value.trim(), targets.value, modelsStore.models.value));
 
@@ -91,9 +94,9 @@ const saveError = ref<string | null>(null);
 // current record so an in-place edit of an unchanged name is allowed.
 const validationError = computed<string | null>(() => {
   const trimmed = aliasName.value.trim();
-  if (trimmed === '') return 'Alias name is required';
+  if (trimmed === '') return 'Alias id is required';
   const collisions = (aliasesStore.aliases.value ?? []).filter(a => a.name === trimmed && a.name !== props.record?.name);
-  if (collisions.length > 0) return `An alias named "${trimmed}" already exists`;
+  if (collisions.length > 0) return `An alias with id "${trimmed}" already exists`;
   if (targets.value.length === 0) return 'At least one target is required';
   if (targets.value.some(t => t.target_model_id.trim() === '')) return 'Every target needs a model id';
   return null;
@@ -161,12 +164,12 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
 
       <div class="grid grid-cols-1 gap-3 sm:grid-cols-2">
         <div>
-          <label class="mb-1.5 block text-xs font-medium text-gray-500">Alias name</label>
+          <label class="mb-1.5 block text-xs font-medium text-gray-500">Alias id</label>
           <Input v-model="aliasName" placeholder="my-alias-id" class="font-mono" />
         </div>
         <div>
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Display name</label>
-          <Input v-model="displayName" placeholder="auto" />
+          <Input v-model="displayName" :placeholder="aliasName.trim() === '' ? 'my-alias-id' : aliasName.trim()" />
         </div>
       </div>
 
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index 9dedb62af..220257429 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -97,7 +97,7 @@ describe('AliasEditDialog', () => {
     w.unmount();
   });
 
-  it('renders the chat rule body when the kind is chat, and the empty-state caption when the kind is embedding', async () => {
+  it('expands the chat rule body for chat aliases; the row toggle is disabled for non-chat aliases', async () => {
     const chat = mount(AliasEditDialog, {
       props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: { reasoning: { effort: 'low' } } as ChatAliasRules }] }) },
       attachTo: document.body,
@@ -114,9 +114,8 @@ describe('AliasEditDialog', () => {
       attachTo: document.body,
     });
     await nextTick();
-    portalQuery<HTMLButtonElement>('button[aria-label="Toggle target row"]')!.click();
-    await nextTick();
-    expect(portalText()).toContain('No per-target rules for this kind.');
+    const toggle = portalQuery<HTMLButtonElement>('button[aria-label="Toggle target row"]')!;
+    expect(toggle.disabled).toBe(true);
     expect(portalText()).not.toContain('Reasoning effort');
     embed.unmount();
   });
diff --git a/apps/web/src/components/alias-edit/AliasTargetRow.vue b/apps/web/src/components/alias-edit/AliasTargetRow.vue
index 1ad486828..9f8c553fd 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow.vue
+++ b/apps/web/src/components/alias-edit/AliasTargetRow.vue
@@ -25,7 +25,13 @@ const emit = defineEmits<{
 }>();
 
 const expanded = ref(false);
-const toggleExpanded = () => { expanded.value = !expanded.value; };
+const canExpand = computed(() => props.kind === 'chat');
+const toggleExpanded = () => { if (canExpand.value) expanded.value = !expanded.value; };
+
+// Switching the alias kind on the parent collapses every non-chat row's
+// body — there's no rule form to show, so an open chevron would be a
+// dead state.
+watch(() => props.kind, k => { if (k !== 'chat') expanded.value = false; });
 
 const targetId = computed({
   get: () => target.value.target_model_id,
@@ -100,7 +106,7 @@ const VERBOSITY_ITEMS = ['low', 'medium', 'high'];
 const SERVICE_TIER_ITEMS = ['default', 'flex', 'priority', 'scale', 'fast'];
 
 const catalog = computed(() => findCatalogModel(props.models, target.value.target_model_id));
-const modelWarnings = computed(() => computeModelWarnings(target.value.target_model_id, catalog.value));
+const modelWarnings = computed(() => computeModelWarnings(target.value.target_model_id, catalog.value, props.kind));
 const ruleWarnings = computed(() => computeRuleWarnings(chatRules.value, catalog.value));
 const warningFor = (field: string) => ruleWarnings.value.find(w => w.field === field)?.message;
 const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
@@ -111,8 +117,9 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
     <header class="flex items-center gap-2 px-3 py-2">
       <button
         type="button"
-        class="grid size-6 shrink-0 place-items-center rounded text-gray-500 transition-colors hover:bg-white/5 hover:text-gray-200"
+        class="grid size-6 shrink-0 place-items-center rounded text-gray-500 transition-colors hover:bg-white/5 hover:text-gray-200 disabled:cursor-not-allowed disabled:opacity-30 disabled:hover:bg-transparent disabled:hover:text-gray-500"
         :aria-expanded="expanded"
+        :disabled="!canExpand"
         aria-label="Toggle target row"
         @click="toggleExpanded"
       >
@@ -182,7 +189,7 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
           <Combobox
             :model-value="chatRules.reasoning?.effort ?? ''"
             :items="EFFORT_ITEMS"
-            placeholder="none / low / medium / high / xhigh"
+            placeholder="e.g. low"
             @update:model-value="setEffort"
           />
           <p v-if="warningFor('reasoning.effort')" class="mt-1 text-xs text-amber-300">{{ warningFor('reasoning.effort') }}</p>
@@ -193,7 +200,7 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
           <input
             type="text"
             inputmode="numeric"
-            placeholder="4096"
+            placeholder="e.g. 4096"
             class="h-9 w-full rounded-[10px] border border-white/[0.14] bg-surface-700 px-3 text-sm text-white placeholder:text-gray-600 focus:border-accent-cyan/50 focus:outline-none focus:ring-1 focus:ring-accent-cyan/30 font-mono"
             :value="budgetText"
             @input="(e: Event) => onBudgetChange((e.target as HTMLInputElement).value)"
@@ -218,7 +225,7 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
           <Combobox
             :model-value="chatRules.reasoning?.summary ?? ''"
             :items="SUMMARY_ITEMS"
-            placeholder="auto"
+            placeholder="e.g. auto"
             @update:model-value="setSummary"
           />
           <p v-if="warningFor('reasoning.summary')" class="mt-1 text-xs text-amber-300">{{ warningFor('reasoning.summary') }}</p>
@@ -229,7 +236,7 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
           <Combobox
             :model-value="chatRules.verbosity ?? ''"
             :items="VERBOSITY_ITEMS"
-            placeholder="medium"
+            placeholder="e.g. medium"
             @update:model-value="setVerbosity"
           />
           <p v-if="warningFor('verbosity')" class="mt-1 text-xs text-amber-300">{{ warningFor('verbosity') }}</p>
@@ -240,7 +247,7 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
           <Combobox
             :model-value="chatRules.serviceTier ?? ''"
             :items="SERVICE_TIER_ITEMS"
-            placeholder="default"
+            placeholder="e.g. default"
             @update:model-value="setServiceTier"
           />
           <p v-if="warningFor('serviceTier')" class="mt-1 text-xs text-amber-300">{{ warningFor('serviceTier') }}</p>
diff --git a/apps/web/src/components/alias-edit/AliasTargetRow_test.ts b/apps/web/src/components/alias-edit/AliasTargetRow_test.ts
index abe3398c2..fd774c35f 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow_test.ts
+++ b/apps/web/src/components/alias-edit/AliasTargetRow_test.ts
@@ -14,6 +14,7 @@ const target = (over: Partial<AliasTarget> = {}): AliasTarget => ({
 const realModel = (id: string, chat?: ControlPlaneModel['chat']): ControlPlaneModel => ({
   id,
   upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
+  kind: 'chat',
   ...(chat ? { chat } : {}),
 });
 
@@ -54,16 +55,18 @@ describe('AliasTargetRow', () => {
     expect(w.emitted('remove')).toHaveLength(1);
   });
 
-  it('renders the expanded chat body only when expanded AND kind is chat; renders the empty-state caption for other kinds', async () => {
+  it('expands to the chat rule body when the kind is chat; the toggle is disabled for non-chat kinds', async () => {
     const chatRow = mountRow({});
     expect(chatRow.text()).not.toContain('Reasoning effort');
     await chatRow.find('button[aria-label="Toggle target row"]').trigger('click');
     expect(chatRow.text()).toContain('Reasoning effort');
     expect(chatRow.text()).toContain('Verbosity');
 
+    // Non-chat aliases carry an empty rules record, so the row has nothing
+    // to expand — the toggle is disabled and the body never renders.
     const embedRow = mountRow({ kind: 'embedding', modelValue: { target_model_id: 'e1', rules: {} } });
-    await embedRow.find('button[aria-label="Toggle target row"]').trigger('click');
-    expect(embedRow.text()).toContain('No per-target rules for this kind.');
+    const toggle = embedRow.find('button[aria-label="Toggle target row"]').element as HTMLButtonElement;
+    expect(toggle.disabled).toBe(true);
     expect(embedRow.text()).not.toContain('Reasoning effort');
   });
 
diff --git a/apps/web/src/components/alias-edit/warnings.ts b/apps/web/src/components/alias-edit/warnings.ts
index 0d9f07bee..1de4b1569 100644
--- a/apps/web/src/components/alias-edit/warnings.ts
+++ b/apps/web/src/components/alias-edit/warnings.ts
@@ -3,7 +3,7 @@
 // helpers keeps the Settings card and the dialog reading the same view of
 // the live /api/models catalog.
 
-import type { ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
+import type { AliasKind, ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
 
 // Excludes alias rows — target ids never re-enter the alias layer, so the
 // rule-warning lookup must compare against the same real-model surface that
@@ -15,10 +15,20 @@ export const findCatalogModel = (
   (models ?? []).find(m => m.id === targetModelId && m.aliasedFrom === undefined);
 
 // Real (non-alias) model ids the operator can route to. Used by the
-// target-id combobox suggestion list and by the shadow-warning check.
+// shadow-warning check (no kind filter — shadowing is a name collision).
 export const realModelIds = (models: readonly ControlPlaneModel[] | null | undefined): string[] =>
   (models ?? []).filter(m => m.aliasedFrom === undefined).map(m => m.id);
 
+// Real (non-alias) model ids whose kind matches the alias's kind. Used by
+// the target-id combobox suggestion list so an embedding alias only
+// suggests embedding models, etc. Operators can still type any string —
+// the suggestion list is a hint, not a constraint.
+export const realModelIdsOfKind = (
+  models: readonly ControlPlaneModel[] | null | undefined,
+  kind: AliasKind,
+): string[] =>
+  (models ?? []).filter(m => m.aliasedFrom === undefined && m.kind === kind).map(m => m.id);
+
 // One warning attached to a specific chat rule field. The field key matches
 // the form's `data-field` attribute so the dialog can render the warning
 // directly under the input it annotates.
@@ -69,17 +79,26 @@ export const computeRuleWarnings = (
   return out;
 };
 
-// Model-level warnings for one target row. Today the only trigger is the
-// target id failing to resolve to any catalog model. Returned as plain
-// strings — the dialog already joins them with newlines for the tooltip.
+// Model-level warnings for one target row. Returned as plain strings —
+// the dialog already joins them with newlines for the tooltip.
+//
+// Two triggers:
+// - Unknown target id: nothing in the catalog matches.
+// - Wrong-kind target: the catalog row exists but its `kind` doesn't
+//   match the alias's kind, so a /<aliasKind> request that resolves to
+//   this target would fall through prefix routing's endpoint check.
 export const computeModelWarnings = (
   targetModelId: string,
   catalog: ControlPlaneModel | undefined,
+  aliasKind: AliasKind,
 ): string[] => {
   if (targetModelId === '') return [];
   if (catalog === undefined) {
     return [`"${targetModelId}" does not currently resolve to any enabled upstream binding.`];
   }
+  if (catalog.kind !== aliasKind) {
+    return [`"${targetModelId}" is a ${catalog.kind} model; this alias is configured for ${aliasKind}.`];
+  }
   return [];
 };
 
diff --git a/apps/web/src/components/alias-edit/warnings_test.ts b/apps/web/src/components/alias-edit/warnings_test.ts
index f50a17bdd..9c5d44f1d 100644
--- a/apps/web/src/components/alias-edit/warnings_test.ts
+++ b/apps/web/src/components/alias-edit/warnings_test.ts
@@ -5,11 +5,13 @@ import type { ControlPlaneModel } from '../../api/types.ts';
 
 const realModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
   upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
+  kind: 'chat',
   ...over,
 });
 
 const aliasModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
   upstreams: [],
+  kind: 'chat',
   aliasedFrom: { name: over.id, kind: 'chat', selection: 'first-available', targets: [] },
   ...over,
 });
@@ -49,20 +51,28 @@ describe('findCatalogModel', () => {
 });
 
 describe('computeModelWarnings', () => {
-  it('returns no warning when the target resolves to a catalog entry', () => {
-    const catalog = realModel({ id: 'gpt-5' });
-    expect(computeModelWarnings('gpt-5', catalog)).toEqual([]);
+  it('returns no warning when the target resolves to a same-kind catalog entry', () => {
+    const catalog = realModel({ id: 'gpt-5', kind: 'chat' });
+    expect(computeModelWarnings('gpt-5', catalog, 'chat')).toEqual([]);
   });
 
   it('returns a "does not resolve" warning when the target is unknown', () => {
-    const w = computeModelWarnings('mystery-model', undefined);
+    const w = computeModelWarnings('mystery-model', undefined, 'chat');
     expect(w).toHaveLength(1);
     expect(w[0]).toContain('mystery-model');
     expect(w[0]).toContain('does not currently resolve');
   });
 
+  it('returns a kind-mismatch warning when the catalog row is the wrong kind', () => {
+    const w = computeModelWarnings('text-emb-3', realModel({ id: 'text-emb-3', kind: 'embedding' }), 'chat');
+    expect(w).toHaveLength(1);
+    expect(w[0]).toContain('text-emb-3');
+    expect(w[0]).toContain('embedding');
+    expect(w[0]).toContain('chat');
+  });
+
   it('emits no warning for an empty id (the row is mid-edit)', () => {
-    expect(computeModelWarnings('', undefined)).toEqual([]);
+    expect(computeModelWarnings('', undefined, 'chat')).toEqual([]);
   });
 });
 

From 423116e2b942f7dc7c05d1e51a775edfa9c55b58 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 01:50:22 +0800
Subject: [PATCH 078/170] =?UTF-8?q?refactor(aliases/web):=20AliasRow=20cap?=
 =?UTF-8?q?tion=20=E2=80=94=20mono-id,=20kind,=20target=20count,=20selecti?=
 =?UTF-8?q?on=20label?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The caption was one font-mono line that read as `<name> · <N> targets
· <raw-selection>`. The raw selection value (`first-available`,
`random`) and the absent kind were operator-jarring on a card meant to
read at a glance. New shape:

  <mono-id> · Kind · N targets · Selection display

Only the alias id stays in mono — `Chat / Embedding / Image`, the
target count, and the selection display (`First available` /
`Random`) read as proportional copy. The hidden suffix keeps its
inline `/v1/models` code fragment in mono.
---
 apps/web/src/components/settings/AliasRow.vue | 27 ++++++++++++-------
 .../src/components/settings/AliasRow_test.ts  |  6 ++---
 2 files changed, 20 insertions(+), 13 deletions(-)

diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index 2b1bb015f..7a697df47 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -31,15 +31,20 @@ const title = computed(() => {
   return props.alias.name;
 });
 
-const caption = computed(() => {
-  const parts: string[] = [
-    props.alias.name,
-    `${props.alias.targets.length} target${props.alias.targets.length === 1 ? '' : 's'}`,
-    props.alias.selection,
-  ];
-  if (!props.alias.visible_in_models_list) parts.push('hidden from /v1/models');
-  return parts.join(' · ');
-});
+const KIND_LABELS: Record<ModelAlias['kind'], string> = {
+  chat: 'Chat',
+  embedding: 'Embedding',
+  image: 'Image',
+};
+
+const SELECTION_LABELS: Record<ModelAlias['selection'], string> = {
+  'first-available': 'First available',
+  random: 'Random',
+};
+
+const kindLabel = computed(() => KIND_LABELS[props.alias.kind]);
+const selectionLabel = computed(() => SELECTION_LABELS[props.alias.selection]);
+const targetCountLabel = computed(() => `${props.alias.targets.length} target${props.alias.targets.length === 1 ? '' : 's'}`);
 
 const shadowWarning = computed(() => computeShadowWarning(props.alias.name, props.alias.targets, props.models));
 const shadowTooltip = computed(() => {
@@ -55,7 +60,9 @@ const shadowTooltip = computed(() => {
     <div class="flex items-start gap-3">
       <div class="min-w-0 flex-1">
         <h4 class="truncate text-sm font-semibold text-white">{{ title }}</h4>
-        <p class="mt-0.5 truncate font-mono text-xs text-gray-500">{{ caption }}</p>
+        <p class="mt-0.5 truncate text-xs text-gray-500">
+          <span class="font-mono">{{ alias.name }}</span> · {{ kindLabel }} · {{ targetCountLabel }} · {{ selectionLabel }}<template v-if="!alias.visible_in_models_list"> · hidden from <code class="font-mono">/v1/models</code></template>
+        </p>
       </div>
 
       <div class="flex shrink-0 items-center gap-1">
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
index f30ed71ef..424199066 100644
--- a/apps/web/src/components/settings/AliasRow_test.ts
+++ b/apps/web/src/components/settings/AliasRow_test.ts
@@ -57,7 +57,7 @@ describe('AliasRow', () => {
     expect(multi.find('h4').text()).toBe('gizmo');
   });
 
-  it('formats the caption: name · N targets · selection (and optional hidden suffix)', () => {
+  it('formats the caption: name · Kind · N targets · Selection (and optional hidden suffix)', () => {
     const w = mount(AliasRow, {
       props: {
         alias: alias({
@@ -72,12 +72,12 @@ describe('AliasRow', () => {
         models: [],
       },
     });
-    expect(w.find('p').text()).toBe('auto-review · 2 targets · random · hidden from /v1/models');
+    expect(w.find('p').text()).toBe('auto-review · Chat · 2 targets · Random · hidden from /v1/models');
 
     const sole = mount(AliasRow, {
       props: { alias: alias({ name: 'one' }), models: [] },
     });
-    expect(sole.find('p').text()).toBe('one · 1 target · first-available');
+    expect(sole.find('p').text()).toBe('one · Chat · 1 target · First available');
   });
 
   it('emits edit on the pencil button and delete on the trash button', async () => {

From 8487e4c81a06b4b7f0fc2f3f2ba4561f85131972 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 01:55:42 +0800
Subject: [PATCH 079/170] refactor(aliases/web): AliasRow title stamps the
 alias id next to the display name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Mirror the chat-playground header: the operator-set display name (or
the alias id itself, when no display is set) reads in sans-serif as
the title; the alias id sits beside it in mono. The caption below
drops the redundant id and just carries Kind · N targets · Selection.
The whole row is vertically centered so the title and the action
cluster share a baseline.

When `display_name` is null the title renders the alias id twice — once
as the title text, once as the mono pill — matching the chat
playground's "<friendly-name> <model-id>" header for an unset alias.
---
 apps/web/src/components/settings/AliasRow.vue | 25 ++++++++-----------
 .../src/components/settings/AliasRow_test.ts  | 14 +++++++----
 2 files changed, 20 insertions(+), 19 deletions(-)

diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index 7a697df47..afae5cacb 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -7,7 +7,6 @@ import { computed } from 'vue';
 
 import type { ControlPlaneModel, ModelAlias } from '../../api/types.ts';
 import { computeShadowWarning } from '../alias-edit/warnings.ts';
-import { composeAliasDisplayName } from '@floway-dev/protocols/common';
 import { Tooltip } from '@floway-dev/ui';
 
 const props = defineProps<{
@@ -20,16 +19,11 @@ defineEmits<{
   delete: [];
 }>();
 
-// Operator-set `display_name` wins; single-target aliases fall through to
-// the compose helper; multi-target falls back to `name`.
-const title = computed(() => {
-  if (props.alias.display_name !== null) return props.alias.display_name;
-  if (props.alias.targets.length === 1) {
-    const t = props.alias.targets[0];
-    return composeAliasDisplayName(t.target_model_id, t.rules);
-  }
-  return props.alias.name;
-});
+// Operator-set `display_name` wins; otherwise fall back to the alias id
+// itself. The id is also stamped next to the title in mono, so an empty
+// display_name produces "alias-id (sans-serif) · alias-id (mono)" — same
+// visual idiom the chat playground uses for the active model.
+const title = computed(() => props.alias.display_name ?? props.alias.name);
 
 const KIND_LABELS: Record<ModelAlias['kind'], string> = {
   chat: 'Chat',
@@ -57,11 +51,14 @@ const shadowTooltip = computed(() => {
 
 <template>
   <div class="rounded-lg border border-white/5 bg-surface-800/80 px-3 py-2.5">
-    <div class="flex items-start gap-3">
+    <div class="flex items-center gap-3">
       <div class="min-w-0 flex-1">
-        <h4 class="truncate text-sm font-semibold text-white">{{ title }}</h4>
+        <div class="flex flex-wrap items-baseline gap-x-2">
+          <h4 class="truncate text-sm font-semibold text-white">{{ title }}</h4>
+          <span class="truncate font-mono text-xs text-gray-500">{{ alias.name }}</span>
+        </div>
         <p class="mt-0.5 truncate text-xs text-gray-500">
-          <span class="font-mono">{{ alias.name }}</span> · {{ kindLabel }} · {{ targetCountLabel }} · {{ selectionLabel }}<template v-if="!alias.visible_in_models_list"> · hidden from <code class="font-mono">/v1/models</code></template>
+          {{ kindLabel }} · {{ targetCountLabel }} · {{ selectionLabel }}<template v-if="!alias.visible_in_models_list"> · hidden from <code class="font-mono">/v1/models</code></template>
         </p>
       </div>
 
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
index 424199066..30aba35c2 100644
--- a/apps/web/src/components/settings/AliasRow_test.ts
+++ b/apps/web/src/components/settings/AliasRow_test.ts
@@ -29,17 +29,19 @@ const aliasModel = (id: string): ControlPlaneModel => ({
 });
 
 describe('AliasRow', () => {
-  it('renders display_name when set; falls back to compose helper for single-target; alias name for multi-target', () => {
+  it('renders display_name when set; otherwise falls back to the alias id', () => {
     const withDisplay = mount(AliasRow, { props: { alias: alias({ name: 'a', display_name: 'My Friendly Name' }), models: [] } });
     expect(withDisplay.find('h4').text()).toBe('My Friendly Name');
 
+    // No operator-set display: title shows the alias id verbatim (same string
+    // as the mono pill next to it — the chat-playground idiom).
     const single = mount(AliasRow, {
       props: {
         alias: alias({ name: 'a', display_name: null, targets: [{ target_model_id: 'gpt-5', rules: { reasoning: { effort: 'low' } } as ChatAliasRules }] }),
         models: [],
       },
     });
-    expect(single.find('h4').text()).toBe('gpt-5 (low effort)');
+    expect(single.find('h4').text()).toBe('a');
 
     const multi = mount(AliasRow, {
       props: {
@@ -57,7 +59,7 @@ describe('AliasRow', () => {
     expect(multi.find('h4').text()).toBe('gizmo');
   });
 
-  it('formats the caption: name · Kind · N targets · Selection (and optional hidden suffix)', () => {
+  it('formats the caption: Kind · N targets · Selection (and optional hidden suffix); the alias id sits next to the title', () => {
     const w = mount(AliasRow, {
       props: {
         alias: alias({
@@ -72,12 +74,14 @@ describe('AliasRow', () => {
         models: [],
       },
     });
-    expect(w.find('p').text()).toBe('auto-review · Chat · 2 targets · Random · hidden from /v1/models');
+    expect(w.find('p').text()).toBe('Chat · 2 targets · Random · hidden from /v1/models');
+    // The alias id stamp sits on the title row, in mono.
+    expect(w.find('div.flex.items-baseline span').text()).toBe('auto-review');
 
     const sole = mount(AliasRow, {
       props: { alias: alias({ name: 'one' }), models: [] },
     });
-    expect(sole.find('p').text()).toBe('one · Chat · 1 target · First available');
+    expect(sole.find('p').text()).toBe('Chat · 1 target · First available');
   });
 
   it('emits edit on the pencil button and delete on the trash button', async () => {

From 4d34d7f7cd6695ba0a71c84756ed79bc00386213 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 02:03:38 +0800
Subject: [PATCH 080/170] fix(aliases): settings reads the raw catalog so
 shadow + kind warnings see real models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The alias-merge step on `/api/models` overwrites any real-model entry
whose id collides with an alias name — that's the right wire shape for
`/v1/models` (clients see one model per id, the alias wins) but it
broke two dashboard surfaces:

- The shadow warning could never fire: `computeShadowWarning` looks for
  a non-alias row whose id equals the alias name, and the alias-merge
  collapse removed that row.
- A target id equal to the alias's own id (`gpt-5.4` → `gpt-5.4`, the
  shadow-the-real-model seed pattern) read as "does not currently
  resolve" because the catalog row had been replaced by the alias
  entry the operator was editing.

Add `?aliases=false` to `/api/models` to skip the merge, and route
every settings surface — the page-level preload, the
`AliasesSettingsCard`, and the `AliasEditDialog` — through a
parallel `useRawModelsStore` that fetches with that query. The
default `useModelsStore` keeps its current shape; the merged catalog
still backs `/dashboard/models` and the other consumers that want the
externally-visible view.
---
 .../components/alias-edit/AliasEditDialog.vue |  4 +-
 .../alias-edit/AliasEditDialog_test.ts        |  2 +-
 .../settings/AliasesSettingsCard.vue          |  4 +-
 .../settings/AliasesSettingsCard_test.ts      |  2 +-
 apps/web/src/composables/useModels.ts         | 39 ++++++++++++++++---
 apps/web/src/pages/dashboard/settings.vue     |  6 +--
 .../src/control-plane/models/routes.ts        | 17 ++++----
 packages/gateway/src/control-plane/routes.ts  |  4 +-
 packages/gateway/src/control-plane/schemas.ts | 10 +++++
 9 files changed, 65 insertions(+), 23 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index c21e1167c..ebcd098cb 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -11,7 +11,7 @@ import { computeShadowWarning, realModelIdsOfKind } from './warnings.ts';
 import { callApi, useApi } from '../../api/client.ts';
 import type { AliasKind, AliasSelection, AliasTarget, ChatAliasRules, ModelAlias } from '../../api/types.ts';
 import { useModelAliases } from '../../composables/useModelAliases.ts';
-import { useModelsStore } from '../../composables/useModels.ts';
+import { useRawModelsStore } from '../../composables/useModels.ts';
 import { Button, Dialog, Input, Select, Switch } from '@floway-dev/ui';
 
 const open = defineModel<boolean>('open', { required: true });
@@ -27,7 +27,7 @@ const emit = defineEmits<{
 
 const api = useApi();
 const aliasesStore = useModelAliases();
-const modelsStore = useModelsStore();
+const modelsStore = useRawModelsStore();
 
 const mode = computed<'create' | 'edit'>(() => (props.record ? 'edit' : 'create'));
 
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index 220257429..57cb613b6 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -17,7 +17,7 @@ vi.mock('../../composables/useModelAliases.ts', () => ({
   useModelAliases: () => ({ aliases: aliasesRef, loading: ref(false), error: ref<string | null>(null), load: async () => {} }),
 }));
 vi.mock('../../composables/useModels.ts', () => ({
-  useModelsStore: () => ({ models: modelsRef, loading: ref(false), error: ref<string | null>(null), load: async () => {} }),
+  useRawModelsStore: () => ({ models: modelsRef, loading: ref(false), error: ref<string | null>(null), load: async () => {} }),
 }));
 vi.mock('../../api/client.ts', () => ({
   useApi: () => ({
diff --git a/apps/web/src/components/settings/AliasesSettingsCard.vue b/apps/web/src/components/settings/AliasesSettingsCard.vue
index 9ac26f29f..93820da63 100644
--- a/apps/web/src/components/settings/AliasesSettingsCard.vue
+++ b/apps/web/src/components/settings/AliasesSettingsCard.vue
@@ -7,7 +7,7 @@ import AliasRow from './AliasRow.vue';
 import { callApi, useApi } from '../../api/client.ts';
 import type { ModelAlias } from '../../api/types.ts';
 import { useModelAliases } from '../../composables/useModelAliases.ts';
-import { useModelsStore } from '../../composables/useModels.ts';
+import { useRawModelsStore } from '../../composables/useModels.ts';
 import { Spinner } from '@floway-dev/ui';
 
 const emit = defineEmits<{
@@ -18,7 +18,7 @@ const emit = defineEmits<{
 
 const api = useApi();
 const aliasesStore = useModelAliases();
-const modelsStore = useModelsStore();
+const modelsStore = useRawModelsStore();
 
 const aliases = computed<ModelAlias[]>(() => aliasesStore.aliases.value ?? []);
 
diff --git a/apps/web/src/components/settings/AliasesSettingsCard_test.ts b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
index 0cfb22fa9..687573215 100644
--- a/apps/web/src/components/settings/AliasesSettingsCard_test.ts
+++ b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
@@ -13,7 +13,7 @@ vi.mock('../../composables/useModelAliases.ts', () => ({
   useModelAliases: () => ({ aliases: aliasesRef, loading: ref(false), error: aliasErrorRef, load: async () => {} }),
 }));
 vi.mock('../../composables/useModels.ts', () => ({
-  useModelsStore: () => ({ models: modelsRef, loading: ref(false), error: ref<string | null>(null), load: async () => {} }),
+  useRawModelsStore: () => ({ models: modelsRef, loading: ref(false), error: ref<string | null>(null), load: async () => {} }),
 }));
 vi.mock('../../api/client.ts', () => ({
   useApi: () => ({
diff --git a/apps/web/src/composables/useModels.ts b/apps/web/src/composables/useModels.ts
index 002506ccf..1b82f5c7e 100644
--- a/apps/web/src/composables/useModels.ts
+++ b/apps/web/src/composables/useModels.ts
@@ -3,22 +3,25 @@ import { ref } from 'vue';
 import { callApi, useApi } from '../api/client.ts';
 import type { ControlPlaneModel } from '../api/types.ts';
 
-const models = ref<ControlPlaneModel[] | null>(null);
-const loading = ref(false);
-const error = ref<string | null>(null);
-
 interface ModelsResponse {
   object: string;
   data: ControlPlaneModel[];
 }
 
+// Default `/api/models` view: real models + synthesised alias entries
+// merged into one list. Backs the /dashboard/models tab and any surface
+// that wants the gateway's externally-visible catalog.
+const models = ref<ControlPlaneModel[] | null>(null);
+const loading = ref(false);
+const error = ref<string | null>(null);
+
 export const useModelsStore = () => {
   const api = useApi();
 
   const load = async () => {
     loading.value = true;
     error.value = null;
-    const { data, error: err } = await callApi<ModelsResponse>(() => api.api.models.$get());
+    const { data, error: err } = await callApi<ModelsResponse>(() => api.api.models.$get({ query: {} }));
     loading.value = false;
     if (err) {
       error.value = err.message;
@@ -29,3 +32,29 @@ export const useModelsStore = () => {
 
   return { models, loading, error, load };
 };
+
+// Raw catalog view: real models only, no alias merging. Backs the alias
+// settings surfaces (edit dialog target combobox, shadow detection,
+// kind-mismatch warning) — those need to see the underlying catalog
+// without the alias-overwrites-real-id collapse the wire-shape applies.
+const rawModels = ref<ControlPlaneModel[] | null>(null);
+const rawLoading = ref(false);
+const rawError = ref<string | null>(null);
+
+export const useRawModelsStore = () => {
+  const api = useApi();
+
+  const load = async () => {
+    rawLoading.value = true;
+    rawError.value = null;
+    const { data, error: err } = await callApi<ModelsResponse>(() => api.api.models.$get({ query: { aliases: 'false' } }));
+    rawLoading.value = false;
+    if (err) {
+      rawError.value = err.message;
+      return;
+    }
+    rawModels.value = data?.data ?? [];
+  };
+
+  return { models: rawModels, loading: rawLoading, error: rawError, load };
+};
diff --git a/apps/web/src/pages/dashboard/settings.vue b/apps/web/src/pages/dashboard/settings.vue
index 1bc16e0f9..3b5321c81 100644
--- a/apps/web/src/pages/dashboard/settings.vue
+++ b/apps/web/src/pages/dashboard/settings.vue
@@ -15,7 +15,7 @@ import ProxiesSettingsCard from '../../components/settings/ProxiesSettingsCard.v
 import SearchConfigSection from '../../components/settings/SearchConfigSection.vue';
 import UpstreamsSettingsCard from '../../components/settings/UpstreamsSettingsCard.vue';
 import { useModelAliases } from '../../composables/useModelAliases.ts';
-import { useModelsStore } from '../../composables/useModels.ts';
+import { useRawModelsStore } from '../../composables/useModels.ts';
 import { useProxiesStore } from '../../composables/useProxies.ts';
 import { useRuntimeInfo } from '../../composables/useRuntimeInfo.ts';
 import { useUpstreamsStore } from '../../composables/useUpstreams.ts';
@@ -32,7 +32,7 @@ export const useSettingsPageData = defineBasicLoader(async () => {
   const [searchRes] = await Promise.all([
     callApi<SearchConfig>(() => api.api['search-config'].$get()),
     useUpstreamsStore().load(),
-    useModelsStore().load(),
+    useRawModelsStore().load(),
     useProxiesStore().load(),
     useModelAliases().load(),
     useRuntimeInfo().load(),
@@ -50,7 +50,7 @@ definePage({ meta: { requiresAdmin: true } });
 
 const router = useRouter();
 const { upstreams, loading: storeLoading, load } = useUpstreamsStore();
-const modelsStore = useModelsStore();
+const modelsStore = useRawModelsStore();
 const proxiesStore = useProxiesStore();
 const aliasesStore = useModelAliases();
 const { load: loadProxies } = proxiesStore;
diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 0487d497c..0f1bb90c3 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -35,6 +35,7 @@ const toControlPlaneModel = (model: ResolvedModel): ControlPlaneModel => ({
 
 export const controlPlaneModels = async (c: Context) => {
   try {
+    const includeAliases = c.req.query('aliases') !== 'false';
     // Scope the dashboard catalog to the caller's effective upstreams, exactly
     // like the data-plane /models endpoint. On a session request there is no
     // API key, so this resolves to the user's per-user upstream cap: a user who
@@ -46,14 +47,16 @@ export const controlPlaneModels = async (c: Context) => {
         fetcherForUpstream,
         backgroundSchedulerFromContext(c),
       ),
-      getRepo().modelAliases.list(),
+      includeAliases ? getRepo().modelAliases.list() : Promise.resolve([]),
     ]);
-    const data = mergeAliasesIntoModels({
-      realModels: models,
-      aliases,
-      mapReal: toControlPlaneModel,
-      wrapAlias: entry => ({ ...entry, upstreams: [] }),
-    });
+    const data = includeAliases
+      ? mergeAliasesIntoModels({
+          realModels: models,
+          aliases,
+          mapReal: toControlPlaneModel,
+          wrapAlias: entry => ({ ...entry, upstreams: [] }),
+        })
+      : models.map(toControlPlaneModel);
     const response: ControlPlaneModelsResponse = {
       object: 'list',
       has_more: false,
diff --git a/packages/gateway/src/control-plane/routes.ts b/packages/gateway/src/control-plane/routes.ts
index c5aa25202..60fb9404c 100644
--- a/packages/gateway/src/control-plane/routes.ts
+++ b/packages/gateway/src/control-plane/routes.ts
@@ -9,7 +9,7 @@ import { createAlias, deleteAlias, listAliases, updateAlias } from './model-alia
 import { controlPlaneModels } from './models/routes.ts';
 import { performanceOverview, performanceTelemetry } from './performance/routes.ts';
 import { createProxy, deleteProxy, listAllBackoffs, listProxies, listProxyBackoffs, resetProxyBackoffs, testProxy, updateProxy } from './proxies/routes.ts';
-import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createAliasBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateAliasBody, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
+import { authLoginBody, changeOwnPasswordBody, claudeCodeAuthorizeUrlBody, claudeCodeImportBody, claudeCodeProbeQuotaBody, claudeCodeRefreshNowBody, claudeCodeReimportBody, claudeCodeSetupTokenImportBody, claudeCodeSetupTokenReimportBody, codexAuthorizeUrlBody, codexImportBody, codexRefreshNowBody, codexReimportBody, copilotAuthPollBody, createAliasBody, createKeyBody, createProxyBody, createUpstreamBody, createUserBody, exportQuery, fetchModelsBody, importBody, modelsQuery, performanceQuery, resetBackoffBody, searchConfigSchema, searchUsageQuery, testProxyBody, tokenUsageQuery, updateAliasBody, updateKeyBody, updateProxyBody, updateUpstreamBody, updateUserBody } from './schemas.ts';
 import { getSearchConfigRoute, putSearchConfigRoute, testSearchConfigRoute } from './search-config/routes.ts';
 import { searchUsage } from './search-usage/routes.ts';
 import { tokenUsage } from './token-usage/routes.ts';
@@ -49,7 +49,7 @@ export const controlPlaneRoutes = new Hono<{ Variables: AuthVars }>()
   .get('/api/search-usage', zValidator('query', searchUsageQuery), searchUsage)
   .get('/api/performance', zValidator('query', performanceQuery), performanceTelemetry)
   .get('/api/performance/overview', zValidator('query', performanceQuery), performanceOverview)
-  .get('/api/models', controlPlaneModels)
+  .get('/api/models', zValidator('query', modelsQuery), controlPlaneModels)
   // Minimal upstream picker exposed to non-admin users so they can scope a key
   // to specific upstreams. Returns id/name/provider/enabled only — no config,
   // no flag overrides, no model lists. Server-side validation (api-keys'
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index 99f73bdb4..a4f76397e 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -706,6 +706,16 @@ const usageBaseQuery = {
 };
 
 export const tokenUsageQuery = z.object(usageBaseQuery);
+
+// Dashboard `/api/models` accepts an explicit `aliases=false` to skip the
+// alias-merge pass. Default behavior (omitted or `aliases=true`) keeps the
+// merged catalog the dashboard's Models tab renders; settings surfaces
+// that need the raw real-model set (alias edit dialog, shadow detection)
+// pass `aliases=false`.
+export const modelsQuery = z.object({
+  aliases: z.enum(['true', 'false']).optional(),
+});
+
 export const searchUsageQuery = z.object({
   ...usageBaseQuery,
   provider: z.string().optional(),

From 59b0b8b956da0e5e52ff45d2789dae4164302bae Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 02:29:58 +0800
Subject: [PATCH 081/170] style(aliases/web): drop bg + rounded from dialog
 footer's /v1/models code chip

AliasRow's inline `/v1/models` reads as plain mono text; the dialog
footer chipped the same string with bg + rounded + extra padding,
which made the same identifier look like two different concepts
across the two surfaces. Align the dialog footer to AliasRow.
---
 apps/web/src/components/alias-edit/AliasEditDialog.vue | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index ebcd098cb..614a3ea73 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -233,7 +233,7 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
       <div class="flex flex-wrap items-center justify-between gap-3 border-t border-white/[0.06] pt-5">
         <label class="flex items-center gap-2">
           <Switch v-model="visibleInModelsList" />
-          <span class="text-sm text-gray-300">Visible in <code class="rounded bg-white/[0.04] px-1 font-mono text-xs">/v1/models</code></span>
+          <span class="text-sm text-gray-300">Visible in <code class="font-mono">/v1/models</code></span>
         </label>
         <div class="flex items-center gap-2">
           <Button variant="secondary" :disabled="saving" @click="open = false">Cancel</Button>

From 6a5fbaeb2d65df3d6ef51b98d229f5b45cdac4d4 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 02:39:24 +0800
Subject: [PATCH 082/170] style(aliases/web): Models heading reads sentence
 case + hint copy

"MODELS" uppercase tracking read like a system label; replace with a
sentence-case heading and a one-line hint ("Click a model id to edit.")
under it so an operator who hasn't expanded a row sees that the
borderless combobox in the row header is the editable surface.
---
 apps/web/src/components/alias-edit/AliasEditDialog.vue | 7 +++++--
 1 file changed, 5 insertions(+), 2 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 614a3ea73..1fd73bb88 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -198,8 +198,11 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
       </div>
 
       <div>
-        <div class="mb-2 flex items-center justify-between">
-          <h4 class="text-xs font-semibold uppercase tracking-wide text-gray-500">Models</h4>
+        <div class="mb-2 flex items-start justify-between gap-3">
+          <div class="min-w-0">
+            <h4 class="text-sm font-semibold text-gray-300">Models</h4>
+            <p class="mt-0.5 text-xs text-gray-500">Click a model id to edit.</p>
+          </div>
           <Button variant="secondary" size="sm" @click="addTarget">Add target</Button>
         </div>
         <div class="space-y-2">

From 4dee8a0b87091100a5a89ade2b2d1c46cd072c99 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 03:05:33 +0800
Subject: [PATCH 083/170] feat(aliases): add announced_metadata_json column to
 model_aliases

Adds the operator-override slot for an alias's announced /v1/models
payload. NULL keeps the automatic rule-aware intersection across the
alias's targets; a non-null JSON blob carries a sparse AnnouncedMetadata
where any sub-field the operator omits falls back to the automatic
computation.
---
 .../gateway/migrations/0047_alias_announced_metadata.sql    | 6 ++++++
 1 file changed, 6 insertions(+)
 create mode 100644 packages/gateway/migrations/0047_alias_announced_metadata.sql

diff --git a/packages/gateway/migrations/0047_alias_announced_metadata.sql b/packages/gateway/migrations/0047_alias_announced_metadata.sql
new file mode 100644
index 000000000..9f29f6290
--- /dev/null
+++ b/packages/gateway/migrations/0047_alias_announced_metadata.sql
@@ -0,0 +1,6 @@
+-- Operator-set override for an alias's announced metadata payload — the
+-- `limits` + `chat.*` block surfaced on /v1/models. NULL keeps the
+-- automatic, rule-aware intersection across the alias's targets; a
+-- non-null value is a JSON-encoded AnnouncedMetadata, sparse so any
+-- omitted sub-field falls back to the automatic computation.
+ALTER TABLE model_aliases ADD COLUMN announced_metadata_json TEXT;

From 2dbc990f43e9d3864e1af10f175a83fde9342b5a Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 03:14:02 +0800
Subject: [PATCH 084/170] feat(aliases): wire announced_metadata through repo +
 zod + DTOs

Backend plumbing for the operator's announced-metadata override:

- @floway-dev/protocols: AnnouncedMetadata wire type
  ({ limits?: PublicModelLimits, chat?: ChatModelInfo }); ModelAlias
  gains announced_metadata: AnnouncedMetadata | null. PublicModelLimits
  factored out of PublicModel's inline shape so the override type can
  reuse the same declaration.
- ModelAliasRecord gains announcedMetadata: AnnouncedMetadata | null;
  null means "compute automatically at listing time".
- SQL + memory repos: read/write announced_metadata_json with
  JSON.parse / JSON.stringify; the SQL row carries the new column in
  every INSERT/UPDATE/SELECT, the memory store deep-clones the payload
  alongside targets.
- recordToWire / wireToRecord round-trip the field.
- createAliasBody / updateAliasBody accept announced_metadata via a
  reusable announcedMetadataSchema; a shared limitsSchema replaces the
  inline upstream-model limits shape so both surfaces validate the
  same vocabulary.
- Tests across repo / serialize / routes / alias-listing / resolve /
  serve get announcedMetadata in their fixtures; serialize_test and
  repo_test gain explicit round-trip cases for a populated override.
---
 .../components/alias-edit/AliasEditDialog.vue |  9 ++++---
 .../alias-edit/AliasEditDialog_test.ts        |  1 +
 .../src/components/settings/AliasRow_test.ts  |  1 +
 .../settings/AliasesSettingsCard_test.ts      |  1 +
 .../control-plane/model-aliases/repo_test.ts  | 17 ++++++++++++
 .../model-aliases/routes_test.ts              |  3 +++
 .../control-plane/model-aliases/serialize.ts  |  5 +++-
 .../model-aliases/serialize_test.ts           | 22 ++++++++++++++++
 packages/gateway/src/control-plane/schemas.ts | 26 +++++++++++++++----
 .../data-plane/model-aliases/resolve_test.ts  |  1 +
 .../data-plane/models/alias-listing_test.ts   |  1 +
 .../src/data-plane/models/serve_test.ts       |  3 +++
 packages/gateway/src/repo/memory.ts           |  1 +
 packages/gateway/src/repo/sql.ts              | 26 ++++++++++++++++---
 packages/gateway/src/repo/types.ts            |  7 ++++-
 packages/protocols/src/common/aliases.ts      | 18 ++++++++++++-
 packages/protocols/src/common/models.ts       | 15 +++++++----
 17 files changed, 137 insertions(+), 20 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 1fd73bb88..f9ec19e66 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -110,9 +110,11 @@ const save = async () => {
 
   const trimmedName = aliasName.value.trim();
   const trimmedDisplay = displayName.value.trim();
-  // The Hono RPC body type infers each target's `rules` as the loose
-  // `Record<string, unknown>` from the Zod schema, so build the payload
-  // with that loose shape and cast each target's rules to match.
+  // The Hono RPC body type widens the per-target rules to the loose
+  // `Record<string, unknown>` it gets from the Zod schema, and likewise
+  // widens `announced_metadata` to its zod-inferred shape (mutable
+  // modality arrays). Cast through the loose shapes so the typed body
+  // matches what the schema accepts.
   const body = {
     name: trimmedName,
     kind: kind.value,
@@ -123,6 +125,7 @@ const save = async () => {
       target_model_id: t.target_model_id.trim(),
       rules: t.rules as Record<string, unknown>,
     })),
+    announced_metadata: (props.record?.announced_metadata ?? null) as Record<string, unknown> | null,
     sort_order: props.record?.sort_order ?? 0,
   };
 
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index 57cb613b6..244268abe 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -51,6 +51,7 @@ const baseAlias = (over: Partial<ModelAlias> & { name: string }): ModelAlias =>
   display_name: null,
   visible_in_models_list: true,
   targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
+  announced_metadata: null,
   sort_order: 0,
   created_at: '2026-01-01T00:00:00Z',
   updated_at: '2026-01-01T00:00:00Z',
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
index 30aba35c2..c527dc5c2 100644
--- a/apps/web/src/components/settings/AliasRow_test.ts
+++ b/apps/web/src/components/settings/AliasRow_test.ts
@@ -10,6 +10,7 @@ const alias = (over: Partial<ModelAlias> & { name: string }): ModelAlias => ({
   display_name: null,
   visible_in_models_list: true,
   targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
+  announced_metadata: null,
   sort_order: 0,
   created_at: '2026-01-01T00:00:00Z',
   updated_at: '2026-01-01T00:00:00Z',
diff --git a/apps/web/src/components/settings/AliasesSettingsCard_test.ts b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
index 687573215..84eec2f84 100644
--- a/apps/web/src/components/settings/AliasesSettingsCard_test.ts
+++ b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
@@ -39,6 +39,7 @@ const baseAlias = (over: Partial<ModelAlias> & { name: string }): ModelAlias =>
   display_name: null,
   visible_in_models_list: true,
   targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
+  announced_metadata: null,
   sort_order: 0,
   created_at: '2026-01-01T00:00:00Z',
   updated_at: '2026-01-01T00:00:00Z',
diff --git a/packages/gateway/src/control-plane/model-aliases/repo_test.ts b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
index 8a56e5f96..5bd380b95 100644
--- a/packages/gateway/src/control-plane/model-aliases/repo_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/repo_test.ts
@@ -24,6 +24,7 @@ const aliasFixture = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasReco
   targets: [
     { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
   ],
+  announcedMetadata: null,
   sortOrder: 0,
   createdAt: '2026-06-26T00:00:00.000Z',
   updatedAt: '2026-06-26T00:00:00.000Z',
@@ -143,6 +144,22 @@ for (const [backend, makeRepo] of REPO_BACKENDS) {
     assertEquals(row?.visibleInModelsList, false);
   });
 
+  test(`[${backend}] announcedMetadata round-trips through JSON column`, async () => {
+    const repo = await freshRepo();
+    await repo.modelAliases.insert(aliasFixture({
+      name: 'overridden',
+      announcedMetadata: {
+        limits: { max_output_tokens: 8192 },
+        chat: { modalities: { input: ['text'], output: ['text'] } },
+      },
+    }));
+    const row = await repo.modelAliases.getByName('overridden');
+    assertEquals(row?.announcedMetadata, {
+      limits: { max_output_tokens: 8192 },
+      chat: { modalities: { input: ['text'], output: ['text'] } },
+    });
+  });
+
   test(`[${backend}] deleteAll wipes every row`, async () => {
     const repo = await freshRepo();
     await repo.modelAliases.insert(aliasFixture({ name: 'a' }));
diff --git a/packages/gateway/src/control-plane/model-aliases/routes_test.ts b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
index 328cb88ed..9122344eb 100644
--- a/packages/gateway/src/control-plane/model-aliases/routes_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/routes_test.ts
@@ -28,6 +28,7 @@ const baseBody = (overrides: Record<string, unknown> = {}) => ({
   targets: [
     { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
   ],
+  announced_metadata: null,
   ...overrides,
 });
 
@@ -37,11 +38,13 @@ test('GET /api/aliases lists every row in sort order', async () => {
   await repo.modelAliases.insert({
     name: 'b', kind: 'chat', selection: 'random', displayName: null, visibleInModelsList: true,
     targets: [{ target_model_id: 'm1', rules: {} }],
+    announcedMetadata: null,
     sortOrder: 1, createdAt: '2026-01-01T00:00:00.000Z', updatedAt: '2026-01-01T00:00:00.000Z',
   });
   await repo.modelAliases.insert({
     name: 'a', kind: 'chat', selection: 'random', displayName: null, visibleInModelsList: true,
     targets: [{ target_model_id: 'm2', rules: {} }],
+    announcedMetadata: null,
     sortOrder: 0, createdAt: '2026-01-02T00:00:00.000Z', updatedAt: '2026-01-02T00:00:00.000Z',
   });
 
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize.ts b/packages/gateway/src/control-plane/model-aliases/serialize.ts
index df22548b4..13d28e9df 100644
--- a/packages/gateway/src/control-plane/model-aliases/serialize.ts
+++ b/packages/gateway/src/control-plane/model-aliases/serialize.ts
@@ -2,7 +2,7 @@
 // shape (`ModelAlias`) lives in `@floway-dev/protocols/common`.
 
 import type { ModelAliasRecord } from '../../repo/types.ts';
-import type { AliasKind, AliasSelection, AliasTarget, ModelAlias } from '@floway-dev/protocols/common';
+import type { AliasKind, AliasSelection, AliasTarget, AnnouncedMetadata, ModelAlias } from '@floway-dev/protocols/common';
 
 export const recordToWire = (record: ModelAliasRecord): ModelAlias => ({
   name: record.name,
@@ -11,6 +11,7 @@ export const recordToWire = (record: ModelAliasRecord): ModelAlias => ({
   display_name: record.displayName,
   visible_in_models_list: record.visibleInModelsList,
   targets: record.targets,
+  announced_metadata: record.announcedMetadata,
   sort_order: record.sortOrder,
   created_at: record.createdAt,
   updated_at: record.updatedAt,
@@ -25,6 +26,7 @@ export interface ModelAliasWireInput {
   display_name: string | null;
   visible_in_models_list: boolean;
   targets: AliasTarget[];
+  announced_metadata: AnnouncedMetadata | null;
   sort_order?: number;
 }
 
@@ -38,6 +40,7 @@ export const wireToRecord = (
   displayName: wire.display_name,
   visibleInModelsList: wire.visible_in_models_list,
   targets: wire.targets,
+  announcedMetadata: wire.announced_metadata,
   sortOrder: meta.sortOrder,
   createdAt: meta.createdAt,
   updatedAt: meta.updatedAt,
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize_test.ts b/packages/gateway/src/control-plane/model-aliases/serialize_test.ts
index 72be080ca..561af2fcd 100644
--- a/packages/gateway/src/control-plane/model-aliases/serialize_test.ts
+++ b/packages/gateway/src/control-plane/model-aliases/serialize_test.ts
@@ -14,6 +14,7 @@ const record: ModelAliasRecord = {
     { target_model_id: 'codex-auto-review', rules: {} },
     { target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } },
   ],
+  announcedMetadata: null,
   sortOrder: 3,
   createdAt: '2026-06-26T00:00:00.000Z',
   updatedAt: '2026-06-26T12:00:00.000Z',
@@ -60,3 +61,24 @@ test('wireToRecord preserves a null display_name', () => {
   );
   assertEquals(built.displayName, null);
 });
+
+test('announced_metadata round-trips a populated override', () => {
+  const withOverride: ModelAliasRecord = {
+    ...record,
+    announcedMetadata: {
+      limits: { max_output_tokens: 8192 },
+      chat: { modalities: { input: ['text'], output: ['text'] } },
+    },
+  };
+  const wire = recordToWire(withOverride);
+  assertEquals(wire.announced_metadata, {
+    limits: { max_output_tokens: 8192 },
+    chat: { modalities: { input: ['text'], output: ['text'] } },
+  });
+  const roundTripped = wireToRecord(wire, {
+    sortOrder: wire.sort_order,
+    createdAt: wire.created_at,
+    updatedAt: wire.updated_at,
+  });
+  assertEquals(roundTripped, withOverride);
+});
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index a4f76397e..29288c4af 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -122,6 +122,14 @@ const chatSchema = z.object({
   reasoning: reasoningSchema.optional(),
 });
 
+// Shared limits shape used by both the upstream-model schema and the
+// alias's announced-metadata override.
+const limitsSchema = z.object({
+  max_context_window_tokens: z.number().optional(),
+  max_prompt_tokens: z.number().optional(),
+  max_output_tokens: z.number().optional(),
+});
+
 // Mirrors the runtime UpstreamModelConfig in @floway-dev/provider.
 // Azure and custom upstreams share this per-model entry; the canonical
 // per-model endpoint validation lives in the runtime validator.
@@ -146,11 +154,7 @@ const upstreamModelSchema = z.object({
     enabled: z.boolean(),
     values: flagOverrideValuesSchema,
   }).optional(),
-  limits: z.object({
-    max_context_window_tokens: z.number().optional(),
-    max_prompt_tokens: z.number().optional(),
-    max_output_tokens: z.number().optional(),
-  }).optional(),
+  limits: limitsSchema.optional(),
   chat: chatSchema.optional(),
 }).refine(
   m => m.chat === undefined || m.kind === undefined || m.kind === 'chat',
@@ -623,6 +627,17 @@ const aliasTargetSchema = z.object({
   rules: z.record(z.string(), z.unknown()),
 });
 
+// Operator override for an alias's announced /v1/models payload. Sparse —
+// both sub-fields are independently optional, and the alias-listing pipeline
+// falls back to the rule-aware automatic computation for any sub-field the
+// operator did not provide. `chatSchema` and `limitsSchema` are the same
+// shapes the upstream-model surface validates, so the override carries the
+// catalog's full vocabulary.
+const announcedMetadataSchema = z.object({
+  limits: limitsSchema.optional(),
+  chat: chatSchema.optional(),
+});
+
 const aliasBaseShape = {
   name: z.string().min(1),
   kind: z.enum(['chat', 'embedding', 'image']),
@@ -630,6 +645,7 @@ const aliasBaseShape = {
   display_name: z.string().min(1).nullable(),
   visible_in_models_list: z.boolean(),
   targets: z.array(aliasTargetSchema).min(1),
+  announced_metadata: announcedMetadataSchema.nullable(),
   sort_order: z.number().int().optional(),
 };
 
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
index cc081660a..cb80fe550 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
@@ -52,6 +52,7 @@ const aliasRecord = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasRecor
   displayName: null,
   visibleInModelsList: true,
   targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
+  announcedMetadata: null,
   sortOrder: 0,
   createdAt: '2026-06-26T00:00:00.000Z',
   updatedAt: '2026-06-26T00:00:00.000Z',
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
index 99ef91e73..9a158b020 100644
--- a/packages/gateway/src/data-plane/models/alias-listing_test.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -11,6 +11,7 @@ const aliasFixture = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasReco
   displayName: null,
   visibleInModelsList: true,
   targets: [{ target_model_id: 'gpt-5.4', rules: {} }],
+  announcedMetadata: null,
   sortOrder: 0,
   createdAt: '2026-06-26T00:00:00.000Z',
   updatedAt: '2026-06-26T00:00:00.000Z',
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 2be79e4c5..94ca48fd3 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -611,6 +611,7 @@ test('/v1/models appends visible aliases with their aliasedFrom block and folds
     displayName: null,
     visibleInModelsList: true,
     targets: [{ target_model_id: 'gpt-4o', rules: { reasoning: { effort: 'low' } } }],
+    announcedMetadata: null,
     sortOrder: 1,
     createdAt: '2026-06-26T00:00:00.000Z',
     updatedAt: '2026-06-26T00:00:00.000Z',
@@ -622,6 +623,7 @@ test('/v1/models appends visible aliases with their aliasedFrom block and folds
     displayName: 'Operator Fast',
     visibleInModelsList: true,
     targets: [{ target_model_id: 'gpt-4o-mini', rules: {} }],
+    announcedMetadata: null,
     sortOrder: 0,
     createdAt: '2026-06-26T00:00:00.000Z',
     updatedAt: '2026-06-26T00:00:00.000Z',
@@ -633,6 +635,7 @@ test('/v1/models appends visible aliases with their aliasedFrom block and folds
     displayName: null,
     visibleInModelsList: false,
     targets: [{ target_model_id: 'gpt-4o', rules: {} }],
+    announcedMetadata: null,
     sortOrder: 2,
     createdAt: '2026-06-26T00:00:00.000Z',
     updatedAt: '2026-06-26T00:00:00.000Z',
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index edbecb390..65738df39 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -889,6 +889,7 @@ const cloneModelAliasRecord = (record: ModelAliasRecord): ModelAliasRecord => ({
   // Deep-clone so a caller's mutation of the returned record never leaks
   // back into the store.
   targets: structuredClone(record.targets),
+  announcedMetadata: record.announcedMetadata === null ? null : structuredClone(record.announcedMetadata),
 });
 
 class MemoryModelAliasesRepo implements ModelAliasesRepo {
diff --git a/packages/gateway/src/repo/sql.ts b/packages/gateway/src/repo/sql.ts
index b9cfd93fc..623a63cd1 100644
--- a/packages/gateway/src/repo/sql.ts
+++ b/packages/gateway/src/repo/sql.ts
@@ -40,7 +40,7 @@ import { latencyBucketForMs } from '../shared/performance-histogram.ts';
 import { generateSessionToken } from '../shared/session-tokens.ts';
 import { assertWebSearchProviderName } from '../shared/web-search-providers.ts';
 import type { SqlDatabase, SqlPreparedStatement, SqlResult } from '@floway-dev/platform';
-import { BILLING_DIMENSIONS, type AliasKind, type AliasSelection, type AliasTarget, type BillingDimension, type ModelPricing, resolveEffectivePricing, unitPriceForDimension } from '@floway-dev/protocols/common';
+import { BILLING_DIMENSIONS, type AliasKind, type AliasSelection, type AliasTarget, type AnnouncedMetadata, type BillingDimension, type ModelPricing, resolveEffectivePricing, unitPriceForDimension } from '@floway-dev/protocols/common';
 import type { ProxyFallbackEntry, ModelPrefixConfig, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
 import { normalizeModelPrefix } from '@floway-dev/provider';
 
@@ -1594,12 +1594,13 @@ interface ModelAliasRow {
   display_name: string | null;
   visible_in_models_list: number;
   targets: string;
+  announced_metadata_json: string | null;
   sort_order: number;
   created_at: string;
   updated_at: string;
 }
 
-const MODEL_ALIAS_COLUMNS = 'name, kind, selection, display_name, visible_in_models_list, targets, sort_order, created_at, updated_at';
+const MODEL_ALIAS_COLUMNS = 'name, kind, selection, display_name, visible_in_models_list, targets, announced_metadata_json, sort_order, created_at, updated_at';
 
 const parseAliasTargets = (raw: string, name: string): AliasTarget[] => {
   let parsed: unknown;
@@ -1612,6 +1613,15 @@ const parseAliasTargets = (raw: string, name: string): AliasTarget[] => {
   return parsed as AliasTarget[];
 };
 
+const parseAnnouncedMetadata = (raw: string | null, name: string): AnnouncedMetadata | null => {
+  if (raw === null) return null;
+  try {
+    return JSON.parse(raw) as AnnouncedMetadata;
+  } catch (cause) {
+    throw new Error(`model_aliases.announced_metadata_json is malformed for ${name}`, { cause });
+  }
+};
+
 const toModelAliasRecord = (row: ModelAliasRow): ModelAliasRecord => ({
   name: row.name,
   kind: row.kind as AliasKind,
@@ -1619,11 +1629,15 @@ const toModelAliasRecord = (row: ModelAliasRow): ModelAliasRecord => ({
   displayName: row.display_name,
   visibleInModelsList: row.visible_in_models_list !== 0,
   targets: parseAliasTargets(row.targets, row.name),
+  announcedMetadata: parseAnnouncedMetadata(row.announced_metadata_json, row.name),
   sortOrder: row.sort_order,
   createdAt: row.created_at,
   updatedAt: row.updated_at,
 });
 
+const announcedMetadataBind = (value: AnnouncedMetadata | null): string | null =>
+  value === null ? null : JSON.stringify(value);
+
 class SqlModelAliasesRepo implements ModelAliasesRepo {
   constructor(private db: SqlDatabase) {}
 
@@ -1645,7 +1659,7 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
   async insert(record: ModelAliasRecord): Promise<void> {
     await this.db
       .prepare(
-        `INSERT INTO model_aliases (${MODEL_ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`,
+        `INSERT INTO model_aliases (${MODEL_ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`,
       )
       .bind(
         record.name,
@@ -1654,6 +1668,7 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
         record.displayName,
         record.visibleInModelsList ? 1 : 0,
         JSON.stringify(record.targets),
+        announcedMetadataBind(record.announcedMetadata),
         record.sortOrder,
         record.createdAt,
         record.updatedAt,
@@ -1671,6 +1686,7 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
              display_name = ?,
              visible_in_models_list = ?,
              targets = ?,
+             announced_metadata_json = ?,
              sort_order = ?,
              created_at = ?,
              updated_at = ?
@@ -1682,6 +1698,7 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
           record.displayName,
           record.visibleInModelsList ? 1 : 0,
           JSON.stringify(record.targets),
+          announcedMetadataBind(record.announcedMetadata),
           record.sortOrder,
           record.createdAt,
           record.updatedAt,
@@ -1701,7 +1718,7 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
 
     await runStatements(this.db, [
       this.db
-        .prepare(`INSERT INTO model_aliases (${MODEL_ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)`)
+        .prepare(`INSERT INTO model_aliases (${MODEL_ALIAS_COLUMNS}) VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)`)
         .bind(
           record.name,
           record.kind,
@@ -1709,6 +1726,7 @@ class SqlModelAliasesRepo implements ModelAliasesRepo {
           record.displayName,
           record.visibleInModelsList ? 1 : 0,
           JSON.stringify(record.targets),
+          announcedMetadataBind(record.announcedMetadata),
           record.sortOrder,
           record.createdAt,
           record.updatedAt,
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 3ca32e5c0..1bd99dd62 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -1,6 +1,6 @@
 import type { HistogramBucket } from '../shared/performance-histogram.ts';
 import type { WebSearchProviderName } from '../shared/web-search-providers.ts';
-import type { AliasKind, AliasSelection, AliasTarget, BillingDimension, ModelPricing } from '@floway-dev/protocols/common';
+import type { AliasKind, AliasSelection, AliasTarget, AnnouncedMetadata, BillingDimension, ModelPricing } from '@floway-dev/protocols/common';
 import type { UpstreamModel, UpstreamRecord } from '@floway-dev/provider';
 
 export interface ApiKey {
@@ -277,6 +277,11 @@ export interface ModelAliasRecord {
   // Order is meaningful for selection=first-available; preserved (but
   // ignored) for selection=random.
   targets: AliasTarget[];
+  // null = compute the announced /v1/models payload automatically from
+  // targets + rules at listing time. A non-null payload is sparse — any
+  // sub-field the operator did not override falls back to the automatic
+  // computation.
+  announcedMetadata: AnnouncedMetadata | null;
   sortOrder: number;
   createdAt: string;
   updatedAt: string;
diff --git a/packages/protocols/src/common/aliases.ts b/packages/protocols/src/common/aliases.ts
index 60e43d5fc..186880ca4 100644
--- a/packages/protocols/src/common/aliases.ts
+++ b/packages/protocols/src/common/aliases.ts
@@ -9,6 +9,8 @@
 // Resolution runs above prefix routing and never re-enters itself, which
 // makes recursive aliasing impossible by construction.
 
+import type { ChatModelInfo, PublicModelLimits } from './models.ts';
+
 // Endpoint family the alias serves. An alias belongs to exactly one kind;
 // rules are only allowed when the kind admits them (today that is `chat`).
 export type AliasKind = 'chat' | 'embedding' | 'image';
@@ -71,8 +73,21 @@ export interface AliasTarget {
   rules: AliasRules;
 }
 
+// Operator-set override for the alias's announced /v1/models payload —
+// the `limits` + `chat.*` block the listing surfaces to clients. Sparse:
+// any sub-field the operator leaves unset falls back to the rule-aware
+// intersection across the alias's available targets. `kind` and the
+// supported endpoint set are not part of this payload; they follow from
+// the alias row (`kind`) and the target intersection (endpoints).
+export interface AnnouncedMetadata {
+  limits?: PublicModelLimits;
+  chat?: ChatModelInfo;
+}
+
 // Wire DTO returned by `/api/aliases`. snake_case to match the rest of the
-// control plane; `display_name === null` means "derive at render time".
+// control plane; `display_name === null` means "derive at render time";
+// `announced_metadata === null` means "compute the announced payload from
+// targets + rules at listing time".
 export interface ModelAlias {
   name: string;
   kind: AliasKind;
@@ -80,6 +95,7 @@ export interface ModelAlias {
   display_name: string | null;
   visible_in_models_list: boolean;
   targets: AliasTarget[];
+  announced_metadata: AnnouncedMetadata | null;
   sort_order: number;
   created_at: string;
   updated_at: string;
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index d12ab4d8b..22cce94ab 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -128,6 +128,15 @@ export interface PublicModelAliasedFrom {
   targets: AliasTarget[];
 }
 
+// Operator-set context-window / prompt / output token limits the gateway
+// surfaces on /v1/models. Pure data — every field is optional so a
+// partially-known upstream still produces a sensible row.
+export interface PublicModelLimits {
+  max_output_tokens?: number;
+  max_context_window_tokens?: number;
+  max_prompt_tokens?: number;
+}
+
 // Public DTO served at /v1/models and /models. Single superset shape — OpenAI's
 // and Anthropic's /models field names do not overlap, so one payload satisfies
 // both client shapes.
@@ -142,11 +151,7 @@ export interface PublicModel {
   display_name: string;
   created_at?: string;
   // Non-standard extra fields below.
-  limits: {
-    max_output_tokens?: number;
-    max_context_window_tokens?: number;
-    max_prompt_tokens?: number;
-  };
+  limits: PublicModelLimits;
   kind: ModelKind;
   cost?: ModelPricing;
   chat?: ChatModelInfo;

From 7cd1fe5257d5f2c3d73fd2e7edebed361b5da9d3 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 03:17:06 +0800
Subject: [PATCH 085/170] feat(aliases): announced metadata = override +
 rule-aware intersection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

synthesizeOne now builds the listing's `limits` + `chat` from the
alias's announced-metadata payload. Two sources, in order:

1. The operator's stored override (sparse — any sub-field they did
   not provide falls back to the automatic computation for that
   sub-field).
2. Otherwise, the rule-aware intersection across the alias's
   currently-available targets.

The rule-driven downgrade: when an alias's rule pins a value at a
target, that target's matching chat sub-field counts as unsupported
for the intersection, so the alias-wide sub-field drops out. The
pinned rule already fixes the value the listing would have advertised,
so dropping the sub-field keeps the wire surface honest about what the
operator left for the caller to choose. Map:

  rules.reasoning.effort         → drops chat.reasoning.effort
  rules.reasoning.budget_tokens  → drops chat.reasoning.budget_tokens
  rules.reasoning.adaptive=true  → drops chat.reasoning.adaptive

`limits` is the new intersection axis: per-field minimum across every
available target, absent when any target leaves the field undeclared.

Tests cover: single-target pinned-rule downgrade (effort + budget),
multi-target effort intersection both with and without a pinning rule,
adaptive downgrade triggered by one target's rule, the limits
intersection, and the operator override (sparse limits-only override
keeps the computed chat; full chat override replaces the computed
chat verbatim).
---
 .../src/data-plane/models/alias-listing.ts    | 136 ++++++++++++------
 .../data-plane/models/alias-listing_test.ts   | 116 ++++++++++++++-
 2 files changed, 204 insertions(+), 48 deletions(-)

diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index 73cc192cd..bb7f0871a 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -3,11 +3,20 @@
 // carries an `aliasedFrom` block so an alias-aware UI can render the
 // alias-of relationship without a second round trip.
 //
-// Capability metadata is the safe lower bound for the inbound request:
-// single-target reports the sole target's metadata narrowed by the
-// alias's rules; multi-target reports the intersection across every
-// currently-available target so whichever target gets picked at request
-// time supports whatever the catalog reported.
+// `limits` and `chat` come from the alias's announced metadata payload:
+// the operator's stored override when set (sparse — any sub-field they
+// did not provide falls back to the automatic computation), otherwise
+// the rule-aware intersection across the alias's available targets. The
+// intersection is the safe lower bound for the inbound request — every
+// reported capability survives no matter which target the resolver
+// picks at request time.
+//
+// The rule-aware part: when an alias's rule pins a value at a target,
+// that target is treated as "unsupported" for the corresponding
+// sub-field for the purposes of the intersection. A pinned rule
+// already fixes whatever value the listing would have advertised, so
+// dropping the sub-field from the announced metadata keeps the wire
+// surface honest about what the operator left for the caller to set.
 //
 // Collision: when an alias's `name` exactly equals a real model id, the
 // alias entry replaces the real entry in the final catalog. Two entries
@@ -18,7 +27,7 @@
 
 import type { ModelAliasRecord } from '../../repo/types.ts';
 import { composeAliasDisplayName } from '@floway-dev/protocols/common';
-import type { AliasTarget, ChatAliasRules, ChatModelInfo, PublicModel, PublicModelAliasedFrom } from '@floway-dev/protocols/common';
+import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
 import type { InternalModel, ResolvedModel } from '@floway-dev/provider';
 
 export interface ListedAliasInputs {
@@ -41,6 +50,25 @@ const intersectArrays = <T>(arrays: readonly (readonly T[])[]): T[] => {
   return head.filter(value => tail.every(other => other.includes(value)));
 };
 
+// Apply the rule-driven downgrade: a target with a pinned rule reports
+// the corresponding catalog sub-field as unsupported (= undefined) for
+// the purposes of intersection. Fields the rule doesn't touch pass
+// through unchanged.
+const effectiveChatForIntersection = (chat: ChatModelInfo | undefined, target: AliasTarget): ChatModelInfo | undefined => {
+  if (chat === undefined) return undefined;
+  const rules = chatRules(target);
+  const ruleReasoning = rules.reasoning;
+  if (ruleReasoning === undefined) return chat;
+  if (chat.reasoning === undefined) return chat;
+
+  const reasoning: NonNullable<ChatModelInfo['reasoning']> = { ...chat.reasoning };
+  if (ruleReasoning.effort !== undefined) delete reasoning.effort;
+  if (ruleReasoning.budget_tokens !== undefined) delete reasoning.budget_tokens;
+  if (ruleReasoning.adaptive === true) delete reasoning.adaptive;
+
+  return { ...chat, reasoning };
+};
+
 const intersectChat = (chats: readonly ChatModelInfo[]): ChatModelInfo | undefined => {
   const result: ChatModelInfo = {};
 
@@ -103,26 +131,20 @@ const intersectChat = (chats: readonly ChatModelInfo[]): ChatModelInfo | undefin
   return Object.keys(result).length > 0 ? result : undefined;
 };
 
-// Narrow the single target's chat metadata against the alias's rule
-// overlay. Fields the rule doesn't touch pass through unchanged.
-const narrowChatByRules = (chat: ChatModelInfo | undefined, target: AliasTarget): ChatModelInfo | undefined => {
-  if (chat === undefined) return undefined;
-  const rules = chatRules(target);
-  if (rules.reasoning === undefined) return chat;
-  const out: ChatModelInfo = { ...chat };
-  if (chat.reasoning !== undefined) {
-    const reasoning: NonNullable<ChatModelInfo['reasoning']> = { ...chat.reasoning };
-    if (rules.reasoning.effort !== undefined) {
-      const fixed = rules.reasoning.effort;
-      reasoning.effort = { supported: [fixed], default: fixed };
-    }
-    if (rules.reasoning.budget_tokens !== undefined) {
-      const fixed = rules.reasoning.budget_tokens;
-      reasoning.budget_tokens = { min: fixed, max: fixed };
-    }
-    out.reasoning = reasoning;
+// `limits` intersection: min across targets per field; the field is
+// absent when any target leaves it undeclared. Matches the safe-lower-
+// bound contract — whichever target the resolver picks, the reported
+// window is one every target can actually serve.
+const LIMIT_KEYS = ['max_context_window_tokens', 'max_prompt_tokens', 'max_output_tokens'] as const;
+
+const intersectLimits = (limitsList: readonly PublicModelLimits[]): PublicModelLimits => {
+  if (limitsList.length === 0) return {};
+  const result: PublicModelLimits = {};
+  for (const key of LIMIT_KEYS) {
+    const values = limitsList.map(l => l[key]).filter((v): v is number => v !== undefined);
+    if (values.length === limitsList.length) result[key] = Math.min(...values);
   }
-  return out;
+  return result;
 };
 
 const buildAliasedFrom = (alias: ModelAliasRecord): PublicModelAliasedFrom => ({
@@ -134,6 +156,41 @@ const buildAliasedFrom = (alias: ModelAliasRecord): PublicModelAliasedFrom => ({
   targets: alias.targets,
 });
 
+// Compute the rule-aware intersection (`limits` + `chat`) over the
+// alias's currently-available targets. Caller decides whether to use
+// the result directly or overlay it under an operator override.
+const computeAutomaticMetadata = (
+  alias: ModelAliasRecord,
+  availableTargets: readonly { target: AliasTarget; real: InternalModel }[],
+): { limits: PublicModelLimits; chat: ChatModelInfo | undefined } => {
+  if (availableTargets.length === 0) return { limits: {}, chat: undefined };
+
+  const limits = intersectLimits(availableTargets.map(({ real }) => real.limits));
+
+  const effectiveChats = availableTargets
+    .map(({ target, real }) => effectiveChatForIntersection(real.chat, target))
+    .filter((c): c is ChatModelInfo => c !== undefined);
+  // Intersect chat metadata only when every available target carries it
+  // (post-downgrade); a half-declared block would leak the metadata of
+  // whichever subset happened to carry it.
+  const chat = effectiveChats.length === availableTargets.length
+    ? intersectChat(effectiveChats)
+    : undefined;
+
+  return { limits, chat };
+};
+
+// Merge the operator's override on top of the computed payload, per the
+// sparse-override contract: any sub-field the operator omitted falls
+// back to the computed value for that sub-field.
+const mergeWithOverride = (
+  computed: { limits: PublicModelLimits; chat: ChatModelInfo | undefined },
+  override: AnnouncedMetadata,
+): { limits: PublicModelLimits; chat: ChatModelInfo | undefined } => ({
+  limits: override.limits ?? computed.limits,
+  chat: override.chat ?? computed.chat,
+});
+
 const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly InternalModel[]): PublicModel => {
   const realById = new Map(realModels.map(m => [m.id, m] as const));
   const availableTargets = alias.targets
@@ -147,38 +204,29 @@ const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly InternalMod
     ? composeAliasDisplayName(alias.targets[0].target_model_id, alias.targets[0].rules)
     : alias.name);
 
+  const computed = computeAutomaticMetadata(alias, availableTargets);
+  const { limits, chat } = alias.announcedMetadata !== null
+    ? mergeWithOverride(computed, alias.announcedMetadata)
+    : computed;
+
   const entry: PublicModel = {
     id: alias.name,
     object: 'model',
     type: 'model',
     display_name: displayName,
-    limits: {},
+    limits,
     kind: alias.kind,
     aliasedFrom: buildAliasedFrom(alias),
   };
+  if (chat !== undefined) entry.chat = chat;
 
-  // No backing target — still emit the row so the dashboard can show the
-  // alias with a no-target warning.
-  if (availableTargets.length === 0) return entry;
-
+  // Single-target chat pricing rides along when available — the resolver
+  // will hit that target, so the catalog can publish its rate verbatim.
   if (availableTargets.length === 1) {
-    const [{ target, real }] = availableTargets;
-    if (real.chat !== undefined) {
-      const chat = narrowChatByRules(real.chat, target);
-      if (chat !== undefined) entry.chat = chat;
-    }
+    const [{ real }] = availableTargets;
     if (real.cost !== undefined) entry.cost = real.cost;
-    return entry;
   }
 
-  const chats = availableTargets.map(({ real }) => real.chat).filter((c): c is ChatModelInfo => c !== undefined);
-  // Intersect chat metadata only when every available target declares it;
-  // a half-declared block would leak the metadata of whichever subset
-  // happened to carry it.
-  if (chats.length === availableTargets.length) {
-    const chat = intersectChat(chats);
-    if (chat !== undefined) entry.chat = chat;
-  }
   return entry;
 };
 
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
index 9a158b020..c8ef0b9d6 100644
--- a/packages/gateway/src/data-plane/models/alias-listing_test.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -25,7 +25,7 @@ const realModel = (overrides: Partial<InternalModel> & { id: string }): Internal
 });
 
 describe('synthesizeListedAliases', () => {
-  test('single-target alias narrows reasoning.effort to the fixed value', () => {
+  test('single-target alias with a pinned reasoning.effort drops the effort block', () => {
     const aliases = [aliasFixture({
       name: 'gpt-fast',
       targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
@@ -42,7 +42,9 @@ describe('synthesizeListedAliases', () => {
     const [entry] = synthesizeListedAliases({ aliases, realModels });
     expect(entry.id).toBe('gpt-fast');
     expect(entry.display_name).toBe('gpt-5.4 (low effort)');
-    expect(entry.chat?.reasoning?.effort).toEqual({ supported: ['low'], default: 'low' });
+    // The rule pins effort, so the announced metadata drops it — the
+    // caller already knows the value because the alias fixes it.
+    expect(entry.chat?.reasoning).toBeUndefined();
     expect(entry.chat?.modalities).toEqual({ input: ['text', 'image'], output: ['text'] });
     expect(entry.aliasedFrom).toEqual({
       name: 'gpt-fast',
@@ -52,7 +54,7 @@ describe('synthesizeListedAliases', () => {
     });
   });
 
-  test('single-target alias narrows reasoning.budget_tokens to a single point', () => {
+  test('single-target alias with a pinned reasoning.budget_tokens drops the budget block', () => {
     const aliases = [aliasFixture({
       targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { budget_tokens: 4096 } } }],
     })];
@@ -61,7 +63,7 @@ describe('synthesizeListedAliases', () => {
       chat: { reasoning: { budget_tokens: { min: 1024, max: 65536 } } },
     })];
     const [entry] = synthesizeListedAliases({ aliases, realModels });
-    expect(entry.chat?.reasoning?.budget_tokens).toEqual({ min: 4096, max: 4096 });
+    expect(entry.chat?.reasoning).toBeUndefined();
   });
 
   test('multi-target alias intersects chat.modalities across every target', () => {
@@ -184,4 +186,110 @@ describe('synthesizeListedAliases', () => {
     const [entry] = synthesizeListedAliases({ aliases, realModels });
     expect(entry.display_name).toBe('My Fast GPT');
   });
+
+  test('multi-target alias whose first target pins reasoning.effort drops the alias-wide effort block', () => {
+    // The pinned target counts as unsupported for effort, so the
+    // intersection collapses — effort never makes it onto the listing.
+    const aliases = [aliasFixture({
+      name: 'mixed',
+      targets: [
+        { target_model_id: 'a', rules: { reasoning: { effort: 'low' } } },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'a', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } } } }),
+      realModel({ id: 'b', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } } } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.chat?.reasoning).toBeUndefined();
+  });
+
+  test('multi-target alias without rules intersects reasoning.effort across targets', () => {
+    const aliases = [aliasFixture({
+      name: 'unfixed',
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'a', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } } } }),
+      realModel({ id: 'b', chat: { reasoning: { effort: { supported: ['medium', 'high'], default: 'medium' } } } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.chat?.reasoning?.effort).toEqual({ supported: ['medium', 'high'], default: 'medium' });
+  });
+
+  test('rules.reasoning.adaptive=true at any target drops adaptive from the announced metadata', () => {
+    const aliases = [aliasFixture({
+      name: 'pinned-adaptive',
+      targets: [
+        { target_model_id: 'a', rules: { reasoning: { adaptive: true } } },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'a', chat: { reasoning: { adaptive: true } } }),
+      realModel({ id: 'b', chat: { reasoning: { adaptive: true } } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.chat?.reasoning).toBeUndefined();
+  });
+
+  test('limits intersection emits min across targets; absent when any target lacks the field', () => {
+    const aliases = [aliasFixture({
+      name: 'multi-limits',
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'a', limits: { max_context_window_tokens: 128000, max_output_tokens: 16000 } }),
+      realModel({ id: 'b', limits: { max_context_window_tokens: 200000 } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    // Both targets advertise max_context_window_tokens — emit the min.
+    expect(entry.limits.max_context_window_tokens).toBe(128000);
+    // Only `a` declares max_output_tokens, so it drops out.
+    expect(entry.limits.max_output_tokens).toBeUndefined();
+  });
+
+  test('operator override pins limits.max_output_tokens; chat falls back to computed intersection', () => {
+    const aliases = [aliasFixture({
+      name: 'overridden',
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'b', rules: {} },
+      ],
+      announcedMetadata: {
+        limits: { max_output_tokens: 8192 },
+      },
+    })];
+    const realModels = [
+      realModel({ id: 'a', limits: { max_context_window_tokens: 128000 }, chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
+      realModel({ id: 'b', limits: { max_context_window_tokens: 200000 }, chat: { modalities: { input: ['text'], output: ['text'] } } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    // The override carries the operator's pinned ceiling verbatim …
+    expect(entry.limits).toEqual({ max_output_tokens: 8192 });
+    // … while chat falls back to the rule-aware intersection.
+    expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
+  });
+
+  test('operator override fully replaces chat when set, regardless of computed', () => {
+    const aliases = [aliasFixture({
+      name: 'chat-override',
+      targets: [{ target_model_id: 'a', rules: {} }],
+      announcedMetadata: {
+        chat: { modalities: { input: ['text'], output: ['text'] } },
+      },
+    })];
+    const realModels = [
+      realModel({ id: 'a', chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.chat).toEqual({ modalities: { input: ['text'], output: ['text'] } });
+  });
 });

From 07a8081073fc737eb82970fc68a307d8d1f9edab Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 03:19:37 +0800
Subject: [PATCH 086/170] feat(aliases/web): AnnouncedMetadataEditor component
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

New component for editing an alias's announced-metadata override.
v-model is the wire AnnouncedMetadata shape; the kind prop gates
which sub-blocks render:

  chat       → Limits + Modalities + Reasoning
  embedding  → Limits only
  image      → never mounted (the dialog hides the whole section)

The three sub-blocks (Limits, Modalities, Reasoning effort with
drag-reorder + budget + adaptive + mandatory) mirror the matching
blocks in ModelEditor.vue. The duplication is documented in the
component's docstring — extracting the catalog-side editor's sub-
blocks cleanly would have entangled this PR with a wider ModelEditor
refactor, so we took the lighter-weight copy now and left a marker
for a later cleanup pass.

Also re-exports AnnouncedMetadata from apps/web/src/api/types.ts so
both the editor and the alias dialog read one wire-type source.
---
 apps/web/src/api/types.ts                     |   3 +-
 .../alias-edit/AnnouncedMetadataEditor.vue    | 431 ++++++++++++++++++
 2 files changed, 433 insertions(+), 1 deletion(-)
 create mode 100644 apps/web/src/components/alias-edit/AnnouncedMetadataEditor.vue

diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 3c3930ffb..a9ea23fe9 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -5,6 +5,7 @@ import type {
   AliasRules,
   AliasSelection,
   AliasTarget,
+  AnnouncedMetadata,
   BillingDimension,
   ChatAliasRules,
   ModelAlias,
@@ -17,7 +18,7 @@ import type { AddressableForm, ModelPrefixConfig } from '@floway-dev/provider/mo
 
 export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing };
 export type { AddressableForm, ModelPrefixConfig };
-export type { AliasKind, AliasRules, AliasSelection, AliasTarget, ChatAliasRules, ModelAlias };
+export type { AliasKind, AliasRules, AliasSelection, AliasTarget, AnnouncedMetadata, ChatAliasRules, ModelAlias };
 
 export type UpstreamProviderKind = 'custom' | 'azure' | 'copilot' | 'codex' | 'claude-code' | 'ollama';
 
diff --git a/apps/web/src/components/alias-edit/AnnouncedMetadataEditor.vue b/apps/web/src/components/alias-edit/AnnouncedMetadataEditor.vue
new file mode 100644
index 000000000..32a37b280
--- /dev/null
+++ b/apps/web/src/components/alias-edit/AnnouncedMetadataEditor.vue
@@ -0,0 +1,431 @@
+<script setup lang="ts">
+// Editor for an alias's announced-metadata override — the operator's
+// explicit `limits` + `chat` block that overrides the auto-computed
+// intersection inside `synthesizeListedAliases`. v-model is the wire
+// shape (`AnnouncedMetadata`), kind-gated:
+//
+//   - chat       → Limits + Modalities + Reasoning sub-blocks
+//   - embedding  → Limits sub-block only
+//   - image      → never mounted (the alias edit dialog hides the whole
+//                  section for image-kind aliases)
+//
+// The three sub-blocks below mirror the matching ones in
+// `apps/web/src/components/upstream-edit/ModelEditor.vue`. We accepted
+// the duplication rather than attempting a full extraction of the
+// catalog-side editor in the same PR — `ModelEditor.vue` ties those
+// blocks to a wider state machine (config / editable / Manual vs
+// Auto / per-model flag overrides) that doesn't carry over cleanly.
+// A later cleanup pass can lift the shared bits into a single host.
+
+import { computed, ref, watch } from 'vue';
+
+import type { AliasKind, AnnouncedMetadata, ModelLimits, UpstreamChatConfig } from '../../api/types.ts';
+import { Button, Input, Switch, Tooltip } from '@floway-dev/ui';
+
+const modelValue = defineModel<AnnouncedMetadata>({ required: true });
+
+const props = defineProps<{
+  kind: AliasKind;
+}>();
+
+// Mutable local view of the wire payload. ChatModelInfo's modality
+// arrays are typed `readonly`; the templates below build mutable
+// copies that the wire shape accepts back without further coercion.
+type EditableMetadata = { limits?: ModelLimits; chat?: UpstreamChatConfig };
+
+const editable = computed<EditableMetadata>(() => modelValue.value as EditableMetadata);
+
+const patch = (next: EditableMetadata) => {
+  // Strip empty sub-blocks so the wire payload stays minimal — the
+  // alias-listing fallback only kicks in for absent fields.
+  const out: EditableMetadata = {};
+  if (next.limits && Object.keys(next.limits).length > 0) out.limits = next.limits;
+  if (next.chat && (next.chat.modalities !== undefined || next.chat.reasoning !== undefined)) out.chat = next.chat;
+  modelValue.value = out as AnnouncedMetadata;
+};
+
+const parseOptionalNumber = (raw: string | number | null | undefined): number | undefined => {
+  if (raw === '' || raw === null || raw === undefined) return undefined;
+  const num = Number(raw);
+  return Number.isFinite(num) && num >= 0 ? num : undefined;
+};
+
+// ── Limits ─────────────────────────────────────────────────────────────
+
+const updateLimit = (
+  key: 'max_context_window_tokens' | 'max_prompt_tokens' | 'max_output_tokens',
+  raw: string | number | null | undefined,
+) => {
+  const limits = { ...(editable.value.limits ?? {}) };
+  const num = parseOptionalNumber(raw);
+  if (num === undefined) delete limits[key];
+  else limits[key] = num;
+  patch({ ...editable.value, limits: Object.keys(limits).length > 0 ? limits : undefined });
+};
+
+// ── Chat builder helpers ───────────────────────────────────────────────
+
+const buildNextChat = (partial: Partial<UpstreamChatConfig>): UpstreamChatConfig | undefined => {
+  const base = editable.value.chat ?? {};
+  const next: UpstreamChatConfig = { ...base, ...partial };
+
+  const hasImageInput = next.modalities?.input.includes('image') === true;
+  next.modalities = hasImageInput
+    ? { input: ['text', 'image'], output: ['text'] }
+    : undefined;
+
+  if (!next.modalities && !next.reasoning) return undefined;
+  return next;
+};
+
+const buildNextReasoning = (
+  update: Partial<NonNullable<UpstreamChatConfig['reasoning']>>,
+): UpstreamChatConfig['reasoning'] => {
+  const base = editable.value.chat?.reasoning ?? {};
+  const merged = { ...base, ...update };
+  const cleaned = Object.fromEntries(
+    Object.entries(merged).filter(([, v]) => v !== undefined),
+  ) as NonNullable<UpstreamChatConfig['reasoning']>;
+  return Object.keys(cleaned).length > 0 ? cleaned : undefined;
+};
+
+const setChat = (chat: UpstreamChatConfig | undefined) => {
+  patch({ ...editable.value, chat });
+};
+
+// ── Modalities ─────────────────────────────────────────────────────────
+
+const chatImageInput = computed<boolean>(
+  () => editable.value.chat?.modalities?.input.includes('image') ?? false,
+);
+
+const toggleImageInput = (on: boolean) => {
+  setChat(buildNextChat({ modalities: on ? { input: ['text', 'image'], output: ['text'] } : undefined }));
+};
+
+// ── Reasoning sub-blocks ───────────────────────────────────────────────
+
+const effortEnabled = computed(() => editable.value.chat?.reasoning?.effort !== undefined);
+const budgetTokensEnabled = computed(() => editable.value.chat?.reasoning?.budget_tokens !== undefined);
+const adaptiveEnabled = computed(() => editable.value.chat?.reasoning?.adaptive === true);
+const mandatoryEnabled = computed(() => editable.value.chat?.reasoning?.mandatory === true);
+
+const anyControlledEnabled = computed(() => effortEnabled.value || budgetTokensEnabled.value || adaptiveEnabled.value);
+const controlledDisabled = computed(() => mandatoryEnabled.value);
+const mandatoryDisabled = computed(() => anyControlledEnabled.value);
+
+const supportedEfforts = computed<string[]>(
+  () => editable.value.chat?.reasoning?.effort?.supported ?? [],
+);
+
+const REASONING_LEVELS = ['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max'] as const;
+const presetEffortLevels = computed(() => REASONING_LEVELS.filter(level => !supportedEfforts.value.includes(level)));
+
+const reasoningLevelInput = ref('');
+
+watch(() => props.kind, () => { reasoningLevelInput.value = ''; });
+
+const toggleEffort = (on: boolean) => {
+  const reasoning = on
+    ? buildNextReasoning({ effort: { supported: ['low', 'medium', 'high'], default: 'medium' } })
+    : buildNextReasoning({ effort: undefined });
+  setChat(buildNextChat({ reasoning }));
+};
+
+const addReasoningLevel = (level: string) => {
+  const trimmed = level.trim();
+  if (trimmed === '') return;
+  const current = supportedEfforts.value;
+  if (current.includes(trimmed)) return;
+  const updated = [...current, trimmed];
+  const existing = editable.value.chat?.reasoning?.effort;
+  setChat(buildNextChat({ reasoning: buildNextReasoning({ effort: { supported: updated, default: existing?.default ?? '' } }) }));
+};
+
+const removeReasoningLevel = (level: string) => {
+  const current = supportedEfforts.value;
+  const removedIndex = current.indexOf(level);
+  const updated = current.filter(e => e !== level);
+  const existingEffort = editable.value.chat?.reasoning?.effort;
+  let nextDefault = existingEffort?.default ?? '';
+  if (existingEffort?.default === level) {
+    if (updated.length === 0) nextDefault = '';
+    else if (removedIndex < updated.length) nextDefault = updated[removedIndex]!;
+    else nextDefault = updated[updated.length - 1]!;
+  }
+  setChat(buildNextChat({ reasoning: buildNextReasoning({ effort: { supported: updated, default: nextDefault } }) }));
+};
+
+const commitReasoningInput = () => {
+  const trimmed = reasoningLevelInput.value.trim();
+  if (trimmed === '') return;
+  addReasoningLevel(trimmed);
+  reasoningLevelInput.value = '';
+};
+
+const setDefaultEffort = (value: string) => {
+  const current = supportedEfforts.value;
+  setChat(buildNextChat({ reasoning: buildNextReasoning({ effort: { supported: current, default: value } }) }));
+};
+
+// ── Effort drag-to-reorder ─────────────────────────────────────────────
+
+const draggedEffortIndex = ref<number | null>(null);
+const dragOverEffortIndex = ref<number | null>(null);
+
+const onEffortDragStart = (index: number, e: DragEvent) => {
+  draggedEffortIndex.value = index;
+  if (e.dataTransfer) {
+    e.dataTransfer.effectAllowed = 'move';
+    e.dataTransfer.setData('text/plain', String(index));
+  }
+};
+
+const onEffortDragOver = (index: number, e: DragEvent) => {
+  if (draggedEffortIndex.value === null) return;
+  e.preventDefault();
+  if (e.dataTransfer) e.dataTransfer.dropEffect = 'move';
+  dragOverEffortIndex.value = index;
+};
+
+const onEffortDragLeave = (index: number) => {
+  if (dragOverEffortIndex.value === index) dragOverEffortIndex.value = null;
+};
+
+const onEffortDrop = (index: number, e: DragEvent) => {
+  e.preventDefault();
+  const from = draggedEffortIndex.value;
+  draggedEffortIndex.value = null;
+  dragOverEffortIndex.value = null;
+  if (from === null || from === index) return;
+  const current = [...supportedEfforts.value];
+  const [moved] = current.splice(from, 1);
+  if (moved === undefined) return;
+  current.splice(index, 0, moved);
+  const existing = editable.value.chat?.reasoning?.effort;
+  setChat(buildNextChat({ reasoning: buildNextReasoning({ effort: { supported: current, default: existing?.default ?? '' } }) }));
+};
+
+const onEffortDragEnd = () => {
+  draggedEffortIndex.value = null;
+  dragOverEffortIndex.value = null;
+};
+
+// ── Budget tokens ──────────────────────────────────────────────────────
+
+const toggleBudgetTokens = (on: boolean) => {
+  const reasoning = on
+    ? buildNextReasoning({ budget_tokens: {} })
+    : buildNextReasoning({ budget_tokens: undefined });
+  setChat(buildNextChat({ reasoning }));
+};
+
+const updateBudgetTokensMin = (raw: string | number | null | undefined) => {
+  const num = parseOptionalNumber(raw);
+  const current = editable.value.chat?.reasoning?.budget_tokens ?? {};
+  const next = { ...current };
+  if (num === undefined) delete next.min; else next.min = num;
+  setChat(buildNextChat({ reasoning: buildNextReasoning({ budget_tokens: next }) }));
+};
+
+const updateBudgetTokensMax = (raw: string | number | null | undefined) => {
+  const num = parseOptionalNumber(raw);
+  const current = editable.value.chat?.reasoning?.budget_tokens ?? {};
+  const next = { ...current };
+  if (num === undefined) delete next.max; else next.max = num;
+  setChat(buildNextChat({ reasoning: buildNextReasoning({ budget_tokens: next }) }));
+};
+
+// ── Adaptive / Mandatory ───────────────────────────────────────────────
+
+const toggleAdaptive = (on: boolean) => {
+  const reasoning = on
+    ? buildNextReasoning({ adaptive: true })
+    : buildNextReasoning({ adaptive: undefined });
+  setChat(buildNextChat({ reasoning }));
+};
+
+const toggleMandatory = (on: boolean) => {
+  const reasoning = on
+    ? buildNextReasoning({ mandatory: true })
+    : buildNextReasoning({ mandatory: undefined });
+  setChat(buildNextChat({ reasoning }));
+};
+
+const showChatBlocks = computed(() => props.kind === 'chat');
+</script>
+
+<template>
+  <div class="space-y-6">
+    <section>
+      <div class="mb-3 flex items-baseline gap-3">
+        <h4 class="text-[11px] font-semibold uppercase tracking-wider text-gray-500">Limits</h4>
+        <span class="text-[11px] text-gray-500">tokens — leave blank to inherit the computed intersection</span>
+      </div>
+      <div class="grid gap-3 sm:grid-cols-3">
+        <label class="block space-y-1.5">
+          <span class="block text-xs font-medium text-gray-500">Context Window</span>
+          <Input
+            type="number"
+            :model-value="editable.limits?.max_context_window_tokens"
+            placeholder="e.g. 1050000"
+            class="font-mono"
+            @update:model-value="v => updateLimit('max_context_window_tokens', v)"
+          />
+        </label>
+        <label class="block space-y-1.5">
+          <span class="block text-xs font-medium text-gray-500">Prompt Tokens</span>
+          <Input
+            type="number"
+            :model-value="editable.limits?.max_prompt_tokens"
+            placeholder="e.g. 922000"
+            class="font-mono"
+            @update:model-value="v => updateLimit('max_prompt_tokens', v)"
+          />
+        </label>
+        <label class="block space-y-1.5">
+          <span class="block text-xs font-medium text-gray-500">Output Tokens</span>
+          <Input
+            type="number"
+            :model-value="editable.limits?.max_output_tokens"
+            placeholder="e.g. 128000"
+            class="font-mono"
+            @update:model-value="v => updateLimit('max_output_tokens', v)"
+          />
+        </label>
+      </div>
+    </section>
+
+    <section v-if="showChatBlocks">
+      <div class="flex flex-wrap items-center gap-x-4 gap-y-2">
+        <h4 class="text-[11px] font-semibold uppercase tracking-wider text-gray-500">Modalities</h4>
+        <label class="flex cursor-pointer items-center gap-2">
+          <Switch :model-value="chatImageInput" @update:model-value="v => toggleImageInput(v === true)" />
+          <span class="text-xs" :class="chatImageInput ? 'text-white' : 'text-gray-500'">Image input</span>
+        </label>
+      </div>
+    </section>
+
+    <section v-if="showChatBlocks">
+      <div class="flex flex-wrap items-center gap-x-4 gap-y-2">
+        <h4 class="text-[11px] font-semibold uppercase tracking-wider text-gray-500">Reasoning</h4>
+        <label class="flex items-center gap-2" :class="controlledDisabled ? 'cursor-not-allowed opacity-60' : 'cursor-pointer'">
+          <Switch :model-value="effortEnabled" :disabled="controlledDisabled" @update:model-value="v => toggleEffort(v === true)" />
+          <span class="text-xs" :class="effortEnabled ? 'text-white' : 'text-gray-500'">Effort levels</span>
+        </label>
+        <label class="flex items-center gap-2" :class="controlledDisabled ? 'cursor-not-allowed opacity-60' : 'cursor-pointer'">
+          <Switch :model-value="budgetTokensEnabled" :disabled="controlledDisabled" @update:model-value="v => toggleBudgetTokens(v === true)" />
+          <span class="text-xs" :class="budgetTokensEnabled ? 'text-white' : 'text-gray-500'">Budget tokens</span>
+        </label>
+        <label class="flex items-center gap-2" :class="controlledDisabled ? 'cursor-not-allowed opacity-60' : 'cursor-pointer'">
+          <Switch :model-value="adaptiveEnabled" :disabled="controlledDisabled" @update:model-value="v => toggleAdaptive(v === true)" />
+          <span class="text-xs" :class="adaptiveEnabled ? 'text-white' : 'text-gray-500'">Adaptive</span>
+          <Tooltip content="Model self-selects reasoning effort"><span class="text-[10px] text-gray-600">?</span></Tooltip>
+        </label>
+        <label class="flex items-center gap-2" :class="mandatoryDisabled ? 'cursor-not-allowed opacity-60' : 'cursor-pointer'">
+          <Switch :model-value="mandatoryEnabled" :disabled="mandatoryDisabled" @update:model-value="v => toggleMandatory(v === true)" />
+          <span class="text-xs" :class="mandatoryEnabled ? 'text-white' : 'text-gray-500'">Mandatory</span>
+          <Tooltip content="Reasoning is always applied; caller cannot opt out"><span class="text-[10px] text-gray-600">?</span></Tooltip>
+        </label>
+      </div>
+
+      <div v-if="effortEnabled" class="mt-3 space-y-1.5 border-l-2 border-white/[0.08] pl-3">
+        <div class="flex min-h-[1.625rem] flex-wrap items-center gap-x-3 gap-y-1.5">
+          <span class="text-xs font-semibold text-gray-300">Effort levels</span>
+          <span class="text-[11px] text-gray-500">(click to set default)</span>
+          <template v-if="supportedEfforts.length > 0">
+            <button
+              v-for="(level, index) in supportedEfforts"
+              :key="level"
+              type="button"
+              class="inline-flex cursor-grab items-center gap-1 rounded border px-2 py-0.5 font-mono text-[11px] transition-colors active:cursor-grabbing"
+              :class="[
+                editable.chat?.reasoning?.effort?.default === level
+                  ? 'border-accent-cyan/50 bg-accent-cyan/10 text-accent-cyan font-semibold'
+                  : 'border-white/15 bg-white/[0.07] text-gray-300 hover:border-white/30 hover:text-white',
+                draggedEffortIndex === index && 'opacity-40',
+                dragOverEffortIndex === index && draggedEffortIndex !== index && 'ring-1 ring-accent-cyan',
+              ]"
+              draggable="true"
+              :title="editable.chat?.reasoning?.effort?.default === level ? 'Default — click another to switch, drag to reorder' : 'Click to set as default, drag to reorder'"
+              @click="setDefaultEffort(level)"
+              @dragstart="e => onEffortDragStart(index, e)"
+              @dragover="e => onEffortDragOver(index, e)"
+              @dragleave="onEffortDragLeave(index)"
+              @drop="e => onEffortDrop(index, e)"
+              @dragend="onEffortDragEnd"
+            >
+              {{ level }}
+              <span
+                role="button"
+                tabindex="0"
+                class="ml-0.5 cursor-pointer text-gray-500 transition-colors hover:text-accent-rose"
+                :aria-label="`Remove ${level}`"
+                @click.stop="removeReasoningLevel(level)"
+                @keydown.enter.stop.prevent="removeReasoningLevel(level)"
+              >
+                <svg class="h-2.5 w-2.5" viewBox="0 0 12 12" fill="none" stroke="currentColor" stroke-width="2">
+                  <path d="M9 3 3 9M3 3l6 6" />
+                </svg>
+              </span>
+            </button>
+          </template>
+          <p v-else class="whitespace-nowrap text-[11px] text-accent-amber">Add at least one effort level — click a preset on the right.</p>
+        </div>
+        <div class="flex flex-wrap items-center gap-1.5">
+          <button
+            v-for="level in presetEffortLevels"
+            :key="level"
+            type="button"
+            class="rounded border border-white/15 px-2 py-0.5 font-mono text-[11px] text-gray-400 transition-colors hover:border-accent-cyan/40 hover:text-accent-cyan"
+            @click="addReasoningLevel(level)"
+          >+ {{ level }}</button>
+          <Input
+            v-model="reasoningLevelInput"
+            size="sm"
+            placeholder="custom…"
+            class="!h-6 !w-28 !py-0 !text-[11px] font-mono"
+            @keydown.enter.prevent="commitReasoningInput"
+          />
+          <Button variant="secondary" size="sm" class="!h-6 !px-2 !py-0 !text-[11px]" @click="commitReasoningInput">Add</Button>
+        </div>
+      </div>
+
+      <div v-if="budgetTokensEnabled" class="mt-3 flex flex-wrap items-center gap-3 border-l-2 border-white/[0.08] pl-3">
+        <span class="text-xs font-semibold text-gray-300">Budget tokens</span>
+        <label class="flex items-center gap-1.5">
+          <span class="text-[11px] text-gray-500">Min</span>
+          <Input
+            type="number"
+            min="0"
+            size="sm"
+            :model-value="editable.chat?.reasoning?.budget_tokens?.min"
+            placeholder="—"
+            class="!h-6 !w-24 !py-0 !text-[11px] font-mono"
+            @update:model-value="v => updateBudgetTokensMin(v)"
+          />
+        </label>
+        <label class="flex items-center gap-1.5">
+          <span class="text-[11px] text-gray-500">Max</span>
+          <Input
+            type="number"
+            min="0"
+            size="sm"
+            :model-value="editable.chat?.reasoning?.budget_tokens?.max"
+            placeholder="—"
+            class="!h-6 !w-24 !py-0 !text-[11px] font-mono"
+            @update:model-value="v => updateBudgetTokensMax(v)"
+          />
+        </label>
+        <p
+          v-if="editable.chat?.reasoning?.budget_tokens?.min !== undefined
+            && editable.chat?.reasoning?.budget_tokens?.max !== undefined
+            && editable.chat.reasoning.budget_tokens.max < editable.chat.reasoning.budget_tokens.min"
+          class="text-[11px] text-accent-amber"
+        >
+          Max must be ≥ min.
+        </p>
+      </div>
+    </section>
+  </div>
+</template>

From 27ed319693cdd984d362704919c3385814705a0c Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 03:26:19 +0800
Subject: [PATCH 087/170] feat(aliases/web): announced-metadata section on the
 edit dialog
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A collapsible section at the bottom of the alias edit dialog, sitting
directly on the dialog background (not wrapped in a card; a subtle
top border separates it from the model list). Header has the title
"Announced metadata" + a one-line hint on the left, and an "Enable
override" switch on the right.

When override is off the body renders a read-only summary of the
auto-computed metadata — a small dl of limits / modalities / reasoning
fields against the live catalog, mirroring the backend
synthesizeOne's rule-aware intersection through a new
`computeAnnouncedMetadata` helper.

When override is on the body renders AnnouncedMetadataEditor seeded
with the current computed view (structured-cloned into the buffer)
so the operator's edits start from a sensible baseline. Toggling the
switch back off discards the buffer and resets the wire payload to
null — the next render snaps back to the live computed view.

Image-kind aliases hide the section entirely. Switching kind from
chat→embedding drops a stored override's `chat` block (the schema
would reject it on save); image→ wipes the override altogether.

Tests cover: override-off → editor not rendered + summary visible;
override-on → editor mounts seeded with the computed view; toggling
off restores the summary; image-kind hides the section.
---
 .../components/alias-edit/AliasEditDialog.vue | 173 +++++++++++++++++-
 .../alias-edit/AliasEditDialog_test.ts        |  94 ++++++++++
 .../alias-edit/announced-metadata.ts          | 143 +++++++++++++++
 3 files changed, 405 insertions(+), 5 deletions(-)
 create mode 100644 apps/web/src/components/alias-edit/announced-metadata.ts

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index f9ec19e66..adb5fc324 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -1,15 +1,17 @@
 <script setup lang="ts">
 // Editor for one alias (create or edit). Top form (name / display name /
 // kind / selection); a vertical stack of AliasTargetRow cards with an
-// "Add target" button; alias-level warnings card; footer (visibility
-// switch + Cancel / Save).
+// "Add target" button; an Announced-metadata section; alias-level
+// warnings card; footer (visibility switch + Cancel / Save).
 
-import { computed, ref } from 'vue';
+import { computed, ref, watch } from 'vue';
 
 import AliasTargetRow from './AliasTargetRow.vue';
+import { computeAnnouncedMetadata } from './announced-metadata.ts';
+import AnnouncedMetadataEditor from './AnnouncedMetadataEditor.vue';
 import { computeShadowWarning, realModelIdsOfKind } from './warnings.ts';
 import { callApi, useApi } from '../../api/client.ts';
-import type { AliasKind, AliasSelection, AliasTarget, ChatAliasRules, ModelAlias } from '../../api/types.ts';
+import type { AliasKind, AliasSelection, AliasTarget, AnnouncedMetadata, ChatAliasRules, ModelAlias } from '../../api/types.ts';
 import { useModelAliases } from '../../composables/useModelAliases.ts';
 import { useRawModelsStore } from '../../composables/useModels.ts';
 import { Button, Dialog, Input, Select, Switch } from '@floway-dev/ui';
@@ -77,6 +79,60 @@ const removeTarget = (idx: number) => {
   targets.value = targets.value.filter((_, i) => i !== idx);
 };
 
+// ── Announced metadata ──────────────────────────────────────────────────
+//
+// The override is a sparse AnnouncedMetadata; null means "compute
+// automatically at listing time". When the operator flips the override
+// switch on, we freeze the current computed view into the buffer so the
+// editor starts from a sensible baseline; flipping back off discards
+// the buffer and resets the wire payload to null so the next render
+// snaps back to the live computed view.
+
+const announcedOverride = ref<AnnouncedMetadata | null>(props.record?.announced_metadata ?? null);
+
+const computedAnnouncedMetadata = computed<AnnouncedMetadata>(() =>
+  computeAnnouncedMetadata(targets.value, kind.value, modelsStore.models.value));
+
+const overrideEnabled = computed<boolean>(() => announcedOverride.value !== null);
+
+const showAnnouncedSection = computed(() => kind.value !== 'image');
+
+const announcedSectionExpanded = ref(false);
+const toggleAnnouncedSection = () => { announcedSectionExpanded.value = !announcedSectionExpanded.value; };
+
+const setOverrideEnabled = (on: boolean) => {
+  if (on) {
+    // Freeze the live computed view into the working state — the
+    // operator's edits start from what the wire surface would have
+    // emitted, so a blank override doesn't visually erase the alias's
+    // metadata.
+    announcedOverride.value = structuredClone(computedAnnouncedMetadata.value);
+  } else {
+    announcedOverride.value = null;
+  }
+};
+
+// Two-way binding for the editor: writes flow back to the buffer.
+const overrideBuffer = computed<AnnouncedMetadata>({
+  get: () => announcedOverride.value ?? {},
+  set: next => { announcedOverride.value = next; },
+});
+
+// Switching alias kind discards a chat-only override since the
+// schema would reject it on save (e.g. embedding aliases can not
+// carry a `chat` block).
+watch(kind, k => {
+  if (announcedOverride.value === null) return;
+  if (k === 'image') {
+    announcedOverride.value = null;
+    return;
+  }
+  if (k === 'embedding' && announcedOverride.value.chat !== undefined) {
+    const { chat: _drop, ...rest } = announcedOverride.value;
+    announcedOverride.value = rest;
+  }
+});
+
 // Suggestion list for every target-id combobox. Filtered to non-alias
 // catalog rows of the alias's current kind so an embedding alias only
 // hints at embedding models. Aliases never re-enter the alias layer at
@@ -125,7 +181,7 @@ const save = async () => {
       target_model_id: t.target_model_id.trim(),
       rules: t.rules as Record<string, unknown>,
     })),
-    announced_metadata: (props.record?.announced_metadata ?? null) as Record<string, unknown> | null,
+    announced_metadata: announcedOverride.value as Record<string, unknown> | null,
     sort_order: props.record?.sort_order ?? 0,
   };
 
@@ -156,6 +212,35 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
   { value: 'embedding', label: 'Embedding' },
   { value: 'image', label: 'Image' },
 ];
+
+// Labels + formatters for the read-only view of the auto-computed
+// announced metadata. Keeping them inline (rather than reusing the
+// editor in disabled mode) keeps the override-off render compact and
+// avoids dragging the editor's mutation surface into a read path.
+const COMPUTED_LIMIT_LABELS = {
+  max_context_window_tokens: 'Context window',
+  max_prompt_tokens: 'Prompt tokens',
+  max_output_tokens: 'Output tokens',
+} as const;
+
+const formatLimit = (n: number | undefined): string => n === undefined ? '—' : n.toLocaleString('en-US');
+
+const formatModalities = (mods: readonly string[] | undefined): string =>
+  mods === undefined || mods.length === 0 ? '—' : mods.join(', ');
+
+const formatEffort = (effort: { supported: readonly string[]; default: string } | undefined): string =>
+  effort === undefined ? '—' : `${effort.supported.join(', ')} (default: ${effort.default})`;
+
+const formatBudget = (range: { min?: number; max?: number } | undefined): string => {
+  if (range === undefined) return '—';
+  const { min, max } = range;
+  if (min === undefined && max === undefined) return '—';
+  if (min !== undefined && max !== undefined) return `${min}–${max}`;
+  if (min !== undefined) return `≥ ${min}`;
+  return `≤ ${max}`;
+};
+
+const formatFlag = (flag: boolean | undefined): string => flag === true ? 'yes' : flag === false ? 'no' : '—';
 </script>
 
 <template>
@@ -227,6 +312,84 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
         </div>
       </div>
 
+      <section v-if="showAnnouncedSection" class="border-t border-white/[0.06] pt-5">
+        <div class="flex items-center justify-between gap-3">
+          <button
+            type="button"
+            class="flex flex-1 min-w-0 items-start gap-2 text-left"
+            :aria-expanded="announcedSectionExpanded"
+            aria-controls="announced-metadata-body"
+            @click="toggleAnnouncedSection"
+          >
+            <svg
+              class="mt-0.5 size-3.5 shrink-0 self-center text-gray-500 transition-transform"
+              :class="announcedSectionExpanded && 'rotate-90'"
+              viewBox="0 0 24 24"
+              fill="none"
+              stroke="currentColor"
+              stroke-width="2"
+            >
+              <path d="m9 6 6 6-6 6" />
+            </svg>
+            <div class="min-w-0">
+              <h4 class="text-sm font-semibold text-gray-300">Announced metadata</h4>
+              <p class="mt-0.5 text-xs text-gray-500">What <code class="font-mono">/v1/models</code> reports about this alias.</p>
+            </div>
+          </button>
+          <label class="flex shrink-0 cursor-pointer items-center gap-2">
+            <Switch :model-value="overrideEnabled" @update:model-value="v => setOverrideEnabled(v === true)" />
+            <span class="text-xs text-gray-400">Enable override</span>
+          </label>
+        </div>
+
+        <div v-if="announcedSectionExpanded" id="announced-metadata-body" class="mt-4">
+          <AnnouncedMetadataEditor
+            v-if="overrideEnabled"
+            v-model="overrideBuffer"
+            :kind="kind"
+          />
+          <div v-else class="space-y-3">
+            <p class="text-xs text-gray-500">
+              Read-only — the intersection across every currently-available
+              target, with any rule-pinned sub-field treated as unsupported.
+              Enable override to publish a different payload to <code class="font-mono">/v1/models</code>.
+            </p>
+            <dl class="grid grid-cols-1 gap-x-6 gap-y-2 text-xs sm:grid-cols-2">
+              <div v-for="(label, key) in COMPUTED_LIMIT_LABELS" :key="key" class="flex items-baseline justify-between gap-3">
+                <dt class="text-gray-500">{{ label }}</dt>
+                <dd class="font-mono text-gray-300">{{ formatLimit(computedAnnouncedMetadata.limits?.[key]) }}</dd>
+              </div>
+              <template v-if="kind === 'chat'">
+                <div class="flex items-baseline justify-between gap-3">
+                  <dt class="text-gray-500">Modalities (input)</dt>
+                  <dd class="font-mono text-gray-300">{{ formatModalities(computedAnnouncedMetadata.chat?.modalities?.input) }}</dd>
+                </div>
+                <div class="flex items-baseline justify-between gap-3">
+                  <dt class="text-gray-500">Modalities (output)</dt>
+                  <dd class="font-mono text-gray-300">{{ formatModalities(computedAnnouncedMetadata.chat?.modalities?.output) }}</dd>
+                </div>
+                <div class="flex items-baseline justify-between gap-3">
+                  <dt class="text-gray-500">Reasoning effort</dt>
+                  <dd class="font-mono text-gray-300">{{ formatEffort(computedAnnouncedMetadata.chat?.reasoning?.effort) }}</dd>
+                </div>
+                <div class="flex items-baseline justify-between gap-3">
+                  <dt class="text-gray-500">Reasoning budget</dt>
+                  <dd class="font-mono text-gray-300">{{ formatBudget(computedAnnouncedMetadata.chat?.reasoning?.budget_tokens) }}</dd>
+                </div>
+                <div class="flex items-baseline justify-between gap-3">
+                  <dt class="text-gray-500">Adaptive</dt>
+                  <dd class="font-mono text-gray-300">{{ formatFlag(computedAnnouncedMetadata.chat?.reasoning?.adaptive) }}</dd>
+                </div>
+                <div class="flex items-baseline justify-between gap-3">
+                  <dt class="text-gray-500">Mandatory</dt>
+                  <dd class="font-mono text-gray-300">{{ formatFlag(computedAnnouncedMetadata.chat?.reasoning?.mandatory) }}</dd>
+                </div>
+              </template>
+            </dl>
+          </div>
+        </div>
+      </section>
+
       <div v-if="shadowWarning" class="rounded-md border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-sm text-amber-300">
         This alias name shadows a real model id:
         <code class="font-mono">{{ shadowWarning.shadowedId }}</code>
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index 244268abe..7072c39cd 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -165,4 +165,98 @@ describe('AliasEditDialog', () => {
     expect(document.body.innerHTML).toContain('<strong class="font-semibold">GPT 5</strong>');
     w.unmount();
   });
+
+  // ── Announced metadata section ────────────────────────────────────────
+
+  // The section header always renders for chat/embedding; the body only
+  // renders the editor when the "Enable override" switch is on. Image
+  // aliases never see the section at all.
+
+  const expandAnnouncedSection = async () => {
+    const header = portalQueryAll<HTMLButtonElement>('button').find(b => (b.textContent ?? '').includes('Announced metadata'))!;
+    header.click();
+    await nextTick();
+  };
+
+  const announcedSwitch = (): HTMLButtonElement => {
+    // The override switch sits at the right end of the section header
+    // row. Reka-UI renders Switch as a <button role="switch">, so scan
+    // by role + the surrounding "Enable override" label.
+    const label = Array.from(document.body.querySelectorAll<HTMLLabelElement>('label')).find(l => (l.textContent ?? '').includes('Enable override'))!;
+    return label.querySelector<HTMLButtonElement>('button[role="switch"]')!;
+  };
+
+  it('announced metadata: override off → editor not rendered; the read-only view appears in its place', async () => {
+    const w = mount(AliasEditDialog, {
+      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }) },
+      attachTo: document.body,
+    });
+    await nextTick();
+    await expandAnnouncedSection();
+
+    // The override switch is present but off.
+    expect(announcedSwitch().getAttribute('aria-checked')).toBe('false');
+    // The editor's distinctive "Effort levels" toggle does not render.
+    expect(portalText()).not.toContain('Effort levels');
+    // Instead the read-only summary copy appears.
+    expect(portalText()).toContain('Read-only');
+    w.unmount();
+  });
+
+  it('announced metadata: toggling override on renders the editor seeded with the computed view', async () => {
+    modelsRef.value = [
+      {
+        id: 'gpt-5',
+        display_name: 'GPT 5',
+        kind: 'chat',
+        upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
+        chat: { reasoning: { effort: { supported: ['low', 'medium'], default: 'medium' } } },
+      },
+    ];
+    const w = mount(AliasEditDialog, {
+      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }) },
+      attachTo: document.body,
+    });
+    await nextTick();
+    await expandAnnouncedSection();
+
+    announcedSwitch().click();
+    await nextTick();
+
+    // The editor's distinctive toggles render once the override is on.
+    expect(portalText()).toContain('Effort levels');
+    expect(portalText()).toContain('Budget tokens');
+    // The frozen seed includes the computed `medium` default, so the
+    // editor's pinned-default tag for `medium` is part of the visible DOM.
+    expect(portalText()).toContain('medium');
+    w.unmount();
+  });
+
+  it('announced metadata: toggling override off discards the buffer and restores the read-only view', async () => {
+    const w = mount(AliasEditDialog, {
+      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }) },
+      attachTo: document.body,
+    });
+    await nextTick();
+    await expandAnnouncedSection();
+
+    const sw = announcedSwitch();
+    sw.click(); await nextTick();
+    expect(portalText()).toContain('Effort levels');
+    sw.click(); await nextTick();
+    // The editor unmounts and the read-only summary is back.
+    expect(portalText()).not.toContain('Effort levels');
+    expect(portalText()).toContain('Read-only');
+    w.unmount();
+  });
+
+  it('announced metadata: image-kind aliases never see the section', async () => {
+    const w = mount(AliasEditDialog, {
+      props: { open: true, record: baseAlias({ name: 'img', kind: 'image', targets: [{ target_model_id: 'dalle', rules: {} as never }] }) },
+      attachTo: document.body,
+    });
+    await nextTick();
+    expect(portalText()).not.toContain('Announced metadata');
+    w.unmount();
+  });
 });
diff --git a/apps/web/src/components/alias-edit/announced-metadata.ts b/apps/web/src/components/alias-edit/announced-metadata.ts
new file mode 100644
index 000000000..69a5543f6
--- /dev/null
+++ b/apps/web/src/components/alias-edit/announced-metadata.ts
@@ -0,0 +1,143 @@
+// Frontend mirror of the backend `synthesizeOne`'s rule-aware
+// intersection. The dashboard renders the read-only view of an
+// alias's announced metadata when no operator override is set; the
+// edit dialog also seeds the editor with this exact payload when
+// the operator flips the override switch on, so the editor starts
+// from the same baseline the wire surface would have published.
+//
+// Mirrors packages/gateway/src/data-plane/models/alias-listing.ts.
+// Keeping a local copy avoids a server round-trip per dialog open,
+// at the cost of a duplicated computation that we keep in sync by
+// hand. The backend stays authoritative — what `/v1/models` reports
+// is what the gateway computes there, not what this helper emits.
+
+import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ControlPlaneModel, ModelLimits } from '../../api/types.ts';
+
+type ChatModelInfo = NonNullable<ControlPlaneModel['chat']>;
+
+const chatRules = (target: AliasTarget): ChatAliasRules => target.rules as ChatAliasRules;
+
+const intersectArrays = <T>(arrays: readonly (readonly T[])[]): T[] => {
+  if (arrays.length === 0) return [];
+  const [head, ...tail] = arrays;
+  return head.filter(value => tail.every(other => other.includes(value)));
+};
+
+// Apply the rule-driven downgrade: a target with a pinned rule
+// reports the corresponding catalog sub-field as unsupported for
+// the purposes of intersection.
+const effectiveChatForIntersection = (chat: ChatModelInfo | undefined, target: AliasTarget): ChatModelInfo | undefined => {
+  if (chat === undefined) return undefined;
+  const rules = chatRules(target);
+  const ruleReasoning = rules.reasoning;
+  if (ruleReasoning === undefined) return chat;
+  if (chat.reasoning === undefined) return chat;
+
+  const reasoning: NonNullable<ChatModelInfo['reasoning']> = { ...chat.reasoning };
+  if (ruleReasoning.effort !== undefined) delete reasoning.effort;
+  if (ruleReasoning.budget_tokens !== undefined) delete reasoning.budget_tokens;
+  if (ruleReasoning.adaptive === true) delete reasoning.adaptive;
+
+  return { ...chat, reasoning };
+};
+
+const intersectChat = (chats: readonly ChatModelInfo[]): ChatModelInfo | undefined => {
+  const result: ChatModelInfo = {};
+
+  const modalityChats = chats.filter(c => c.modalities !== undefined);
+  if (modalityChats.length === chats.length) {
+    const input = intersectArrays(modalityChats.map(c => c.modalities!.input));
+    const output = intersectArrays(modalityChats.map(c => c.modalities!.output));
+    if (input.length > 0 || output.length > 0) result.modalities = { input, output };
+  }
+
+  const reasoningChats = chats.filter(c => c.reasoning !== undefined);
+  if (reasoningChats.length === chats.length) {
+    const reasoning: NonNullable<ChatModelInfo['reasoning']> = {};
+
+    const effortChats = reasoningChats.filter(c => c.reasoning!.effort !== undefined);
+    if (effortChats.length === reasoningChats.length) {
+      const supported = intersectArrays(effortChats.map(c => c.reasoning!.effort!.supported));
+      const defaults = new Set(effortChats.map(c => c.reasoning!.effort!.default));
+      if (supported.length > 0) {
+        const agreedDefault = defaults.size === 1 ? [...defaults][0] : undefined;
+        reasoning.effort = agreedDefault !== undefined && supported.includes(agreedDefault)
+          ? { supported, default: agreedDefault }
+          : { supported, default: supported[0]! };
+      }
+    }
+
+    const budgetChats = reasoningChats.filter(c => c.reasoning!.budget_tokens !== undefined);
+    if (budgetChats.length === reasoningChats.length) {
+      const mins = budgetChats.map(c => c.reasoning!.budget_tokens!.min).filter((v): v is number => v !== undefined);
+      const maxes = budgetChats.map(c => c.reasoning!.budget_tokens!.max).filter((v): v is number => v !== undefined);
+      const min = mins.length === budgetChats.length ? Math.max(...mins) : undefined;
+      const max = maxes.length === budgetChats.length ? Math.min(...maxes) : undefined;
+      if (!(min !== undefined && max !== undefined && min > max)) {
+        const budget: NonNullable<NonNullable<ChatModelInfo['reasoning']>['budget_tokens']> = {};
+        if (min !== undefined) budget.min = min;
+        if (max !== undefined) budget.max = max;
+        if (min !== undefined || max !== undefined) reasoning.budget_tokens = budget;
+      }
+    }
+
+    const adaptiveAgreed = new Set(reasoningChats.map(c => c.reasoning!.adaptive));
+    if (adaptiveAgreed.size === 1) {
+      const value = [...adaptiveAgreed][0];
+      if (value !== undefined) reasoning.adaptive = value;
+    }
+    const mandatoryAgreed = new Set(reasoningChats.map(c => c.reasoning!.mandatory));
+    if (mandatoryAgreed.size === 1) {
+      const value = [...mandatoryAgreed][0];
+      if (value !== undefined) reasoning.mandatory = value;
+    }
+
+    if (Object.keys(reasoning).length > 0) result.reasoning = reasoning;
+  }
+
+  return Object.keys(result).length > 0 ? result : undefined;
+};
+
+const LIMIT_KEYS = ['max_context_window_tokens', 'max_prompt_tokens', 'max_output_tokens'] as const;
+
+const intersectLimits = (limitsList: readonly ModelLimits[]): ModelLimits => {
+  if (limitsList.length === 0) return {};
+  const result: ModelLimits = {};
+  for (const key of LIMIT_KEYS) {
+    const values = limitsList.map(l => l[key]).filter((v): v is number => v !== undefined);
+    if (values.length === limitsList.length) result[key] = Math.min(...values);
+  }
+  return result;
+};
+
+// Returns the rule-aware intersection across the targets that the live
+// catalog currently serves under the alias's kind. The returned shape
+// matches AnnouncedMetadata; an empty payload (no targets matched)
+// returns `{}` so callers can still render a skeleton.
+export const computeAnnouncedMetadata = (
+  targets: readonly AliasTarget[],
+  kind: 'chat' | 'embedding' | 'image',
+  models: readonly ControlPlaneModel[] | null | undefined,
+): AnnouncedMetadata => {
+  const realById = new Map((models ?? []).filter(m => m.aliasedFrom === undefined).map(m => [m.id, m] as const));
+  const available = targets
+    .map(target => ({ target, real: realById.get(target.target_model_id) }))
+    .filter((entry): entry is { target: AliasTarget; real: ControlPlaneModel } => entry.real?.kind === kind);
+
+  if (available.length === 0) return {};
+
+  const limitsList = available.map(({ real }) => real.limits ?? {});
+  const limits = intersectLimits(limitsList);
+
+  const effectiveChats = available
+    .map(({ target, real }) => effectiveChatForIntersection(real.chat, target))
+    .filter((c): c is ChatModelInfo => c !== undefined);
+  const chat = effectiveChats.length === available.length
+    ? intersectChat(effectiveChats)
+    : undefined;
+
+  const out: AnnouncedMetadata = {};
+  if (Object.keys(limits).length > 0) out.limits = limits;
+  if (chat !== undefined) out.chat = chat;
+  return out;
+};

From 079c3ded8380e2b71f8494b9fd7d7d8ca3ce0794 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 03:57:23 +0800
Subject: [PATCH 088/170] feat(aliases): alias entry advertises intersected
 endpoints in /v1/models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an optional `endpoints?: ModelEndpoints` to `PublicModel`. Real
model entries stamp it from the resolved binding's endpoint map;
alias entries surface the intersection of every available target's
endpoint map. A key survives the intersection iff every available
target advertises it; sub-capability flags inside a key are ANDed
conservatively so a future sub-cap addition lands without re-
engineering the helper. Endpoints are never operator-editable on
aliases — they follow the target set, not a stored override.
---
 .../src/data-plane/models/alias-listing.ts    | 45 ++++++++++++++---
 .../data-plane/models/alias-listing_test.ts   | 48 ++++++++++++++++++-
 .../gateway/src/data-plane/models/load.ts     |  5 +-
 .../src/data-plane/models/load_test.ts        | 10 +++-
 .../src/data-plane/models/serve_test.ts       |  6 ++-
 packages/protocols/src/common/models.ts       |  9 ++++
 6 files changed, 110 insertions(+), 13 deletions(-)

diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index bb7f0871a..e9f8a9d80 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -27,12 +27,12 @@
 
 import type { ModelAliasRecord } from '../../repo/types.ts';
 import { composeAliasDisplayName } from '@floway-dev/protocols/common';
-import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
-import type { InternalModel, ResolvedModel } from '@floway-dev/provider';
+import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, ModelEndpointKey, ModelEndpoints, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
+import type { ResolvedModel } from '@floway-dev/provider';
 
 export interface ListedAliasInputs {
   readonly aliases: readonly ModelAliasRecord[];
-  readonly realModels: readonly InternalModel[];
+  readonly realModels: readonly ResolvedModel[];
 }
 
 // The repo guarantees rule shape matches the row's `kind` (chat rows carry
@@ -50,6 +50,30 @@ const intersectArrays = <T>(arrays: readonly (readonly T[])[]): T[] => {
   return head.filter(value => tail.every(other => other.includes(value)));
 };
 
+// Endpoint intersection: a key survives iff every target advertises it.
+// Sub-capability flags inside a key (the inner object) are ANDed
+// conservatively — present in the result iff every contributing target
+// declares them. Today every endpoint value is the empty object, so the
+// AND collapses to an empty object too; the structure is in place so a
+// future sub-cap addition lands without re-engineering this helper.
+const intersectEndpoints = (endpointsList: readonly ModelEndpoints[]): ModelEndpoints => {
+  if (endpointsList.length === 0) return {};
+  const keys = Object.keys(endpointsList[0]) as ModelEndpointKey[];
+  const result: ModelEndpoints = {};
+  for (const key of keys) {
+    if (!endpointsList.every(e => e[key] !== undefined)) continue;
+    const subCaps = endpointsList.map(e => e[key]!);
+    const merged: Record<string, unknown> = { ...subCaps[0] };
+    for (const cap of subCaps.slice(1)) {
+      for (const flag of Object.keys(merged)) {
+        if ((cap as Record<string, unknown>)[flag] === undefined) delete merged[flag];
+      }
+    }
+    result[key] = merged as ModelEndpoints[typeof key];
+  }
+  return result;
+};
+
 // Apply the rule-driven downgrade: a target with a pinned rule reports
 // the corresponding catalog sub-field as unsupported (= undefined) for
 // the purposes of intersection. Fields the rule doesn't touch pass
@@ -161,7 +185,7 @@ const buildAliasedFrom = (alias: ModelAliasRecord): PublicModelAliasedFrom => ({
 // the result directly or overlay it under an operator override.
 const computeAutomaticMetadata = (
   alias: ModelAliasRecord,
-  availableTargets: readonly { target: AliasTarget; real: InternalModel }[],
+  availableTargets: readonly { target: AliasTarget; real: ResolvedModel }[],
 ): { limits: PublicModelLimits; chat: ChatModelInfo | undefined } => {
   if (availableTargets.length === 0) return { limits: {}, chat: undefined };
 
@@ -191,11 +215,11 @@ const mergeWithOverride = (
   chat: override.chat ?? computed.chat,
 });
 
-const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly InternalModel[]): PublicModel => {
+const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly ResolvedModel[]): PublicModel => {
   const realById = new Map(realModels.map(m => [m.id, m] as const));
   const availableTargets = alias.targets
     .map(target => ({ target, real: realById.get(target.target_model_id) }))
-    .filter((entry): entry is { target: AliasTarget; real: InternalModel } => entry.real !== undefined && entry.real.kind === alias.kind);
+    .filter((entry): entry is { target: AliasTarget; real: ResolvedModel } => entry.real !== undefined && entry.real.kind === alias.kind);
 
   // Display name precedence: operator-set wins; otherwise derive from the
   // sole target's id + rules when single-target; multi-target falls back to
@@ -220,6 +244,15 @@ const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly InternalMod
   };
   if (chat !== undefined) entry.chat = chat;
 
+  // Endpoints follow the available-targets intersection unconditionally
+  // — the operator can't override them (the alias's reachable surface is
+  // a fact derived from what the targets serve). Absent when no target
+  // is currently available, same shape as the chat block.
+  if (availableTargets.length > 0) {
+    const endpoints = intersectEndpoints(availableTargets.map(({ real }) => real.endpoints));
+    if (Object.keys(endpoints).length > 0) entry.endpoints = endpoints;
+  }
+
   // Single-target chat pricing rides along when available — the resolver
   // will hit that target, so the catalog can publish its rate verbatim.
   if (availableTargets.length === 1) {
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
index c8ef0b9d6..6a616dce3 100644
--- a/packages/gateway/src/data-plane/models/alias-listing_test.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -2,7 +2,7 @@ import { describe, expect, test } from 'vitest';
 
 import { synthesizeListedAliases } from './alias-listing.ts';
 import type { ModelAliasRecord } from '../../repo/types.ts';
-import type { InternalModel } from '@floway-dev/provider';
+import type { ResolvedModel } from '@floway-dev/provider';
 
 const aliasFixture = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasRecord => ({
   name: 'gpt-fast',
@@ -18,9 +18,11 @@ const aliasFixture = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasReco
   ...overrides,
 });
 
-const realModel = (overrides: Partial<InternalModel> & { id: string }): InternalModel => ({
+const realModel = (overrides: Partial<ResolvedModel> & { id: string }): ResolvedModel => ({
   kind: 'chat',
   limits: {},
+  endpoints: { chatCompletions: {} },
+  providers: [],
   ...overrides,
 });
 
@@ -292,4 +294,46 @@ describe('synthesizeListedAliases', () => {
     const [entry] = synthesizeListedAliases({ aliases, realModels });
     expect(entry.chat).toEqual({ modalities: { input: ['text'], output: ['text'] } });
   });
+
+  test('endpoints is the intersection of available targets — shared keys survive, divergent keys drop', () => {
+    const aliases = [aliasFixture({
+      name: 'multi-ep',
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'a', endpoints: { chatCompletions: {}, responses: {} } }),
+      realModel({ id: 'b', endpoints: { chatCompletions: {}, messages: {} } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.endpoints).toEqual({ chatCompletions: {} });
+  });
+
+  test('endpoints drops a key when any target lacks it', () => {
+    const aliases = [aliasFixture({
+      name: 'one-missing',
+      targets: [
+        { target_model_id: 'has-cc', rules: {} },
+        { target_model_id: 'no-cc', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'has-cc', endpoints: { chatCompletions: {} } }),
+      realModel({ id: 'no-cc', endpoints: { responses: {} } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    // Disjoint endpoint sets → intersection is empty → field is absent.
+    expect(entry.endpoints).toBeUndefined();
+  });
+
+  test('endpoints is absent on the entry when no target is currently available', () => {
+    const aliases = [aliasFixture({
+      name: 'ghost',
+      targets: [{ target_model_id: 'missing', rules: {} }],
+    })];
+    const [entry] = synthesizeListedAliases({ aliases, realModels: [] });
+    expect(entry.endpoints).toBeUndefined();
+  });
 });
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 849b7fdab..3dedd1579 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -3,9 +3,9 @@ import type { ModelAliasesRepo } from '../../repo/types.ts';
 import { getModels } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
-import type { Fetcher, InternalModel } from '@floway-dev/provider';
+import type { Fetcher, ResolvedModel } from '@floway-dev/provider';
 
-export const toPublicModel = (model: InternalModel): PublicModel => {
+export const toPublicModel = (model: ResolvedModel): PublicModel => {
   const info: PublicModel = {
     id: model.id,
     object: 'model',
@@ -13,6 +13,7 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
     display_name: model.display_name ?? model.id,
     limits: { ...model.limits },
     kind: model.kind,
+    endpoints: { ...model.endpoints },
   };
   if (model.owned_by !== undefined) info.owned_by = model.owned_by;
   if (model.created !== undefined) {
diff --git a/packages/gateway/src/data-plane/models/load_test.ts b/packages/gateway/src/data-plane/models/load_test.ts
index 39985c9c4..a479de582 100644
--- a/packages/gateway/src/data-plane/models/load_test.ts
+++ b/packages/gateway/src/data-plane/models/load_test.ts
@@ -1,12 +1,14 @@
 import { describe, expect, test } from 'vitest';
 
 import { toPublicModel } from './load.ts';
-import type { InternalModel } from '@floway-dev/provider';
+import type { ResolvedModel } from '@floway-dev/provider';
 
-const base: InternalModel = {
+const base: ResolvedModel = {
   id: 'm1',
   kind: 'chat',
   limits: { max_context_window_tokens: 100000 },
+  endpoints: { chatCompletions: {} },
+  providers: [],
 };
 
 describe('toPublicModel', () => {
@@ -21,6 +23,10 @@ describe('toPublicModel', () => {
     };
     expect(toPublicModel({ ...base, chat }).chat).toEqual(chat);
   });
+
+  test('stamps the resolved binding endpoints onto the wire entry', () => {
+    expect(toPublicModel(base).endpoints).toEqual({ chatCompletions: {} });
+  });
 });
 
 // The alias merge step inside `loadModels` (alias entries follow real
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 94ca48fd3..476ea01e4 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -119,7 +119,8 @@ test('/v1/models returns merged model list from Copilot and custom upstreams', a
         assertEquals(model.providerKind, undefined);
         assertEquals(model.providers, undefined);
         assertEquals(model.providerData, undefined);
-        assertEquals(model.endpoints, undefined);
+        // `endpoints` IS surfaced — it tells callers which API families
+        // accept this id. Shape verified below.
         assertEquals(model.upstream, undefined);
         assertEquals(model.upstreamModel, undefined);
         // Copilot-only raw fields never reach the public DTO.
@@ -250,6 +251,7 @@ test('/models returns the same superset payload as /v1/models', async () => {
             display_name: 'Claude Opus 4.7 XHigh',
             limits: {},
             kind: 'chat',
+            endpoints: { messages: {} },
             cost: {
               input: 5,
               output: 25,
@@ -272,6 +274,7 @@ test('/models returns the same superset payload as /v1/models', async () => {
             display_name: 'embedding-only',
             limits: {},
             kind: 'embedding',
+            endpoints: { embeddings: {} },
           },
           {
             id: 'gpt-image-2',
@@ -280,6 +283,7 @@ test('/models returns the same superset payload as /v1/models', async () => {
             display_name: 'gpt-image-2',
             limits: {},
             kind: 'image',
+            endpoints: { imagesGenerations: {}, imagesEdits: {} },
           },
         ],
       });
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 22cce94ab..9cd05072f 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -1,4 +1,5 @@
 import type { AliasKind, AliasSelection, AliasTarget } from './aliases.ts';
+import type { ModelEndpoints } from './capabilities.ts';
 
 // Disjoint billing dimensions a single request can be charged on. Every count
 // keyed by these is non-overlapping: a prompt token is counted under exactly
@@ -153,6 +154,14 @@ export interface PublicModel {
   // Non-standard extra fields below.
   limits: PublicModelLimits;
   kind: ModelKind;
+  // Per-endpoint availability map. Mirrors the upstream-side `ModelEndpoints`
+  // verbatim: a key present means the model is reachable over that endpoint.
+  // Real-model entries inherit it from the resolved binding; alias entries
+  // surface the intersection of every available target's endpoint map.
+  // Optional — clients that only care about /v1/models for capability
+  // discovery may ignore it. Operator-editable on real models via the
+  // ModelEditor; not editable on aliases (it follows from the target set).
+  endpoints?: ModelEndpoints;
   cost?: ModelPricing;
   chat?: ChatModelInfo;
   // Present only on entries the gateway synthesized from an operator-defined

From 3ac4f53923ad762cb389343518f992633b53218d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 04:02:40 +0800
Subject: [PATCH 089/170] Revert "feat(aliases): alias entry advertises
 intersected endpoints in /v1/models"

This reverts commit 079c3ded8380e2b71f8494b9fd7d7d8ca3ce0794.
---
 .../src/data-plane/models/alias-listing.ts    | 45 +++--------------
 .../data-plane/models/alias-listing_test.ts   | 48 +------------------
 .../gateway/src/data-plane/models/load.ts     |  5 +-
 .../src/data-plane/models/load_test.ts        | 10 +---
 .../src/data-plane/models/serve_test.ts       |  6 +--
 packages/protocols/src/common/models.ts       |  9 ----
 6 files changed, 13 insertions(+), 110 deletions(-)

diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index e9f8a9d80..bb7f0871a 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -27,12 +27,12 @@
 
 import type { ModelAliasRecord } from '../../repo/types.ts';
 import { composeAliasDisplayName } from '@floway-dev/protocols/common';
-import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, ModelEndpointKey, ModelEndpoints, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
-import type { ResolvedModel } from '@floway-dev/provider';
+import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
+import type { InternalModel, ResolvedModel } from '@floway-dev/provider';
 
 export interface ListedAliasInputs {
   readonly aliases: readonly ModelAliasRecord[];
-  readonly realModels: readonly ResolvedModel[];
+  readonly realModels: readonly InternalModel[];
 }
 
 // The repo guarantees rule shape matches the row's `kind` (chat rows carry
@@ -50,30 +50,6 @@ const intersectArrays = <T>(arrays: readonly (readonly T[])[]): T[] => {
   return head.filter(value => tail.every(other => other.includes(value)));
 };
 
-// Endpoint intersection: a key survives iff every target advertises it.
-// Sub-capability flags inside a key (the inner object) are ANDed
-// conservatively — present in the result iff every contributing target
-// declares them. Today every endpoint value is the empty object, so the
-// AND collapses to an empty object too; the structure is in place so a
-// future sub-cap addition lands without re-engineering this helper.
-const intersectEndpoints = (endpointsList: readonly ModelEndpoints[]): ModelEndpoints => {
-  if (endpointsList.length === 0) return {};
-  const keys = Object.keys(endpointsList[0]) as ModelEndpointKey[];
-  const result: ModelEndpoints = {};
-  for (const key of keys) {
-    if (!endpointsList.every(e => e[key] !== undefined)) continue;
-    const subCaps = endpointsList.map(e => e[key]!);
-    const merged: Record<string, unknown> = { ...subCaps[0] };
-    for (const cap of subCaps.slice(1)) {
-      for (const flag of Object.keys(merged)) {
-        if ((cap as Record<string, unknown>)[flag] === undefined) delete merged[flag];
-      }
-    }
-    result[key] = merged as ModelEndpoints[typeof key];
-  }
-  return result;
-};
-
 // Apply the rule-driven downgrade: a target with a pinned rule reports
 // the corresponding catalog sub-field as unsupported (= undefined) for
 // the purposes of intersection. Fields the rule doesn't touch pass
@@ -185,7 +161,7 @@ const buildAliasedFrom = (alias: ModelAliasRecord): PublicModelAliasedFrom => ({
 // the result directly or overlay it under an operator override.
 const computeAutomaticMetadata = (
   alias: ModelAliasRecord,
-  availableTargets: readonly { target: AliasTarget; real: ResolvedModel }[],
+  availableTargets: readonly { target: AliasTarget; real: InternalModel }[],
 ): { limits: PublicModelLimits; chat: ChatModelInfo | undefined } => {
   if (availableTargets.length === 0) return { limits: {}, chat: undefined };
 
@@ -215,11 +191,11 @@ const mergeWithOverride = (
   chat: override.chat ?? computed.chat,
 });
 
-const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly ResolvedModel[]): PublicModel => {
+const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly InternalModel[]): PublicModel => {
   const realById = new Map(realModels.map(m => [m.id, m] as const));
   const availableTargets = alias.targets
     .map(target => ({ target, real: realById.get(target.target_model_id) }))
-    .filter((entry): entry is { target: AliasTarget; real: ResolvedModel } => entry.real !== undefined && entry.real.kind === alias.kind);
+    .filter((entry): entry is { target: AliasTarget; real: InternalModel } => entry.real !== undefined && entry.real.kind === alias.kind);
 
   // Display name precedence: operator-set wins; otherwise derive from the
   // sole target's id + rules when single-target; multi-target falls back to
@@ -244,15 +220,6 @@ const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly ResolvedMod
   };
   if (chat !== undefined) entry.chat = chat;
 
-  // Endpoints follow the available-targets intersection unconditionally
-  // — the operator can't override them (the alias's reachable surface is
-  // a fact derived from what the targets serve). Absent when no target
-  // is currently available, same shape as the chat block.
-  if (availableTargets.length > 0) {
-    const endpoints = intersectEndpoints(availableTargets.map(({ real }) => real.endpoints));
-    if (Object.keys(endpoints).length > 0) entry.endpoints = endpoints;
-  }
-
   // Single-target chat pricing rides along when available — the resolver
   // will hit that target, so the catalog can publish its rate verbatim.
   if (availableTargets.length === 1) {
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
index 6a616dce3..c8ef0b9d6 100644
--- a/packages/gateway/src/data-plane/models/alias-listing_test.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -2,7 +2,7 @@ import { describe, expect, test } from 'vitest';
 
 import { synthesizeListedAliases } from './alias-listing.ts';
 import type { ModelAliasRecord } from '../../repo/types.ts';
-import type { ResolvedModel } from '@floway-dev/provider';
+import type { InternalModel } from '@floway-dev/provider';
 
 const aliasFixture = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasRecord => ({
   name: 'gpt-fast',
@@ -18,11 +18,9 @@ const aliasFixture = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasReco
   ...overrides,
 });
 
-const realModel = (overrides: Partial<ResolvedModel> & { id: string }): ResolvedModel => ({
+const realModel = (overrides: Partial<InternalModel> & { id: string }): InternalModel => ({
   kind: 'chat',
   limits: {},
-  endpoints: { chatCompletions: {} },
-  providers: [],
   ...overrides,
 });
 
@@ -294,46 +292,4 @@ describe('synthesizeListedAliases', () => {
     const [entry] = synthesizeListedAliases({ aliases, realModels });
     expect(entry.chat).toEqual({ modalities: { input: ['text'], output: ['text'] } });
   });
-
-  test('endpoints is the intersection of available targets — shared keys survive, divergent keys drop', () => {
-    const aliases = [aliasFixture({
-      name: 'multi-ep',
-      targets: [
-        { target_model_id: 'a', rules: {} },
-        { target_model_id: 'b', rules: {} },
-      ],
-    })];
-    const realModels = [
-      realModel({ id: 'a', endpoints: { chatCompletions: {}, responses: {} } }),
-      realModel({ id: 'b', endpoints: { chatCompletions: {}, messages: {} } }),
-    ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
-    expect(entry.endpoints).toEqual({ chatCompletions: {} });
-  });
-
-  test('endpoints drops a key when any target lacks it', () => {
-    const aliases = [aliasFixture({
-      name: 'one-missing',
-      targets: [
-        { target_model_id: 'has-cc', rules: {} },
-        { target_model_id: 'no-cc', rules: {} },
-      ],
-    })];
-    const realModels = [
-      realModel({ id: 'has-cc', endpoints: { chatCompletions: {} } }),
-      realModel({ id: 'no-cc', endpoints: { responses: {} } }),
-    ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
-    // Disjoint endpoint sets → intersection is empty → field is absent.
-    expect(entry.endpoints).toBeUndefined();
-  });
-
-  test('endpoints is absent on the entry when no target is currently available', () => {
-    const aliases = [aliasFixture({
-      name: 'ghost',
-      targets: [{ target_model_id: 'missing', rules: {} }],
-    })];
-    const [entry] = synthesizeListedAliases({ aliases, realModels: [] });
-    expect(entry.endpoints).toBeUndefined();
-  });
 });
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 3dedd1579..849b7fdab 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -3,9 +3,9 @@ import type { ModelAliasesRepo } from '../../repo/types.ts';
 import { getModels } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
-import type { Fetcher, ResolvedModel } from '@floway-dev/provider';
+import type { Fetcher, InternalModel } from '@floway-dev/provider';
 
-export const toPublicModel = (model: ResolvedModel): PublicModel => {
+export const toPublicModel = (model: InternalModel): PublicModel => {
   const info: PublicModel = {
     id: model.id,
     object: 'model',
@@ -13,7 +13,6 @@ export const toPublicModel = (model: ResolvedModel): PublicModel => {
     display_name: model.display_name ?? model.id,
     limits: { ...model.limits },
     kind: model.kind,
-    endpoints: { ...model.endpoints },
   };
   if (model.owned_by !== undefined) info.owned_by = model.owned_by;
   if (model.created !== undefined) {
diff --git a/packages/gateway/src/data-plane/models/load_test.ts b/packages/gateway/src/data-plane/models/load_test.ts
index a479de582..39985c9c4 100644
--- a/packages/gateway/src/data-plane/models/load_test.ts
+++ b/packages/gateway/src/data-plane/models/load_test.ts
@@ -1,14 +1,12 @@
 import { describe, expect, test } from 'vitest';
 
 import { toPublicModel } from './load.ts';
-import type { ResolvedModel } from '@floway-dev/provider';
+import type { InternalModel } from '@floway-dev/provider';
 
-const base: ResolvedModel = {
+const base: InternalModel = {
   id: 'm1',
   kind: 'chat',
   limits: { max_context_window_tokens: 100000 },
-  endpoints: { chatCompletions: {} },
-  providers: [],
 };
 
 describe('toPublicModel', () => {
@@ -23,10 +21,6 @@ describe('toPublicModel', () => {
     };
     expect(toPublicModel({ ...base, chat }).chat).toEqual(chat);
   });
-
-  test('stamps the resolved binding endpoints onto the wire entry', () => {
-    expect(toPublicModel(base).endpoints).toEqual({ chatCompletions: {} });
-  });
 });
 
 // The alias merge step inside `loadModels` (alias entries follow real
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 476ea01e4..94ca48fd3 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -119,8 +119,7 @@ test('/v1/models returns merged model list from Copilot and custom upstreams', a
         assertEquals(model.providerKind, undefined);
         assertEquals(model.providers, undefined);
         assertEquals(model.providerData, undefined);
-        // `endpoints` IS surfaced — it tells callers which API families
-        // accept this id. Shape verified below.
+        assertEquals(model.endpoints, undefined);
         assertEquals(model.upstream, undefined);
         assertEquals(model.upstreamModel, undefined);
         // Copilot-only raw fields never reach the public DTO.
@@ -251,7 +250,6 @@ test('/models returns the same superset payload as /v1/models', async () => {
             display_name: 'Claude Opus 4.7 XHigh',
             limits: {},
             kind: 'chat',
-            endpoints: { messages: {} },
             cost: {
               input: 5,
               output: 25,
@@ -274,7 +272,6 @@ test('/models returns the same superset payload as /v1/models', async () => {
             display_name: 'embedding-only',
             limits: {},
             kind: 'embedding',
-            endpoints: { embeddings: {} },
           },
           {
             id: 'gpt-image-2',
@@ -283,7 +280,6 @@ test('/models returns the same superset payload as /v1/models', async () => {
             display_name: 'gpt-image-2',
             limits: {},
             kind: 'image',
-            endpoints: { imagesGenerations: {}, imagesEdits: {} },
           },
         ],
       });
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 9cd05072f..22cce94ab 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -1,5 +1,4 @@
 import type { AliasKind, AliasSelection, AliasTarget } from './aliases.ts';
-import type { ModelEndpoints } from './capabilities.ts';
 
 // Disjoint billing dimensions a single request can be charged on. Every count
 // keyed by these is non-overlapping: a prompt token is counted under exactly
@@ -154,14 +153,6 @@ export interface PublicModel {
   // Non-standard extra fields below.
   limits: PublicModelLimits;
   kind: ModelKind;
-  // Per-endpoint availability map. Mirrors the upstream-side `ModelEndpoints`
-  // verbatim: a key present means the model is reachable over that endpoint.
-  // Real-model entries inherit it from the resolved binding; alias entries
-  // surface the intersection of every available target's endpoint map.
-  // Optional — clients that only care about /v1/models for capability
-  // discovery may ignore it. Operator-editable on real models via the
-  // ModelEditor; not editable on aliases (it follows from the target set).
-  endpoints?: ModelEndpoints;
   cost?: ModelPricing;
   chat?: ChatModelInfo;
   // Present only on entries the gateway synthesized from an operator-defined

From d0ccf9730bb019ad5a226157b83dd0a1024fafb3 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 04:11:52 +0800
Subject: [PATCH 090/170] refactor(aliases/web): extract ChatMetadataEditor
 shared by alias dialog + model editor
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Limits + Modalities + Reasoning sub-blocks were duplicated between
the alias dialog's AnnouncedMetadataEditor and ModelEditor's inline
sections. They now live in one place — apps/web/src/components/shared/
ChatMetadataEditor.vue — as a controlled component the parent steers
with `mode: 'auto' | 'manual'`.

Contract:
- `modelValue: AnnouncedMetadata | undefined` — `{ limits?, chat? }`.
- `kind: 'chat' | 'embedding' | 'image'` — gates the sub-blocks
  (chat shows all three, embedding shows Limits only, image renders
  nothing).
- `mode='auto'` → every field renders read-only; interactions no-op.
- `mode='manual'` → editable; interactions emit `update:modelValue`.

Caller wiring:
- AliasEditDialog now mounts ChatMetadataEditor unconditionally (when
  the announced-metadata section is open) and maps the "Enable
  override" switch to the mode prop. The read-only `<dl>` summary
  collapses into the same editor in auto mode — operators get one
  consistent surface. Auto-mode value is the live computed
  intersection; manual-mode value is the override buffer.
- ModelEditor mounts ChatMetadataEditor between Supported endpoints
  and Pricing (replacing the inline Context Limits + Modalities +
  Reasoning sections), wired to `editable ? 'manual' : 'auto'`.

AnnouncedMetadataEditor.vue is deleted; its logic now lives in the
shared component. `ChatModelInfo` is re-exported from the web
package's api/types.ts for the component's prop type.
---
 apps/web/src/api/types.ts                     |   3 +-
 .../components/alias-edit/AliasEditDialog.vue | 100 +---
 .../alias-edit/AliasEditDialog_test.ts        |  38 +-
 .../ChatMetadataEditor.vue}                   | 228 +++++----
 .../shared/ChatMetadataEditor_test.ts         | 111 +++++
 .../components/upstream-edit/ModelEditor.vue  | 455 ++----------------
 6 files changed, 333 insertions(+), 602 deletions(-)
 rename apps/web/src/components/{alias-edit/AnnouncedMetadataEditor.vue => shared/ChatMetadataEditor.vue} (64%)
 create mode 100644 apps/web/src/components/shared/ChatMetadataEditor_test.ts

diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index a9ea23fe9..d919a0c01 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -8,6 +8,7 @@ import type {
   AnnouncedMetadata,
   BillingDimension,
   ChatAliasRules,
+  ChatModelInfo,
   ModelAlias,
   ModelEndpointKey,
   ModelEndpoints,
@@ -18,7 +19,7 @@ import type { AddressableForm, ModelPrefixConfig } from '@floway-dev/provider/mo
 
 export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing };
 export type { AddressableForm, ModelPrefixConfig };
-export type { AliasKind, AliasRules, AliasSelection, AliasTarget, AnnouncedMetadata, ChatAliasRules, ModelAlias };
+export type { AliasKind, AliasRules, AliasSelection, AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, ModelAlias };
 
 export type UpstreamProviderKind = 'custom' | 'azure' | 'copilot' | 'codex' | 'claude-code' | 'ollama';
 
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index adb5fc324..806f50b7e 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -8,12 +8,12 @@ import { computed, ref, watch } from 'vue';
 
 import AliasTargetRow from './AliasTargetRow.vue';
 import { computeAnnouncedMetadata } from './announced-metadata.ts';
-import AnnouncedMetadataEditor from './AnnouncedMetadataEditor.vue';
 import { computeShadowWarning, realModelIdsOfKind } from './warnings.ts';
 import { callApi, useApi } from '../../api/client.ts';
 import type { AliasKind, AliasSelection, AliasTarget, AnnouncedMetadata, ChatAliasRules, ModelAlias } from '../../api/types.ts';
 import { useModelAliases } from '../../composables/useModelAliases.ts';
 import { useRawModelsStore } from '../../composables/useModels.ts';
+import ChatMetadataEditor from '../shared/ChatMetadataEditor.vue';
 import { Button, Dialog, Input, Select, Switch } from '@floway-dev/ui';
 
 const open = defineModel<boolean>('open', { required: true });
@@ -112,11 +112,19 @@ const setOverrideEnabled = (on: boolean) => {
   }
 };
 
-// Two-way binding for the editor: writes flow back to the buffer.
-const overrideBuffer = computed<AnnouncedMetadata>({
-  get: () => announcedOverride.value ?? {},
-  set: next => { announcedOverride.value = next; },
-});
+// The editor's `modelValue` source-of-truth: the override buffer when
+// the operator is editing, the live computed snapshot when not. In
+// auto mode the editor is read-only, so its emits are no-ops anyway.
+const announcedEditorValue = computed<AnnouncedMetadata>(
+  () => announcedOverride.value ?? computedAnnouncedMetadata.value,
+);
+
+const onAnnouncedChange = (next: AnnouncedMetadata | undefined) => {
+  // Editor only fires this in manual mode (auto is read-only). Persist
+  // an empty object rather than null so the override stays "on" even
+  // when the operator clears every field.
+  announcedOverride.value = next ?? {};
+};
 
 // Switching alias kind discards a chat-only override since the
 // schema would reject it on save (e.g. embedding aliases can not
@@ -212,35 +220,6 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
   { value: 'embedding', label: 'Embedding' },
   { value: 'image', label: 'Image' },
 ];
-
-// Labels + formatters for the read-only view of the auto-computed
-// announced metadata. Keeping them inline (rather than reusing the
-// editor in disabled mode) keeps the override-off render compact and
-// avoids dragging the editor's mutation surface into a read path.
-const COMPUTED_LIMIT_LABELS = {
-  max_context_window_tokens: 'Context window',
-  max_prompt_tokens: 'Prompt tokens',
-  max_output_tokens: 'Output tokens',
-} as const;
-
-const formatLimit = (n: number | undefined): string => n === undefined ? '—' : n.toLocaleString('en-US');
-
-const formatModalities = (mods: readonly string[] | undefined): string =>
-  mods === undefined || mods.length === 0 ? '—' : mods.join(', ');
-
-const formatEffort = (effort: { supported: readonly string[]; default: string } | undefined): string =>
-  effort === undefined ? '—' : `${effort.supported.join(', ')} (default: ${effort.default})`;
-
-const formatBudget = (range: { min?: number; max?: number } | undefined): string => {
-  if (range === undefined) return '—';
-  const { min, max } = range;
-  if (min === undefined && max === undefined) return '—';
-  if (min !== undefined && max !== undefined) return `${min}–${max}`;
-  if (min !== undefined) return `≥ ${min}`;
-  return `≤ ${max}`;
-};
-
-const formatFlag = (flag: boolean | undefined): string => flag === true ? 'yes' : flag === false ? 'no' : '—';
 </script>
 
 <template>
@@ -343,50 +322,17 @@ const formatFlag = (flag: boolean | undefined): string => flag === true ? 'yes'
         </div>
 
         <div v-if="announcedSectionExpanded" id="announced-metadata-body" class="mt-4">
-          <AnnouncedMetadataEditor
-            v-if="overrideEnabled"
-            v-model="overrideBuffer"
+          <p v-if="!overrideEnabled" class="mb-3 text-xs text-gray-500">
+            Read-only — the intersection across every currently-available
+            target, with any rule-pinned sub-field treated as unsupported.
+            Enable override to publish a different payload to <code class="font-mono">/v1/models</code>.
+          </p>
+          <ChatMetadataEditor
+            :model-value="announcedEditorValue"
             :kind="kind"
+            :mode="overrideEnabled ? 'manual' : 'auto'"
+            @update:model-value="onAnnouncedChange"
           />
-          <div v-else class="space-y-3">
-            <p class="text-xs text-gray-500">
-              Read-only — the intersection across every currently-available
-              target, with any rule-pinned sub-field treated as unsupported.
-              Enable override to publish a different payload to <code class="font-mono">/v1/models</code>.
-            </p>
-            <dl class="grid grid-cols-1 gap-x-6 gap-y-2 text-xs sm:grid-cols-2">
-              <div v-for="(label, key) in COMPUTED_LIMIT_LABELS" :key="key" class="flex items-baseline justify-between gap-3">
-                <dt class="text-gray-500">{{ label }}</dt>
-                <dd class="font-mono text-gray-300">{{ formatLimit(computedAnnouncedMetadata.limits?.[key]) }}</dd>
-              </div>
-              <template v-if="kind === 'chat'">
-                <div class="flex items-baseline justify-between gap-3">
-                  <dt class="text-gray-500">Modalities (input)</dt>
-                  <dd class="font-mono text-gray-300">{{ formatModalities(computedAnnouncedMetadata.chat?.modalities?.input) }}</dd>
-                </div>
-                <div class="flex items-baseline justify-between gap-3">
-                  <dt class="text-gray-500">Modalities (output)</dt>
-                  <dd class="font-mono text-gray-300">{{ formatModalities(computedAnnouncedMetadata.chat?.modalities?.output) }}</dd>
-                </div>
-                <div class="flex items-baseline justify-between gap-3">
-                  <dt class="text-gray-500">Reasoning effort</dt>
-                  <dd class="font-mono text-gray-300">{{ formatEffort(computedAnnouncedMetadata.chat?.reasoning?.effort) }}</dd>
-                </div>
-                <div class="flex items-baseline justify-between gap-3">
-                  <dt class="text-gray-500">Reasoning budget</dt>
-                  <dd class="font-mono text-gray-300">{{ formatBudget(computedAnnouncedMetadata.chat?.reasoning?.budget_tokens) }}</dd>
-                </div>
-                <div class="flex items-baseline justify-between gap-3">
-                  <dt class="text-gray-500">Adaptive</dt>
-                  <dd class="font-mono text-gray-300">{{ formatFlag(computedAnnouncedMetadata.chat?.reasoning?.adaptive) }}</dd>
-                </div>
-                <div class="flex items-baseline justify-between gap-3">
-                  <dt class="text-gray-500">Mandatory</dt>
-                  <dd class="font-mono text-gray-300">{{ formatFlag(computedAnnouncedMetadata.chat?.reasoning?.mandatory) }}</dd>
-                </div>
-              </template>
-            </dl>
-          </div>
         </div>
       </section>
 
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index 7072c39cd..d8f0bc29b 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -186,7 +186,16 @@ describe('AliasEditDialog', () => {
     return label.querySelector<HTMLButtonElement>('button[role="switch"]')!;
   };
 
-  it('announced metadata: override off → editor not rendered; the read-only view appears in its place', async () => {
+  // Locate the "Effort levels" toggle inside the ChatMetadataEditor by
+  // scanning labels in the portal-rooted DOM. Reka-UI renders Switch as
+  // a `<button role="switch">`, sitting next to its caption.
+  const effortSwitch = (): HTMLButtonElement | null => {
+    const label = Array.from(document.body.querySelectorAll<HTMLLabelElement>('label'))
+      .find(l => (l.textContent ?? '').trim().startsWith('Effort levels'));
+    return label?.querySelector<HTMLButtonElement>('button[role="switch"]') ?? null;
+  };
+
+  it('announced metadata: override off → editor renders in auto (read-only) mode', async () => {
     const w = mount(AliasEditDialog, {
       props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }) },
       attachTo: document.body,
@@ -196,14 +205,17 @@ describe('AliasEditDialog', () => {
 
     // The override switch is present but off.
     expect(announcedSwitch().getAttribute('aria-checked')).toBe('false');
-    // The editor's distinctive "Effort levels" toggle does not render.
-    expect(portalText()).not.toContain('Effort levels');
-    // Instead the read-only summary copy appears.
+    // The read-only hint copy appears above the editor.
     expect(portalText()).toContain('Read-only');
+    // The shared editor mounts and renders the Reasoning toggles, but
+    // every Switch in there is disabled because mode='auto'.
+    const sw = effortSwitch();
+    expect(sw).not.toBeNull();
+    expect(sw!.disabled).toBe(true);
     w.unmount();
   });
 
-  it('announced metadata: toggling override on renders the editor seeded with the computed view', async () => {
+  it('announced metadata: toggling override on switches the editor into manual (enabled) mode and seeds it from the computed view', async () => {
     modelsRef.value = [
       {
         id: 'gpt-5',
@@ -223,16 +235,18 @@ describe('AliasEditDialog', () => {
     announcedSwitch().click();
     await nextTick();
 
-    // The editor's distinctive toggles render once the override is on.
-    expect(portalText()).toContain('Effort levels');
-    expect(portalText()).toContain('Budget tokens');
+    // Read-only hint disappears; the editor now accepts input.
+    expect(portalText()).not.toContain('Read-only');
+    const sw = effortSwitch();
+    expect(sw).not.toBeNull();
+    expect(sw!.disabled).toBe(false);
     // The frozen seed includes the computed `medium` default, so the
     // editor's pinned-default tag for `medium` is part of the visible DOM.
     expect(portalText()).toContain('medium');
     w.unmount();
   });
 
-  it('announced metadata: toggling override off discards the buffer and restores the read-only view', async () => {
+  it('announced metadata: toggling override off restores auto (read-only) mode', async () => {
     const w = mount(AliasEditDialog, {
       props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }) },
       attachTo: document.body,
@@ -242,11 +256,11 @@ describe('AliasEditDialog', () => {
 
     const sw = announcedSwitch();
     sw.click(); await nextTick();
-    expect(portalText()).toContain('Effort levels');
+    expect(effortSwitch()!.disabled).toBe(false);
     sw.click(); await nextTick();
-    // The editor unmounts and the read-only summary is back.
-    expect(portalText()).not.toContain('Effort levels');
+    // Auto mode: read-only hint back, effort switch disabled again.
     expect(portalText()).toContain('Read-only');
+    expect(effortSwitch()!.disabled).toBe(true);
     w.unmount();
   });
 
diff --git a/apps/web/src/components/alias-edit/AnnouncedMetadataEditor.vue b/apps/web/src/components/shared/ChatMetadataEditor.vue
similarity index 64%
rename from apps/web/src/components/alias-edit/AnnouncedMetadataEditor.vue
rename to apps/web/src/components/shared/ChatMetadataEditor.vue
index 32a37b280..ef1bba905 100644
--- a/apps/web/src/components/alias-edit/AnnouncedMetadataEditor.vue
+++ b/apps/web/src/components/shared/ChatMetadataEditor.vue
@@ -1,74 +1,88 @@
 <script setup lang="ts">
-// Editor for an alias's announced-metadata override — the operator's
-// explicit `limits` + `chat` block that overrides the auto-computed
-// intersection inside `synthesizeListedAliases`. v-model is the wire
-// shape (`AnnouncedMetadata`), kind-gated:
+// Shared editor for a chat/embedding model's `limits` + `chat` metadata.
+// Hosts the Limits + Modalities + Reasoning sub-blocks consumed by both
+// `ModelEditor.vue` (a real catalog row's editor) and
+// `AliasEditDialog.vue` (the alias's announced-metadata override).
 //
-//   - chat       → Limits + Modalities + Reasoning sub-blocks
-//   - embedding  → Limits sub-block only
-//   - image      → never mounted (the alias edit dialog hides the whole
-//                  section for image-kind aliases)
+// Controlled component: the parent owns the `mode` flip.
+//   - `mode === 'manual'` → every field is editable; interactions emit.
+//   - `mode === 'auto'`   → every field renders read-only; interactions
+//     are no-ops. The parent passes in the computed snapshot via
+//     `modelValue` so the operator still sees the live values.
 //
-// The three sub-blocks below mirror the matching ones in
-// `apps/web/src/components/upstream-edit/ModelEditor.vue`. We accepted
-// the duplication rather than attempting a full extraction of the
-// catalog-side editor in the same PR — `ModelEditor.vue` ties those
-// blocks to a wider state machine (config / editable / Manual vs
-// Auto / per-model flag overrides) that doesn't carry over cleanly.
-// A later cleanup pass can lift the shared bits into a single host.
+// Kind-gated sub-blocks:
+//   - `chat`      → Limits + Modalities + Reasoning.
+//   - `embedding` → Limits only.
+//   - `image`     → renders nothing (callers should not mount this).
 
 import { computed, ref, watch } from 'vue';
 
-import type { AliasKind, AnnouncedMetadata, ModelLimits, UpstreamChatConfig } from '../../api/types.ts';
+import type { AnnouncedMetadata, ChatModelInfo, ModelKind } from '../../api/types.ts';
 import { Button, Input, Switch, Tooltip } from '@floway-dev/ui';
 
-const modelValue = defineModel<AnnouncedMetadata>({ required: true });
-
 const props = defineProps<{
-  kind: AliasKind;
+  modelValue: AnnouncedMetadata | undefined;
+  kind: ModelKind;
+  mode: 'auto' | 'manual';
 }>();
 
-// Mutable local view of the wire payload. ChatModelInfo's modality
-// arrays are typed `readonly`; the templates below build mutable
-// copies that the wire shape accepts back without further coercion.
-type EditableMetadata = { limits?: ModelLimits; chat?: UpstreamChatConfig };
+const emit = defineEmits<{
+  'update:modelValue': [next: AnnouncedMetadata | undefined];
+}>();
 
-const editable = computed<EditableMetadata>(() => modelValue.value as EditableMetadata);
+const value = computed<AnnouncedMetadata>(() => props.modelValue ?? {});
+const editable = computed(() => props.mode === 'manual');
+const showChatBlocks = computed(() => props.kind === 'chat');
+const renderAnything = computed(() => props.kind !== 'image');
 
-const patch = (next: EditableMetadata) => {
-  // Strip empty sub-blocks so the wire payload stays minimal — the
-  // alias-listing fallback only kicks in for absent fields.
-  const out: EditableMetadata = {};
+// Known Codex CLI effort presets as of v0.137. Codex's wire type is open
+// (ReasoningEffort::Custom(String)) so any string is accepted upstream;
+// these are just the convenient quick-adds. See:
+// https://github.com/openai/codex/blob/main/codex-rs/protocol/src/openai_models.rs
+const REASONING_LEVELS = ['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max'] as const;
+
+// Strip empty sub-blocks so the wire payload stays minimal — the alias
+// listing fallback only kicks in for absent fields; the upstream model
+// config likewise treats a missing key as "inherit".
+const patch = (next: AnnouncedMetadata) => {
+  if (!editable.value) return;
+  const out: AnnouncedMetadata = {};
   if (next.limits && Object.keys(next.limits).length > 0) out.limits = next.limits;
   if (next.chat && (next.chat.modalities !== undefined || next.chat.reasoning !== undefined)) out.chat = next.chat;
-  modelValue.value = out as AnnouncedMetadata;
+  emit('update:modelValue', Object.keys(out).length > 0 ? out : undefined);
 };
 
 const parseOptionalNumber = (raw: string | number | null | undefined): number | undefined => {
   if (raw === '' || raw === null || raw === undefined) return undefined;
   const num = Number(raw);
+  // Both editor surfaces feed nonnegative integer counts (token caps,
+  // budget bounds); a typo that drops a negative shouldn't stage data
+  // the next PUT will 400 on.
   return Number.isFinite(num) && num >= 0 ? num : undefined;
 };
 
-// ── Limits ─────────────────────────────────────────────────────────────
+// ── Limits ────────────────────────────────────────────────────────────
 
 const updateLimit = (
   key: 'max_context_window_tokens' | 'max_prompt_tokens' | 'max_output_tokens',
   raw: string | number | null | undefined,
 ) => {
-  const limits = { ...(editable.value.limits ?? {}) };
+  if (!editable.value) return;
+  const limits = { ...(value.value.limits ?? {}) };
   const num = parseOptionalNumber(raw);
   if (num === undefined) delete limits[key];
   else limits[key] = num;
-  patch({ ...editable.value, limits: Object.keys(limits).length > 0 ? limits : undefined });
+  patch({ ...value.value, limits: Object.keys(limits).length > 0 ? limits : undefined });
 };
 
-// ── Chat builder helpers ───────────────────────────────────────────────
+// ── Chat builder helpers ──────────────────────────────────────────────
 
-const buildNextChat = (partial: Partial<UpstreamChatConfig>): UpstreamChatConfig | undefined => {
-  const base = editable.value.chat ?? {};
-  const next: UpstreamChatConfig = { ...base, ...partial };
+const buildNextChat = (partial: Partial<ChatModelInfo>): ChatModelInfo | undefined => {
+  const base = value.value.chat ?? {};
+  const next: ChatModelInfo = { ...base, ...partial };
 
+  // Normalise: omit modalities when it would only carry the default
+  // (text-only) shape.
   const hasImageInput = next.modalities?.input.includes('image') === true;
   next.modalities = hasImageInput
     ? { input: ['text', 'image'], output: ['text'] }
@@ -79,53 +93,61 @@ const buildNextChat = (partial: Partial<UpstreamChatConfig>): UpstreamChatConfig
 };
 
 const buildNextReasoning = (
-  update: Partial<NonNullable<UpstreamChatConfig['reasoning']>>,
-): UpstreamChatConfig['reasoning'] => {
-  const base = editable.value.chat?.reasoning ?? {};
+  update: Partial<NonNullable<ChatModelInfo['reasoning']>>,
+): ChatModelInfo['reasoning'] => {
+  const base = value.value.chat?.reasoning ?? {};
   const merged = { ...base, ...update };
   const cleaned = Object.fromEntries(
     Object.entries(merged).filter(([, v]) => v !== undefined),
-  ) as NonNullable<UpstreamChatConfig['reasoning']>;
+  ) as NonNullable<ChatModelInfo['reasoning']>;
   return Object.keys(cleaned).length > 0 ? cleaned : undefined;
 };
 
-const setChat = (chat: UpstreamChatConfig | undefined) => {
-  patch({ ...editable.value, chat });
+const setChat = (chat: ChatModelInfo | undefined) => {
+  patch({ ...value.value, chat });
 };
 
-// ── Modalities ─────────────────────────────────────────────────────────
+// ── Modalities ────────────────────────────────────────────────────────
 
 const chatImageInput = computed<boolean>(
-  () => editable.value.chat?.modalities?.input.includes('image') ?? false,
+  () => value.value.chat?.modalities?.input.includes('image') ?? false,
 );
 
 const toggleImageInput = (on: boolean) => {
+  if (!editable.value) return;
   setChat(buildNextChat({ modalities: on ? { input: ['text', 'image'], output: ['text'] } : undefined }));
 };
 
-// ── Reasoning sub-blocks ───────────────────────────────────────────────
+// ── Reasoning sub-block enabled states ────────────────────────────────
 
-const effortEnabled = computed(() => editable.value.chat?.reasoning?.effort !== undefined);
-const budgetTokensEnabled = computed(() => editable.value.chat?.reasoning?.budget_tokens !== undefined);
-const adaptiveEnabled = computed(() => editable.value.chat?.reasoning?.adaptive === true);
-const mandatoryEnabled = computed(() => editable.value.chat?.reasoning?.mandatory === true);
+const effortEnabled = computed(() => value.value.chat?.reasoning?.effort !== undefined);
+const budgetTokensEnabled = computed(() => value.value.chat?.reasoning?.budget_tokens !== undefined);
+const adaptiveEnabled = computed(() => value.value.chat?.reasoning?.adaptive === true);
+const mandatoryEnabled = computed(() => value.value.chat?.reasoning?.mandatory === true);
 
+// Mandatory is exclusive: when on, the three operator-controlled toggles
+// lock off. When any of those is on, Mandatory locks off. UI-only
+// constraint (the schema would technically accept any subset).
 const anyControlledEnabled = computed(() => effortEnabled.value || budgetTokensEnabled.value || adaptiveEnabled.value);
-const controlledDisabled = computed(() => mandatoryEnabled.value);
-const mandatoryDisabled = computed(() => anyControlledEnabled.value);
+const controlledDisabled = computed(() => !editable.value || mandatoryEnabled.value);
+const mandatoryDisabled = computed(() => !editable.value || anyControlledEnabled.value);
 
-const supportedEfforts = computed<string[]>(
-  () => editable.value.chat?.reasoning?.effort?.supported ?? [],
+const supportedEfforts = computed<readonly string[]>(
+  () => value.value.chat?.reasoning?.effort?.supported ?? [],
 );
-
-const REASONING_LEVELS = ['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max'] as const;
 const presetEffortLevels = computed(() => REASONING_LEVELS.filter(level => !supportedEfforts.value.includes(level)));
 
+// Free-typing input for adding a custom reasoning level not in the quick-add list.
 const reasoningLevelInput = ref('');
 
+// Resync input buffer when the active kind changes (parent may swap the
+// hosted record under us).
 watch(() => props.kind, () => { reasoningLevelInput.value = ''; });
 
+// ── Effort sub-block ──────────────────────────────────────────────────
+
 const toggleEffort = (on: boolean) => {
+  if (!editable.value) return;
   const reasoning = on
     ? buildNextReasoning({ effort: { supported: ['low', 'medium', 'high'], default: 'medium' } })
     : buildNextReasoning({ effort: undefined });
@@ -133,20 +155,27 @@ const toggleEffort = (on: boolean) => {
 };
 
 const addReasoningLevel = (level: string) => {
+  if (!editable.value) return;
   const trimmed = level.trim();
   if (trimmed === '') return;
   const current = supportedEfforts.value;
   if (current.includes(trimmed)) return;
   const updated = [...current, trimmed];
-  const existing = editable.value.chat?.reasoning?.effort;
+  const existing = value.value.chat?.reasoning?.effort;
   setChat(buildNextChat({ reasoning: buildNextReasoning({ effort: { supported: updated, default: existing?.default ?? '' } }) }));
 };
 
 const removeReasoningLevel = (level: string) => {
+  if (!editable.value) return;
   const current = supportedEfforts.value;
   const removedIndex = current.indexOf(level);
   const updated = current.filter(e => e !== level);
-  const existingEffort = editable.value.chat?.reasoning?.effort;
+  const existingEffort = value.value.chat?.reasoning?.effort;
+  // The default must always be one of the supported levels (or empty
+  // when the list itself is empty). When the operator deletes the
+  // current default, pick the neighbor that slides into the same index
+  // slot — falling back to the new tail when the removed entry was the
+  // last one.
   let nextDefault = existingEffort?.default ?? '';
   if (existingEffort?.default === level) {
     if (updated.length === 0) nextDefault = '';
@@ -163,20 +192,27 @@ const commitReasoningInput = () => {
   reasoningLevelInput.value = '';
 };
 
-const setDefaultEffort = (value: string) => {
+const setDefaultEffort = (level: string) => {
+  if (!editable.value) return;
   const current = supportedEfforts.value;
-  setChat(buildNextChat({ reasoning: buildNextReasoning({ effort: { supported: current, default: value } }) }));
+  setChat(buildNextChat({ reasoning: buildNextReasoning({ effort: { supported: current, default: level } }) }));
 };
 
-// ── Effort drag-to-reorder ─────────────────────────────────────────────
-
+// ── Effort tag drag-to-reorder ────────────────────────────────────────
+//
+// HTML5 DnD distinguishes drag from click via a built-in pointer-distance
+// threshold: a mousedown+mouseup with no movement still fires `click`
+// (and sets the default), while a mousedown+drag+drop suppresses click
+// entirely. So the two affordances coexist on the same button element.
 const draggedEffortIndex = ref<number | null>(null);
 const dragOverEffortIndex = ref<number | null>(null);
 
 const onEffortDragStart = (index: number, e: DragEvent) => {
+  if (!editable.value) return;
   draggedEffortIndex.value = index;
   if (e.dataTransfer) {
     e.dataTransfer.effectAllowed = 'move';
+    // Firefox requires setData to actually initiate the drag.
     e.dataTransfer.setData('text/plain', String(index));
   }
 };
@@ -197,12 +233,12 @@ const onEffortDrop = (index: number, e: DragEvent) => {
   const from = draggedEffortIndex.value;
   draggedEffortIndex.value = null;
   dragOverEffortIndex.value = null;
-  if (from === null || from === index) return;
+  if (from === null || from === index || !editable.value) return;
   const current = [...supportedEfforts.value];
   const [moved] = current.splice(from, 1);
   if (moved === undefined) return;
   current.splice(index, 0, moved);
-  const existing = editable.value.chat?.reasoning?.effort;
+  const existing = value.value.chat?.reasoning?.effort;
   setChat(buildNextChat({ reasoning: buildNextReasoning({ effort: { supported: current, default: existing?.default ?? '' } }) }));
 };
 
@@ -211,9 +247,10 @@ const onEffortDragEnd = () => {
   dragOverEffortIndex.value = null;
 };
 
-// ── Budget tokens ──────────────────────────────────────────────────────
+// ── Budget tokens sub-block ───────────────────────────────────────────
 
 const toggleBudgetTokens = (on: boolean) => {
+  if (!editable.value) return;
   const reasoning = on
     ? buildNextReasoning({ budget_tokens: {} })
     : buildNextReasoning({ budget_tokens: undefined });
@@ -221,24 +258,27 @@ const toggleBudgetTokens = (on: boolean) => {
 };
 
 const updateBudgetTokensMin = (raw: string | number | null | undefined) => {
+  if (!editable.value) return;
   const num = parseOptionalNumber(raw);
-  const current = editable.value.chat?.reasoning?.budget_tokens ?? {};
+  const current = value.value.chat?.reasoning?.budget_tokens ?? {};
   const next = { ...current };
   if (num === undefined) delete next.min; else next.min = num;
   setChat(buildNextChat({ reasoning: buildNextReasoning({ budget_tokens: next }) }));
 };
 
 const updateBudgetTokensMax = (raw: string | number | null | undefined) => {
+  if (!editable.value) return;
   const num = parseOptionalNumber(raw);
-  const current = editable.value.chat?.reasoning?.budget_tokens ?? {};
+  const current = value.value.chat?.reasoning?.budget_tokens ?? {};
   const next = { ...current };
   if (num === undefined) delete next.max; else next.max = num;
   setChat(buildNextChat({ reasoning: buildNextReasoning({ budget_tokens: next }) }));
 };
 
-// ── Adaptive / Mandatory ───────────────────────────────────────────────
+// ── Adaptive / Mandatory toggles ──────────────────────────────────────
 
 const toggleAdaptive = (on: boolean) => {
+  if (!editable.value) return;
   const reasoning = on
     ? buildNextReasoning({ adaptive: true })
     : buildNextReasoning({ adaptive: undefined });
@@ -246,28 +286,28 @@ const toggleAdaptive = (on: boolean) => {
 };
 
 const toggleMandatory = (on: boolean) => {
+  if (!editable.value) return;
   const reasoning = on
     ? buildNextReasoning({ mandatory: true })
     : buildNextReasoning({ mandatory: undefined });
   setChat(buildNextChat({ reasoning }));
 };
-
-const showChatBlocks = computed(() => props.kind === 'chat');
 </script>
 
 <template>
-  <div class="space-y-6">
+  <div v-if="renderAnything" class="space-y-6">
     <section>
       <div class="mb-3 flex items-baseline gap-3">
         <h4 class="text-[11px] font-semibold uppercase tracking-wider text-gray-500">Limits</h4>
-        <span class="text-[11px] text-gray-500">tokens — leave blank to inherit the computed intersection</span>
+        <span class="text-[11px] text-gray-500">tokens — leave blank to inherit</span>
       </div>
       <div class="grid gap-3 sm:grid-cols-3">
         <label class="block space-y-1.5">
           <span class="block text-xs font-medium text-gray-500">Context Window</span>
           <Input
             type="number"
-            :model-value="editable.limits?.max_context_window_tokens"
+            :model-value="value.limits?.max_context_window_tokens"
+            :readonly="!editable"
             placeholder="e.g. 1050000"
             class="font-mono"
             @update:model-value="v => updateLimit('max_context_window_tokens', v)"
@@ -277,7 +317,8 @@ const showChatBlocks = computed(() => props.kind === 'chat');
           <span class="block text-xs font-medium text-gray-500">Prompt Tokens</span>
           <Input
             type="number"
-            :model-value="editable.limits?.max_prompt_tokens"
+            :model-value="value.limits?.max_prompt_tokens"
+            :readonly="!editable"
             placeholder="e.g. 922000"
             class="font-mono"
             @update:model-value="v => updateLimit('max_prompt_tokens', v)"
@@ -287,7 +328,8 @@ const showChatBlocks = computed(() => props.kind === 'chat');
           <span class="block text-xs font-medium text-gray-500">Output Tokens</span>
           <Input
             type="number"
-            :model-value="editable.limits?.max_output_tokens"
+            :model-value="value.limits?.max_output_tokens"
+            :readonly="!editable"
             placeholder="e.g. 128000"
             class="font-mono"
             @update:model-value="v => updateLimit('max_output_tokens', v)"
@@ -299,8 +341,12 @@ const showChatBlocks = computed(() => props.kind === 'chat');
     <section v-if="showChatBlocks">
       <div class="flex flex-wrap items-center gap-x-4 gap-y-2">
         <h4 class="text-[11px] font-semibold uppercase tracking-wider text-gray-500">Modalities</h4>
-        <label class="flex cursor-pointer items-center gap-2">
-          <Switch :model-value="chatImageInput" @update:model-value="v => toggleImageInput(v === true)" />
+        <label class="flex items-center gap-2" :class="editable ? 'cursor-pointer' : 'cursor-not-allowed'">
+          <Switch
+            :model-value="chatImageInput"
+            :disabled="!editable"
+            @update:model-value="v => toggleImageInput(v === true)"
+          />
           <span class="text-xs" :class="chatImageInput ? 'text-white' : 'text-gray-500'">Image input</span>
         </label>
       </div>
@@ -332,22 +378,24 @@ const showChatBlocks = computed(() => props.kind === 'chat');
       <div v-if="effortEnabled" class="mt-3 space-y-1.5 border-l-2 border-white/[0.08] pl-3">
         <div class="flex min-h-[1.625rem] flex-wrap items-center gap-x-3 gap-y-1.5">
           <span class="text-xs font-semibold text-gray-300">Effort levels</span>
-          <span class="text-[11px] text-gray-500">(click to set default)</span>
+          <span v-if="editable" class="text-[11px] text-gray-500">(click to set default)</span>
           <template v-if="supportedEfforts.length > 0">
             <button
               v-for="(level, index) in supportedEfforts"
               :key="level"
               type="button"
-              class="inline-flex cursor-grab items-center gap-1 rounded border px-2 py-0.5 font-mono text-[11px] transition-colors active:cursor-grabbing"
+              class="inline-flex items-center gap-1 rounded border px-2 py-0.5 font-mono text-[11px] transition-colors"
               :class="[
-                editable.chat?.reasoning?.effort?.default === level
+                value.chat?.reasoning?.effort?.default === level
                   ? 'border-accent-cyan/50 bg-accent-cyan/10 text-accent-cyan font-semibold'
                   : 'border-white/15 bg-white/[0.07] text-gray-300 hover:border-white/30 hover:text-white',
+                editable ? 'cursor-grab active:cursor-grabbing' : 'cursor-not-allowed',
                 draggedEffortIndex === index && 'opacity-40',
                 dragOverEffortIndex === index && draggedEffortIndex !== index && 'ring-1 ring-accent-cyan',
               ]"
-              draggable="true"
-              :title="editable.chat?.reasoning?.effort?.default === level ? 'Default — click another to switch, drag to reorder' : 'Click to set as default, drag to reorder'"
+              :disabled="!editable"
+              :draggable="editable"
+              :title="value.chat?.reasoning?.effort?.default === level ? 'Default — click another to switch, drag to reorder' : 'Click to set as default, drag to reorder'"
               @click="setDefaultEffort(level)"
               @dragstart="e => onEffortDragStart(index, e)"
               @dragover="e => onEffortDragOver(index, e)"
@@ -357,6 +405,7 @@ const showChatBlocks = computed(() => props.kind === 'chat');
             >
               {{ level }}
               <span
+                v-if="editable"
                 role="button"
                 tabindex="0"
                 class="ml-0.5 cursor-pointer text-gray-500 transition-colors hover:text-accent-rose"
@@ -370,9 +419,10 @@ const showChatBlocks = computed(() => props.kind === 'chat');
               </span>
             </button>
           </template>
-          <p v-else class="whitespace-nowrap text-[11px] text-accent-amber">Add at least one effort level — click a preset on the right.</p>
+          <p v-else-if="editable" class="whitespace-nowrap text-[11px] text-accent-amber">Add at least one effort level — click a preset on the right.</p>
+          <p v-else class="whitespace-nowrap text-[11px] text-gray-500">—</p>
         </div>
-        <div class="flex flex-wrap items-center gap-1.5">
+        <div v-if="editable" class="flex flex-wrap items-center gap-1.5">
           <button
             v-for="level in presetEffortLevels"
             :key="level"
@@ -399,7 +449,8 @@ const showChatBlocks = computed(() => props.kind === 'chat');
             type="number"
             min="0"
             size="sm"
-            :model-value="editable.chat?.reasoning?.budget_tokens?.min"
+            :model-value="value.chat?.reasoning?.budget_tokens?.min"
+            :readonly="!editable"
             placeholder="—"
             class="!h-6 !w-24 !py-0 !text-[11px] font-mono"
             @update:model-value="v => updateBudgetTokensMin(v)"
@@ -411,16 +462,17 @@ const showChatBlocks = computed(() => props.kind === 'chat');
             type="number"
             min="0"
             size="sm"
-            :model-value="editable.chat?.reasoning?.budget_tokens?.max"
+            :model-value="value.chat?.reasoning?.budget_tokens?.max"
+            :readonly="!editable"
             placeholder="—"
             class="!h-6 !w-24 !py-0 !text-[11px] font-mono"
             @update:model-value="v => updateBudgetTokensMax(v)"
           />
         </label>
         <p
-          v-if="editable.chat?.reasoning?.budget_tokens?.min !== undefined
-            && editable.chat?.reasoning?.budget_tokens?.max !== undefined
-            && editable.chat.reasoning.budget_tokens.max < editable.chat.reasoning.budget_tokens.min"
+          v-if="value.chat?.reasoning?.budget_tokens?.min !== undefined
+            && value.chat?.reasoning?.budget_tokens?.max !== undefined
+            && value.chat.reasoning.budget_tokens.max < value.chat.reasoning.budget_tokens.min"
           class="text-[11px] text-accent-amber"
         >
           Max must be ≥ min.
diff --git a/apps/web/src/components/shared/ChatMetadataEditor_test.ts b/apps/web/src/components/shared/ChatMetadataEditor_test.ts
new file mode 100644
index 000000000..7e7a3d293
--- /dev/null
+++ b/apps/web/src/components/shared/ChatMetadataEditor_test.ts
@@ -0,0 +1,111 @@
+import { mount } from '@vue/test-utils';
+import { describe, expect, it } from 'vitest';
+import { nextTick } from 'vue';
+
+import ChatMetadataEditor from './ChatMetadataEditor.vue';
+import type { AnnouncedMetadata } from '../../api/types.ts';
+
+const baseValue = (): AnnouncedMetadata => ({
+  limits: { max_context_window_tokens: 100_000, max_output_tokens: 4096 },
+  chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } } },
+});
+
+describe('ChatMetadataEditor', () => {
+  it('renders nothing when kind="image"', () => {
+    const w = mount(ChatMetadataEditor, {
+      props: { modelValue: baseValue(), kind: 'image', mode: 'manual' },
+    });
+    expect(w.html().trim()).toBe('<!--v-if-->');
+  });
+
+  it('kind="embedding" renders only the Limits section — no Modalities, no Reasoning', () => {
+    const w = mount(ChatMetadataEditor, {
+      props: { modelValue: baseValue(), kind: 'embedding', mode: 'manual' },
+    });
+    expect(w.text()).toContain('Limits');
+    expect(w.text()).not.toContain('Modalities');
+    expect(w.text()).not.toContain('Reasoning');
+  });
+
+  it('kind="chat" renders Limits + Modalities + Reasoning', () => {
+    const w = mount(ChatMetadataEditor, {
+      props: { modelValue: baseValue(), kind: 'chat', mode: 'manual' },
+    });
+    const txt = w.text();
+    expect(txt).toContain('Limits');
+    expect(txt).toContain('Modalities');
+    expect(txt).toContain('Reasoning');
+    expect(txt).toContain('Effort levels');
+  });
+
+  it('mode="auto" renders the values, but Switches are disabled and Inputs are readonly', () => {
+    const w = mount(ChatMetadataEditor, {
+      props: { modelValue: baseValue(), kind: 'chat', mode: 'auto' },
+    });
+    // Operator can still read every limit value.
+    const numberInputs = w.findAll('input[type="number"]');
+    const limitValues = numberInputs.slice(0, 3).map(i => (i.element as HTMLInputElement).value);
+    expect(limitValues).toContain('100000');
+    expect(limitValues).toContain('4096');
+    // Every limit input is readonly.
+    for (const inp of numberInputs.slice(0, 3)) {
+      expect((inp.element as HTMLInputElement).readOnly).toBe(true);
+    }
+    // Every Switch (Reka-UI renders as button[role="switch"]) is disabled.
+    const switches = w.findAll('button[role="switch"]');
+    expect(switches.length).toBeGreaterThan(0);
+    for (const s of switches) {
+      expect((s.element as HTMLButtonElement).disabled).toBe(true);
+    }
+  });
+
+  it('mode="auto": clicking a Switch is a no-op (no update:modelValue emit)', async () => {
+    const w = mount(ChatMetadataEditor, {
+      props: { modelValue: { chat: {} } as AnnouncedMetadata, kind: 'chat', mode: 'auto' },
+    });
+    // The Modalities image-input Switch is the first Switch under the Modalities section.
+    const switches = w.findAll('button[role="switch"]');
+    expect(switches.length).toBeGreaterThan(0);
+    await switches[0].trigger('click');
+    await nextTick();
+    expect(w.emitted('update:modelValue')).toBeUndefined();
+  });
+
+  it('mode="manual": editing a limit emits update:modelValue with the patched payload', async () => {
+    const w = mount(ChatMetadataEditor, {
+      props: { modelValue: undefined, kind: 'chat', mode: 'manual' },
+    });
+    const numberInputs = w.findAll('input[type="number"]');
+    const contextInput = numberInputs[0]!.element as HTMLInputElement;
+    contextInput.value = '64000';
+    await numberInputs[0]!.trigger('input');
+    await nextTick();
+    const emitted = w.emitted('update:modelValue');
+    expect(emitted).toBeDefined();
+    const last = emitted![emitted!.length - 1]![0] as AnnouncedMetadata;
+    expect(last.limits?.max_context_window_tokens).toBe(64_000);
+  });
+
+  it('mode="manual": toggling the Effort levels switch on emits a reasoning seed', async () => {
+    const w = mount(ChatMetadataEditor, {
+      props: { modelValue: undefined, kind: 'chat', mode: 'manual' },
+    });
+    // The Effort levels Switch is the second Switch (Modalities image is first).
+    const switches = w.findAll('button[role="switch"]');
+    // Find the one labelled "Effort levels".
+    const labels = w.findAll('label');
+    const effortLabel = labels.find(l => (l.text() ?? '').includes('Effort levels'))!;
+    const effortSwitch = effortLabel.find('button[role="switch"]');
+    expect(effortSwitch.exists()).toBe(true);
+    expect((effortSwitch.element as HTMLButtonElement).disabled).toBe(false);
+    expect(switches.length).toBeGreaterThan(1);
+
+    await effortSwitch.trigger('click');
+    await nextTick();
+    const emitted = w.emitted('update:modelValue');
+    expect(emitted).toBeDefined();
+    const last = emitted![emitted!.length - 1]![0] as AnnouncedMetadata;
+    expect(last.chat?.reasoning?.effort?.supported).toEqual(['low', 'medium', 'high']);
+    expect(last.chat?.reasoning?.effort?.default).toBe('medium');
+  });
+});
diff --git a/apps/web/src/components/upstream-edit/ModelEditor.vue b/apps/web/src/components/upstream-edit/ModelEditor.vue
index 44c5ec8e8..6948299c3 100644
--- a/apps/web/src/components/upstream-edit/ModelEditor.vue
+++ b/apps/web/src/components/upstream-edit/ModelEditor.vue
@@ -4,7 +4,8 @@ import { computed, ref, watch } from 'vue';
 import EndpointsField from './EndpointsField.vue';
 import FlagOverridesEditor from './FlagOverridesEditor.vue';
 import { configOf, defaultEndpointsForKind, publicIdOf, titleFor, type Row } from './modelRows.ts';
-import type { BillingDimension, FlagDef, ModelKind, ModelPricing, UpstreamChatConfig, UpstreamModelConfig, UpstreamProviderKind } from '../../api/types.ts';
+import type { AnnouncedMetadata, BillingDimension, FlagDef, ModelKind, ModelPricing, UpstreamChatConfig, UpstreamModelConfig, UpstreamProviderKind } from '../../api/types.ts';
+import ChatMetadataEditor from '../shared/ChatMetadataEditor.vue';
 import { Button, Input, Select, Switch, Tooltip } from '@floway-dev/ui';
 
 const props = defineProps<{
@@ -74,18 +75,6 @@ const parseOptionalNumber = (raw: string | number | null | undefined): number |
   return Number.isFinite(num) && num >= 0 ? num : undefined;
 };
 
-const updateLimit = (
-  key: 'max_context_window_tokens' | 'max_prompt_tokens' | 'max_output_tokens',
-  raw: string | number | null | undefined,
-) => {
-  if (!config.value) return;
-  const limits = { ...(config.value.limits ?? {}) };
-  const num = parseOptionalNumber(raw);
-  if (num === undefined) delete limits[key];
-  else limits[key] = num;
-  patch({ limits: Object.keys(limits).length > 0 ? limits : undefined });
-};
-
 const updateCost = (key: BillingDimension, raw: string | number | null | undefined) => {
   if (!config.value) return;
   const cost = { ...(config.value.cost ?? {}) } as ModelPricing;
@@ -255,44 +244,23 @@ const updateFlagOverrides = (values: Record<string, boolean>) => {
 
 // ── Chat metadata ──────────────────────────────────────────────────────────
 
-// Known Codex CLI effort presets as of v0.137. Codex's wire type is open
-// (ReasoningEffort::Custom(String)) so any string is accepted upstream;
-// these are just the convenient quick-adds. See:
-// https://github.com/openai/codex/blob/main/codex-rs/protocol/src/openai_models.rs
-const REASONING_LEVELS = ['none', 'minimal', 'low', 'medium', 'high', 'xhigh', 'max'] as const;
-
-// Free-typing input for adding a custom reasoning level not in the quick-add list.
-const reasoningLevelInput = ref('');
-
-// Resync input buffer when the active row changes.
-watch(() => props.row?.uiId, () => {
-  reasoningLevelInput.value = '';
+// Mirror the shared editor's value shape: pull the model's `limits` +
+// `chat` block out of the row config, hand it to ChatMetadataEditor,
+// and forward edits back through `patch()`.
+const chatMetadataValue = computed<AnnouncedMetadata | undefined>(() => {
+  if (!config.value) return undefined;
+  const out: AnnouncedMetadata = {};
+  if (config.value.limits) out.limits = config.value.limits;
+  if (config.value.chat) out.chat = config.value.chat;
+  return out;
 });
 
-const chatImageInput = computed<boolean>(
-  () => config.value?.chat?.modalities?.input.includes('image') ?? false,
-);
-
-// ── Reasoning sub-block enabled states ────────────────────────────────────
-
-const effortEnabled = computed(() => config.value?.chat?.reasoning?.effort !== undefined);
-const budgetTokensEnabled = computed(() => config.value?.chat?.reasoning?.budget_tokens !== undefined);
-const adaptiveEnabled = computed(() => config.value?.chat?.reasoning?.adaptive === true);
-const mandatoryEnabled = computed(() => config.value?.chat?.reasoning?.mandatory === true);
-
-// Mandatory is exclusive: when enabled it dominates and the three operator-
-// controlled toggles disappear. When any of those is on, Mandatory is locked
-// off. UI-only constraint (the schema would technically accept any subset).
-const anyControlledEnabled = computed(() => effortEnabled.value || budgetTokensEnabled.value || adaptiveEnabled.value);
-const controlledDisabled = computed(() => !editable.value || mandatoryEnabled.value);
-const mandatoryDisabled = computed(() => !editable.value || anyControlledEnabled.value);
-
-const supportedEfforts = computed<string[]>(
-  () => config.value?.chat?.reasoning?.effort?.supported ?? [],
-);
-const presetEffortLevels = computed(() => REASONING_LEVELS.filter(level => !supportedEfforts.value.includes(level)));
-
-// ── Validity ───────────────────────────────────────────────────────────────
+const onChatMetadataChange = (next: AnnouncedMetadata | undefined) => {
+  // The editor builds `chat` through fresh object literals — its
+  // `readonly` modality arrays are nominally typed, never frozen, so the
+  // mutable `UpstreamChatConfig` shape held in `config` accepts them.
+  patch({ limits: next?.limits, chat: next?.chat as UpstreamChatConfig | undefined });
+};
 
 // A chat row is invalid when:
 // - effort is enabled but supported list is empty
@@ -300,209 +268,23 @@ const presetEffortLevels = computed(() => REASONING_LEVELS.filter(level => !supp
 // - budget_tokens is enabled but max < min (when both are set)
 const isReasoningValid = computed<boolean>(() => {
   const reasoning = config.value?.chat?.reasoning;
+  if (reasoning === undefined) return true;
 
-  if (effortEnabled.value) {
-    const effort = reasoning?.effort;
-    if (!effort || effort.supported.length === 0) return false;
+  if (reasoning.effort !== undefined) {
+    const effort = reasoning.effort;
+    if (effort.supported.length === 0) return false;
     if (effort.default === '' || !effort.supported.includes(effort.default)) return false;
   }
 
-  if (budgetTokensEnabled.value) {
-    const bt = reasoning?.budget_tokens;
-    if (bt?.min !== undefined && bt?.max !== undefined && bt.max < bt.min) return false;
+  if (reasoning.budget_tokens !== undefined) {
+    const bt = reasoning.budget_tokens;
+    if (bt.min !== undefined && bt.max !== undefined && bt.max < bt.min) return false;
   }
 
   return true;
 });
 
 watch(isReasoningValid, valid => { emit('validity-change', valid); }, { immediate: true });
-
-// ── Chat state builder ─────────────────────────────────────────────────────
-
-const buildNextChat = (partial: Partial<UpstreamChatConfig>): UpstreamChatConfig | undefined => {
-  const base = config.value?.chat ?? {};
-  const next: UpstreamChatConfig = { ...base, ...partial };
-
-  // Normalise: omit modalities when it would only carry the default (text-only) shape.
-  const hasImageInput = next.modalities?.input.includes('image') === true;
-  next.modalities = hasImageInput
-    ? { input: ['text', 'image'], output: ['text'] }
-    : undefined;
-
-  // Return undefined (omit chat key entirely) when nothing is configured.
-  if (!next.modalities && !next.reasoning) return undefined;
-  return next;
-};
-
-// Build a reasoning object with a single key updated, dropping undefined keys.
-const buildNextReasoning = (
-  update: Partial<NonNullable<UpstreamChatConfig['reasoning']>>,
-): UpstreamChatConfig['reasoning'] => {
-  const base = config.value?.chat?.reasoning ?? {};
-  const merged = { ...base, ...update };
-  // Drop keys explicitly set to undefined.
-  const cleaned = Object.fromEntries(
-    Object.entries(merged).filter(([, v]) => v !== undefined),
-  ) as NonNullable<UpstreamChatConfig['reasoning']>;
-  return Object.keys(cleaned).length > 0 ? cleaned : undefined;
-};
-
-const toggleImageInput = (on: boolean) => {
-  if (!editable.value) return;
-  patch({ chat: buildNextChat({ modalities: on ? { input: ['text', 'image'], output: ['text'] } : undefined }) });
-};
-
-// ── Effort sub-block ───────────────────────────────────────────────────────
-
-const toggleEffort = (on: boolean) => {
-  if (!editable.value) return;
-  const reasoning = on
-    ? buildNextReasoning({ effort: { supported: ['low', 'medium', 'high'], default: 'medium' } })
-    : buildNextReasoning({ effort: undefined });
-  patch({ chat: buildNextChat({ reasoning }) });
-};
-
-const addReasoningLevel = (level: string) => {
-  if (!editable.value || !config.value) return;
-  const trimmed = level.trim();
-  if (!trimmed) return;
-  const current = supportedEfforts.value;
-  if (current.includes(trimmed)) return;
-  const updated = [...current, trimmed];
-  const existing = config.value.chat?.reasoning?.effort;
-  patch({ chat: buildNextChat({ reasoning: buildNextReasoning({ effort: { supported: updated, default: existing?.default ?? '' } }) }) });
-};
-
-const removeReasoningLevel = (level: string) => {
-  if (!editable.value || !config.value) return;
-  const current = supportedEfforts.value;
-  const removedIndex = current.indexOf(level);
-  const updated = current.filter(e => e !== level);
-  const existingEffort = config.value.chat?.reasoning?.effort;
-  // The default must always be one of the supported levels (or empty when the
-  // list itself is empty). When the operator deletes the current default, pick
-  // the neighbor that slides into the same index slot — falling back to the
-  // new tail when the removed entry was the last one.
-  let nextDefault = existingEffort?.default ?? '';
-  if (existingEffort?.default === level) {
-    if (updated.length === 0) nextDefault = '';
-    else if (removedIndex < updated.length) nextDefault = updated[removedIndex]!;
-    else nextDefault = updated[updated.length - 1]!;
-  }
-  // Keep the effort sub-block even when no levels remain — the empty-list
-  // warning replaces the tag row in-place so the user sees what's needed
-  // without the toggle silently flipping off.
-  const nextEffort = { supported: updated, default: nextDefault };
-  patch({ chat: buildNextChat({ reasoning: buildNextReasoning({ effort: nextEffort }) }) });
-};
-
-const commitReasoningInput = () => {
-  const trimmed = reasoningLevelInput.value.trim();
-  if (!trimmed) return;
-  addReasoningLevel(trimmed);
-  reasoningLevelInput.value = '';
-};
-
-const setDefaultEffort = (value: string) => {
-  if (!editable.value || !config.value) return;
-  const current = supportedEfforts.value;
-  patch({ chat: buildNextChat({ reasoning: buildNextReasoning({ effort: { supported: current, default: value } }) }) });
-};
-
-// ── Effort tag drag-to-reorder ─────────────────────────────────────────────
-//
-// HTML5 DnD distinguishes drag from click via a built-in pointer-distance
-// threshold: a mousedown+mouseup with no movement still fires `click` (and
-// sets the default), while a mousedown+drag+drop suppresses click entirely.
-// So the two affordances coexist on the same button element.
-const draggedEffortIndex = ref<number | null>(null);
-const dragOverEffortIndex = ref<number | null>(null);
-
-const onEffortDragStart = (index: number, e: DragEvent) => {
-  if (!editable.value) return;
-  draggedEffortIndex.value = index;
-  if (e.dataTransfer) {
-    e.dataTransfer.effectAllowed = 'move';
-    // Firefox requires setData to actually initiate the drag.
-    e.dataTransfer.setData('text/plain', String(index));
-  }
-};
-
-const onEffortDragOver = (index: number, e: DragEvent) => {
-  if (draggedEffortIndex.value === null) return;
-  e.preventDefault();
-  if (e.dataTransfer) e.dataTransfer.dropEffect = 'move';
-  dragOverEffortIndex.value = index;
-};
-
-const onEffortDragLeave = (index: number) => {
-  if (dragOverEffortIndex.value === index) dragOverEffortIndex.value = null;
-};
-
-const onEffortDrop = (index: number, e: DragEvent) => {
-  e.preventDefault();
-  const from = draggedEffortIndex.value;
-  draggedEffortIndex.value = null;
-  dragOverEffortIndex.value = null;
-  if (from === null || from === index || !config.value) return;
-  const current = [...supportedEfforts.value];
-  const [moved] = current.splice(from, 1);
-  if (moved === undefined) return;
-  current.splice(index, 0, moved);
-  const existing = config.value.chat?.reasoning?.effort;
-  patch({ chat: buildNextChat({ reasoning: buildNextReasoning({ effort: { supported: current, default: existing?.default ?? '' } }) }) });
-};
-
-const onEffortDragEnd = () => {
-  draggedEffortIndex.value = null;
-  dragOverEffortIndex.value = null;
-};
-
-// ── Budget tokens sub-block ────────────────────────────────────────────────
-
-const toggleBudgetTokens = (on: boolean) => {
-  if (!editable.value) return;
-  const reasoning = on
-    ? buildNextReasoning({ budget_tokens: {} })
-    : buildNextReasoning({ budget_tokens: undefined });
-  patch({ chat: buildNextChat({ reasoning }) });
-};
-
-const updateBudgetTokensMin = (raw: string | number | null | undefined) => {
-  if (!editable.value || !config.value) return;
-  const num = parseOptionalNumber(raw);
-  const current = config.value.chat?.reasoning?.budget_tokens ?? {};
-  const next = { ...current };
-  if (num === undefined) delete next.min; else next.min = num;
-  patch({ chat: buildNextChat({ reasoning: buildNextReasoning({ budget_tokens: next }) }) });
-};
-
-const updateBudgetTokensMax = (raw: string | number | null | undefined) => {
-  if (!editable.value || !config.value) return;
-  const num = parseOptionalNumber(raw);
-  const current = config.value.chat?.reasoning?.budget_tokens ?? {};
-  const next = { ...current };
-  if (num === undefined) delete next.max; else next.max = num;
-  patch({ chat: buildNextChat({ reasoning: buildNextReasoning({ budget_tokens: next }) }) });
-};
-
-// ── Adaptive / Mandatory toggles ───────────────────────────────────────────
-
-const toggleAdaptive = (on: boolean) => {
-  if (!editable.value) return;
-  const reasoning = on
-    ? buildNextReasoning({ adaptive: true })
-    : buildNextReasoning({ adaptive: undefined });
-  patch({ chat: buildNextChat({ reasoning }) });
-};
-
-const toggleMandatory = (on: boolean) => {
-  if (!editable.value) return;
-  const reasoning = on
-    ? buildNextReasoning({ mandatory: true })
-    : buildNextReasoning({ mandatory: undefined });
-  patch({ chat: buildNextChat({ reasoning }) });
-};
 </script>
 
 <template>
@@ -608,47 +390,13 @@ const toggleMandatory = (on: boolean) => {
           />
         </section>
 
-        <section v-if="rowKind === 'chat'">
-          <div class="mb-3 flex items-baseline gap-3">
-            <h3 class="text-[11px] font-semibold uppercase tracking-wider text-gray-500">Context Limits</h3>
-            <span class="text-[11px] text-gray-500">tokens — leave blank to inherit upstream defaults</span>
-          </div>
-          <div class="grid gap-3 sm:grid-cols-3">
-            <label class="block space-y-1.5">
-              <span class="block text-xs font-medium text-gray-500">Context Window</span>
-              <Input
-                type="number"
-                :model-value="config.limits?.max_context_window_tokens"
-                :readonly="!editable"
-                placeholder="e.g. 1050000"
-                class="font-mono"
-                @update:model-value="v => updateLimit('max_context_window_tokens', v)"
-              />
-            </label>
-            <label class="block space-y-1.5">
-              <span class="block text-xs font-medium text-gray-500">Prompt Tokens</span>
-              <Input
-                type="number"
-                :model-value="config.limits?.max_prompt_tokens"
-                :readonly="!editable"
-                placeholder="e.g. 922000"
-                class="font-mono"
-                @update:model-value="v => updateLimit('max_prompt_tokens', v)"
-              />
-            </label>
-            <label class="block space-y-1.5">
-              <span class="block text-xs font-medium text-gray-500">Output Tokens</span>
-              <Input
-                type="number"
-                :model-value="config.limits?.max_output_tokens"
-                :readonly="!editable"
-                placeholder="e.g. 128000"
-                class="font-mono"
-                @update:model-value="v => updateLimit('max_output_tokens', v)"
-              />
-            </label>
-          </div>
-        </section>
+        <ChatMetadataEditor
+          v-if="rowKind !== 'image'"
+          :model-value="chatMetadataValue"
+          :kind="rowKind"
+          :mode="editable ? 'manual' : 'auto'"
+          @update:model-value="onChatMetadataChange"
+        />
 
         <section>
           <div class="mb-3 flex items-baseline gap-3">
@@ -786,147 +534,6 @@ const toggleMandatory = (on: boolean) => {
           </div>
         </section>
 
-        <section v-if="rowKind === 'chat'">
-          <div class="flex flex-wrap items-center gap-x-4 gap-y-2">
-            <h3 class="text-[11px] font-semibold uppercase tracking-wider text-gray-500">Modalities</h3>
-            <label class="flex items-center gap-2" :class="editable ? 'cursor-pointer' : 'cursor-not-allowed'">
-              <Switch
-                :model-value="chatImageInput"
-                :disabled="!editable"
-                @update:model-value="v => toggleImageInput(v === true)"
-              />
-              <span class="text-xs" :class="chatImageInput ? 'text-white' : 'text-gray-500'">Image input</span>
-            </label>
-          </div>
-        </section>
-
-        <section v-if="rowKind === 'chat'">
-          <div class="flex flex-wrap items-center gap-x-4 gap-y-2">
-            <h3 class="text-[11px] font-semibold uppercase tracking-wider text-gray-500">Reasoning</h3>
-            <label class="flex items-center gap-2" :class="controlledDisabled ? 'cursor-not-allowed opacity-60' : 'cursor-pointer'">
-              <Switch :model-value="effortEnabled" :disabled="controlledDisabled" @update:model-value="v => toggleEffort(v === true)" />
-              <span class="text-xs" :class="effortEnabled ? 'text-white' : 'text-gray-500'">Effort levels</span>
-            </label>
-            <label class="flex items-center gap-2" :class="controlledDisabled ? 'cursor-not-allowed opacity-60' : 'cursor-pointer'">
-              <Switch :model-value="budgetTokensEnabled" :disabled="controlledDisabled" @update:model-value="v => toggleBudgetTokens(v === true)" />
-              <span class="text-xs" :class="budgetTokensEnabled ? 'text-white' : 'text-gray-500'">Budget tokens</span>
-            </label>
-            <label class="flex items-center gap-2" :class="controlledDisabled ? 'cursor-not-allowed opacity-60' : 'cursor-pointer'">
-              <Switch :model-value="adaptiveEnabled" :disabled="controlledDisabled" @update:model-value="v => toggleAdaptive(v === true)" />
-              <span class="text-xs" :class="adaptiveEnabled ? 'text-white' : 'text-gray-500'">Adaptive</span>
-              <Tooltip content="Model self-selects reasoning effort"><span class="text-[10px] text-gray-600">?</span></Tooltip>
-            </label>
-            <label class="flex items-center gap-2" :class="mandatoryDisabled ? 'cursor-not-allowed opacity-60' : 'cursor-pointer'">
-              <Switch :model-value="mandatoryEnabled" :disabled="mandatoryDisabled" @update:model-value="v => toggleMandatory(v === true)" />
-              <span class="text-xs" :class="mandatoryEnabled ? 'text-white' : 'text-gray-500'">Mandatory</span>
-              <Tooltip content="Reasoning is always applied; caller cannot opt out"><span class="text-[10px] text-gray-600">?</span></Tooltip>
-            </label>
-          </div>
-
-          <div v-if="effortEnabled" class="mt-3 space-y-1.5 border-l-2 border-white/[0.08] pl-3">
-            <div class="flex min-h-[1.625rem] flex-wrap items-center gap-x-3 gap-y-1.5">
-              <span class="text-xs font-semibold text-gray-300">Effort levels</span>
-              <span class="text-[11px] text-gray-500">(click to set default)</span>
-              <template v-if="supportedEfforts.length > 0">
-                <button
-                  v-for="(level, index) in supportedEfforts"
-                  :key="level"
-                  type="button"
-                  class="inline-flex items-center gap-1 rounded border px-2 py-0.5 font-mono text-[11px] transition-colors"
-                  :class="[
-                    config.chat?.reasoning?.effort?.default === level
-                      ? 'border-accent-cyan/50 bg-accent-cyan/10 text-accent-cyan font-semibold'
-                      : 'border-white/15 bg-white/[0.07] text-gray-300 hover:border-white/30 hover:text-white',
-                    editable ? 'cursor-grab active:cursor-grabbing' : 'cursor-not-allowed',
-                    draggedEffortIndex === index && 'opacity-40',
-                    dragOverEffortIndex === index && draggedEffortIndex !== index && 'ring-1 ring-accent-cyan',
-                  ]"
-                  :disabled="!editable"
-                  :draggable="editable"
-                  :title="config.chat?.reasoning?.effort?.default === level ? 'Default — click another to switch, drag to reorder' : 'Click to set as default, drag to reorder'"
-                  @click="setDefaultEffort(level)"
-                  @dragstart="e => onEffortDragStart(index, e)"
-                  @dragover="e => onEffortDragOver(index, e)"
-                  @dragleave="onEffortDragLeave(index)"
-                  @drop="e => onEffortDrop(index, e)"
-                  @dragend="onEffortDragEnd"
-                >
-                  {{ level }}
-                  <span
-                    v-if="editable"
-                    role="button"
-                    tabindex="0"
-                    class="ml-0.5 cursor-pointer text-gray-500 transition-colors hover:text-accent-rose"
-                    :aria-label="`Remove ${level}`"
-                    @click.stop="removeReasoningLevel(level)"
-                    @keydown.enter.stop.prevent="removeReasoningLevel(level)"
-                  >
-                    <svg class="h-2.5 w-2.5" viewBox="0 0 12 12" fill="none" stroke="currentColor" stroke-width="2">
-                      <path d="M9 3 3 9M3 3l6 6" />
-                    </svg>
-                  </span>
-                </button>
-              </template>
-              <p v-else class="whitespace-nowrap text-[11px] text-accent-amber">Add at least one effort level — click a preset on the right.</p>
-            </div>
-            <div v-if="editable" class="flex flex-wrap items-center gap-1.5">
-              <button
-                v-for="level in presetEffortLevels"
-                :key="level"
-                type="button"
-                class="rounded border border-white/15 px-2 py-0.5 font-mono text-[11px] text-gray-400 transition-colors hover:border-accent-cyan/40 hover:text-accent-cyan"
-                @click="addReasoningLevel(level)"
-              >+ {{ level }}</button>
-              <Input
-                v-model="reasoningLevelInput"
-                size="sm"
-                placeholder="custom…"
-                class="!h-6 !w-28 !py-0 !text-[11px] font-mono"
-                @keydown.enter.prevent="commitReasoningInput"
-              />
-              <Button variant="secondary" size="sm" class="!h-6 !px-2 !py-0 !text-[11px]" @click="commitReasoningInput">Add</Button>
-            </div>
-          </div>
-
-          <div v-if="budgetTokensEnabled" class="mt-3 flex flex-wrap items-center gap-3 border-l-2 border-white/[0.08] pl-3">
-            <span class="text-xs font-semibold text-gray-300">Budget tokens</span>
-            <label class="flex items-center gap-1.5">
-              <span class="text-[11px] text-gray-500">Min</span>
-              <Input
-                type="number"
-                min="0"
-                size="sm"
-                :model-value="config.chat?.reasoning?.budget_tokens?.min"
-                :readonly="!editable"
-                placeholder="—"
-                class="!h-6 !w-24 !py-0 !text-[11px] font-mono"
-                @update:model-value="v => updateBudgetTokensMin(v)"
-              />
-            </label>
-            <label class="flex items-center gap-1.5">
-              <span class="text-[11px] text-gray-500">Max</span>
-              <Input
-                type="number"
-                min="0"
-                size="sm"
-                :model-value="config.chat?.reasoning?.budget_tokens?.max"
-                :readonly="!editable"
-                placeholder="—"
-                class="!h-6 !w-24 !py-0 !text-[11px] font-mono"
-                @update:model-value="v => updateBudgetTokensMax(v)"
-              />
-            </label>
-            <p
-              v-if="config.chat?.reasoning?.budget_tokens?.min !== undefined
-                && config.chat?.reasoning?.budget_tokens?.max !== undefined
-                && config.chat.reasoning.budget_tokens.max < config.chat.reasoning.budget_tokens.min"
-              class="text-[11px] text-accent-amber"
-            >
-              Max must be ≥ min.
-            </p>
-          </div>
-        </section>
-
         <section>
           <div class="mb-3 flex items-baseline gap-3">
             <h3 class="text-[11px] font-semibold uppercase tracking-wider text-gray-500">Override Feature Flags</h3>

From c9b69bc608199af7edcb63561d51cfcc4c85492c Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 04:34:16 +0800
Subject: [PATCH 091/170] feat(aliases): alias entry advertises the union of
 target endpoints in /v1/models
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Add an optional `endpoints?: ModelEndpoints` to `PublicModel`. The
provider layer (ollama / copilot / etc.) already projects raw upstream
catalogs into the public-facing shape — the three chat endpoints
appear together because the gateway translates between them, and
`completions` / `embeddings` / `imagesGenerations` / `imagesEdits`
only appear when the upstream natively serves them. `toPublicModel`
stamps that projected shape onto the wire entry verbatim.

Alias entries surface the UNION across every available target, not
an intersection: every endpoint reachable through ANY target is
advertised. At request time the resolver narrows the pool to the
targets that serve the inbound endpoint, so any endpoint listed
here is reachable through at least one target. Endpoints are never
operator-editable on aliases — they follow the target set.
---
 .../src/data-plane/models/alias-listing.ts    | 43 ++++++++++++--
 .../data-plane/models/alias-listing_test.ts   | 56 ++++++++++++++++++-
 .../gateway/src/data-plane/models/load.ts     |  5 +-
 .../src/data-plane/models/load_test.ts        | 10 +++-
 .../src/data-plane/models/serve_test.ts       |  7 ++-
 packages/protocols/src/common/models.ts       | 14 +++++
 6 files changed, 122 insertions(+), 13 deletions(-)

diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index bb7f0871a..acc4fa6b9 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -27,12 +27,12 @@
 
 import type { ModelAliasRecord } from '../../repo/types.ts';
 import { composeAliasDisplayName } from '@floway-dev/protocols/common';
-import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
-import type { InternalModel, ResolvedModel } from '@floway-dev/provider';
+import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, ModelEndpointKey, ModelEndpoints, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
+import type { ResolvedModel } from '@floway-dev/provider';
 
 export interface ListedAliasInputs {
   readonly aliases: readonly ModelAliasRecord[];
-  readonly realModels: readonly InternalModel[];
+  readonly realModels: readonly ResolvedModel[];
 }
 
 // The repo guarantees rule shape matches the row's `kind` (chat rows carry
@@ -50,6 +50,25 @@ const intersectArrays = <T>(arrays: readonly (readonly T[])[]): T[] => {
   return head.filter(value => tail.every(other => other.includes(value)));
 };
 
+// Endpoint union across the alias's available targets: a key appears in
+// the alias's advertised endpoints whenever ANY target serves it. Sub-cap
+// flags inside a key are ORed conservatively — present in the result iff
+// any contributing target declares them. The pool-narrowing at request
+// time picks among the targets that actually serve the inbound endpoint,
+// so every endpoint advertised here is reachable through at least one
+// target.
+const unionEndpoints = (endpointsList: readonly ModelEndpoints[]): ModelEndpoints => {
+  const result: ModelEndpoints = {};
+  for (const endpoints of endpointsList) {
+    for (const key of Object.keys(endpoints) as ModelEndpointKey[]) {
+      const incoming = endpoints[key];
+      if (incoming === undefined) continue;
+      result[key] = { ...result[key], ...incoming };
+    }
+  }
+  return result;
+};
+
 // Apply the rule-driven downgrade: a target with a pinned rule reports
 // the corresponding catalog sub-field as unsupported (= undefined) for
 // the purposes of intersection. Fields the rule doesn't touch pass
@@ -161,7 +180,7 @@ const buildAliasedFrom = (alias: ModelAliasRecord): PublicModelAliasedFrom => ({
 // the result directly or overlay it under an operator override.
 const computeAutomaticMetadata = (
   alias: ModelAliasRecord,
-  availableTargets: readonly { target: AliasTarget; real: InternalModel }[],
+  availableTargets: readonly { target: AliasTarget; real: ResolvedModel }[],
 ): { limits: PublicModelLimits; chat: ChatModelInfo | undefined } => {
   if (availableTargets.length === 0) return { limits: {}, chat: undefined };
 
@@ -191,11 +210,11 @@ const mergeWithOverride = (
   chat: override.chat ?? computed.chat,
 });
 
-const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly InternalModel[]): PublicModel => {
+const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly ResolvedModel[]): PublicModel => {
   const realById = new Map(realModels.map(m => [m.id, m] as const));
   const availableTargets = alias.targets
     .map(target => ({ target, real: realById.get(target.target_model_id) }))
-    .filter((entry): entry is { target: AliasTarget; real: InternalModel } => entry.real !== undefined && entry.real.kind === alias.kind);
+    .filter((entry): entry is { target: AliasTarget; real: ResolvedModel } => entry.real !== undefined && entry.real.kind === alias.kind);
 
   // Display name precedence: operator-set wins; otherwise derive from the
   // sole target's id + rules when single-target; multi-target falls back to
@@ -220,6 +239,18 @@ const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly InternalMod
   };
   if (chat !== undefined) entry.chat = chat;
 
+  // Endpoints follow the available-targets UNION, not an intersection —
+  // every endpoint reachable through ANY target is advertised, because
+  // the resolver's request-time pool narrows to targets that serve the
+  // inbound endpoint and the first-available / random pick happens
+  // within that narrowed pool. Operator can't override endpoints (they
+  // follow the target set, not a stored override). Absent when no
+  // target is currently available, same shape as the chat block.
+  if (availableTargets.length > 0) {
+    const endpoints = unionEndpoints(availableTargets.map(({ real }) => real.endpoints));
+    if (Object.keys(endpoints).length > 0) entry.endpoints = endpoints;
+  }
+
   // Single-target chat pricing rides along when available — the resolver
   // will hit that target, so the catalog can publish its rate verbatim.
   if (availableTargets.length === 1) {
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
index c8ef0b9d6..0c346e10e 100644
--- a/packages/gateway/src/data-plane/models/alias-listing_test.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -2,7 +2,7 @@ import { describe, expect, test } from 'vitest';
 
 import { synthesizeListedAliases } from './alias-listing.ts';
 import type { ModelAliasRecord } from '../../repo/types.ts';
-import type { InternalModel } from '@floway-dev/provider';
+import type { ResolvedModel } from '@floway-dev/provider';
 
 const aliasFixture = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasRecord => ({
   name: 'gpt-fast',
@@ -18,9 +18,11 @@ const aliasFixture = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasReco
   ...overrides,
 });
 
-const realModel = (overrides: Partial<InternalModel> & { id: string }): InternalModel => ({
+const realModel = (overrides: Partial<ResolvedModel> & { id: string }): ResolvedModel => ({
   kind: 'chat',
   limits: {},
+  endpoints: { chatCompletions: {}, messages: {}, responses: {} },
+  providers: [],
   ...overrides,
 });
 
@@ -292,4 +294,54 @@ describe('synthesizeListedAliases', () => {
     const [entry] = synthesizeListedAliases({ aliases, realModels });
     expect(entry.chat).toEqual({ modalities: { input: ['text'], output: ['text'] } });
   });
+
+  test('endpoints is the union across available targets — every reachable endpoint surfaces', () => {
+    const aliases = [aliasFixture({
+      name: 'mixed',
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const realModels = [
+      // Target a serves the three chat endpoints + /completions.
+      realModel({ id: 'a', endpoints: { chatCompletions: {}, messages: {}, responses: {}, completions: {} } }),
+      // Target b only serves the three chat endpoints.
+      realModel({ id: 'b', endpoints: { chatCompletions: {}, messages: {}, responses: {} } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    // Union: every key surfaces. Resolver narrows to the supporting subset
+    // at request time, so first-available / random stays sound per-endpoint.
+    expect(entry.endpoints).toEqual({
+      chatCompletions: {},
+      messages: {},
+      responses: {},
+      completions: {},
+    });
+  });
+
+  test('endpoints union surfaces both image keys when targets split between generations and edits', () => {
+    const aliases = [aliasFixture({
+      kind: 'image',
+      targets: [
+        { target_model_id: 'gen', rules: {} },
+        { target_model_id: 'edit', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'gen', kind: 'image', endpoints: { imagesGenerations: {} } }),
+      realModel({ id: 'edit', kind: 'image', endpoints: { imagesEdits: {} } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    expect(entry.endpoints).toEqual({ imagesGenerations: {}, imagesEdits: {} });
+  });
+
+  test('endpoints is absent on the entry when no target is currently available', () => {
+    const aliases = [aliasFixture({
+      name: 'ghost',
+      targets: [{ target_model_id: 'missing', rules: {} }],
+    })];
+    const [entry] = synthesizeListedAliases({ aliases, realModels: [] });
+    expect(entry.endpoints).toBeUndefined();
+  });
 });
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 849b7fdab..3dedd1579 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -3,9 +3,9 @@ import type { ModelAliasesRepo } from '../../repo/types.ts';
 import { getModels } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
-import type { Fetcher, InternalModel } from '@floway-dev/provider';
+import type { Fetcher, ResolvedModel } from '@floway-dev/provider';
 
-export const toPublicModel = (model: InternalModel): PublicModel => {
+export const toPublicModel = (model: ResolvedModel): PublicModel => {
   const info: PublicModel = {
     id: model.id,
     object: 'model',
@@ -13,6 +13,7 @@ export const toPublicModel = (model: InternalModel): PublicModel => {
     display_name: model.display_name ?? model.id,
     limits: { ...model.limits },
     kind: model.kind,
+    endpoints: { ...model.endpoints },
   };
   if (model.owned_by !== undefined) info.owned_by = model.owned_by;
   if (model.created !== undefined) {
diff --git a/packages/gateway/src/data-plane/models/load_test.ts b/packages/gateway/src/data-plane/models/load_test.ts
index 39985c9c4..db96d7ac9 100644
--- a/packages/gateway/src/data-plane/models/load_test.ts
+++ b/packages/gateway/src/data-plane/models/load_test.ts
@@ -1,12 +1,14 @@
 import { describe, expect, test } from 'vitest';
 
 import { toPublicModel } from './load.ts';
-import type { InternalModel } from '@floway-dev/provider';
+import type { ResolvedModel } from '@floway-dev/provider';
 
-const base: InternalModel = {
+const base: ResolvedModel = {
   id: 'm1',
   kind: 'chat',
   limits: { max_context_window_tokens: 100000 },
+  endpoints: { chatCompletions: {}, messages: {}, responses: {} },
+  providers: [],
 };
 
 describe('toPublicModel', () => {
@@ -21,6 +23,10 @@ describe('toPublicModel', () => {
     };
     expect(toPublicModel({ ...base, chat }).chat).toEqual(chat);
   });
+
+  test('stamps the upstream endpoint map onto the wire entry verbatim', () => {
+    expect(toPublicModel(base).endpoints).toEqual({ chatCompletions: {}, messages: {}, responses: {} });
+  });
 });
 
 // The alias merge step inside `loadModels` (alias entries follow real
diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 94ca48fd3..1b51d5cc5 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -119,7 +119,9 @@ test('/v1/models returns merged model list from Copilot and custom upstreams', a
         assertEquals(model.providerKind, undefined);
         assertEquals(model.providers, undefined);
         assertEquals(model.providerData, undefined);
-        assertEquals(model.endpoints, undefined);
+        // `endpoints` IS surfaced — see assertion further below for the
+        // exact shape (the chat-three-API atom + optional /completions
+        // + embeddings / images keys).
         assertEquals(model.upstream, undefined);
         assertEquals(model.upstreamModel, undefined);
         // Copilot-only raw fields never reach the public DTO.
@@ -250,6 +252,7 @@ test('/models returns the same superset payload as /v1/models', async () => {
             display_name: 'Claude Opus 4.7 XHigh',
             limits: {},
             kind: 'chat',
+            endpoints: { messages: {} },
             cost: {
               input: 5,
               output: 25,
@@ -272,6 +275,7 @@ test('/models returns the same superset payload as /v1/models', async () => {
             display_name: 'embedding-only',
             limits: {},
             kind: 'embedding',
+            endpoints: { embeddings: {} },
           },
           {
             id: 'gpt-image-2',
@@ -280,6 +284,7 @@ test('/models returns the same superset payload as /v1/models', async () => {
             display_name: 'gpt-image-2',
             limits: {},
             kind: 'image',
+            endpoints: { imagesGenerations: {}, imagesEdits: {} },
           },
         ],
       });
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 22cce94ab..31478d183 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -1,4 +1,5 @@
 import type { AliasKind, AliasSelection, AliasTarget } from './aliases.ts';
+import type { ModelEndpoints } from './capabilities.ts';
 
 // Disjoint billing dimensions a single request can be charged on. Every count
 // keyed by these is non-overlapping: a prompt token is counted under exactly
@@ -153,6 +154,19 @@ export interface PublicModel {
   // Non-standard extra fields below.
   limits: PublicModelLimits;
   kind: ModelKind;
+  // Public-facing endpoint surface. Mirrors the upstream-side ModelEndpoints
+  // verbatim — by the time a model reaches this DTO, the provider layer
+  // (e.g. provider-ollama, provider-copilot) has already projected the raw
+  // upstream catalog into the public-facing shape: the three chat endpoints
+  // (chatCompletions / messages / responses) appear together because the
+  // gateway translates between them, while `completions`, `embeddings`,
+  // `imagesGenerations`, and `imagesEdits` only appear when the upstream
+  // natively serves them. Alias entries surface the UNION of every
+  // currently-available target's endpoint map — at request time the
+  // resolver narrows the pool to targets that serve the inbound endpoint,
+  // so any endpoint advertised here is reachable through at least one
+  // target.
+  endpoints?: ModelEndpoints;
   cost?: ModelPricing;
   chat?: ChatModelInfo;
   // Present only on entries the gateway synthesized from an operator-defined

From ddb405b8b30e73b5e9ae225afe6a592465a46b89 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 04:50:36 +0800
Subject: [PATCH 092/170] fix(aliases): resolver pool narrows to targets that
 serve the inbound endpoint
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`random` selection could pick a target that didn't serve the inbound
endpoint — pool was the targets reachable through ANY enabled binding,
ignoring whether the binding exposed the endpoint the caller wanted.
`first-available` had the same problem when the first listed target's
binding wasn't right for the inbound endpoint.

Thread an `endpointAccepts: (endpoints) => boolean` predicate from the
caller into the resolver. Chat surfaces wrap `pickTarget` (which already
knew which chat endpoints the source serve accepts); passthrough surfaces
fold the existing per-call `endpointKey` into both the resolver
predicate and the downstream binding filter — replacing the
`bindingServesEndpoint` closure with a single `endpointKey` field on
PassthroughServeContext.

Pairs with the recent union-not-intersection endpoint advertising on
alias entries: clients see every endpoint reachable through any target,
and the resolver guarantees the picked target actually serves whichever
endpoint the client called.
---
 .../chat/chat-completions/serve_test.ts       |   1 +
 .../src/data-plane/chat/gemini/serve_test.ts  |   1 +
 .../data-plane/chat/messages/serve_test.ts    |   1 +
 .../data-plane/chat/responses/serve_test.ts   |   1 +
 .../src/data-plane/chat/shared/candidates.ts  |   7 +-
 .../src/data-plane/completions/serve.ts       |   2 +-
 .../src/data-plane/embeddings/serve.ts        |   2 +-
 .../gateway/src/data-plane/images/serve.ts    |   4 +-
 .../src/data-plane/model-aliases/resolve.ts   |  47 ++++---
 .../data-plane/model-aliases/resolve_test.ts  | 119 ++++++++++++++++--
 .../src/data-plane/providers/registry.ts      |   9 ++
 .../data-plane/shared/passthrough-serve.ts    |  16 ++-
 12 files changed, 174 insertions(+), 36 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 02d6079cc..81b35b6a8 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -31,6 +31,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
         providers: [],
         fetcherForUpstream: () => directFetcher,
         scheduler: args.scheduler,
+        endpointAccepts: () => true,
         repo: { getByName: () => Promise.resolve(null) } as never,
       });
       const effectiveModel = aliasResolution?.targetModelId ?? args.model;
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index e1a535ef3..b407587aa 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -30,6 +30,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
         providers: [],
         fetcherForUpstream: () => directFetcher,
         scheduler: args.scheduler,
+        endpointAccepts: () => true,
         repo: { getByName: () => Promise.resolve(null) } as never,
       });
       const effectiveModel = aliasResolution?.targetModelId ?? args.model;
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index 6fcf560c0..e513b4dbf 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -28,6 +28,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
         providers: [],
         fetcherForUpstream: () => directFetcher,
         scheduler: args.scheduler,
+        endpointAccepts: () => true,
         repo: { getByName: () => Promise.resolve(null) } as never,
       });
       const effectiveModel = aliasResolution?.targetModelId ?? args.model;
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index c6a210543..8ba8224cd 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -38,6 +38,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
         providers: [],
         fetcherForUpstream: () => directFetcher,
         scheduler: args.scheduler,
+        endpointAccepts: () => true,
         repo: { getByName: () => Promise.resolve(null) } as never,
       });
       const effectiveModel = aliasResolution?.targetModelId ?? args.model;
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates.ts b/packages/gateway/src/data-plane/chat/shared/candidates.ts
index 229284edb..8c67d4872 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates.ts
@@ -47,12 +47,17 @@ export const enumerateProviderCandidates = async ({
   // sees the same alias surface. The target id is fed verbatim into prefix
   // routing; alias names never re-enter the alias layer.
   // `AliasNoTargetAvailableError` propagates so the chat serve's catch maps
-  // it to its protocol-native 404.
+  // it to its protocol-native 404. The endpoint predicate piggybacks on
+  // `pickTarget` so the resolver's pool narrows to targets whose binding
+  // exposes one of the chat surfaces the source serve actually wants —
+  // first-available / random pick from a set the prefix router can serve
+  // end-to-end.
   const aliasResolution = await resolveAlias({
     modelName: model,
     providers,
     fetcherForUpstream,
     scheduler,
+    endpointAccepts: endpoints => pickTarget(endpoints) !== null,
     repo: getRepo().modelAliases,
   });
   const effectiveModel = aliasResolution?.targetModelId ?? model;
diff --git a/packages/gateway/src/data-plane/completions/serve.ts b/packages/gateway/src/data-plane/completions/serve.ts
index d72ed42c2..f26708065 100644
--- a/packages/gateway/src/data-plane/completions/serve.ts
+++ b/packages/gateway/src/data-plane/completions/serve.ts
@@ -100,7 +100,7 @@ export const completions = async (c: Context): Promise<Response> => {
     ctx,
     sourceApi: '/completions',
     model: request.model,
-    bindingServesEndpoint: binding => binding.upstreamModel.endpoints.completions !== undefined,
+    endpointKey: 'completions',
     call: (binding, opts) =>
       binding.provider.callCompletions(binding.upstreamModel, upstreamBody, ctx.abortSignal, opts),
     response: request.wantsStream
diff --git a/packages/gateway/src/data-plane/embeddings/serve.ts b/packages/gateway/src/data-plane/embeddings/serve.ts
index 9c33e6736..ead9b3f28 100644
--- a/packages/gateway/src/data-plane/embeddings/serve.ts
+++ b/packages/gateway/src/data-plane/embeddings/serve.ts
@@ -58,7 +58,7 @@ export const embeddings = async (c: Context): Promise<Response> => {
     ctx,
     sourceApi: '/embeddings',
     model: request.model,
-    bindingServesEndpoint: binding => binding.upstreamModel.endpoints.embeddings !== undefined,
+    endpointKey: 'embeddings',
     call: async (binding, opts) => {
       const { model: _model, ...body } = request.body;
       return await binding.provider.callEmbeddings(binding.upstreamModel, body, undefined, opts);
diff --git a/packages/gateway/src/data-plane/images/serve.ts b/packages/gateway/src/data-plane/images/serve.ts
index 58f8a7a25..c27fbac56 100644
--- a/packages/gateway/src/data-plane/images/serve.ts
+++ b/packages/gateway/src/data-plane/images/serve.ts
@@ -57,7 +57,7 @@ export const imagesGenerations = async (c: Context): Promise<Response> => {
     ctx,
     sourceApi: '/images/generations',
     model: request.model,
-    bindingServesEndpoint: binding => binding.upstreamModel.endpoints.imagesGenerations !== undefined,
+    endpointKey: 'imagesGenerations',
     call: (binding, opts) => {
       const { model: _model, ...body } = request.body;
       return binding.provider.callImagesGenerations(binding.upstreamModel, body, undefined, opts);
@@ -96,7 +96,7 @@ export const imagesEdits = async (c: Context): Promise<Response> => {
     ctx,
     sourceApi: '/images/edits',
     model: modelRaw,
-    bindingServesEndpoint: binding => binding.upstreamModel.endpoints.imagesEdits !== undefined,
+    endpointKey: 'imagesEdits',
     call: (binding, opts) => {
       // ModelProvider.callImagesEdits takes ownership of the FormData and
       // appends the upstream-specific model/deployment id; allocate a fresh
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
index 07c13b85c..474f71060 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -5,18 +5,20 @@
 // construction and the shadow-the-real-model pattern (an alias whose first
 // target is its own name) Just Works.
 //
-// The resolver is endpoint-blind: alias names are opaque global mappings
-// and the routability filter only checks whether a target id resolves to
-// any enabled upstream binding. A kind-mismatched call (e.g. a chat alias
-// hit from /embeddings) gets the resolved target id back; if that target
-// does not expose the inbound endpoint, prefix routing surfaces the natural
-// "endpoint not supported" 404. The `AliasKind` on the row only governs UI
-// rule forms and the `/v1/models` listing block.
+// The resolver is endpoint-aware on the pool-narrowing axis but
+// kind-blind on the alias-rejection axis. The caller hands in an
+// `endpointAccepts` predicate that decides whether a candidate target's
+// resolved binding actually serves the inbound endpoint; the pool only
+// keeps targets that satisfy it, so first-available / random pick from a
+// set that the prefix router can serve end-to-end. The resolver does NOT
+// reject an alias just because its kind disagrees with the inbound
+// endpoint — that responsibility stays with the predicate, and a
+// kind-mismatched alias surfaces the natural "no target available" 404.
 
 import type { ModelAliasesRepo, ModelAliasRecord } from '../../repo/types.ts';
 import { collectInterpretationOutcomes, enumerateModelInterpretations } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
-import type { AliasRules } from '@floway-dev/protocols/common';
+import type { AliasRules, ModelEndpoints } from '@floway-dev/protocols/common';
 import type { Fetcher, ModelProviderInstance } from '@floway-dev/provider';
 
 export interface AliasResolution {
@@ -34,8 +36,9 @@ const aliasNoTargetMessage = (params: { aliasName: string; targetCount: number }
   `alias '${params.aliasName}' has ${params.targetCount} target(s); none currently map to an enabled upstream binding`;
 
 // Thrown when the alias name was found but no target currently resolves to
-// an enabled upstream binding. Caught at each protocol's serve seam and
-// surfaced as a 404 in the protocol-specific error envelope.
+// an enabled upstream binding that serves the inbound endpoint. Caught at
+// each protocol's serve seam and surfaced as a 404 in the protocol-specific
+// error envelope.
 export class AliasNoTargetAvailableError extends Error {
   readonly aliasName: string;
 
@@ -54,25 +57,34 @@ interface ResolveAliasArgs {
   // factory cost paid once per request rather than twice.
   readonly providers: readonly ModelProviderInstance[];
   readonly fetcherForUpstream: (upstreamId: string) => Fetcher;
+  // Predicate the caller supplies to narrow the pool to targets whose
+  // resolved binding serves the inbound endpoint. Chat callers wrap
+  // `pickTarget`; passthrough callers check the specific endpoint key.
+  // A target enters the pool iff at least one of its resolved bindings
+  // returns true here.
+  readonly endpointAccepts: (endpoints: ModelEndpoints) => boolean;
   // Injected so tests can hand in a stub; the per-request ctx already owns
   // a concrete one via `getRepo().modelAliases`.
   readonly repo: ModelAliasesRepo;
 }
 
 // Reports true when the target id resolves to at least one enabled upstream
-// binding, irrespective of which endpoint that binding exposes. Endpoint
-// suitability is the prefix-routing layer's job; the resolver only proves
-// the target is reachable somewhere in the catalog.
+// binding whose endpoint map satisfies the inbound endpoint predicate.
+// `random` selection in particular depends on this — without endpoint
+// awareness, a randomly-picked target may not serve the inbound endpoint
+// and the request would 404 at prefix routing even though another target
+// would have worked.
 const candidateIsRoutable = async (
   targetModelId: string,
   providers: readonly ModelProviderInstance[],
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
+  endpointAccepts: (endpoints: ModelEndpoints) => boolean,
 ): Promise<boolean> => {
   if (providers.length === 0) return false;
   const interpretations = enumerateModelInterpretations(targetModelId, providers);
   const { resolutions } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
-  return resolutions.length > 0;
+  return resolutions.some(r => endpointAccepts(r.resolved.binding.upstreamModel.endpoints));
 };
 
 // Pre-pick the available pool ONCE. Order is preserved so
@@ -83,18 +95,19 @@ const buildAvailablePool = async (
   providers: readonly ModelProviderInstance[],
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
+  endpointAccepts: (endpoints: ModelEndpoints) => boolean,
 ): Promise<ModelAliasRecord['targets']> => {
   const availability = await Promise.all(record.targets.map(target =>
-    candidateIsRoutable(target.target_model_id, providers, fetcherForUpstream, scheduler)));
+    candidateIsRoutable(target.target_model_id, providers, fetcherForUpstream, scheduler, endpointAccepts)));
   return record.targets.filter((_, index) => availability[index]);
 };
 
 export const resolveAlias = async (args: ResolveAliasArgs): Promise<AliasResolution | null> => {
-  const { modelName, providers, fetcherForUpstream, scheduler, repo } = args;
+  const { modelName, providers, fetcherForUpstream, scheduler, endpointAccepts, repo } = args;
   const record = await repo.getByName(modelName);
   if (!record) return null;
 
-  const pool = await buildAvailablePool(record, providers, fetcherForUpstream, scheduler);
+  const pool = await buildAvailablePool(record, providers, fetcherForUpstream, scheduler, endpointAccepts);
   if (pool.length === 0) throw new AliasNoTargetAvailableError(record.name, record.targets.length);
 
   const picked = record.selection === 'first-available'
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
index cb80fe550..516a2a749 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
@@ -1,20 +1,23 @@
 // Behavioral coverage for the alias resolver. Mocks the lower-layer
 // catalog seam (`enumerateModelInterpretations` + `collectInterpretationOutcomes`
 // out of `providers/registry.ts`) so each test can hand-script which
-// target model ids look routable; the resolver itself runs unmocked, so
-// its filter logic (availability, selection strategy) is the thing under
-// test. The resolver is endpoint-blind — a target is routable iff it
-// resolves to ANY enabled binding — so the mock no longer differentiates
-// endpoints.
+// target model ids look routable AND what endpoint map their binding
+// advertises; the resolver itself runs unmocked, so its filter logic
+// (availability via the endpointAccepts predicate, selection strategy)
+// is the thing under test.
 
 import { test, vi } from 'vitest';
 
 import type { ModelAliasRecord, ModelAliasesRepo } from '../../repo/types.ts';
 import type { ModelInterpretation, ProviderModelResolution } from '../providers/registry.ts';
+import type { ModelEndpoints } from '@floway-dev/protocols/common';
 import { directFetcher, type Fetcher } from '@floway-dev/provider';
 import { assert, assertEquals, assertRejects } from '@floway-dev/test-utils';
 
-const routableModels = new Set<string>();
+// id → endpoint map. Absent ids look unroutable; present ids return
+// resolutions whose binding advertises the given endpoints, so the
+// resolver's `endpointAccepts` predicate can filter them.
+const routableModels = new Map<string, ModelEndpoints>();
 
 vi.mock('../providers/registry.ts', () => ({
   enumerateModelInterpretations: vi.fn((modelId: string, providers: readonly { upstream: string }[]): ModelInterpretation[] =>
@@ -26,8 +29,8 @@ vi.mock('../providers/registry.ts', () => ({
         provider: i.provider,
         resolved: {
           id: i.lookupId,
-          model: { id: i.lookupId, endpoints: {} },
-          binding: { upstream: i.provider.upstream, upstreamModel: { id: i.lookupId, endpoints: {} } },
+          model: { id: i.lookupId, endpoints: routableModels.get(i.lookupId) },
+          binding: { upstream: i.provider.upstream, upstreamModel: { id: i.lookupId, endpoints: routableModels.get(i.lookupId) } },
         } as unknown as ProviderModelResolution,
       })),
     failedUpstreams: [],
@@ -68,14 +71,27 @@ const RESOLVE_DEFAULTS = {
   scheduler: () => {},
 };
 
+// Mark these ids routable with the full chat-three endpoint set — most
+// tests only care about availability, not which endpoint surface the
+// binding advertises. Endpoint-aware tests use `setRoutableWith` below.
 const setRoutable = (...ids: string[]): void => {
   routableModels.clear();
-  for (const id of ids) routableModels.add(id);
+  for (const id of ids) routableModels.set(id, { chatCompletions: {}, messages: {}, responses: {} });
+};
+
+// Endpoint-aware variant: each id carries the exact endpoint map its
+// binding advertises. Lets a test pin "target A serves /chat/completions,
+// target B only serves /messages" so the resolver's pool-narrowing can
+// be verified.
+const setRoutableWith = (entries: Record<string, ModelEndpoints>): void => {
+  routableModels.clear();
+  for (const [id, endpoints] of Object.entries(entries)) routableModels.set(id, endpoints);
 };
 
 test('returns null when no alias matches the inbound name', async () => {
   setRoutable('gpt-5.4');
   const result = await resolveAlias({
+    endpointAccepts: () => true,
     ...RESOLVE_DEFAULTS,
     modelName: 'not-an-alias',
     repo: stubRepoFor(null),
@@ -86,6 +102,7 @@ test('returns null when no alias matches the inbound name', async () => {
 test('returns the target and rules when a single target is available', async () => {
   setRoutable('gpt-5.4');
   const result = await resolveAlias({
+    endpointAccepts: () => true,
     ...RESOLVE_DEFAULTS,
     modelName: 'gpt-fast',
     repo: stubRepoFor(aliasRecord()),
@@ -100,6 +117,7 @@ test('throws AliasNoTargetAvailableError when the alias exists but no target is
   setRoutable(); // catalog empty
   await assertRejects(
     () => resolveAlias({
+      endpointAccepts: () => true,
       ...RESOLVE_DEFAULTS,
       modelName: 'gpt-fast',
       repo: stubRepoFor(aliasRecord({
@@ -117,6 +135,7 @@ test('throws AliasNoTargetAvailableError when the alias exists but no target is
 test('first-available skips unroutable rows and picks the first available, not the first listed', async () => {
   setRoutable('gpt-5.5'); // `gpt-5.4` is not in the catalog
   const result = await resolveAlias({
+    endpointAccepts: () => true,
     ...RESOLVE_DEFAULTS,
     modelName: 'gpt-fast',
     repo: stubRepoFor(aliasRecord({
@@ -137,6 +156,7 @@ test('random selection picks every available target across enough iterations', a
   const seen = new Set<string>();
   for (let i = 0; i < 100; i += 1) {
     const result = await resolveAlias({
+      endpointAccepts: () => true,
       ...RESOLVE_DEFAULTS,
       modelName: 'gpt-fast',
       repo: stubRepoFor(aliasRecord({
@@ -160,6 +180,7 @@ test('random selection picks every available target across enough iterations', a
 test('shadow pattern: alias whose first target equals its own name picks the real model when present', async () => {
   setRoutable('codex-auto-review'); // the real model IS in the catalog
   const result = await resolveAlias({
+    endpointAccepts: () => true,
     ...RESOLVE_DEFAULTS,
     modelName: 'codex-auto-review',
     repo: stubRepoFor(aliasRecord({
@@ -178,6 +199,7 @@ test('shadow pattern: alias whose first target equals its own name picks the rea
 test('shadow pattern: alias falls back to the second target when the real model is not in the catalog', async () => {
   setRoutable('gpt-5.4'); // only the fallback is routable
   const result = await resolveAlias({
+    endpointAccepts: () => true,
     ...RESOLVE_DEFAULTS,
     modelName: 'codex-auto-review',
     repo: stubRepoFor(aliasRecord({
@@ -192,3 +214,82 @@ test('shadow pattern: alias falls back to the second target when the real model
   assertEquals(result.targetModelId, 'gpt-5.4');
   assertEquals(result.rules, { reasoning: { effort: 'low' } });
 });
+
+test('endpoint-aware pool: random selection picks ONLY from targets whose binding serves the inbound endpoint', async () => {
+  // Two targets: A serves /chat/completions; B only serves /messages.
+  // Inbound endpoint = /chat/completions (predicate keeps only A). Run 50
+  // iterations of `random` selection and assert B is never picked — if
+  // the resolver were endpoint-blind, B would surface ~half the time and
+  // the downstream prefix router would 404.
+  setRoutableWith({
+    'serves-cc': { chatCompletions: {} },
+    'serves-messages-only': { messages: {} },
+  });
+  const repo = stubRepoFor(aliasRecord({
+    selection: 'random',
+    targets: [
+      { target_model_id: 'serves-cc', rules: {} },
+      { target_model_id: 'serves-messages-only', rules: {} },
+    ],
+  }));
+  const picks = new Set<string>();
+  for (let i = 0; i < 50; i++) {
+    const result = await resolveAlias({
+      modelName: 'gpt-fast',
+      ...RESOLVE_DEFAULTS,
+      endpointAccepts: endpoints => endpoints.chatCompletions !== undefined,
+      repo,
+    });
+    assert(result !== null);
+    picks.add(result.targetModelId);
+  }
+  assertEquals([...picks], ['serves-cc']);
+});
+
+test('endpoint-aware pool: first-available skips targets whose binding does not serve the inbound endpoint', async () => {
+  // Configured order [A, B]: A only serves /messages, B serves /chat/completions.
+  // Inbound endpoint = /chat/completions. first-available without endpoint
+  // narrowing would pick A and downstream 404. With narrowing the pool
+  // becomes [B], and first-available returns B.
+  setRoutableWith({
+    'messages-only': { messages: {} },
+    'serves-cc': { chatCompletions: {} },
+  });
+  const repo = stubRepoFor(aliasRecord({
+    selection: 'first-available',
+    targets: [
+      { target_model_id: 'messages-only', rules: {} },
+      { target_model_id: 'serves-cc', rules: {} },
+    ],
+  }));
+  const result = await resolveAlias({
+    modelName: 'gpt-fast',
+    ...RESOLVE_DEFAULTS,
+    endpointAccepts: endpoints => endpoints.chatCompletions !== undefined,
+    repo,
+  });
+  assert(result !== null);
+  assertEquals(result.targetModelId, 'serves-cc');
+});
+
+test('endpoint-aware pool: alias with NO target serving the inbound endpoint throws AliasNoTargetAvailableError', async () => {
+  setRoutableWith({
+    'a': { messages: {} },
+    'b': { messages: {} },
+  });
+  const repo = stubRepoFor(aliasRecord({
+    targets: [
+      { target_model_id: 'a', rules: {} },
+      { target_model_id: 'b', rules: {} },
+    ],
+  }));
+  await assertRejects(
+    () => resolveAlias({
+      modelName: 'gpt-fast',
+      ...RESOLVE_DEFAULTS,
+      endpointAccepts: endpoints => endpoints.chatCompletions !== undefined,
+      repo,
+    }),
+    AliasNoTargetAvailableError,
+  );
+});
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 98eddc4c7..5a6b26cb2 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -370,6 +370,14 @@ export const resolveModelForRequest = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
+  // Predicate the alias resolver uses to narrow its target pool to
+  // bindings whose endpoint map serves the inbound endpoint. Passthrough
+  // callers pass `endpoints => endpoints[targetEndpointKey] !== undefined`
+  // so first-available / random alias selection picks a target the prefix
+  // router can serve end-to-end. The default accepts any binding — used
+  // by call sites whose own endpoint targeting happens downstream (e.g.
+  // the Responses image-generation tool's internal model resolve).
+  endpointAccepts: (endpoints: ModelEndpoints) => boolean = () => true,
 ): Promise<ModelResolution> => {
   const providers = await listModelProviders(upstreamFilter);
   if (providers.length === 0) {
@@ -386,6 +394,7 @@ export const resolveModelForRequest = async (
     providers,
     fetcherForUpstream,
     scheduler,
+    endpointAccepts,
     repo: getRepo().modelAliases,
   });
   const effectiveModelId = aliasResolution?.targetModelId ?? modelId;
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 0604e51d1..19f172cc4 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -28,7 +28,7 @@ import { ALIAS_RESPONSE_HEADER } from '../model-aliases/apply.ts';
 import { AliasNoTargetAvailableError } from '../model-aliases/resolve.ts';
 import { resolveModelForRequest } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
-import { doneFrame, eventFrame, parseSSEStream, parseTargetStreamFrames, type ProtocolFrame, sseCommentFrame, sseFrame } from '@floway-dev/protocols/common';
+import { doneFrame, eventFrame, type ModelEndpointKey, parseSSEStream, parseTargetStreamFrames, type ProtocolFrame, sseCommentFrame, sseFrame } from '@floway-dev/protocols/common';
 import { httpResponseToResponse, ProviderModelsUnavailableError, toInternalDebugError } from '@floway-dev/provider';
 import type { ProviderCallResult, ProviderModelRecord, UpstreamCallOptions } from '@floway-dev/provider';
 
@@ -104,7 +104,13 @@ interface PassthroughServeContext {
   // resolves it against the provider registry; if no upstream serves the
   // id, the client sees a 404 with the standard wording.
   readonly model: string;
-  readonly bindingServesEndpoint: (binding: ProviderModelRecord) => boolean;
+  // The single ModelEndpoints key this passthrough call needs the
+  // upstream binding to advertise. Used to filter the resolver's
+  // candidate matches AND to narrow the alias resolver's first-
+  // available / random pool to targets whose binding actually serves
+  // this endpoint, so an alias with mixed endpoint coverage never
+  // routes to a target that 404s downstream.
+  readonly endpointKey: ModelEndpointKey;
   // Performs the upstream HTTP call for the chosen binding. Any throw here
   // is preserved and becomes a 502 with the internal-debug envelope —
   // exceptions thrown from the actual fetch must not be silently swallowed.
@@ -120,7 +126,7 @@ export const passthroughApiError = (c: Context, message: string, status: Content
   c.json({ error: { message, type: 'api_error' } }, status);
 
 export const passthroughServe = async (input: PassthroughServeContext): Promise<Response> => {
-  const { c, ctx, sourceApi, model, bindingServesEndpoint, call, response: responseHandling } = input;
+  const { c, ctx, sourceApi, model, endpointKey, call, response: responseHandling } = input;
   const requestStartedAt = performance.now();
   let lastPerformance: PerformanceTelemetryContext | undefined;
 
@@ -137,7 +143,7 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
     // target id; `AliasNoTargetAvailableError` propagates as the 404 below.
     let resolution;
     try {
-      resolution = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler);
+      resolution = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler, endpoints => endpoints[endpointKey] !== undefined);
     } catch (e) {
       if (e instanceof AliasNoTargetAvailableError) {
         ctx.dump?.error('gateway');
@@ -153,7 +159,7 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
     }
 
     for (const match of matches) {
-      if (!bindingServesEndpoint(match.binding)) continue;
+      if (match.binding.upstreamModel.endpoints[endpointKey] === undefined) continue;
 
       const recorder = createUpstreamLatencyRecorder();
       const { response, modelKey } = await call(match.binding, {

From 8de3eeafc88aab6da6eb20c4da2b53895eb1567a Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 05:08:07 +0800
Subject: [PATCH 093/170] docs(aliases): clarify AnnouncedMetadata override
 granularity
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The docstring promised "any sub-field … falls back" — implementer-
faithful but operator-misleading: `mergeWithOverride` replaces the two
top-level sub-blocks (`limits` / `chat`) wholesale, not field-by-field.
The dashboard hides this by seeding the buffer from the full computed
snapshot when "Enable override" flips on; programmatic callers see the
real semantic. Spell it out so a `POST /api/aliases` author isn't
surprised that `{ limits: { max_output_tokens: 8192 } }` blanks the
other limit keys.
---
 packages/protocols/src/common/aliases.ts | 14 ++++++++++----
 1 file changed, 10 insertions(+), 4 deletions(-)

diff --git a/packages/protocols/src/common/aliases.ts b/packages/protocols/src/common/aliases.ts
index 186880ca4..e9c55118c 100644
--- a/packages/protocols/src/common/aliases.ts
+++ b/packages/protocols/src/common/aliases.ts
@@ -75,10 +75,16 @@ export interface AliasTarget {
 
 // Operator-set override for the alias's announced /v1/models payload —
 // the `limits` + `chat.*` block the listing surfaces to clients. Sparse:
-// any sub-field the operator leaves unset falls back to the rule-aware
-// intersection across the alias's available targets. `kind` and the
-// supported endpoint set are not part of this payload; they follow from
-// the alias row (`kind`) and the target intersection (endpoints).
+// any top-level sub-block (`limits` / `chat`) the operator leaves unset
+// falls back wholesale to the rule-aware intersection across the alias's
+// available targets. Fallback is at the sub-block boundary, not per-leaf:
+// posting `{ limits: { max_output_tokens: 8192 } }` replaces `limits`
+// entirely, so other limit keys disappear from the announced metadata
+// unless the override re-states them. (The dashboard hides this by
+// seeding the buffer from the full computed snapshot at the moment the
+// "Enable override" switch flips on.) `kind` and the supported endpoint
+// set are not part of this payload; they follow from the alias row
+// (`kind`) and the target union (endpoints).
 export interface AnnouncedMetadata {
   limits?: PublicModelLimits;
   chat?: ChatModelInfo;

From 2860aeefc95754dfdfbb7378113f7456edf2225a Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 05:14:25 +0800
Subject: [PATCH 094/170] cleanup(aliases): import ChatModelInfo from api/types
 instead of redefining

`apps/web/src/api/types.ts` re-exports `ChatModelInfo` from
`@floway-dev/protocols/common`; the helper already imports from the
same module, so the local `NonNullable<ControlPlaneModel['chat']>`
alias is unnecessary indirection for the same structural shape.
---
 apps/web/src/components/alias-edit/announced-metadata.ts | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/apps/web/src/components/alias-edit/announced-metadata.ts b/apps/web/src/components/alias-edit/announced-metadata.ts
index 69a5543f6..12559d5be 100644
--- a/apps/web/src/components/alias-edit/announced-metadata.ts
+++ b/apps/web/src/components/alias-edit/announced-metadata.ts
@@ -11,9 +11,7 @@
 // hand. The backend stays authoritative — what `/v1/models` reports
 // is what the gateway computes there, not what this helper emits.
 
-import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ControlPlaneModel, ModelLimits } from '../../api/types.ts';
-
-type ChatModelInfo = NonNullable<ControlPlaneModel['chat']>;
+import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, ControlPlaneModel, ModelLimits } from '../../api/types.ts';
 
 const chatRules = (target: AliasTarget): ChatAliasRules => target.rules as ChatAliasRules;
 

From c0e5c059ecf519818f4a201b3f5c91995e6211ba Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 05:20:56 +0800
Subject: [PATCH 095/170] docs(aliases): align the five mirror docstrings to
 the corrected override semantics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

8de3eeaf corrected the AnnouncedMetadata wire docstring (round-1
review-stage fix) — fallback is at the top-level sub-block boundary
(`limits` / `chat`), not per-leaf. Five sibling docstrings in this
same feature branch still carried the operator-misleading "any
sub-field falls back" wording: the migration comment, the repo
record type, the zod schema, and two passes inside alias-listing.ts.
Restate each in the corrected language so a future reader doesn't
hit the same trap the round-1 review caught.
---
 .../migrations/0047_alias_announced_metadata.sql  |  4 ++--
 packages/gateway/src/control-plane/schemas.ts     | 14 ++++++++------
 .../src/data-plane/models/alias-listing.ts        | 15 +++++++++------
 packages/gateway/src/repo/types.ts                |  7 ++++---
 4 files changed, 23 insertions(+), 17 deletions(-)

diff --git a/packages/gateway/migrations/0047_alias_announced_metadata.sql b/packages/gateway/migrations/0047_alias_announced_metadata.sql
index 9f29f6290..1e57423c3 100644
--- a/packages/gateway/migrations/0047_alias_announced_metadata.sql
+++ b/packages/gateway/migrations/0047_alias_announced_metadata.sql
@@ -1,6 +1,6 @@
 -- Operator-set override for an alias's announced metadata payload — the
 -- `limits` + `chat.*` block surfaced on /v1/models. NULL keeps the
 -- automatic, rule-aware intersection across the alias's targets; a
--- non-null value is a JSON-encoded AnnouncedMetadata, sparse so any
--- omitted sub-field falls back to the automatic computation.
+-- non-null value is a JSON-encoded AnnouncedMetadata. Fallback is at
+-- the top-level sub-block boundary (`limits` / `chat`), not per-leaf.
 ALTER TABLE model_aliases ADD COLUMN announced_metadata_json TEXT;
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index 29288c4af..09afdd5ab 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -627,12 +627,14 @@ const aliasTargetSchema = z.object({
   rules: z.record(z.string(), z.unknown()),
 });
 
-// Operator override for an alias's announced /v1/models payload. Sparse —
-// both sub-fields are independently optional, and the alias-listing pipeline
-// falls back to the rule-aware automatic computation for any sub-field the
-// operator did not provide. `chatSchema` and `limitsSchema` are the same
-// shapes the upstream-model surface validates, so the override carries the
-// catalog's full vocabulary.
+// Operator override for an alias's announced /v1/models payload. Both
+// sub-fields are independently optional, and the alias-listing pipeline
+// falls back to the rule-aware automatic computation for any TOP-LEVEL
+// sub-block (`limits` / `chat`) the operator did not provide — a present
+// sub-block replaces the computed counterpart wholesale, not per-leaf.
+// `chatSchema` and `limitsSchema` are the same shapes the upstream-model
+// surface validates, so the override carries the catalog's full
+// vocabulary.
 const announcedMetadataSchema = z.object({
   limits: limitsSchema.optional(),
   chat: chatSchema.optional(),
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index acc4fa6b9..d2bbecb90 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -4,9 +4,10 @@
 // alias-of relationship without a second round trip.
 //
 // `limits` and `chat` come from the alias's announced metadata payload:
-// the operator's stored override when set (sparse — any sub-field they
-// did not provide falls back to the automatic computation), otherwise
-// the rule-aware intersection across the alias's available targets. The
+// the operator's stored override when set (with top-level sub-block
+// granularity — a present `limits` / `chat` replaces the computed
+// counterpart wholesale, not per-leaf), otherwise the rule-aware
+// intersection across the alias's available targets. The
 // intersection is the safe lower bound for the inbound request — every
 // reported capability survives no matter which target the resolver
 // picks at request time.
@@ -199,9 +200,11 @@ const computeAutomaticMetadata = (
   return { limits, chat };
 };
 
-// Merge the operator's override on top of the computed payload, per the
-// sparse-override contract: any sub-field the operator omitted falls
-// back to the computed value for that sub-field.
+// Merge the operator's override on top of the computed payload at the
+// top-level sub-block boundary: a present `limits` / `chat` on the
+// override replaces the computed counterpart wholesale; an omitted
+// sub-block falls back to the computed value. (Merge is intentionally
+// NOT per-leaf — that's the contract `AnnouncedMetadata` advertises.)
 const mergeWithOverride = (
   computed: { limits: PublicModelLimits; chat: ChatModelInfo | undefined },
   override: AnnouncedMetadata,
diff --git a/packages/gateway/src/repo/types.ts b/packages/gateway/src/repo/types.ts
index 1bd99dd62..77d168fb5 100644
--- a/packages/gateway/src/repo/types.ts
+++ b/packages/gateway/src/repo/types.ts
@@ -278,9 +278,10 @@ export interface ModelAliasRecord {
   // ignored) for selection=random.
   targets: AliasTarget[];
   // null = compute the announced /v1/models payload automatically from
-  // targets + rules at listing time. A non-null payload is sparse — any
-  // sub-field the operator did not override falls back to the automatic
-  // computation.
+  // targets + rules at listing time. A non-null payload replaces the
+  // computed value at the top-level sub-block boundary (`limits` /
+  // `chat`); omitted sub-blocks fall back to the computation but a
+  // present sub-block wins wholesale (it does not merge per-leaf).
   announcedMetadata: AnnouncedMetadata | null;
   sortOrder: number;
   createdAt: string;

From 9d7bc3e8ae9f404db3b1d6225318a4b5882c39d2 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 05:45:33 +0800
Subject: [PATCH 096/170] refactor(aliases): extract shared unionEndpoints
 helper

Both the catalog merge (one public id surfaced from multiple upstreams)
and the alias listing (UNION across an alias's available targets) used
the same key-present-anywhere + sub-cap OR shape with different
signatures. Consolidate into one variadic helper in
data-plane/providers/endpoint-union.ts; binary callers pass a 2-element
list.
---
 .../src/data-plane/models/alias-listing.ts    | 22 ++-----------------
 .../data-plane/providers/endpoint-union.ts    | 21 ++++++++++++++++++
 .../src/data-plane/providers/registry.ts      | 17 +++-----------
 3 files changed, 26 insertions(+), 34 deletions(-)
 create mode 100644 packages/gateway/src/data-plane/providers/endpoint-union.ts

diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index d2bbecb90..981389a5b 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -27,8 +27,9 @@
 // at the `loadModels` merge step.
 
 import type { ModelAliasRecord } from '../../repo/types.ts';
+import { unionEndpoints } from '../providers/endpoint-union.ts';
 import { composeAliasDisplayName } from '@floway-dev/protocols/common';
-import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, ModelEndpointKey, ModelEndpoints, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
+import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
 import type { ResolvedModel } from '@floway-dev/provider';
 
 export interface ListedAliasInputs {
@@ -51,25 +52,6 @@ const intersectArrays = <T>(arrays: readonly (readonly T[])[]): T[] => {
   return head.filter(value => tail.every(other => other.includes(value)));
 };
 
-// Endpoint union across the alias's available targets: a key appears in
-// the alias's advertised endpoints whenever ANY target serves it. Sub-cap
-// flags inside a key are ORed conservatively — present in the result iff
-// any contributing target declares them. The pool-narrowing at request
-// time picks among the targets that actually serve the inbound endpoint,
-// so every endpoint advertised here is reachable through at least one
-// target.
-const unionEndpoints = (endpointsList: readonly ModelEndpoints[]): ModelEndpoints => {
-  const result: ModelEndpoints = {};
-  for (const endpoints of endpointsList) {
-    for (const key of Object.keys(endpoints) as ModelEndpointKey[]) {
-      const incoming = endpoints[key];
-      if (incoming === undefined) continue;
-      result[key] = { ...result[key], ...incoming };
-    }
-  }
-  return result;
-};
-
 // Apply the rule-driven downgrade: a target with a pinned rule reports
 // the corresponding catalog sub-field as unsupported (= undefined) for
 // the purposes of intersection. Fields the rule doesn't touch pass
diff --git a/packages/gateway/src/data-plane/providers/endpoint-union.ts b/packages/gateway/src/data-plane/providers/endpoint-union.ts
new file mode 100644
index 000000000..5b968deed
--- /dev/null
+++ b/packages/gateway/src/data-plane/providers/endpoint-union.ts
@@ -0,0 +1,21 @@
+import type { ModelEndpointKey, ModelEndpoints } from '@floway-dev/protocols/common';
+
+// Union N endpoint maps: a key appears in the result whenever ANY input
+// declares it, and its sub-capability flags are OR-ed so a sub-cap
+// advertised by any contributor survives. Used at two layers — the catalog
+// merge collapses multiple upstream surfaces of the same public id into one
+// row, and the alias listing advertises the union across an alias's
+// available targets. The request-time pool narrows to whatever subset
+// actually serves the inbound endpoint, so every endpoint surfaced through
+// the union remains reachable.
+export const unionEndpoints = (endpointsList: readonly ModelEndpoints[]): ModelEndpoints => {
+  const result: ModelEndpoints = {};
+  for (const endpoints of endpointsList) {
+    for (const key of Object.keys(endpoints) as ModelEndpointKey[]) {
+      const incoming = endpoints[key];
+      if (incoming === undefined) continue;
+      result[key] = { ...result[key], ...incoming };
+    }
+  }
+  return result;
+};
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 5a6b26cb2..17317e3d8 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -1,8 +1,9 @@
+import { unionEndpoints } from './endpoint-union.ts';
 import { fetchUpstreamModelsCached } from './models-cache.ts';
 import { getRepo } from '../../repo/index.ts';
 import { type AliasResolution, resolveAlias } from '../model-aliases/resolve.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
-import { type ModelEndpointKey, type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common';
+import { type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common';
 import type { InternalModel, ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
 import { createAzureProvider } from '@floway-dev/provider-azure';
 import { createClaudeCodeProvider } from '@floway-dev/provider-claude-code';
@@ -80,18 +81,6 @@ export const listModelProviders = async (
   return providers;
 };
 
-// Merge two capability maps: a key present in either side is present in the
-// result, and its sub-capability flags are OR-ed so a sub-cap advertised by
-// either provider survives.
-const unionEndpoints = (a: ModelEndpoints, b: ModelEndpoints): ModelEndpoints => {
-  const result: ModelEndpoints = { ...a };
-  for (const key of Object.keys(b) as ModelEndpointKey[]) {
-    const merged = { ...result[key], ...b[key] };
-    (result as Record<ModelEndpointKey, object>)[key] = merged;
-  }
-  return result;
-};
-
 const resolvedFromUpstreamModel = (upstreamModel: UpstreamModel, record: ProviderModelRecord): ResolvedModel => {
   const { providerData: _providerData, endpoints, ...internal } = upstreamModel;
   return {
@@ -127,7 +116,7 @@ const mergeIntoCatalog = (
     byId.set(publicId, resolvedFromUpstreamModel(surfacedModel, record));
     return;
   }
-  const endpoints = unionEndpoints(existing.endpoints, surfacedModel.endpoints);
+  const endpoints = unionEndpoints([existing.endpoints, surfacedModel.endpoints]);
   byId.set(publicId, {
     ...existing,
     endpoints,

From 731414d6d72e5f5ea80a849d17e61cb9094e9783 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 05:50:44 +0800
Subject: [PATCH 097/170] refactor(aliases): tighten
 ModelResolution.aliasResolution to nullable

The shape was `aliasResolution?: AliasResolution` with both producers
doing the sparse-spread `...(aliasResolution !== null ? { aliasResolution } : {})`
to dodge exactOptionalPropertyTypes. The resolver already returns
`AliasResolution | null`; align the carriers so callers receive a single
`AliasResolution | null` field and switch their guards to `!== null`.

Touches: registry.ModelResolution, candidates.enumerateProviderCandidates,
all four chat serves (messages, gemini, chat-completions, responses
serve-prep), passthrough-serve, plus the per-protocol serve/http test
mocks that mirrored the same conditional spread.
---
 .../gateway/src/data-plane/chat/chat-completions/http_test.ts | 2 +-
 .../gateway/src/data-plane/chat/chat-completions/serve.ts     | 2 +-
 .../src/data-plane/chat/chat-completions/serve_test.ts        | 2 +-
 packages/gateway/src/data-plane/chat/gemini/http_test.ts      | 2 +-
 packages/gateway/src/data-plane/chat/gemini/serve.ts          | 4 ++--
 packages/gateway/src/data-plane/chat/gemini/serve_test.ts     | 2 +-
 packages/gateway/src/data-plane/chat/messages/http_test.ts    | 2 +-
 packages/gateway/src/data-plane/chat/messages/serve.ts        | 4 ++--
 packages/gateway/src/data-plane/chat/messages/serve_test.ts   | 2 +-
 packages/gateway/src/data-plane/chat/responses/http_test.ts   | 2 +-
 packages/gateway/src/data-plane/chat/responses/serve-prep.ts  | 2 +-
 packages/gateway/src/data-plane/chat/responses/serve_test.ts  | 2 +-
 packages/gateway/src/data-plane/chat/shared/candidates.ts     | 4 ++--
 packages/gateway/src/data-plane/providers/registry.ts         | 4 ++--
 packages/gateway/src/data-plane/shared/passthrough-serve.ts   | 2 +-
 15 files changed, 19 insertions(+), 19 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts
index 720fb53ac..5d0302c72 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts
@@ -19,7 +19,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
     enumerateProviderCandidates: vi.fn(async () => {
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('http_test: no candidates enqueued');
-      return next;
+      return { ...next, failedUpstreams: [], aliasResolution: null };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 65251e277..17c70cc19 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -39,7 +39,7 @@ export const chatCompletionsServe = {
       throw error;
     }
     const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
-    if (aliasResolution) {
+    if (aliasResolution !== null) {
       payload.model = aliasResolution.targetModelId;
       applyChatRulesToChatCompletions(payload, aliasResolution.rules);
       ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 15ad0c731..79d4c6cfb 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -38,7 +38,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
       lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
-      return { ...next, failedUpstreams: [], ...(aliasResolution !== null ? { aliasResolution } : {}) };
+      return { ...next, failedUpstreams: [], aliasResolution };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/gemini/http_test.ts b/packages/gateway/src/data-plane/chat/gemini/http_test.ts
index 276d3251a..1a14f742d 100644
--- a/packages/gateway/src/data-plane/chat/gemini/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/http_test.ts
@@ -20,7 +20,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
     enumerateProviderCandidates: vi.fn(async () => {
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('http_test: no candidates enqueued');
-      return next;
+      return { ...next, failedUpstreams: [], aliasResolution: null };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index 3a949ec27..3e15fdefb 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -50,7 +50,7 @@ export const geminiServe = {
     }
     const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
     const model = aliasResolution?.targetModelId ?? args.model;
-    if (aliasResolution) {
+    if (aliasResolution !== null) {
       applyChatRulesToGemini(payload, aliasResolution.rules);
       ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
     }
@@ -93,7 +93,7 @@ export const geminiServe = {
     }
     const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
     const model = aliasResolution?.targetModelId ?? args.model;
-    if (aliasResolution) {
+    if (aliasResolution !== null) {
       applyChatRulesToGemini(payload, aliasResolution.rules);
       ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
     }
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 9482d6808..8d2d4156d 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -37,7 +37,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
       lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
-      return { ...next, failedUpstreams: [], ...(aliasResolution !== null ? { aliasResolution } : {}) };
+      return { ...next, failedUpstreams: [], aliasResolution };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/messages/http_test.ts b/packages/gateway/src/data-plane/chat/messages/http_test.ts
index 27f403465..82f3581a0 100644
--- a/packages/gateway/src/data-plane/chat/messages/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/http_test.ts
@@ -19,7 +19,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
     enumerateProviderCandidates: vi.fn(async () => {
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('http_test: no candidates enqueued');
-      return next;
+      return { ...next, failedUpstreams: [], aliasResolution: null };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index 3c99f718d..fab48ce1f 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -46,7 +46,7 @@ export const messagesServe = {
       throw error;
     }
     const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
-    if (aliasResolution) {
+    if (aliasResolution !== null) {
       payload.model = aliasResolution.targetModelId;
       applyChatRulesToMessages(payload, aliasResolution.rules);
       ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
@@ -86,7 +86,7 @@ export const messagesServe = {
       throw error;
     }
     const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
-    if (aliasResolution) {
+    if (aliasResolution !== null) {
       payload.model = aliasResolution.targetModelId;
       applyChatRulesToMessages(payload, aliasResolution.rules);
       ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index cf8f80529..9f78a83e0 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -35,7 +35,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
       lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
-      return { ...next, failedUpstreams: [], ...(aliasResolution !== null ? { aliasResolution } : {}) };
+      return { ...next, failedUpstreams: [], aliasResolution };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/responses/http_test.ts b/packages/gateway/src/data-plane/chat/responses/http_test.ts
index 36beb3894..460541ad3 100644
--- a/packages/gateway/src/data-plane/chat/responses/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http_test.ts
@@ -22,7 +22,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
     enumerateProviderCandidates: vi.fn(async (_args: { model: string }) => {
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('http_test: no candidates enqueued');
-      return next;
+      return { ...next, failedUpstreams: [], aliasResolution: null };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index 5339a585c..febd35858 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -106,7 +106,7 @@ export const prepareResponsesServePlan = async (args: {
     throw error;
   }
   const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
-  if (aliasResolution) {
+  if (aliasResolution !== null) {
     prepared.model = aliasResolution.targetModelId;
     applyChatRulesToResponses(prepared, aliasResolution.rules);
     ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index 535dae573..58dead562 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -45,7 +45,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
       lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
-      return { ...next, failedUpstreams: [], ...(aliasResolution !== null ? { aliasResolution } : {}) };
+      return { ...next, failedUpstreams: [], aliasResolution };
     }),
   };
 });
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates.ts b/packages/gateway/src/data-plane/chat/shared/candidates.ts
index 8c67d4872..248166f3c 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates.ts
@@ -38,7 +38,7 @@ export const enumerateProviderCandidates = async ({
   readonly candidates: readonly ChatCandidate[];
   readonly sawModel: boolean;
   readonly failedUpstreams: readonly string[];
-  readonly aliasResolution?: AliasResolution;
+  readonly aliasResolution: AliasResolution | null;
 }> => {
   const fetcherForUpstream = await createPerRequestFetcher(currentColo);
   const providers = await listModelProviders(upstreamIds);
@@ -87,6 +87,6 @@ export const enumerateProviderCandidates = async ({
     candidates,
     sawModel,
     failedUpstreams,
-    ...(aliasResolution !== null ? { aliasResolution } : {}),
+    aliasResolution,
   };
 };
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 17317e3d8..32a01345b 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -272,7 +272,7 @@ interface ModelResolution {
   // otherwise. `AliasNoTargetAvailableError` is thrown out of
   // `resolveModelForRequest` itself when the alias exists but has no
   // routable target, and is caught at each protocol's serve seam.
-  aliasResolution?: AliasResolution;
+  aliasResolution: AliasResolution | null;
 }
 
 export interface ProviderModelResolution {
@@ -393,7 +393,7 @@ export const resolveModelForRequest = async (
   return {
     matches: resolutions.map(r => r.resolved),
     failedUpstreams,
-    ...(aliasResolution !== null ? { aliasResolution } : {}),
+    aliasResolution,
   };
 };
 
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 19f172cc4..ace58402c 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -152,7 +152,7 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
       throw e;
     }
     const { matches, failedUpstreams, aliasResolution } = resolution;
-    if (aliasResolution) ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
+    if (aliasResolution !== null) ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
     if (matches.length === 0) {
       ctx.dump?.error('gateway');
       return passthroughApiError(c, appendFailedUpstreams(`Model ${model} is not available on any configured upstream.`, failedUpstreams), 404);

From 56eba8b997bb9fa6af249281c61367d20da1df8a Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 05:51:20 +0800
Subject: [PATCH 098/170] cleanup(responses): drop redundant runtime guards in
 responsesAttempt.invoke
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both guards (compact+non-responses targetApi; compact+snapshotMode
override) are unreachable from the single call site in serve.compact
— pickTarget filters to `targetApi='responses'` and snapshotMode is
never passed. The guards were belt-and-suspenders defenses; the invariants
they encode live in the call site by construction.
---
 .../gateway/src/data-plane/chat/responses/attempt.ts | 12 ------------
 1 file changed, 12 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/responses/attempt.ts b/packages/gateway/src/data-plane/chat/responses/attempt.ts
index 113e87bc4..37db097f3 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt.ts
@@ -50,18 +50,6 @@ export interface ResponsesAttemptInvokeArgs {
 export const responsesAttempt = {
   invoke: async (args: ResponsesAttemptInvokeArgs): Promise<ResponsesAttemptResult> => {
     const { payload, action, ctx, store, candidate, headers, snapshotMode: snapshotModeOverride } = args;
-    // Read the caller's intent `action` (NOT `invocation.action`) — the guard
-    // runs pre-chain, before any interceptor can flip the value.
-    if (action === 'compact' && candidate.targetApi !== 'responses') {
-      throw new Error(`responsesAttempt.invoke(action='compact') requires targetApi='responses', got '${candidate.targetApi}'`);
-    }
-    // Compact always replaces history wholesale; an override would be a
-    // contract violation. Only `serve.compact` reaches this branch today
-    // and it never passes one, but pin the invariant so a future caller
-    // that does pass one fails loudly instead of silently overwriting.
-    if (action === 'compact' && snapshotModeOverride !== undefined) {
-      throw new Error('responsesAttempt.invoke: snapshotMode override is not supported in the compact branch — compact always replaces');
-    }
     // Rewrite + privatePayload seed + assistant-content normalization all run
     // BEFORE the interceptor chain so source interceptors — most importantly
     // the web-search server-tool shim — see fully inline-expanded input items

From ddddd94ad7e6cb2a8711d58a42e619530b4def66 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 05:52:58 +0800
Subject: [PATCH 099/170] cleanup(aliases): collapse repeated alias-pipeline
 narrative to one canonical doc

The "alias resolution runs once, above prefix routing, target id feeds
verbatim into prefix routing, alias names never re-enter" narrative was
restated at four call sites. Keep the canonical version on resolve.ts
(the source) and reduce each restatement to a one-line pointer. Local
context that's specific to each caller (e.g. why a given callsite wraps
pickTarget into endpointAccepts) stays in place.
---
 .../src/data-plane/chat/shared/candidates.ts  | 20 +++++++------------
 .../src/data-plane/providers/registry.ts      |  6 +-----
 .../data-plane/shared/passthrough-serve.ts    |  8 +++-----
 3 files changed, 11 insertions(+), 23 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/shared/candidates.ts b/packages/gateway/src/data-plane/chat/shared/candidates.ts
index 248166f3c..ac3420a15 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates.ts
@@ -15,10 +15,9 @@ export type ChatCandidate = ProviderCandidate;
 // "model is missing entirely" failure from "model exists but does not
 // expose the endpoint this source needs", plus the names of upstreams
 // whose catalog fetch rejected this round so the caller's failure
-// renderer can surface them parenthetically. Alias resolution runs inside
-// this entry — if the inbound id is an alias, the resolution is returned
-// on `aliasResolution` so the caller can overlay rules onto the IR and
-// stage the `x-floway-alias` response header.
+// renderer can surface them parenthetically. See resolve.ts for the
+// alias-resolves-once-above-prefix-routing contract; this entry runs it
+// and returns the resolution on `aliasResolution`.
 export const enumerateProviderCandidates = async ({
   upstreamIds, model, pickTarget, scheduler, currentColo,
 }: {
@@ -43,15 +42,10 @@ export const enumerateProviderCandidates = async ({
   const fetcherForUpstream = await createPerRequestFetcher(currentColo);
   const providers = await listModelProviders(upstreamIds);
 
-  // Alias resolution runs above prefix routing so every data-plane endpoint
-  // sees the same alias surface. The target id is fed verbatim into prefix
-  // routing; alias names never re-enter the alias layer.
-  // `AliasNoTargetAvailableError` propagates so the chat serve's catch maps
-  // it to its protocol-native 404. The endpoint predicate piggybacks on
-  // `pickTarget` so the resolver's pool narrows to targets whose binding
-  // exposes one of the chat surfaces the source serve actually wants —
-  // first-available / random pick from a set the prefix router can serve
-  // end-to-end.
+  // See resolve.ts for the alias-resolves-once-above-prefix-routing contract.
+  // The endpoint predicate piggybacks on `pickTarget` so the resolver's pool
+  // narrows to targets whose binding exposes one of the chat surfaces the
+  // source serve actually wants.
   const aliasResolution = await resolveAlias({
     modelName: model,
     providers,
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 32a01345b..dfa068f63 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -373,11 +373,7 @@ export const resolveModelForRequest = async (
     throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
   }
 
-  // Alias resolution runs above prefix routing so every data-plane endpoint
-  // sees the same alias surface. The target id is then fed verbatim back
-  // into prefix routing; alias names never re-enter the alias layer.
-  // `AliasNoTargetAvailableError` propagates so the protocol's catch maps
-  // it to its native 404.
+  // See resolve.ts for the alias-resolves-once-above-prefix-routing contract.
   const aliasResolution = await resolveAlias({
     modelName: modelId,
     providers,
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index ace58402c..582f97529 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -136,11 +136,9 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
     // the inbound public id. Iteration order follows configured sort_order
     // across upstreams, with the unprefixed interpretation pushed before the
     // prefixed one within a single upstream. The first match whose binding
-    // satisfies the endpoint capability wins. `resolveModelForRequest` also
-    // owns alias resolution: when the inbound id is an alias, the returned
-    // `aliasResolution` carries the original alias name (for the response
-    // header) and the targets feeding `matches` are the alias's resolved
-    // target id; `AliasNoTargetAvailableError` propagates as the 404 below.
+    // satisfies the endpoint capability wins. See resolve.ts for the
+    // alias-resolves-once-above-prefix-routing contract; `resolveModelForRequest`
+    // runs it and surfaces the result on `aliasResolution`.
     let resolution;
     try {
       resolution = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler, endpoints => endpoints[endpointKey] !== undefined);

From 1982e1f1b22dbd99855f28cd2ec30dd1f8e43956 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 05:53:30 +0800
Subject: [PATCH 100/170] refactor(aliases/web): collapse useModels factories
 sharing one core
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Both stores were 18-line near-identical factories diverging only on the
`?aliases=false` query and the ref singletons. Lift the shared body into
`makeStore({ includeAliases })`; the exported `useModelsStore` and
`useRawModelsStore` are thin wrappers so existing call sites are
untouched. Each kind keeps its own ref singleton — the makeStore closure
owns one set per invocation — so the two caches stay separate.
---
 apps/web/src/composables/useModels.ts | 79 +++++++++++----------------
 1 file changed, 32 insertions(+), 47 deletions(-)

diff --git a/apps/web/src/composables/useModels.ts b/apps/web/src/composables/useModels.ts
index 1b82f5c7e..7f2f0abc8 100644
--- a/apps/web/src/composables/useModels.ts
+++ b/apps/web/src/composables/useModels.ts
@@ -8,53 +8,38 @@ interface ModelsResponse {
   data: ControlPlaneModel[];
 }
 
-// Default `/api/models` view: real models + synthesised alias entries
-// merged into one list. Backs the /dashboard/models tab and any surface
-// that wants the gateway's externally-visible catalog.
-const models = ref<ControlPlaneModel[] | null>(null);
-const loading = ref(false);
-const error = ref<string | null>(null);
-
-export const useModelsStore = () => {
-  const api = useApi();
-
-  const load = async () => {
-    loading.value = true;
-    error.value = null;
-    const { data, error: err } = await callApi<ModelsResponse>(() => api.api.models.$get({ query: {} }));
-    loading.value = false;
-    if (err) {
-      error.value = err.message;
-      return;
-    }
-    models.value = data?.data ?? [];
+// Two stores share this core: the default `/api/models` view (real models
+// + synthesised alias entries merged into one list) backs the dashboard
+// models tab and surfaces that want the externally-visible catalog; the
+// raw view (`?aliases=false`) backs the alias settings surfaces (edit
+// dialog target combobox, shadow detection, kind-mismatch warning) that
+// need to see the underlying catalog without the alias-overwrites-real-id
+// collapse the wire shape applies. The two singletons live separately so
+// each kind has its own cache.
+const makeStore = (params: { includeAliases: boolean }) => {
+  const models = ref<ControlPlaneModel[] | null>(null);
+  const loading = ref(false);
+  const error = ref<string | null>(null);
+
+  return () => {
+    const api = useApi();
+
+    const load = async () => {
+      loading.value = true;
+      error.value = null;
+      const query = params.includeAliases ? {} : { aliases: 'false' as const };
+      const { data, error: err } = await callApi<ModelsResponse>(() => api.api.models.$get({ query }));
+      loading.value = false;
+      if (err) {
+        error.value = err.message;
+        return;
+      }
+      models.value = data?.data ?? [];
+    };
+
+    return { models, loading, error, load };
   };
-
-  return { models, loading, error, load };
 };
 
-// Raw catalog view: real models only, no alias merging. Backs the alias
-// settings surfaces (edit dialog target combobox, shadow detection,
-// kind-mismatch warning) — those need to see the underlying catalog
-// without the alias-overwrites-real-id collapse the wire-shape applies.
-const rawModels = ref<ControlPlaneModel[] | null>(null);
-const rawLoading = ref(false);
-const rawError = ref<string | null>(null);
-
-export const useRawModelsStore = () => {
-  const api = useApi();
-
-  const load = async () => {
-    rawLoading.value = true;
-    rawError.value = null;
-    const { data, error: err } = await callApi<ModelsResponse>(() => api.api.models.$get({ query: { aliases: 'false' } }));
-    rawLoading.value = false;
-    if (err) {
-      rawError.value = err.message;
-      return;
-    }
-    rawModels.value = data?.data ?? [];
-  };
-
-  return { models: rawModels, loading: rawLoading, error: rawError, load };
-};
+export const useModelsStore = makeStore({ includeAliases: true });
+export const useRawModelsStore = makeStore({ includeAliases: false });

From f3baa595e4df8c95325342b8b0e3a1aa0217c9b8 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 05:54:53 +0800
Subject: [PATCH 101/170] refactor(aliases): make PublicModel.endpoints
 required
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Real models always set `endpoints` via toPublicModel. The only branch
that left it undefined was the alias synthesis when no target was
currently available — give that branch an explicit `{}` and tighten the
DTO field to required. The empty-map case stays distinguishable from a
declared-but-empty surface because every reachable target contributes at
least one key; the case where a caller actually sees `{}` is the "alias
configured but no target routable right now" view, which the dashboard
already renders distinctly.
---
 .../src/data-plane/models/alias-listing.ts    | 24 +++++++++----------
 .../data-plane/models/alias-listing_test.ts   |  4 ++--
 packages/protocols/src/common/models.ts       |  5 ++--
 3 files changed, 17 insertions(+), 16 deletions(-)

diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index 981389a5b..c38d80287 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -213,6 +213,17 @@ const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly ResolvedMod
     ? mergeWithOverride(computed, alias.announcedMetadata)
     : computed;
 
+  // Endpoints follow the available-targets UNION, not an intersection —
+  // every endpoint reachable through ANY target is advertised, because
+  // the resolver's request-time pool narrows to targets that serve the
+  // inbound endpoint and the first-available / random pick happens
+  // within that narrowed pool. Operator can't override endpoints (they
+  // follow the target set, not a stored override). Empty (`{}`) when no
+  // target is currently available — the field stays present.
+  const endpoints = availableTargets.length > 0
+    ? unionEndpoints(availableTargets.map(({ real }) => real.endpoints))
+    : {};
+
   const entry: PublicModel = {
     id: alias.name,
     object: 'model',
@@ -220,22 +231,11 @@ const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly ResolvedMod
     display_name: displayName,
     limits,
     kind: alias.kind,
+    endpoints,
     aliasedFrom: buildAliasedFrom(alias),
   };
   if (chat !== undefined) entry.chat = chat;
 
-  // Endpoints follow the available-targets UNION, not an intersection —
-  // every endpoint reachable through ANY target is advertised, because
-  // the resolver's request-time pool narrows to targets that serve the
-  // inbound endpoint and the first-available / random pick happens
-  // within that narrowed pool. Operator can't override endpoints (they
-  // follow the target set, not a stored override). Absent when no
-  // target is currently available, same shape as the chat block.
-  if (availableTargets.length > 0) {
-    const endpoints = unionEndpoints(availableTargets.map(({ real }) => real.endpoints));
-    if (Object.keys(endpoints).length > 0) entry.endpoints = endpoints;
-  }
-
   // Single-target chat pricing rides along when available — the resolver
   // will hit that target, so the catalog can publish its rate verbatim.
   if (availableTargets.length === 1) {
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
index 0c346e10e..69e071840 100644
--- a/packages/gateway/src/data-plane/models/alias-listing_test.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -336,12 +336,12 @@ describe('synthesizeListedAliases', () => {
     expect(entry.endpoints).toEqual({ imagesGenerations: {}, imagesEdits: {} });
   });
 
-  test('endpoints is absent on the entry when no target is currently available', () => {
+  test('endpoints is an empty map on the entry when no target is currently available', () => {
     const aliases = [aliasFixture({
       name: 'ghost',
       targets: [{ target_model_id: 'missing', rules: {} }],
     })];
     const [entry] = synthesizeListedAliases({ aliases, realModels: [] });
-    expect(entry.endpoints).toBeUndefined();
+    expect(entry.endpoints).toEqual({});
   });
 });
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index 31478d183..bf8b9fe0c 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -165,8 +165,9 @@ export interface PublicModel {
   // currently-available target's endpoint map — at request time the
   // resolver narrows the pool to targets that serve the inbound endpoint,
   // so any endpoint advertised here is reachable through at least one
-  // target.
-  endpoints?: ModelEndpoints;
+  // target. The map is empty (`{}`) when an alias has no currently-available
+  // target; never absent.
+  endpoints: ModelEndpoints;
   cost?: ModelPricing;
   chat?: ChatModelInfo;
   // Present only on entries the gateway synthesized from an operator-defined

From 8df50e1bad323a6d2629abcefca9870da6050368 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 06:03:33 +0800
Subject: [PATCH 102/170] refactor(codex): remove hardcoded codex-auto-review
 alias; let the alias table own it
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The hardcoded `codex-auto-review → gpt-5.4` rewrite predates the operator-
managed alias table seeded by migration 0046. Now that the alias mechanism
runs once above prefix routing for every data-plane endpoint, the hardcode
is redundant — and worse, it ran BEFORE the resolver saw the inbound
model, so the seeded alias never triggered and operators could not point
the alias at a different target (copilot/gpt-5.4, azure/gpt-5.4, ...).

Changes:
- Delete the `auto-review-alias.ts` constants module.
  `rewriteResponsesEntryModelAlias` was already removed at merge.
- Rewrite the codex catalog filter to consult `modelAliases.list()`:
  a slug survives when the registry advertises it OR when a visible alias
  of that name has at least one currently-routable target. The
  context-window resolver follows the same lookup — prefer the operator's
  announced metadata override, else fall back to the first available
  target's window (multi-target aliases pick first-available for
  determinism).
- Reinstate the dropped alias-rewrite tests in `responses/http_test.ts`,
  this time driving the rewrite through the alias resolver. Add a
  `resolveAlias` mock that mirrors the serve_test pattern so tests inject
  resolutions without standing up the routability registry.
- Update the codex routes_test that asserted `codex-auto-review` survives
  to seed the alias via the InMemoryRepo before the request.

provider-codex's pricing entry for `codex-auto-review` stays put — that
tags requests routed to codex with a known billing dimension, a
vendor-level concern independent of how the gateway routes the alias.
---
 .../data-plane/chat/responses/http_test.ts    | 116 +++++++++++++++++-
 .../src/data-plane/codex/auto-review-alias.ts |  12 --
 .../gateway/src/data-plane/codex/models.ts    |  66 ++++++++--
 .../src/data-plane/codex/routes_test.ts       |  28 +++--
 4 files changed, 185 insertions(+), 37 deletions(-)
 delete mode 100644 packages/gateway/src/data-plane/codex/auto-review-alias.ts

diff --git a/packages/gateway/src/data-plane/chat/responses/http_test.ts b/packages/gateway/src/data-plane/chat/responses/http_test.ts
index 460541ad3..215a6e1e0 100644
--- a/packages/gateway/src/data-plane/chat/responses/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http_test.ts
@@ -8,21 +8,58 @@ import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { ApiKey, StoredResponsesItem, User } from '../../../repo/types.ts';
 import type { ProviderCandidate } from '../shared/candidates.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
-import type { ResponsesResult, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
+import type { ResponsesPayload, ResponsesResult, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
 import { directFetcher, type ProviderResponsesResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 // Mock the candidates seam so each test hands the http entry exactly the
-// provider candidates it wants. Mirrors the pattern from serve_test.ts.
+// provider candidates it wants. The mock mirrors `serve_test.ts` by
+// running the real `resolveAlias` against the in-memory repo so any
+// alias seeded by a test (e.g. `codex-auto-review`) rewrites the model
+// id and rule overlay reaches the upstream call.
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
+// `lastSeenModel` captures the effective model id the serve passes downstream
+// — the alias rewrite (if any) applied. Tests assert against this to confirm
+// the alias table drove the rewrite.
+const lastSeenModel: { value: string | null } = { value: null };
 vi.mock('../shared/candidates.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+  const { resolveAlias } = await import('../../model-aliases/resolve.ts');
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async (_args: { model: string }) => {
+    enumerateProviderCandidates: vi.fn(async (args: { model: string; scheduler: () => void }) => {
+      // Mirror the real entry's alias resolution so the rule-overlay test
+      // sees the resolved target id reach the candidates layer and the
+      // serve overlays rules from the returned `aliasResolution`. Tests
+      // queue a resolution via `aliasResolutionQueue` when they want one.
+      const aliasResolution = await resolveAlias({
+        modelName: args.model,
+        providers: [],
+        fetcherForUpstream: () => directFetcher,
+        scheduler: args.scheduler,
+        endpointAccepts: () => true,
+        repo: { getByName: () => Promise.resolve(null) } as never,
+      });
+      lastSeenModel.value = aliasResolution?.targetModelId ?? args.model;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('http_test: no candidates enqueued');
-      return { ...next, failedUpstreams: [], aliasResolution: null };
+      return { ...next, failedUpstreams: [], aliasResolution };
+    }),
+  };
+});
+
+// Mock the alias resolver so the alias-rewrite tests can inject a resolution
+// without standing up the per-request fetcher + registry stack the routability
+// pool needs. Tests that don't enqueue a resolution see the default
+// (`null` = no alias matched).
+const aliasResolutionQueue: ({ targetModelId: string; rules: Record<string, unknown>; aliasName: string } | null)[] = [];
+vi.mock('../../model-aliases/resolve.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../model-aliases/resolve.ts')>();
+  return {
+    ...original,
+    resolveAlias: vi.fn(async () => {
+      if (aliasResolutionQueue.length === 0) return null;
+      return aliasResolutionQueue.shift()!;
     }),
   };
 });
@@ -279,3 +316,74 @@ test('POST /v1/responses renders a routing-unavailable 400 when a forcing item n
   const body = await response.json() as { error: { code: string } };
   assertEquals(body.error.code, 'responses_item_routing_unavailable');
 });
+
+const seedCodexAutoReviewAliasResolution = (): void => {
+  aliasResolutionQueue.push({
+    targetModelId: 'gpt-5.4',
+    rules: { reasoning: { effort: 'low' } },
+    aliasName: 'codex-auto-review',
+  });
+};
+
+test('POST /v1/responses routes a codex-auto-review request through the seeded alias: rewrites the model to gpt-5.4 and stamps reasoning.effort=low', async () => {
+  installRepo();
+  seedCodexAutoReviewAliasResolution();
+  lastSeenModel.value = null;
+  const observedBodies: ResponsesPayload[] = [];
+  const callResponses = vi.fn(async (_model: unknown, body: unknown): Promise<ProviderResponsesResult> => {
+    observedBodies.push(body as ResponsesPayload);
+    return {
+      action: 'generate', ok: true,
+      events: makeProviderEvents([completedEvent()]),
+      modelKey: 'test-model-key',
+      headers: new Headers(),
+    };
+  });
+  queueCandidates([makeCandidate({ callResponses })]);
+
+  const response = await makeApp().request('/v1/responses', {
+    method: 'POST',
+    headers: new Headers({ 'content-type': 'application/json' }),
+    body: JSON.stringify({ model: 'codex-auto-review', input: 'hello', stream: true }),
+  });
+
+  assertEquals(response.status, 200);
+  assertEquals(lastSeenModel.value, 'gpt-5.4');
+  const observed = observedBodies[0];
+  if (observed === undefined) throw new Error('expected callResponses to receive a body');
+  assertEquals(observed.reasoning?.effort, 'low');
+});
+
+test('POST /v1/responses/compact routes a codex-auto-review request through the seeded alias: rewrites the model to gpt-5.4 and stamps reasoning.effort=low (the alias rule overlays the compact body too)', async () => {
+  installRepo();
+  seedCodexAutoReviewAliasResolution();
+  lastSeenModel.value = null;
+  const observedBodies: ResponsesPayload[] = [];
+  const compactionItem = { type: 'compaction' as const, id: 'cmp_1', encrypted_content: 'ENC' };
+  const compactionResult: ResponsesResult = {
+    ...makeResponsesResult(),
+    object: 'response.compaction',
+    output: [compactionItem] as unknown as ResponsesResult['output'],
+  };
+  const callResponses = vi.fn(async (_model: unknown, body: unknown, action: ResponsesAction): Promise<ProviderResponsesResult> => {
+    if (action !== 'compact') throw new Error(`expected compact, got ${action}`);
+    observedBodies.push(body as ResponsesPayload);
+    return { action: 'compact', ok: true, result: compactionResult, modelKey: 'test-model-key' };
+  });
+  queueCandidates([makeCandidate({ callResponses })]);
+
+  const response = await makeApp().request('/v1/responses/compact', {
+    method: 'POST',
+    headers: new Headers({ 'content-type': 'application/json' }),
+    body: JSON.stringify({
+      model: 'codex-auto-review',
+      input: [{ type: 'message', role: 'user', content: 'kept' }],
+    }),
+  });
+
+  assertEquals(response.status, 200);
+  assertEquals(lastSeenModel.value, 'gpt-5.4');
+  const observed = observedBodies[0];
+  if (observed === undefined) throw new Error('expected callResponses to receive a body');
+  assertEquals(observed.reasoning?.effort, 'low');
+});
diff --git a/packages/gateway/src/data-plane/codex/auto-review-alias.ts b/packages/gateway/src/data-plane/codex/auto-review-alias.ts
deleted file mode 100644
index 7d0668224..000000000
--- a/packages/gateway/src/data-plane/codex/auto-review-alias.ts
+++ /dev/null
@@ -1,12 +0,0 @@
-// codex's `codex-auto-review` is a model-id alias the CLI sends on auto-review
-// turns; it has no upstream of its own and gets rewritten request-side to
-// `gpt-5.4`. The constants live here so the responses HTTP entry (which
-// performs the rewrite) and the codex 1p catalog handler (which decides
-// whether to keep the alias slug in the catalog) can agree on the target
-// without one reaching into the other's module.
-//
-// Codex defines the alias as `DEFAULT_APPROVAL_REVIEW_PREFERRED_MODEL`:
-// https://github.com/openai/codex/blob/e7bffc5a20e92cbc64d6c16a1b257d0b2e4cd5df/codex-rs/model-provider/src/provider.rs#L73-L96
-
-export const CODEX_AUTO_REVIEW_ALIAS = 'codex-auto-review';
-export const CODEX_AUTO_REVIEW_TARGET = 'gpt-5.4';
diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index d06b89d7b..fb1029f56 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -13,6 +13,14 @@
 // registry (see context-window.ts) so the codex client sees the same
 // limits the data plane will actually enforce.
 //
+// Operator-defined aliases participate in the same filter: a bundled
+// catalog slug that matches a visible alias survives whenever the alias
+// has at least one currently-routable target, and the context window the
+// alias advertises follows the alias's announced metadata (when the
+// operator overrode it) or the first available target (single-target
+// aliases collapse to "the target's window"; multi-target aliases pick
+// first-available for determinism — `random` doesn't fit a catalog).
+//
 // Latency: codex aborts the catalog fetch after 5 s
 // (`MODELS_REFRESH_TIMEOUT` in codex-rs/model-provider/src/models_endpoint.rs)
 // and silently falls back to its binary-bundled catalog on miss. The
@@ -25,16 +33,17 @@
 
 import type { Context } from 'hono';
 
-import { CODEX_AUTO_REVIEW_ALIAS, CODEX_AUTO_REVIEW_TARGET } from './auto-review-alias.ts';
 import { parseCodexVersion, resolveCodexCatalog, type CodexCatalog } from './catalog.ts';
 import { applyContextWindowFromRegistry, type ContextWindowResolver } from './context-window.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { getRepo } from '../../repo/index.ts';
+import type { ModelAliasRecord } from '../../repo/types.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { getInternalModels } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
-import type { Fetcher } from '@floway-dev/provider';
+import type { InternalModel, Fetcher } from '@floway-dev/provider';
 
 // Five minutes is short enough to pick up an upstream catalog change within
 // one or two codex sessions but long enough that an active user only ever
@@ -50,34 +59,65 @@ const cacheKeyFor = (clientVersion: string, upstreamIds: readonly string[] | nul
   return new Request(`https://floway.invalid/codex-models?v=${encodeURIComponent(clientVersion)}&u=${encodeURIComponent(ids)}`);
 };
 
+// First currently-routable target's id, or null when no target resolves
+// against the registry. Single-target aliases collapse to the lone target;
+// multi-target aliases pick first-available (the order the operator
+// configured). This drives both the slug-survives filter and the
+// context-window resolver.
+const firstAvailableTargetId = (alias: ModelAliasRecord, registrySlugs: ReadonlySet<string>): string | null => {
+  for (const target of alias.targets) {
+    if (registrySlugs.has(target.target_model_id)) return target.target_model_id;
+  }
+  return null;
+};
+
 const computeCatalog = async (
   userAgent: string | undefined,
   upstreamIds: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
 ): Promise<CodexCatalog> => {
-  const [catalog, internalModels] = await Promise.all([
+  const [catalog, internalModels, aliases] = await Promise.all([
     resolveCodexCatalog(userAgent),
     getInternalModels(upstreamIds, fetcherForUpstream, scheduler),
+    getRepo().modelAliases.list(),
   ]);
   const slugContextWindow = new Map<string, number>();
+  const slugInternalModel = new Map<string, InternalModel>();
   for (const m of internalModels) {
+    slugInternalModel.set(m.id, m);
     const limit = m.limits.max_context_window_tokens;
     if (typeof limit === 'number') slugContextWindow.set(m.id, limit);
   }
   const registrySlugs = new Set(internalModels.map(m => m.id));
+
+  // Visible aliases whose first target currently resolves — keyed by alias
+  // name so the slug filter and the context-window resolver both look the
+  // same alias up in O(1).
+  const aliasBySlug = new Map<string, { alias: ModelAliasRecord; firstTargetId: string }>();
+  for (const alias of aliases) {
+    if (!alias.visibleInModelsList) continue;
+    const firstTargetId = firstAvailableTargetId(alias, registrySlugs);
+    if (firstTargetId === null) continue;
+    aliasBySlug.set(alias.name, { alias, firstTargetId });
+  }
+
   const filtered: CodexCatalog = {
-    models: catalog.models.filter(m => {
-      if (registrySlugs.has(m.slug)) return true;
-      if (m.slug === CODEX_AUTO_REVIEW_ALIAS && registrySlugs.has(CODEX_AUTO_REVIEW_TARGET)) return true;
-      return false;
-    }),
+    models: catalog.models.filter(m => registrySlugs.has(m.slug) || aliasBySlug.has(m.slug)),
+  };
+
+  // For an alias slug: prefer the operator's announced override, else the
+  // first available target's window. Falls back to the registry-side lookup
+  // for plain (non-alias) slugs.
+  const contextWindowOf: ContextWindowResolver = slug => {
+    const aliasEntry = aliasBySlug.get(slug);
+    if (aliasEntry !== undefined) {
+      const overridden = aliasEntry.alias.announcedMetadata?.limits?.max_context_window_tokens;
+      if (typeof overridden === 'number') return overridden;
+      return slugContextWindow.get(aliasEntry.firstTargetId) ?? null;
+    }
+    return slugContextWindow.get(slug) ?? null;
   };
-  // codex-auto-review has no upstream of its own and gets rewritten to
-  // CODEX_AUTO_REVIEW_TARGET at request time, so its catalog entry should
-  // advertise the target's actual window — bundled's value would otherwise
-  // leak the OpenAI 1p limits through the alias.
-  const contextWindowOf: ContextWindowResolver = slug => slugContextWindow.get(slug === CODEX_AUTO_REVIEW_ALIAS ? CODEX_AUTO_REVIEW_TARGET : slug) ?? null;
   return applyContextWindowFromRegistry(filtered, contextWindowOf);
 };
 
diff --git a/packages/gateway/src/data-plane/codex/routes_test.ts b/packages/gateway/src/data-plane/codex/routes_test.ts
index ecd0010f3..bcb9a2ec9 100644
--- a/packages/gateway/src/data-plane/codex/routes_test.ts
+++ b/packages/gateway/src/data-plane/codex/routes_test.ts
@@ -433,13 +433,25 @@ describe('codex 1p namespace', () => {
       const slugs = body.models.map(m => m.slug);
       // The bundled catalog ships with six slugs (gpt-5.5, gpt-5.4,
       // gpt-5.4-mini, gpt-5.3-codex, gpt-5.2, codex-auto-review). Registry
-      // here advertises only gpt-5.5, and codex-auto-review's target
-      // (gpt-5.4) is missing — so the response is just gpt-5.5.
+      // here advertises only gpt-5.5, and no alias is seeded — so the
+      // response is just gpt-5.5.
       expect(slugs).toEqual(['gpt-5.5']);
     });
 
-    it('keeps codex-auto-review when its alias target is in the registry, drops it otherwise, and reports the target window', async () => {
-      const { apiKey } = await setupAppTest();
+    it('keeps codex-auto-review when the seeded alias target is in the registry, drops it otherwise, and reports the target window', async () => {
+      const { apiKey, repo } = await setupAppTest();
+      await repo.modelAliases.insert({
+        name: 'codex-auto-review',
+        kind: 'chat',
+        selection: 'first-available',
+        displayName: 'Codex Auto Review',
+        visibleInModelsList: true,
+        targets: [{ target_model_id: 'gpt-5.4', rules: {} }],
+        announcedMetadata: null,
+        sortOrder: 0,
+        createdAt: '2026-01-01T00:00:00.000Z',
+        updatedAt: '2026-01-01T00:00:00.000Z',
+      });
       const app = buildCodexApp();
       const body = await withMockedFetch(
         copilotFetch([{ id: 'gpt-5.4', maxContextWindowTokens: 272000 }]),
@@ -455,10 +467,10 @@ describe('codex 1p namespace', () => {
       expect(slugs.has('gpt-5.4')).toBe(true);
       expect(slugs.has('codex-auto-review')).toBe(true);
       expect(slugs.has('gpt-5.5')).toBe(false);
-      // codex-auto-review has no registry entry of its own, but it gets
-      // rewritten to gpt-5.4 at request time, so its catalog row reports
-      // gpt-5.4's window — not the bundled 1000000 max that would advertise
-      // a tier the gateway cannot serve.
+      // codex-auto-review has no registry entry of its own; the alias's
+      // first available target is gpt-5.4, so its catalog row reports
+      // gpt-5.4's 272000 window — not the bundled 1000000 max that would
+      // advertise a tier the gateway cannot serve.
       const autoReview = body.models.find(m => m.slug === 'codex-auto-review');
       expect(autoReview?.context_window).toBe(272000);
       expect(autoReview?.max_context_window).toBe(272000);

From ce74a37393d14d122ad4fab1d11fe75b3257b512 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 06:13:49 +0800
Subject: [PATCH 103/170] cleanup(codex): drop dead slugInternalModel
 allocation in computeCatalog

Built and written once in the catalog-derivation loop but never read; a
leftover from an intermediate design that intended richer per-slug
state. The final code only needs `slugContextWindow` (and the
alias-aware lookups built off `aliases`). Remove the map, the only
write site, and the now-unused `InternalModel` import.
---
 packages/gateway/src/data-plane/codex/models.ts | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index fb1029f56..7f7f4c50c 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -43,7 +43,7 @@ import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { getInternalModels } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
-import type { InternalModel, Fetcher } from '@floway-dev/provider';
+import type { Fetcher } from '@floway-dev/provider';
 
 // Five minutes is short enough to pick up an upstream catalog change within
 // one or two codex sessions but long enough that an active user only ever
@@ -83,9 +83,7 @@ const computeCatalog = async (
     getRepo().modelAliases.list(),
   ]);
   const slugContextWindow = new Map<string, number>();
-  const slugInternalModel = new Map<string, InternalModel>();
   for (const m of internalModels) {
-    slugInternalModel.set(m.id, m);
     const limit = m.limits.max_context_window_tokens;
     if (typeof limit === 'number') slugContextWindow.set(m.id, limit);
   }

From 6a88cc03312147f8387c64da45ec8ee73f29c4dd Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 06:13:49 +0800
Subject: [PATCH 104/170] docs(aliases/tests): correct the alias-resolver mock
 docstrings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The mock factories in the chat serve_tests + the responses http_test
claimed the dynamic `await import('../../model-aliases/resolve.ts')`
"mirrored the real entry's alias resolution against the in-memory
repo." That's wrong — the same files also `vi.mock` the resolver
module, so the dynamic import resolves to the mocked function that
drains from `aliasResolutionQueue`. The tests exercise the serve's
downstream handling of an injected `AliasResolution`, not the
alias-table lookup. Restate to match what the mock actually does.
---
 .../data-plane/chat/chat-completions/serve_test.ts |  2 +-
 .../src/data-plane/chat/gemini/serve_test.ts       |  2 +-
 .../src/data-plane/chat/messages/serve_test.ts     |  2 +-
 .../src/data-plane/chat/responses/http_test.ts     | 14 ++++++++------
 .../src/data-plane/chat/responses/serve_test.ts    |  2 +-
 5 files changed, 12 insertions(+), 10 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 79d4c6cfb..644af50b8 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -22,7 +22,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
   return {
     ...original,
     enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
-      // Mirror the real entry's alias resolution so the rule-overlay test
+      // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
       // sees the resolved target id reach the candidates layer and the
       // serve overlays rules from the returned `aliasResolution`. Tests
       // queue the resolution via `aliasResolutionQueue`.
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 8d2d4156d..41b854315 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -21,7 +21,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
   return {
     ...original,
     enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
-      // Mirror the real entry's alias resolution so the rule-overlay test
+      // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
       // sees the resolved target id reach the candidates layer and the
       // serve overlays rules from the returned `aliasResolution`. Tests
       // queue the resolution via `aliasResolutionQueue`.
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index 9f78a83e0..1851751bb 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -19,7 +19,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
   return {
     ...original,
     enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
-      // Mirror the real entry's alias resolution so the rule-overlay test
+      // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
       // sees the resolved target id reach the candidates layer and the
       // serve overlays rules from the returned `aliasResolution`. Tests
       // queue the resolution via `aliasResolutionQueue`.
diff --git a/packages/gateway/src/data-plane/chat/responses/http_test.ts b/packages/gateway/src/data-plane/chat/responses/http_test.ts
index 215a6e1e0..faf41fddf 100644
--- a/packages/gateway/src/data-plane/chat/responses/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http_test.ts
@@ -13,14 +13,16 @@ import { directFetcher, type ProviderResponsesResult, type ResponsesAction, type
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 // Mock the candidates seam so each test hands the http entry exactly the
-// provider candidates it wants. The mock mirrors `serve_test.ts` by
-// running the real `resolveAlias` against the in-memory repo so any
-// alias seeded by a test (e.g. `codex-auto-review`) rewrites the model
-// id and rule overlay reaches the upstream call.
+// provider candidates it wants. The mock drains a queued `AliasResolution`
+// from `aliasResolutionQueue` (set up by `installRepo` for the alias-
+// rewrite tests below) and forwards it on the candidates return, so the
+// serve's downstream alias-rewrite + rule-overlay path runs end-to-end
+// against an injected resolution without standing up the real catalog
+// stack.
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
 // `lastSeenModel` captures the effective model id the serve passes downstream
 // — the alias rewrite (if any) applied. Tests assert against this to confirm
-// the alias table drove the rewrite.
+// the alias mechanism drove the rewrite.
 const lastSeenModel: { value: string | null } = { value: null };
 vi.mock('../shared/candidates.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../shared/candidates.ts')>();
@@ -28,7 +30,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
   return {
     ...original,
     enumerateProviderCandidates: vi.fn(async (args: { model: string; scheduler: () => void }) => {
-      // Mirror the real entry's alias resolution so the rule-overlay test
+      // Drain a queued resolution from `aliasResolutionQueue` (set up by
       // sees the resolved target id reach the candidates layer and the
       // serve overlays rules from the returned `aliasResolution`. Tests
       // queue a resolution via `aliasResolutionQueue` when they want one.
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index 58dead562..dd85e307e 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -29,7 +29,7 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
   return {
     ...original,
     enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
-      // Mirror the real entry's alias resolution so the rule-overlay test
+      // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
       // sees the resolved target id reach the candidates layer and the
       // serve overlays rules from the returned `aliasResolution`. Tests
       // queue the resolution via `aliasResolutionQueue`.

From 6354266dc35924cbdd8a1c86ea679dd444ef56c7 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 06:15:29 +0800
Subject: [PATCH 105/170] cleanup(aliases/tests): drop the broken inline
 comment in the candidates mock
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Round 4's docstring repair on the alias-resolver mock left this sister
inline comment inside the candidates mock truncated mid-sentence —
"set up by" then jumps to "sees the resolved target id reach...".
The outer block comment on the same mock already explains the drain
behavior in full, so deleting the inline rather than rewording it.
---
 packages/gateway/src/data-plane/chat/responses/http_test.ts | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/responses/http_test.ts b/packages/gateway/src/data-plane/chat/responses/http_test.ts
index faf41fddf..e9144ca1d 100644
--- a/packages/gateway/src/data-plane/chat/responses/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http_test.ts
@@ -30,10 +30,6 @@ vi.mock('../shared/candidates.ts', async importOriginal => {
   return {
     ...original,
     enumerateProviderCandidates: vi.fn(async (args: { model: string; scheduler: () => void }) => {
-      // Drain a queued resolution from `aliasResolutionQueue` (set up by
-      // sees the resolved target id reach the candidates layer and the
-      // serve overlays rules from the returned `aliasResolution`. Tests
-      // queue a resolution via `aliasResolutionQueue` when they want one.
       const aliasResolution = await resolveAlias({
         modelName: args.model,
         providers: [],

From 275e9a8fd6f02be4cffcec40139e4475d72b41a4 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 14:07:32 +0800
Subject: [PATCH 106/170] style(aliases/web): hint copy stays put + Switch
 label says Manual
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Switching override on/off now leaves the hint paragraph in place
(no v-if collapse → no vertical jump). The hint always explains
both halves: the auto computation (intersection across available
targets, rule-pinned sub-fields treated as unsupported) AND the
fact that switching to Manual lets the operator publish a custom
payload to /v1/models any time.

"Enable override" Switch label → "Manual". The mode is what the
operator is choosing between (Auto by default; Manual on toggle),
not an action they're enabling.
---
 .../src/components/alias-edit/AliasEditDialog.vue  | 11 ++++++-----
 .../components/alias-edit/AliasEditDialog_test.ts  | 14 +++++---------
 2 files changed, 11 insertions(+), 14 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 806f50b7e..0b8fcab32 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -317,15 +317,16 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
           </button>
           <label class="flex shrink-0 cursor-pointer items-center gap-2">
             <Switch :model-value="overrideEnabled" @update:model-value="v => setOverrideEnabled(v === true)" />
-            <span class="text-xs text-gray-400">Enable override</span>
+            <span class="text-xs text-gray-400">Manual</span>
           </label>
         </div>
 
         <div v-if="announcedSectionExpanded" id="announced-metadata-body" class="mt-4">
-          <p v-if="!overrideEnabled" class="mb-3 text-xs text-gray-500">
-            Read-only — the intersection across every currently-available
-            target, with any rule-pinned sub-field treated as unsupported.
-            Enable override to publish a different payload to <code class="font-mono">/v1/models</code>.
+          <p class="mb-3 text-xs text-gray-500">
+            Defaults to the intersection across every currently-available
+            target (rule-pinned sub-fields are treated as unsupported).
+            Switch to Manual any time to publish your own payload to
+            <code class="font-mono">/v1/models</code>.
           </p>
           <ChatMetadataEditor
             :model-value="announcedEditorValue"
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index d8f0bc29b..0271bd591 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -169,7 +169,7 @@ describe('AliasEditDialog', () => {
   // ── Announced metadata section ────────────────────────────────────────
 
   // The section header always renders for chat/embedding; the body only
-  // renders the editor when the "Enable override" switch is on. Image
+  // renders the editor when the "Manual" switch is on. Image
   // aliases never see the section at all.
 
   const expandAnnouncedSection = async () => {
@@ -181,8 +181,8 @@ describe('AliasEditDialog', () => {
   const announcedSwitch = (): HTMLButtonElement => {
     // The override switch sits at the right end of the section header
     // row. Reka-UI renders Switch as a <button role="switch">, so scan
-    // by role + the surrounding "Enable override" label.
-    const label = Array.from(document.body.querySelectorAll<HTMLLabelElement>('label')).find(l => (l.textContent ?? '').includes('Enable override'))!;
+    // by role + the surrounding "Manual" label.
+    const label = Array.from(document.body.querySelectorAll<HTMLLabelElement>('label')).find(l => (l.textContent ?? '').includes('Manual'))!;
     return label.querySelector<HTMLButtonElement>('button[role="switch"]')!;
   };
 
@@ -205,8 +205,6 @@ describe('AliasEditDialog', () => {
 
     // The override switch is present but off.
     expect(announcedSwitch().getAttribute('aria-checked')).toBe('false');
-    // The read-only hint copy appears above the editor.
-    expect(portalText()).toContain('Read-only');
     // The shared editor mounts and renders the Reasoning toggles, but
     // every Switch in there is disabled because mode='auto'.
     const sw = effortSwitch();
@@ -235,8 +233,7 @@ describe('AliasEditDialog', () => {
     announcedSwitch().click();
     await nextTick();
 
-    // Read-only hint disappears; the editor now accepts input.
-    expect(portalText()).not.toContain('Read-only');
+    // The editor now accepts input.
     const sw = effortSwitch();
     expect(sw).not.toBeNull();
     expect(sw!.disabled).toBe(false);
@@ -258,8 +255,7 @@ describe('AliasEditDialog', () => {
     sw.click(); await nextTick();
     expect(effortSwitch()!.disabled).toBe(false);
     sw.click(); await nextTick();
-    // Auto mode: read-only hint back, effort switch disabled again.
-    expect(portalText()).toContain('Read-only');
+    // Auto mode: effort switch disabled again.
     expect(effortSwitch()!.disabled).toBe(true);
     w.unmount();
   });

From 197977a7885c12a25820bf65b0053e0231301738 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 14:12:04 +0800
Subject: [PATCH 107/170] style(aliases/web): drop the imprecise "leave blank
 to inherit" hint

There is no parent to inherit from in either the alias-announced-metadata
context (announced doesn't track any upstream catalog) or the upstream-
model context (limits aren't a copy of anything either). Leaving the input
blank just means the limit is not declared. The empty placeholder already
conveys that; the trailing hint added confusion. Keep the unit cue
("tokens") so the operator sees what value to type.
---
 apps/web/src/components/shared/ChatMetadataEditor.vue | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/web/src/components/shared/ChatMetadataEditor.vue b/apps/web/src/components/shared/ChatMetadataEditor.vue
index ef1bba905..16e7c9f61 100644
--- a/apps/web/src/components/shared/ChatMetadataEditor.vue
+++ b/apps/web/src/components/shared/ChatMetadataEditor.vue
@@ -299,7 +299,7 @@ const toggleMandatory = (on: boolean) => {
     <section>
       <div class="mb-3 flex items-baseline gap-3">
         <h4 class="text-[11px] font-semibold uppercase tracking-wider text-gray-500">Limits</h4>
-        <span class="text-[11px] text-gray-500">tokens — leave blank to inherit</span>
+        <span class="text-[11px] text-gray-500">tokens</span>
       </div>
       <div class="grid gap-3 sm:grid-cols-3">
         <label class="block space-y-1.5">

From ea2ea1d6641c1f31b05469c2f9915be04d46a3d8 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 14:16:10 +0800
Subject: [PATCH 108/170] style(aliases/web): announced-metadata hint explains
 why rule-pinned drops out
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Operator reading "rule-pinned sub-fields are treated as unsupported"
had no way to tell why. Expand the hint to spell out the chain — a
rule pins the value, so the client can't move it, so the alias drops
the field from its advertised capability surface.
---
 apps/web/src/components/alias-edit/AliasEditDialog.vue | 6 +++---
 1 file changed, 3 insertions(+), 3 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 0b8fcab32..83afccef7 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -324,9 +324,9 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
         <div v-if="announcedSectionExpanded" id="announced-metadata-body" class="mt-4">
           <p class="mb-3 text-xs text-gray-500">
             Defaults to the intersection across every currently-available
-            target (rule-pinned sub-fields are treated as unsupported).
-            Switch to Manual any time to publish your own payload to
-            <code class="font-mono">/v1/models</code>.
+            target (rule-pinned sub-fields are not modifiable on client
+            side so treated as unsupported). Switch to Manual any time to
+            publish your own payload to <code class="font-mono">/v1/models</code>.
           </p>
           <ChatMetadataEditor
             :model-value="announcedEditorValue"

From e298485e50f241bd74295d114701d8c1216bb7b2 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 14:18:28 +0800
Subject: [PATCH 109/170] style(aliases/web): trim announced-metadata hint to
 just the default-derivation note
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop the second sentence describing the Manual override — the
"Manual" switch right of the section title already signals the
choice. The hint paragraph now only explains the default's
derivation so an operator scanning the auto values knows where each
piece came from.
---
 apps/web/src/components/alias-edit/AliasEditDialog.vue | 3 +--
 1 file changed, 1 insertion(+), 2 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 83afccef7..b919e3dc6 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -325,8 +325,7 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
           <p class="mb-3 text-xs text-gray-500">
             Defaults to the intersection across every currently-available
             target (rule-pinned sub-fields are not modifiable on client
-            side so treated as unsupported). Switch to Manual any time to
-            publish your own payload to <code class="font-mono">/v1/models</code>.
+            side so treated as unsupported).
           </p>
           <ChatMetadataEditor
             :model-value="announcedEditorValue"

From e1021c956e840b9700b9bd4e11fab0e7ec2b2808 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 14:30:03 +0800
Subject: [PATCH 110/170] refactor(aliases): Gemini catalog reuses
 mergeAliasesIntoModels
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Gemini `/v1beta/models` route called `synthesizeListedAliases` and
then re-implemented the collision + dedupe step inline. Swap that for
`mergeAliasesIntoModels`, pre-filtering both inputs to `kind === 'chat'`
so Gemini's chat-only surface stays honest while the shared merge runs.

Loosen `mergeAliasesIntoModels<T extends PublicModel>` to `<T>` — the
function body never touches a PublicModel property on T, so the
constraint was vestigial and blocked the InternalModel-shaped row type
Gemini needs.
---
 .../src/data-plane/models/alias-listing.ts    |  6 +++--
 .../gateway/src/data-plane/models/gemini.ts   | 26 ++++++++++---------
 2 files changed, 18 insertions(+), 14 deletions(-)

diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index c38d80287..1d1001bac 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -263,8 +263,10 @@ export const synthesizeListedAliases = (input: ListedAliasInputs): PublicModel[]
 // it preserves intent. `mapReal` shapes each real model into the caller's
 // row type; `wrapAlias` lifts a synthesized `PublicModel` alias entry into
 // the same row type (the dashboard, for example, adds an empty `upstreams`
-// array since alias rows do not bind to an upstream directly).
-export const mergeAliasesIntoModels = <T extends PublicModel>(input: {
+// array since alias rows do not bind to an upstream directly; the Gemini
+// `/v1beta/models` route maps into the upstream's `InternalModel` shape
+// before projecting to Gemini's wire form).
+export const mergeAliasesIntoModels = <T>(input: {
   readonly realModels: readonly ResolvedModel[];
   readonly aliases: readonly ModelAliasRecord[];
   readonly mapReal: (model: ResolvedModel) => T;
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index e8cb7a409..5d8dcafdd 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -1,6 +1,6 @@
 import type { Context } from 'hono';
 
-import { synthesizeListedAliases } from './alias-listing.ts';
+import { mergeAliasesIntoModels } from './alias-listing.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from './shared.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
@@ -65,8 +65,9 @@ const geminiModelLoadError = (error: unknown): Response => {
   return geminiError(502, error instanceof Error ? error.message : String(error));
 };
 
-// Real chat models plus chat-kind alias entries; see `loadModels` for the
-// collision rationale.
+// Real chat models plus chat-kind alias entries; collision and dedupe ride
+// on the shared `mergeAliasesIntoModels` helper so /v1beta/models stays in
+// step with /v1/models and the dashboard's /api/models.
 const loadGeminiModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
@@ -77,21 +78,22 @@ const loadGeminiModels = async (
     getModels(upstreamFilter, fetcherForUpstream, scheduler),
     aliasRepo.list(),
   ]);
-  const chatModels = models.filter(model => model.kind === 'chat');
-  const aliasEntries = synthesizeListedAliases({ aliases, realModels: models })
-    .filter(entry => entry.kind === 'chat');
-  const aliasIds = new Set(aliasEntries.map(entry => entry.id));
-  const merged: InternalModel[] = [
-    ...chatModels.filter(model => !aliasIds.has(model.id)),
-    ...aliasEntries.map<InternalModel>(entry => ({
+  // Gemini surfaces chat-kind models only; filter both the real catalog and
+  // the synthesized alias entries before the merge so the alias collision
+  // step only ever weighs chat-on-chat.
+  const merged = mergeAliasesIntoModels<InternalModel>({
+    realModels: models.filter(model => model.kind === 'chat'),
+    aliases: aliases.filter(alias => alias.kind === 'chat'),
+    mapReal: model => model,
+    wrapAlias: entry => ({
       id: entry.id,
       display_name: entry.display_name,
       limits: entry.limits,
       kind: entry.kind,
       ...(entry.cost !== undefined ? { cost: entry.cost } : {}),
       ...(entry.chat !== undefined ? { chat: entry.chat } : {}),
-    })),
-  ];
+    }),
+  });
   return merged.map(toGeminiModel);
 };
 

From 062d2efb66a36dc7217f29f7e264fb4d7568f0cc Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 14:33:01 +0800
Subject: [PATCH 111/170] refactor(aliases): codex 1p catalog rides
 synthesizeListedAliases for alias entries
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`computeCatalog` had its own `firstAvailableTargetId` + `aliasBySlug`
walk that duplicated the alias-availability check `synthesizeListedAliases`
already does for /v1/models. Replace it: run the shared synthesizer, then
look up each catalog slug against the synthesized entries' `aliasedFrom`
block to drive both the slug-survives filter and the context-window
resolver.

The "first-available regardless of selection mode" rule stays in place —
codex catalog publishes a static window so a `random` alias still
collapses to first-routable. Note inline. Switch the upstream snapshot
from `getInternalModels` to `getModels` because the synthesizer reads
`ResolvedModel` (it needs `endpoints` for its own bookkeeping); the
`limits` lookup the codex catalog cares about is identical between the
two.
---
 .../gateway/src/data-plane/codex/models.ts    | 69 +++++++++----------
 1 file changed, 32 insertions(+), 37 deletions(-)

diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index 7f7f4c50c..e74e232ba 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -38,10 +38,10 @@ import { applyContextWindowFromRegistry, type ContextWindowResolver } from './co
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
 import { getRepo } from '../../repo/index.ts';
-import type { ModelAliasRecord } from '../../repo/types.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
-import { getInternalModels } from '../providers/registry.ts';
+import { synthesizeListedAliases } from '../models/alias-listing.ts';
+import { getModels } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { Fetcher } from '@floway-dev/provider';
 
@@ -59,61 +59,56 @@ const cacheKeyFor = (clientVersion: string, upstreamIds: readonly string[] | nul
   return new Request(`https://floway.invalid/codex-models?v=${encodeURIComponent(clientVersion)}&u=${encodeURIComponent(ids)}`);
 };
 
-// First currently-routable target's id, or null when no target resolves
-// against the registry. Single-target aliases collapse to the lone target;
-// multi-target aliases pick first-available (the order the operator
-// configured). This drives both the slug-survives filter and the
-// context-window resolver.
-const firstAvailableTargetId = (alias: ModelAliasRecord, registrySlugs: ReadonlySet<string>): string | null => {
-  for (const target of alias.targets) {
-    if (registrySlugs.has(target.target_model_id)) return target.target_model_id;
-  }
-  return null;
-};
-
 const computeCatalog = async (
   userAgent: string | undefined,
   upstreamIds: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
 ): Promise<CodexCatalog> => {
-  const [catalog, internalModels, aliases] = await Promise.all([
+  const [catalog, realModels, aliases] = await Promise.all([
     resolveCodexCatalog(userAgent),
-    getInternalModels(upstreamIds, fetcherForUpstream, scheduler),
+    getModels(upstreamIds, fetcherForUpstream, scheduler),
     getRepo().modelAliases.list(),
   ]);
   const slugContextWindow = new Map<string, number>();
-  for (const m of internalModels) {
+  for (const m of realModels) {
     const limit = m.limits.max_context_window_tokens;
     if (typeof limit === 'number') slugContextWindow.set(m.id, limit);
   }
-  const registrySlugs = new Set(internalModels.map(m => m.id));
+  const registrySlugs = new Set(realModels.map(m => m.id));
 
-  // Visible aliases whose first target currently resolves — keyed by alias
-  // name so the slug filter and the context-window resolver both look the
-  // same alias up in O(1).
-  const aliasBySlug = new Map<string, { alias: ModelAliasRecord; firstTargetId: string }>();
-  for (const alias of aliases) {
-    if (!alias.visibleInModelsList) continue;
-    const firstTargetId = firstAvailableTargetId(alias, registrySlugs);
-    if (firstTargetId === null) continue;
-    aliasBySlug.set(alias.name, { alias, firstTargetId });
+  // Run the shared alias synthesizer so the codex catalog reads the same
+  // visible-alias surface that /v1/models, the dashboard, and Gemini do.
+  // Each entry's `aliasedFrom.targets` keeps every configured target — the
+  // synthesizer does not narrow to availability — so we still pick the
+  // first one in registry order here. Selection mode never matters for
+  // this static listing: a `random` alias would refuse to publish a
+  // stable context window, so the catalog uses first-available regardless
+  // of the alias's runtime selection.
+  const aliasFirstTarget = new Map<string, string>();
+  for (const entry of synthesizeListedAliases({ aliases, realModels })) {
+    const aliasedFrom = entry.aliasedFrom;
+    if (aliasedFrom === undefined) continue;
+    const firstRoutable = aliasedFrom.targets.find(t => registrySlugs.has(t.target_model_id));
+    if (firstRoutable !== undefined) aliasFirstTarget.set(entry.id, firstRoutable.target_model_id);
   }
 
   const filtered: CodexCatalog = {
-    models: catalog.models.filter(m => registrySlugs.has(m.slug) || aliasBySlug.has(m.slug)),
+    models: catalog.models.filter(m => registrySlugs.has(m.slug) || aliasFirstTarget.has(m.slug)),
   };
 
-  // For an alias slug: prefer the operator's announced override, else the
-  // first available target's window. Falls back to the registry-side lookup
-  // for plain (non-alias) slugs.
+  // For an alias slug: redirect to the first routable target's window so
+  // the published number is one the gateway can honour. Plain (non-alias)
+  // slugs read straight off the registry. Operator-set overrides on the
+  // alias's announced metadata travel through `synthesizeListedAliases`
+  // into the alias entry's own limits — but the codex catalog needs the
+  // *target's* window here, not the alias's announced one, because
+  // `applyContextWindowFromRegistry` writes both `context_window` and
+  // `max_context_window` and the upstream binding only enforces the
+  // target's real ceiling.
   const contextWindowOf: ContextWindowResolver = slug => {
-    const aliasEntry = aliasBySlug.get(slug);
-    if (aliasEntry !== undefined) {
-      const overridden = aliasEntry.alias.announcedMetadata?.limits?.max_context_window_tokens;
-      if (typeof overridden === 'number') return overridden;
-      return slugContextWindow.get(aliasEntry.firstTargetId) ?? null;
-    }
+    const firstTargetId = aliasFirstTarget.get(slug);
+    if (firstTargetId !== undefined) return slugContextWindow.get(firstTargetId) ?? null;
     return slugContextWindow.get(slug) ?? null;
   };
   return applyContextWindowFromRegistry(filtered, contextWindowOf);

From a49d0a928a4f43d00fa5cba419197faf342ede87 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 14:56:20 +0800
Subject: [PATCH 112/170] refactor(aliases): unify chat + passthrough
 model-resolve wrappers
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`enumerateProviderCandidates` (chat) and `resolveModelForRequest`
(passthrough) wrapped the same `resolveAlias → enumerate-interpretations
→ collect-outcomes` core with two slightly different output shapes.
Collapse them into one helper, `resolveModelCandidates`, generic on the
caller's target descriptor:

- Chat callers pass `pickTarget: endpoints => ChatTargetApi | null`.
- Passthrough / image-generation callers pass
  `endpoints => endpoints[key] !== undefined ? key : null`.

The pickTarget predicate doubles as the alias resolver's
endpoint-narrowing filter, so a target that doesn't serve an acceptable
surface drops out of the first-available / random pool.

passthrough-serve loses its inner endpoint-mismatch filter (pickTarget
already excluded those bindings) and collapses the for-loop to taking
the first candidate — every iteration always returned anyway. The
"does not support the endpoint" 400 is now the explicit
`candidates.length === 0` branch above the call.

Image-generation's `resolveImageBinding` returns the candidate's fetcher
alongside the binding, dropping a redundant `createPerRequestFetcher`
call at the caller. Test mocks (8 serve/http tests + 1 image-gen
integration test) update to mock the new export at its new path.
---
 .../chat/chat-completions/http_test.ts        |   6 +-
 .../data-plane/chat/chat-completions/serve.ts |   6 +-
 .../chat/chat-completions/serve_test.ts       |  10 +-
 .../src/data-plane/chat/gemini/http_test.ts   |   6 +-
 .../src/data-plane/chat/gemini/serve.ts       |  10 +-
 .../src/data-plane/chat/gemini/serve_test.ts  |  10 +-
 .../src/data-plane/chat/messages/http_test.ts |   6 +-
 .../src/data-plane/chat/messages/serve.ts     |  10 +-
 .../data-plane/chat/messages/serve_test.ts    |  10 +-
 .../data-plane/chat/responses/http_test.ts    |  10 +-
 .../image-generation-integration_test.ts      |  12 +-
 .../server-tools/image-generation.ts          |  26 +-
 .../data-plane/chat/responses/serve-prep.ts   |   7 +-
 .../data-plane/chat/responses/serve_test.ts   |  12 +-
 .../src/data-plane/chat/shared/candidates.ts  |  89 +------
 .../data-plane/chat/shared/candidates_test.ts |  52 ++--
 .../src/data-plane/providers/registry.ts      |  96 +++++--
 .../src/data-plane/providers/registry_test.ts | 101 ++++---
 .../data-plane/shared/passthrough-serve.ts    | 251 +++++++++---------
 19 files changed, 358 insertions(+), 372 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts
index 5d0302c72..6df2dd23c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts
@@ -12,11 +12,11 @@ import { directFetcher, type ProviderStreamResult, type UpstreamCallOptions } fr
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
-vi.mock('../shared/candidates.ts', async importOriginal => {
-  const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+vi.mock('../../providers/registry.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../providers/registry.ts')>();
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async () => {
+    resolveModelCandidates: vi.fn(async () => {
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('http_test: no candidates enqueued');
       return { ...next, failedUpstreams: [], aliasResolution: null };
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 17c70cc19..e68b8521b 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -3,8 +3,8 @@ import { renderChatCompletionsFailure } from './errors.ts';
 import { planChatCompletionsRouting } from './routing.ts';
 import { ALIAS_RESPONSE_HEADER, applyChatRulesToChatCompletions } from '../../model-aliases/apply.ts';
 import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
+import { resolveModelCandidates } from '../../providers/registry.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import { aliasFailureFromError } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
@@ -23,9 +23,9 @@ export const chatCompletionsServe = {
     const { payload, ctx, store, headers } = args;
     let enumerated;
     try {
-      enumerated = await enumerateProviderCandidates({
+      enumerated = await resolveModelCandidates({
         upstreamIds: ctx.upstreamIds,
-        model: payload.model,
+        modelName: payload.model,
         pickTarget: endpoints =>
           endpoints.chatCompletions ? 'chat-completions'
             : endpoints.messages ? 'messages'
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 644af50b8..1b4ce105c 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -16,25 +16,25 @@ import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-d
 // provider candidates it wants.
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
 const lastCandidatesCall: { model?: string } = {};
-vi.mock('../shared/candidates.ts', async importOriginal => {
-  const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+vi.mock('../../providers/registry.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../providers/registry.ts')>();
   const { resolveAlias } = await import('../../model-aliases/resolve.ts');
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
+    resolveModelCandidates: vi.fn(async (args: { modelName: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
       // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
       // sees the resolved target id reach the candidates layer and the
       // serve overlays rules from the returned `aliasResolution`. Tests
       // queue the resolution via `aliasResolutionQueue`.
       const aliasResolution = await resolveAlias({
-        modelName: args.model,
+        modelName: args.modelName,
         providers: [],
         fetcherForUpstream: () => directFetcher,
         scheduler: args.scheduler,
         endpointAccepts: () => true,
         repo: { getByName: () => Promise.resolve(null) } as never,
       });
-      const effectiveModel = aliasResolution?.targetModelId ?? args.model;
+      const effectiveModel = aliasResolution?.targetModelId ?? args.modelName;
       lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
diff --git a/packages/gateway/src/data-plane/chat/gemini/http_test.ts b/packages/gateway/src/data-plane/chat/gemini/http_test.ts
index 1a14f742d..1b3ee8a2d 100644
--- a/packages/gateway/src/data-plane/chat/gemini/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/http_test.ts
@@ -13,11 +13,11 @@ import { directFetcher, type ProviderCallResult, type ProviderStreamResult, type
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
-vi.mock('../shared/candidates.ts', async importOriginal => {
-  const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+vi.mock('../../providers/registry.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../providers/registry.ts')>();
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async () => {
+    resolveModelCandidates: vi.fn(async () => {
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('http_test: no candidates enqueued');
       return { ...next, failedUpstreams: [], aliasResolution: null };
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index 3e15fdefb..e7f34f271 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -3,8 +3,8 @@ import { renderGeminiFailure } from './errors.ts';
 import { planGeminiRouting } from './routing.ts';
 import { ALIAS_RESPONSE_HEADER, applyChatRulesToGemini } from '../../model-aliases/apply.ts';
 import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
+import { resolveModelCandidates } from '../../providers/registry.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import { aliasFailureFromError } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
@@ -35,9 +35,9 @@ export const geminiServe = {
     const { payload, ctx, store, headers } = args;
     let enumerated;
     try {
-      enumerated = await enumerateProviderCandidates({
+      enumerated = await resolveModelCandidates({
         upstreamIds: ctx.upstreamIds,
-        model: args.model,
+        modelName: args.model,
         // Gemini has no native upstream target in the provider API; prefer
         // Chat Completions, then Messages, then Responses.
         pickTarget: endpoints => endpoints.chatCompletions ? 'chat-completions' : endpoints.messages ? 'messages' : endpoints.responses ? 'responses' : null,
@@ -77,9 +77,9 @@ export const geminiServe = {
     const { payload, ctx, store, headers } = args;
     let enumerated;
     try {
-      enumerated = await enumerateProviderCandidates({
+      enumerated = await resolveModelCandidates({
         upstreamIds: ctx.upstreamIds,
-        model: args.model,
+        modelName: args.model,
         // Gemini countTokens has no native upstream support; only providers
         // exposing the Messages endpoint qualify because we translate Gemini
         // → Messages and call Messages count_tokens upstream.
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 41b854315..ef9f16fba 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -15,25 +15,25 @@ import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-d
 
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
 const lastCandidatesCall: { model?: string } = {};
-vi.mock('../shared/candidates.ts', async importOriginal => {
-  const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+vi.mock('../../providers/registry.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../providers/registry.ts')>();
   const { resolveAlias } = await import('../../model-aliases/resolve.ts');
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
+    resolveModelCandidates: vi.fn(async (args: { modelName: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
       // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
       // sees the resolved target id reach the candidates layer and the
       // serve overlays rules from the returned `aliasResolution`. Tests
       // queue the resolution via `aliasResolutionQueue`.
       const aliasResolution = await resolveAlias({
-        modelName: args.model,
+        modelName: args.modelName,
         providers: [],
         fetcherForUpstream: () => directFetcher,
         scheduler: args.scheduler,
         endpointAccepts: () => true,
         repo: { getByName: () => Promise.resolve(null) } as never,
       });
-      const effectiveModel = aliasResolution?.targetModelId ?? args.model;
+      const effectiveModel = aliasResolution?.targetModelId ?? args.modelName;
       lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
diff --git a/packages/gateway/src/data-plane/chat/messages/http_test.ts b/packages/gateway/src/data-plane/chat/messages/http_test.ts
index 82f3581a0..dc69d31aa 100644
--- a/packages/gateway/src/data-plane/chat/messages/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/http_test.ts
@@ -12,11 +12,11 @@ import { directFetcher, type ProviderCallResult, type ProviderStreamResult, type
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const candidatesQueue: { readonly candidates: readonly ChatCandidate[]; readonly sawModel: boolean }[] = [];
-vi.mock('../shared/candidates.ts', async importOriginal => {
-  const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+vi.mock('../../providers/registry.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../providers/registry.ts')>();
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async () => {
+    resolveModelCandidates: vi.fn(async () => {
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('http_test: no candidates enqueued');
       return { ...next, failedUpstreams: [], aliasResolution: null };
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index fab48ce1f..e4561d0d6 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -3,8 +3,8 @@ import { renderMessagesFailure } from './errors.ts';
 import { planMessagesRouting } from './routing.ts';
 import { ALIAS_RESPONSE_HEADER, applyChatRulesToMessages } from '../../model-aliases/apply.ts';
 import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
+import { resolveModelCandidates } from '../../providers/registry.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import { enumerateProviderCandidates } from '../shared/candidates.ts';
 import { aliasFailureFromError } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
@@ -30,9 +30,9 @@ export const messagesServe = {
     const { payload, ctx, store, headers } = args;
     let enumerated;
     try {
-      enumerated = await enumerateProviderCandidates({
+      enumerated = await resolveModelCandidates({
         upstreamIds: ctx.upstreamIds,
-        model: payload.model,
+        modelName: payload.model,
         pickTarget: endpoints =>
           endpoints.messages ? 'messages'
             : endpoints.responses ? 'responses'
@@ -74,9 +74,9 @@ export const messagesServe = {
     const { payload, ctx, store, headers } = args;
     let enumerated;
     try {
-      enumerated = await enumerateProviderCandidates({
+      enumerated = await resolveModelCandidates({
         upstreamIds: ctx.upstreamIds,
-        model: payload.model,
+        modelName: payload.model,
         pickTarget: endpoints => endpoints.messages ? 'messages' : null,
         scheduler: ctx.backgroundScheduler,
         currentColo: ctx.currentColo,
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index 1851751bb..f28131dc7 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -13,25 +13,25 @@ import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-d
 
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
 const lastCandidatesCall: { model?: string } = {};
-vi.mock('../shared/candidates.ts', async importOriginal => {
-  const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+vi.mock('../../providers/registry.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../providers/registry.ts')>();
   const { resolveAlias } = await import('../../model-aliases/resolve.ts');
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
+    resolveModelCandidates: vi.fn(async (args: { modelName: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
       // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
       // sees the resolved target id reach the candidates layer and the
       // serve overlays rules from the returned `aliasResolution`. Tests
       // queue the resolution via `aliasResolutionQueue`.
       const aliasResolution = await resolveAlias({
-        modelName: args.model,
+        modelName: args.modelName,
         providers: [],
         fetcherForUpstream: () => directFetcher,
         scheduler: args.scheduler,
         endpointAccepts: () => true,
         repo: { getByName: () => Promise.resolve(null) } as never,
       });
-      const effectiveModel = aliasResolution?.targetModelId ?? args.model;
+      const effectiveModel = aliasResolution?.targetModelId ?? args.modelName;
       lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
diff --git a/packages/gateway/src/data-plane/chat/responses/http_test.ts b/packages/gateway/src/data-plane/chat/responses/http_test.ts
index e9144ca1d..c3d4b6b65 100644
--- a/packages/gateway/src/data-plane/chat/responses/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http_test.ts
@@ -24,21 +24,21 @@ const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; read
 // — the alias rewrite (if any) applied. Tests assert against this to confirm
 // the alias mechanism drove the rewrite.
 const lastSeenModel: { value: string | null } = { value: null };
-vi.mock('../shared/candidates.ts', async importOriginal => {
-  const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+vi.mock('../../providers/registry.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../providers/registry.ts')>();
   const { resolveAlias } = await import('../../model-aliases/resolve.ts');
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async (args: { model: string; scheduler: () => void }) => {
+    resolveModelCandidates: vi.fn(async (args: { modelName: string; scheduler: () => void }) => {
       const aliasResolution = await resolveAlias({
-        modelName: args.model,
+        modelName: args.modelName,
         providers: [],
         fetcherForUpstream: () => directFetcher,
         scheduler: args.scheduler,
         endpointAccepts: () => true,
         repo: { getByName: () => Promise.resolve(null) } as never,
       });
-      lastSeenModel.value = aliasResolution?.targetModelId ?? args.model;
+      lastSeenModel.value = aliasResolution?.targetModelId ?? args.modelName;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('http_test: no candidates enqueued');
       return { ...next, failedUpstreams: [], aliasResolution };
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
index 14119f61b..82484e621 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation-integration_test.ts
@@ -30,13 +30,8 @@ interface BackendStub {
 const stub = vi.hoisted((): BackendStub => ({ generationsCalls: [], editsForms: [], nextGenerations: [], nextEdits: [] }));
 
 vi.mock('../../../../providers/registry.ts', () => ({
-  resolveModelForRequest: vi.fn(async () => ({
-    matches: [{
-      id: 'gpt-image-2',
-      model: {
-        id: 'gpt-image-2',
-        endpoints: { imagesGenerations: {}, imagesEdits: {} },
-      },
+  resolveModelCandidates: vi.fn(async () => ({
+    candidates: [{
       binding: {
         upstream: 'u',
         upstreamModel: { id: 'gpt-image-2', endpoints: { imagesGenerations: {}, imagesEdits: {} } },
@@ -56,8 +51,11 @@ vi.mock('../../../../providers/registry.ts', () => ({
           },
         },
       },
+      fetcher: directFetcher,
     }],
+    sawModel: true,
     failedUpstreams: [],
+    aliasResolution: null,
   })),
 }));
 
diff --git a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
index a66fe9995..229363cf7 100644
--- a/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
+++ b/packages/gateway/src/data-plane/chat/responses/interceptors/server-tools/image-generation.ts
@@ -1,6 +1,5 @@
-import { createPerRequestFetcher } from '../../../../../dial/per-request.ts';
 import { sleep } from '../../../../../shared/sleep.ts';
-import { resolveModelForRequest } from '../../../../providers/registry.ts';
+import { resolveModelCandidates } from '../../../../providers/registry.ts';
 import { appendFailedUpstreams } from '../../../../shared/failed-upstreams.ts';
 import { createUpstreamLatencyRecorder, recordPerformanceError, recordPerformanceLatency, requireRecordedDurationMs } from '../../../../shared/telemetry/performance.ts';
 import { recordTokenUsage, tokenUsageFromImagesBody } from '../../../../shared/telemetry/usage.ts';
@@ -529,18 +528,23 @@ const serverError = (e: unknown): ImageError => ({
 const resolveImageBinding = async (
   isEdit: boolean,
   state: ShimState,
-  fetcherForUpstream: (upstreamId: string) => Fetcher,
-): Promise<{ ok: true; binding: ProviderModelRecord } | { ok: false; error: ImageError }> => {
+): Promise<{ ok: true; binding: ProviderModelRecord; fetcher: Fetcher } | { ok: false; error: ImageError }> => {
   const endpointKey = isEdit ? 'imagesEdits' : 'imagesGenerations';
   const endpointPath = isEdit ? '/images/edits' : '/images/generations';
   let resolution;
   try {
-    resolution = await resolveModelForRequest(state.config.model, state.upstreamIds, fetcherForUpstream, state.backgroundScheduler);
+    resolution = await resolveModelCandidates({
+      modelName: state.config.model,
+      upstreamIds: state.upstreamIds,
+      scheduler: state.backgroundScheduler,
+      currentColo: state.currentColo,
+      pickTarget: endpoints => endpoints[endpointKey] !== undefined ? endpointKey : null,
+    });
   } catch (e) {
     return { ok: false, error: serverError(e) };
   }
-  const match = resolution.matches.find(m => m.binding.upstreamModel.endpoints[endpointKey] !== undefined);
-  if (match === undefined) {
+  const [candidate] = resolution.candidates;
+  if (candidate === undefined) {
     return {
       ok: false,
       error: {
@@ -551,7 +555,7 @@ const resolveImageBinding = async (
       },
     };
   }
-  return { ok: true, binding: match.binding };
+  return { ok: true, binding: candidate.binding, fetcher: candidate.fetcher };
 };
 
 // 60s cap matches the per-minute refill window of Azure TPM/RPM and
@@ -757,11 +761,9 @@ const streamImageGeneration = (
   sources: readonly ImageSource[],
   state: ShimState,
 ) => async function* (): AsyncGenerator<ServerToolLifecycleEvent, ServerToolTerminal> {
-  const fetcherForUpstream = await createPerRequestFetcher(state.currentColo);
-  const resolved = await resolveImageBinding(isEdit, state, fetcherForUpstream);
+  const resolved = await resolveImageBinding(isEdit, state);
   if (!resolved.ok) return imageTerminal(prompt, action, { ok: false, error: resolved.error });
-  const { binding } = resolved;
-  const fetcher = fetcherForUpstream(binding.upstream);
+  const { binding, fetcher } = resolved;
   const wantsPartials = (state.config.partial_images ?? 0) > 0;
 
   let response: Response;
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index febd35858..db51d0eb5 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -3,7 +3,8 @@ import type { StatefulResponsesStore } from './items/store.ts';
 import { planResponsesRouting } from './routing.ts';
 import { ALIAS_RESPONSE_HEADER, applyChatRulesToResponses } from '../../model-aliases/apply.ts';
 import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
-import { enumerateProviderCandidates, type ChatCandidate } from '../shared/candidates.ts';
+import { resolveModelCandidates } from '../../providers/registry.ts';
+import { type ChatCandidate } from '../shared/candidates.ts';
 import { aliasFailureFromError } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common';
@@ -94,9 +95,9 @@ export const prepareResponsesServePlan = async (args: {
   const prepared = await expandPreviousResponseId(payload, store);
   let enumerated;
   try {
-    enumerated = await enumerateProviderCandidates({
+    enumerated = await resolveModelCandidates({
       upstreamIds: ctx.upstreamIds,
-      model: prepared.model,
+      modelName: prepared.model,
       pickTarget,
       scheduler: ctx.backgroundScheduler,
       currentColo: ctx.currentColo,
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index dd85e307e..c45759441 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -14,7 +14,7 @@ import type { ResponsesPayload, ResponsesResult, ResponsesStreamEvent } from '@f
 import { directFetcher, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
-// `enumerateProviderCandidates` is the only seam between serve and the
+// `resolveModelCandidates` is the only seam between serve and the
 // provider registry — mocking it directly keeps the serve tests narrow
 // (no fake fetch, no repo upstream rows for provider catalogs) and lets
 // each test hand the serve exactly the candidates it wants to exercise.
@@ -23,25 +23,25 @@ import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-d
 // false` so the serve renders 404 rather than 400.
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
 const lastCandidatesCall: { model?: string } = {};
-vi.mock('../shared/candidates.ts', async importOriginal => {
-  const original = await importOriginal<typeof import('../shared/candidates.ts')>();
+vi.mock('../../providers/registry.ts', async importOriginal => {
+  const original = await importOriginal<typeof import('../../providers/registry.ts')>();
   const { resolveAlias } = await import('../../model-aliases/resolve.ts');
   return {
     ...original,
-    enumerateProviderCandidates: vi.fn(async (args: { model: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
+    resolveModelCandidates: vi.fn(async (args: { modelName: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
       // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
       // sees the resolved target id reach the candidates layer and the
       // serve overlays rules from the returned `aliasResolution`. Tests
       // queue the resolution via `aliasResolutionQueue`.
       const aliasResolution = await resolveAlias({
-        modelName: args.model,
+        modelName: args.modelName,
         providers: [],
         fetcherForUpstream: () => directFetcher,
         scheduler: args.scheduler,
         endpointAccepts: () => true,
         repo: { getByName: () => Promise.resolve(null) } as never,
       });
-      const effectiveModel = aliasResolution?.targetModelId ?? args.model;
+      const effectiveModel = aliasResolution?.targetModelId ?? args.modelName;
       lastCandidatesCall.model = effectiveModel;
       const next = candidatesQueue.shift();
       if (next === undefined) throw new Error('serve_test: no candidates enqueued');
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates.ts b/packages/gateway/src/data-plane/chat/shared/candidates.ts
index ac3420a15..a322f99ec 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates.ts
@@ -1,86 +1,11 @@
-import { createPerRequestFetcher } from '../../../dial/per-request.ts';
-import { getRepo } from '../../../repo/index.ts';
-import { type AliasResolution, resolveAlias } from '../../model-aliases/resolve.ts';
-import { collectInterpretationOutcomes, enumerateModelInterpretations, listModelProviders } from '../../providers/registry.ts';
-import type { BackgroundScheduler } from '@floway-dev/platform';
-import type { ModelEndpoints } from '@floway-dev/protocols/common';
-import type { ChatTargetApi, ProviderCandidate } from '@floway-dev/provider';
+// Chat-side facade over the shared `resolveModelCandidates` helper. The chat
+// surfaces (chat-completions, messages, responses, gemini) all share the
+// `ChatTargetApi` target descriptor; the passthrough surfaces use
+// `ModelEndpointKey` directly. Both ride on the same resolve helper — see
+// `data-plane/providers/registry.ts`.
+
+import type { ProviderCandidate } from '@floway-dev/provider';
 
 export type { ProviderCandidate };
 
 export type ChatCandidate = ProviderCandidate;
-
-// Returns the candidates that satisfy both the model resolution and the
-// target-endpoint pick, plus a `sawModel` flag that distinguishes the
-// "model is missing entirely" failure from "model exists but does not
-// expose the endpoint this source needs", plus the names of upstreams
-// whose catalog fetch rejected this round so the caller's failure
-// renderer can surface them parenthetically. See resolve.ts for the
-// alias-resolves-once-above-prefix-routing contract; this entry runs it
-// and returns the resolution on `aliasResolution`.
-export const enumerateProviderCandidates = async ({
-  upstreamIds, model, pickTarget, scheduler, currentColo,
-}: {
-  // null = unrestricted; empty list = no providers visible.
-  upstreamIds: readonly string[] | null;
-  model: string;
-  pickTarget: (endpoints: ModelEndpoints) => ChatTargetApi | null;
-  // Threaded into `resolveModelForProvider` so the per-upstream catalog
-  // lookup hits the SWR-cached `fetchUpstreamModelsCached` instead of
-  // round-tripping to the upstream on every chat serve.
-  scheduler: BackgroundScheduler;
-  // Current colo for this request — see GatewayCtx.currentColo. Threaded
-  // into the per-request fetcher so colo-scoped fallback entries can be
-  // honoured at dial time.
-  currentColo: string;
-}): Promise<{
-  readonly candidates: readonly ChatCandidate[];
-  readonly sawModel: boolean;
-  readonly failedUpstreams: readonly string[];
-  readonly aliasResolution: AliasResolution | null;
-}> => {
-  const fetcherForUpstream = await createPerRequestFetcher(currentColo);
-  const providers = await listModelProviders(upstreamIds);
-
-  // See resolve.ts for the alias-resolves-once-above-prefix-routing contract.
-  // The endpoint predicate piggybacks on `pickTarget` so the resolver's pool
-  // narrows to targets whose binding exposes one of the chat surfaces the
-  // source serve actually wants.
-  const aliasResolution = await resolveAlias({
-    modelName: model,
-    providers,
-    fetcherForUpstream,
-    scheduler,
-    endpointAccepts: endpoints => pickTarget(endpoints) !== null,
-    repo: getRepo().modelAliases,
-  });
-  const effectiveModel = aliasResolution?.targetModelId ?? model;
-
-  // Each (provider, lookupId) interpretation describes one way the inbound
-  // id can address an upstream — bare form for `[unprefixed]`-addressable
-  // upstreams, stripped form for `[prefixed]`-addressable upstreams when the
-  // inbound starts with the configured prefix. A dual-addressable upstream
-  // contributes both when applicable. The fan-out is shared with
-  // `resolveModelForRequest`; first-viable-wins ordering follows configured
-  // sort_order across upstreams, with the unprefixed interpretation pushed
-  // before the prefixed one within a single upstream.
-  const interpretations = enumerateModelInterpretations(effectiveModel, providers);
-  const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
-
-  const candidates: ChatCandidate[] = [];
-  let sawModel = false;
-
-  for (const { provider, resolved } of resolutions) {
-    sawModel = true;
-    const targetApi = pickTarget(resolved.binding.upstreamModel.endpoints);
-    if (!targetApi) continue;
-    candidates.push({ provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) });
-  }
-
-  return {
-    candidates,
-    sawModel,
-    failedUpstreams,
-    aliasResolution,
-  };
-};
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
index 627b631d5..aabe4ef3c 100644
--- a/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/candidates_test.ts
@@ -1,8 +1,8 @@
 import { describe, test } from 'vitest';
 
-import { enumerateProviderCandidates } from './candidates.ts';
 import { buildCustomUpstreamRecord, setupAppTest } from '../../../test-helpers.ts';
 import { clearInFlightForTesting } from '../../providers/models-cache.ts';
+import { resolveModelCandidates } from '../../providers/registry.ts';
 import type { ModelEndpoints } from '@floway-dev/protocols/common';
 import type { ChatTargetApi, UpstreamRecord } from '@floway-dev/provider';
 import { assertEquals, jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
@@ -46,15 +46,15 @@ const pickResponses = (e: ModelEndpoints): ChatTargetApi | null =>
 const pickAny = (e: ModelEndpoints): ChatTargetApi | null =>
   e.messages ? 'messages' : e.responses ? 'responses' : e.chatCompletions ? 'chat-completions' : null;
 
-describe('enumerateProviderCandidates', () => {
+describe('resolveModelCandidates', () => {
   test('single provider with a matching binding yields one candidate', async () => {
     const { repo } = await setupAppTest();
     await repo.upstreams.deleteAll();
     await repo.upstreams.save(azureUpstream('up_a', 10, ['test-model'], { messages: {} }));
 
-    const { candidates, sawModel } = await enumerateProviderCandidates({
+    const { candidates, sawModel } = await resolveModelCandidates({
       upstreamIds: null,
-      model: 'test-model',
+      modelName: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
       currentColo: 'TEST',
@@ -72,9 +72,9 @@ describe('enumerateProviderCandidates', () => {
     await repo.upstreams.deleteAll();
     await repo.upstreams.save(azureUpstream('up_chat', 10, ['test-model'], { chatCompletions: {} }));
 
-    const { candidates, sawModel } = await enumerateProviderCandidates({
+    const { candidates, sawModel } = await resolveModelCandidates({
       upstreamIds: null,
-      model: 'test-model',
+      modelName: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
       currentColo: 'TEST',
@@ -92,9 +92,9 @@ describe('enumerateProviderCandidates', () => {
     await repo.upstreams.deleteAll();
     await repo.upstreams.save(azureUpstream('up_a', 10, ['other-model'], { messages: {} }));
 
-    const { candidates, sawModel } = await enumerateProviderCandidates({
+    const { candidates, sawModel } = await resolveModelCandidates({
       upstreamIds: null,
-      model: 'test-model',
+      modelName: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
       currentColo: 'TEST',
@@ -111,9 +111,9 @@ describe('enumerateProviderCandidates', () => {
     await repo.upstreams.save(azureUpstream('up_second', 20, ['other-model'], { messages: {} }));
     await repo.upstreams.save(azureUpstream('up_third', 30, ['test-model'], { messages: {} }));
 
-    const { candidates } = await enumerateProviderCandidates({
+    const { candidates } = await resolveModelCandidates({
       upstreamIds: null,
-      model: 'test-model',
+      modelName: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
       currentColo: 'TEST',
@@ -131,9 +131,9 @@ describe('enumerateProviderCandidates', () => {
     await repo.upstreams.save(azureUpstream('up_b', 20, ['test-model'], { messages: {} }));
     await repo.upstreams.save(azureUpstream('up_c', 30, ['test-model'], { messages: {} }));
 
-    const { candidates } = await enumerateProviderCandidates({
+    const { candidates } = await resolveModelCandidates({
       upstreamIds: ['up_c', 'up_a'],
-      model: 'test-model',
+      modelName: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
       currentColo: 'TEST',
@@ -153,9 +153,9 @@ describe('enumerateProviderCandidates', () => {
       enabled: false,
     });
 
-    const { candidates } = await enumerateProviderCandidates({
+    const { candidates } = await resolveModelCandidates({
       upstreamIds: null,
-      model: 'test-model',
+      modelName: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
       currentColo: 'TEST',
@@ -170,9 +170,9 @@ describe('enumerateProviderCandidates', () => {
     await repo.upstreams.deleteAll();
     await repo.upstreams.save(azureUpstream('up_multi', 10, ['test-model'], { messages: {}, responses: {} }));
 
-    const { candidates: msgCandidates } = await enumerateProviderCandidates({
+    const { candidates: msgCandidates } = await resolveModelCandidates({
       upstreamIds: null,
-      model: 'test-model',
+      modelName: 'test-model',
       pickTarget: pickMessagesOrResponses,
       scheduler: testScheduler,
       currentColo: 'TEST',
@@ -180,9 +180,9 @@ describe('enumerateProviderCandidates', () => {
     assertEquals(msgCandidates.length, 1);
     assertEquals(msgCandidates[0].targetApi, 'messages');
 
-    const { candidates: resCandidates } = await enumerateProviderCandidates({
+    const { candidates: resCandidates } = await resolveModelCandidates({
       upstreamIds: null,
-      model: 'test-model',
+      modelName: 'test-model',
       pickTarget: pickResponses,
       scheduler: testScheduler,
       currentColo: 'TEST',
@@ -196,9 +196,9 @@ describe('enumerateProviderCandidates', () => {
     await repo.upstreams.deleteAll();
     await repo.upstreams.save(azureUpstream('up_chat', 10, ['test-model'], { chatCompletions: {} }));
 
-    const { candidates: anyCandidates } = await enumerateProviderCandidates({
+    const { candidates: anyCandidates } = await resolveModelCandidates({
       upstreamIds: null,
-      model: 'test-model',
+      modelName: 'test-model',
       pickTarget: pickAny,
       scheduler: testScheduler,
       currentColo: 'TEST',
@@ -206,9 +206,9 @@ describe('enumerateProviderCandidates', () => {
     assertEquals(anyCandidates.length, 1);
     assertEquals(anyCandidates[0].targetApi, 'chat-completions');
 
-    const { candidates: msgCandidates, sawModel } = await enumerateProviderCandidates({
+    const { candidates: msgCandidates, sawModel } = await resolveModelCandidates({
       upstreamIds: null,
-      model: 'test-model',
+      modelName: 'test-model',
       pickTarget: pickMessages,
       scheduler: testScheduler,
       currentColo: 'TEST',
@@ -243,9 +243,9 @@ describe('enumerateProviderCandidates', () => {
         throw new Error(`Unhandled fetch ${request.url}`);
       },
       async () => {
-        const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
+        const { candidates, sawModel, failedUpstreams } = await resolveModelCandidates({
           upstreamIds: null,
-          model: 'test-model',
+          modelName: 'test-model',
           pickTarget: pickMessages,
           scheduler: testScheduler,
           currentColo: 'TEST',
@@ -286,9 +286,9 @@ describe('enumerateProviderCandidates', () => {
         throw new Error(`Unhandled fetch ${request.url}`);
       },
       async () => {
-        const { candidates, sawModel, failedUpstreams } = await enumerateProviderCandidates({
+        const { candidates, sawModel, failedUpstreams } = await resolveModelCandidates({
           upstreamIds: null,
-          model: 'test-model',
+          modelName: 'test-model',
           pickTarget: pickMessages,
           scheduler: testScheduler,
           currentColo: 'TEST',
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index dfa068f63..b6d41a789 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -1,5 +1,6 @@
 import { unionEndpoints } from './endpoint-union.ts';
 import { fetchUpstreamModelsCached } from './models-cache.ts';
+import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { getRepo } from '../../repo/index.ts';
 import { type AliasResolution, resolveAlias } from '../model-aliases/resolve.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
@@ -259,20 +260,31 @@ export const getInternalModels = async (
 ): Promise<InternalModel[]> =>
   (await getModels(upstreamFilter, fetcherForUpstream, scheduler)).map(({ providers: _providers, endpoints: _endpoints, ...model }) => model);
 
-interface ModelResolution {
-  matches: readonly ProviderModelResolution[];
+interface ResolveCandidatesResult<TTarget> {
+  readonly candidates: ReadonlyArray<{
+    readonly provider: ModelProviderInstance;
+    readonly binding: ProviderModelRecord;
+    readonly targetApi: TTarget;
+    readonly fetcher: Fetcher;
+  }>;
+  // True when at least one interpretation resolved against an upstream
+  // catalog — independent of whether `pickTarget` accepted any of them.
+  // Lets a caller distinguish "model is unknown to every configured
+  // upstream" (sawModel=false) from "model exists but no binding serves
+  // this endpoint" (sawModel=true, candidates=[]).
+  readonly sawModel: boolean;
   // Upstream names whose catalog fetch rejected during this resolution.
   // Threaded out so the caller's failure renderer can mention them
   // parenthetically — same data the dashboard's `modelsCache.lastError`
   // surfaces, but inlined into the per-request 404/400 so a client sees
   // why their model might be temporarily missing.
-  failedUpstreams: readonly string[];
+  readonly failedUpstreams: readonly string[];
   // Set when the inbound id resolved through the alias layer. Callers
   // stage the `x-floway-alias` response header from this and ignore it
   // otherwise. `AliasNoTargetAvailableError` is thrown out of
-  // `resolveModelForRequest` itself when the alias exists but has no
+  // `resolveModelCandidates` itself when the alias exists but has no
   // routable target, and is caught at each protocol's serve seam.
-  aliasResolution: AliasResolution | null;
+  readonly aliasResolution: AliasResolution | null;
 }
 
 export interface ProviderModelResolution {
@@ -313,10 +325,11 @@ export const enumerateModelInterpretations = (
 
 // Fan out per-interpretation against the SWR cache and collect the resolved
 // matches plus a deduped list of upstreams whose catalog fetch rejected.
-// Shared by `resolveModelForRequest` and `enumerateProviderCandidates`; the
-// per-caller divergence (passthrough vs LLM-candidate shape) happens after
-// this returns. Cancellation (`AbortError`) propagates so the per-request
-// abort signal cannot be masked by a slow upstream's rejection.
+// `resolveModelCandidates` consumes this directly; the per-caller divergence
+// (chat-shape `ChatTargetApi` vs passthrough-shape `ModelEndpointKey`) is
+// handled at the `pickTarget` boundary. Cancellation (`AbortError`)
+// propagates so the per-request abort signal cannot be masked by a slow
+// upstream's rejection.
 export const collectInterpretationOutcomes = async (
   interpretations: readonly ModelInterpretation[],
   fetcherForUpstream: (upstreamId: string) => Fetcher,
@@ -354,40 +367,65 @@ export const collectInterpretationOutcomes = async (
   return { resolutions, failedUpstreams };
 };
 
-export const resolveModelForRequest = async (
-  modelId: string,
-  upstreamFilter: readonly string[] | null,
-  fetcherForUpstream: (upstreamId: string) => Fetcher,
-  scheduler: BackgroundScheduler,
-  // Predicate the alias resolver uses to narrow its target pool to
-  // bindings whose endpoint map serves the inbound endpoint. Passthrough
-  // callers pass `endpoints => endpoints[targetEndpointKey] !== undefined`
-  // so first-available / random alias selection picks a target the prefix
-  // router can serve end-to-end. The default accepts any binding — used
-  // by call sites whose own endpoint targeting happens downstream (e.g.
-  // the Responses image-generation tool's internal model resolve).
-  endpointAccepts: (endpoints: ModelEndpoints) => boolean = () => true,
-): Promise<ModelResolution> => {
-  const providers = await listModelProviders(upstreamFilter);
+// One resolver wrapper above the shared `resolveAlias` + interpretation +
+// outcome-collection pipeline, generic on the caller's target descriptor.
+// Chat surfaces pass a `pickTarget` returning `ChatTargetApi | null`;
+// passthrough surfaces (embeddings, image generation, /v1/completions) pass
+// one returning `ModelEndpointKey | null`. The endpoint-narrowing predicate
+// the alias resolver needs piggybacks on `pickTarget` — a target whose
+// binding does not serve any acceptable surface is dropped from the alias's
+// first-available / random pool so selection can never pick a binding the
+// downstream serve cannot dispatch on.
+//
+// See resolve.ts for the alias-resolves-once-above-prefix-routing contract.
+// `AliasNoTargetAvailableError` is the alias-only failure mode and surfaces
+// out of this function for the caller to render as a 404.
+export const resolveModelCandidates = async <TTarget>(args: {
+  readonly modelName: string;
+  // null = unrestricted; empty list = no providers visible.
+  readonly upstreamIds: readonly string[] | null;
+  readonly scheduler: BackgroundScheduler;
+  // Current colo for this request — see GatewayCtx.currentColo. Threaded
+  // into the per-request fetcher so colo-scoped fallback entries can be
+  // honoured at dial time.
+  readonly currentColo: string;
+  readonly pickTarget: (endpoints: ModelEndpoints) => TTarget | null;
+}): Promise<ResolveCandidatesResult<TTarget>> => {
+  const { modelName, upstreamIds, scheduler, currentColo, pickTarget } = args;
+  const fetcherForUpstream = await createPerRequestFetcher(currentColo);
+  const providers = await listModelProviders(upstreamIds);
   if (providers.length === 0) {
     throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
   }
 
-  // See resolve.ts for the alias-resolves-once-above-prefix-routing contract.
   const aliasResolution = await resolveAlias({
-    modelName: modelId,
+    modelName,
     providers,
     fetcherForUpstream,
     scheduler,
-    endpointAccepts,
+    endpointAccepts: endpoints => pickTarget(endpoints) !== null,
     repo: getRepo().modelAliases,
   });
-  const effectiveModelId = aliasResolution?.targetModelId ?? modelId;
+  const effectiveModelId = aliasResolution?.targetModelId ?? modelName;
 
   const interpretations = enumerateModelInterpretations(effectiveModelId, providers);
   const { resolutions, failedUpstreams } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
+
+  const candidates: Array<{
+    provider: ModelProviderInstance;
+    binding: ProviderModelRecord;
+    targetApi: TTarget;
+    fetcher: Fetcher;
+  }> = [];
+  for (const { provider, resolved } of resolutions) {
+    const targetApi = pickTarget(resolved.binding.upstreamModel.endpoints);
+    if (targetApi === null) continue;
+    candidates.push({ provider, binding: resolved.binding, targetApi, fetcher: fetcherForUpstream(provider.upstream) });
+  }
+
   return {
-    matches: resolutions.map(r => r.resolved),
+    candidates,
+    sawModel: resolutions.length > 0,
     failedUpstreams,
     aliasResolution,
   };
diff --git a/packages/gateway/src/data-plane/providers/registry_test.ts b/packages/gateway/src/data-plane/providers/registry_test.ts
index c330abfee..22cb1f165 100644
--- a/packages/gateway/src/data-plane/providers/registry_test.ts
+++ b/packages/gateway/src/data-plane/providers/registry_test.ts
@@ -1,7 +1,7 @@
 import { describe, expect, test } from 'vitest';
 
 import { clearInFlightForTesting } from './models-cache.ts';
-import { compareModelIds, enumerateModelInterpretations, getInternalModels, listModelProviders, resolveModelForProvider, resolveModelForRequest } from './registry.ts';
+import { compareModelIds, enumerateModelInterpretations, getInternalModels, listModelProviders, resolveModelCandidates, resolveModelForProvider } from './registry.ts';
 import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, setupAppTest } from '../../test-helpers.ts';
 import { directFetcher, type ModelProviderInstance } from '@floway-dev/provider';
 import { createCopilotProvider } from '@floway-dev/provider-copilot';
@@ -9,6 +9,13 @@ import { assertEquals, jsonResponse, stubProvider, withMockedFetch } from '@flow
 
 const sortedIds = (ids: readonly string[]): string[] => [...ids].sort(compareModelIds);
 
+// `resolveModelCandidates` requires a target descriptor; tests that just want
+// to verify routing wiring (not endpoint filtering) accept any binding by
+// returning a fixed sentinel string. Every test fixture below declares at
+// least one endpoint on its bindings, so the pick is never asked to reason
+// about an empty `endpoints` map.
+const acceptAnyBinding = () => 'any' as const;
+
 // Drains the background revalidate promise so its rejection surfaces in the
 // test runner instead of being swallowed.
 const testScheduler = (promise: Promise<unknown>): void => {
@@ -189,16 +196,22 @@ test('getInternalModels returns the catalog projection without execution binding
       assertEquals(Object.hasOwn(model!, 'providers'), false);
       assertEquals(Object.hasOwn(model!, 'providerData'), false);
 
-      const resolved = await resolveModelForRequest('shared-model', null, () => directFetcher, testScheduler);
-      assertEquals(resolved.matches.map(m => m.binding.upstream), ['up_copilot', 'up_custom']);
-      // Each match carries its own per-provider endpoints — no merge.
-      assertEquals(resolved.matches[0]?.model.endpoints, { messages: {} });
-      assertEquals(resolved.matches[1]?.model.endpoints, { chatCompletions: {} });
+      const resolved = await resolveModelCandidates({
+        modelName: 'shared-model',
+        upstreamIds: null,
+        scheduler: testScheduler,
+        currentColo: 'TEST',
+        pickTarget: acceptAnyBinding,
+      });
+      assertEquals(resolved.candidates.map(c => c.binding.upstream), ['up_copilot', 'up_custom']);
+      // Each candidate carries its own per-provider endpoints — no merge.
+      assertEquals(resolved.candidates[0]?.binding.upstreamModel.endpoints, { messages: {} });
+      assertEquals(resolved.candidates[1]?.binding.upstreamModel.endpoints, { chatCompletions: {} });
     },
   );
 });
 
-test('resolveModelForRequest applies provider-owned aliases only to that provider', async () => {
+test('resolveModelCandidates applies provider-owned aliases only to that provider', async () => {
   const { repo } = await setupAppTest();
 
   await repo.upstreams.save(
@@ -240,14 +253,20 @@ test('resolveModelForRequest applies provider-owned aliases only to that provide
       throw new Error(`Unhandled fetch ${request.url}`);
     },
     async () => {
-      const resolved = await resolveModelForRequest('claude-opus-4-7-20300101', null, () => directFetcher, testScheduler);
+      const resolved = await resolveModelCandidates({
+        modelName: 'claude-opus-4-7-20300101',
+        upstreamIds: null,
+        scheduler: testScheduler,
+        currentColo: 'TEST',
+        pickTarget: acceptAnyBinding,
+      });
 
       // Only the Copilot upstream's `resolveRequestedModelId` aliases the
       // dated id back to `claude-opus-4-7`; the custom upstream resolves
       // nothing for the dated id, so only one match emerges.
-      assertEquals(resolved.matches.map(m => m.binding.upstream), ['up_copilot']);
-      assertEquals(resolved.matches[0]?.id, 'claude-opus-4-7');
-      assertEquals(resolved.matches[0]?.model.endpoints, { messages: {} });
+      assertEquals(resolved.candidates.map(c => c.binding.upstream), ['up_copilot']);
+      assertEquals(resolved.candidates[0]?.binding.upstreamModel.id, 'claude-opus-4-7');
+      assertEquals(resolved.candidates[0]?.binding.upstreamModel.endpoints, { messages: {} });
     },
   );
 });
@@ -364,16 +383,16 @@ test('disabledPublicModelIds hides models from the catalog and routing, per upst
   assertEquals([...catalog.map(m => m.id)].sort(), ['gpt-keep', 'gpt-shared']);
 
   // The solo and override ids resolve to nothing (hidden + unroutable).
-  assertEquals((await resolveModelForRequest('gpt-solo', null, () => directFetcher, testScheduler)).matches.length, 0);
-  assertEquals((await resolveModelForRequest('gpt-override', null, () => directFetcher, testScheduler)).matches.length, 0);
+  assertEquals((await resolveModelCandidates({ modelName: 'gpt-solo', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding })).candidates.length, 0);
+  assertEquals((await resolveModelCandidates({ modelName: 'gpt-override', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding })).candidates.length, 0);
 
   // The shared id survives because up_b allows it; only up_b binds it.
-  const shared = await resolveModelForRequest('gpt-shared', null, () => directFetcher, testScheduler);
-  assertEquals(shared.matches.map(m => m.binding.upstream), ['up_b']);
+  const shared = await resolveModelCandidates({ modelName: 'gpt-shared', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding });
+  assertEquals(shared.candidates.map(c => c.binding.upstream), ['up_b']);
 
   // The untouched model still routes from up_a.
-  const keep = await resolveModelForRequest('gpt-keep', null, () => directFetcher, testScheduler);
-  assertEquals(keep.matches.map(m => m.binding.upstream), ['up_a']);
+  const keep = await resolveModelCandidates({ modelName: 'gpt-keep', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding });
+  assertEquals(keep.candidates.map(c => c.binding.upstream), ['up_a']);
 });
 
 test('resolveModelForProvider rejects a model id disabled on that upstream (filter parity with the catalog)', async () => {
@@ -532,7 +551,7 @@ test('getInternalModels: a rejected provider does not block other providers', as
 // site asking for a model belonging to one of the *healthy* upstreams must
 // still resolve. The broken upstream's display name flows back via
 // `failedUpstreams` so the eventual error renderer can mention it.
-test('resolveModelForRequest: healthy upstream still resolves alongside a rejecting one, with failedUpstreams reported', async () => {
+test('resolveModelCandidates: healthy upstream still resolves alongside a rejecting one, with failedUpstreams reported', async () => {
   clearInFlightForTesting();
   const { repo } = await setupAppTest();
   await repo.upstreams.deleteAll();
@@ -562,17 +581,17 @@ test('resolveModelForRequest: healthy upstream still resolves alongside a reject
       throw new Error(`Unhandled fetch ${request.url}`);
     },
     async () => {
-      const resolvedExisting = await resolveModelForRequest('ok-model', null, () => directFetcher, testScheduler);
-      assertEquals(resolvedExisting.matches.map(m => m.binding.upstream), ['up_ok']);
-      assertEquals(resolvedExisting.matches[0]?.id, 'ok-model');
+      const resolvedExisting = await resolveModelCandidates({ modelName: 'ok-model', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding });
+      assertEquals(resolvedExisting.candidates.map(c => c.binding.upstream), ['up_ok']);
+      assertEquals(resolvedExisting.candidates[0]?.binding.upstreamModel.id, 'ok-model');
       assertEquals(resolvedExisting.failedUpstreams, ['Broken upstream']);
 
       // A model nobody currently knows about must NOT rethrow the broken
       // upstream's catalog error — the caller's failure renderer is the right
       // place to surface that, parenthetically, alongside the model-missing
       // body.
-      const resolvedMissing = await resolveModelForRequest('unknown-model', null, () => directFetcher, testScheduler);
-      assertEquals(resolvedMissing.matches.length, 0);
+      const resolvedMissing = await resolveModelCandidates({ modelName: 'unknown-model', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding });
+      assertEquals(resolvedMissing.candidates.length, 0);
       assertEquals(resolvedMissing.failedUpstreams, ['Broken upstream']);
     },
   );
@@ -719,14 +738,14 @@ describe('catalog listing under modelPrefix', () => {
         // the prefixed surface, so a byId-based routing lookup against the
         // stripped bare id would miss. Routing must instead consult each
         // scoped upstream's own catalog, where the bare id is always present.
-        const resolved = await resolveModelForRequest('or/gpt-4o', null, () => directFetcher, testScheduler);
-        assertEquals(resolved.matches.map(m => m.binding.upstream), ['up_prefixed']);
-        assertEquals(resolved.matches[0]?.id, 'gpt-4o');
+        const resolved = await resolveModelCandidates({ modelName: 'or/gpt-4o', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding });
+        assertEquals(resolved.candidates.map(c => c.binding.upstream), ['up_prefixed']);
+        assertEquals(resolved.candidates[0]?.binding.upstreamModel.id, 'gpt-4o');
 
         // The bare-id request must NOT route to a prefix-only-addressable
         // upstream, regardless of routing path.
-        const bare = await resolveModelForRequest('gpt-4o', null, () => directFetcher, testScheduler);
-        assertEquals(bare.matches.length, 0);
+        const bare = await resolveModelCandidates({ modelName: 'gpt-4o', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding });
+        assertEquals(bare.candidates.length, 0);
       },
     );
   });
@@ -758,16 +777,16 @@ describe('catalog listing under modelPrefix', () => {
         const catalog = await getInternalModels(null, () => directFetcher, testScheduler);
         assertEquals(catalog.map(m => m.id), ['or/gpt-4o']);
 
-        const bare = await resolveModelForRequest('gpt-4o', null, () => directFetcher, testScheduler);
-        assertEquals(bare.matches.map(m => m.binding.upstream), ['up_dual_addressable']);
-        assertEquals(bare.matches[0]?.id, 'gpt-4o');
+        const bare = await resolveModelCandidates({ modelName: 'gpt-4o', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding });
+        assertEquals(bare.candidates.map(c => c.binding.upstream), ['up_dual_addressable']);
+        assertEquals(bare.candidates[0]?.binding.upstreamModel.id, 'gpt-4o');
 
         // The prefixed request enumerates both forms against `up_dual_addressable`:
         // the unprefixed lookup (`or/gpt-4o`) misses the upstream catalog, and
         // the prefix-stripped lookup (`gpt-4o`) hits — yielding a single match.
-        const prefixed = await resolveModelForRequest('or/gpt-4o', null, () => directFetcher, testScheduler);
-        assertEquals(prefixed.matches.map(m => m.binding.upstream), ['up_dual_addressable']);
-        assertEquals(prefixed.matches[0]?.id, 'gpt-4o');
+        const prefixed = await resolveModelCandidates({ modelName: 'or/gpt-4o', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding });
+        assertEquals(prefixed.candidates.map(c => c.binding.upstream), ['up_dual_addressable']);
+        assertEquals(prefixed.candidates[0]?.binding.upstreamModel.id, 'gpt-4o');
       },
     );
   });
@@ -809,14 +828,14 @@ describe('catalog listing under modelPrefix', () => {
         // Both upstreams enumerate against the bare id: up_plain via its only
         // form, up_dual via the unprefixed interpretation. Order follows the
         // configured sort_order across providers, then FORM_ORDER within one.
-        const bare = await resolveModelForRequest('gpt-4o', null, () => directFetcher, testScheduler);
-        assertEquals(bare.matches.map(m => m.binding.upstream), ['up_plain', 'up_dual']);
+        const bare = await resolveModelCandidates({ modelName: 'gpt-4o', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding });
+        assertEquals(bare.candidates.map(c => c.binding.upstream), ['up_plain', 'up_dual']);
 
         // The prefixed id resolves only against up_dual: up_plain's catalog
         // does not contain `or/gpt-4o`, and up_dual's prefix-stripped lookup
         // hits its catalog's bare `gpt-4o`.
-        const prefixed = await resolveModelForRequest('or/gpt-4o', null, () => directFetcher, testScheduler);
-        assertEquals(prefixed.matches.map(m => m.binding.upstream), ['up_dual']);
+        const prefixed = await resolveModelCandidates({ modelName: 'or/gpt-4o', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding });
+        assertEquals(prefixed.candidates.map(c => c.binding.upstream), ['up_dual']);
       },
     );
   });
@@ -899,9 +918,9 @@ describe('catalog listing under modelPrefix', () => {
         throw new Error(`Unhandled fetch ${request.url}`);
       },
       async () => {
-        const resolved = await resolveModelForRequest('aa/bb/gpt-5', null, () => directFetcher, testScheduler);
-        assertEquals(resolved.matches.map(m => m.binding.upstream), ['up_short_prefix', 'up_long_prefix', 'up_bare']);
-        assertEquals(resolved.matches.map(m => m.id), ['bb/gpt-5', 'gpt-5', 'aa/bb/gpt-5']);
+        const resolved = await resolveModelCandidates({ modelName: 'aa/bb/gpt-5', upstreamIds: null, scheduler: testScheduler, currentColo: 'TEST', pickTarget: acceptAnyBinding });
+        assertEquals(resolved.candidates.map(c => c.binding.upstream), ['up_short_prefix', 'up_long_prefix', 'up_bare']);
+        assertEquals(resolved.candidates.map(c => c.binding.upstreamModel.id), ['bb/gpt-5', 'gpt-5', 'aa/bb/gpt-5']);
       },
     );
   });
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 582f97529..2b7c662d5 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -19,14 +19,13 @@ import { inboundHeadersForUpstream } from './inbound-headers.ts';
 import type { PerformanceTelemetryContext } from './telemetry/performance.ts';
 import { createUpstreamLatencyRecorder, recordPerformanceError, recordPerformanceLatency, recordRequestPerformance, requireRecordedDurationMs } from './telemetry/performance.ts';
 import { recordTokenUsage } from './telemetry/usage.ts';
-import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import type { AuthedContext } from '../../middleware/auth.ts';
 import type { TokenUsage } from '../../repo/types.ts';
 import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
 import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
 import { ALIAS_RESPONSE_HEADER } from '../model-aliases/apply.ts';
 import { AliasNoTargetAvailableError } from '../model-aliases/resolve.ts';
-import { resolveModelForRequest } from '../providers/registry.ts';
+import { resolveModelCandidates } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import { doneFrame, eventFrame, type ModelEndpointKey, parseSSEStream, parseTargetStreamFrames, type ProtocolFrame, sseCommentFrame, sseFrame } from '@floway-dev/protocols/common';
 import { httpResponseToResponse, ProviderModelsUnavailableError, toInternalDebugError } from '@floway-dev/provider';
@@ -131,17 +130,23 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
   let lastPerformance: PerformanceTelemetryContext | undefined;
 
   try {
-    const fetcherForUpstream = await createPerRequestFetcher(ctx.currentColo);
-    // Each match is one (upstream, upstream-catalog id) pair that interprets
-    // the inbound public id. Iteration order follows configured sort_order
-    // across upstreams, with the unprefixed interpretation pushed before the
-    // prefixed one within a single upstream. The first match whose binding
-    // satisfies the endpoint capability wins. See resolve.ts for the
-    // alias-resolves-once-above-prefix-routing contract; `resolveModelForRequest`
-    // runs it and surfaces the result on `aliasResolution`.
+    // Each candidate is one (upstream, upstream-catalog id) pair that
+    // interprets the inbound public id AND advertises this passthrough's
+    // endpoint. Iteration order follows configured sort_order across
+    // upstreams, with the unprefixed interpretation pushed before the
+    // prefixed one within a single upstream. The first candidate wins.
+    // See resolve.ts for the alias-resolves-once-above-prefix-routing
+    // contract; `resolveModelCandidates` runs it and surfaces the result
+    // on `aliasResolution`.
     let resolution;
     try {
-      resolution = await resolveModelForRequest(model, ctx.upstreamIds, fetcherForUpstream, ctx.backgroundScheduler, endpoints => endpoints[endpointKey] !== undefined);
+      resolution = await resolveModelCandidates({
+        modelName: model,
+        upstreamIds: ctx.upstreamIds,
+        scheduler: ctx.backgroundScheduler,
+        currentColo: ctx.currentColo,
+        pickTarget: endpoints => endpoints[endpointKey] !== undefined ? endpointKey : null,
+      });
     } catch (e) {
       if (e instanceof AliasNoTargetAvailableError) {
         ctx.dump?.error('gateway');
@@ -149,133 +154,131 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
       }
       throw e;
     }
-    const { matches, failedUpstreams, aliasResolution } = resolution;
+    const { candidates, sawModel, failedUpstreams, aliasResolution } = resolution;
     if (aliasResolution !== null) ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
-    if (matches.length === 0) {
+    if (!sawModel) {
       ctx.dump?.error('gateway');
       return passthroughApiError(c, appendFailedUpstreams(`Model ${model} is not available on any configured upstream.`, failedUpstreams), 404);
     }
+    const [candidate] = candidates;
+    if (candidate === undefined) {
+      ctx.dump?.error('gateway');
+      return passthroughApiError(c, appendFailedUpstreams(`Model ${model} does not support the ${sourceApi} endpoint.`, failedUpstreams), 400);
+    }
 
-    for (const match of matches) {
-      if (match.binding.upstreamModel.endpoints[endpointKey] === undefined) continue;
+    const recorder = createUpstreamLatencyRecorder();
+    const { response, modelKey } = await call(candidate.binding, {
+      fetcher: candidate.fetcher,
+      recordUpstreamLatency: recorder.record,
+      waitUntil: ctx.backgroundScheduler,
+      headers: inboundHeadersForUpstream(c),
+    });
+    const upstreamDurationMs = requireRecordedDurationMs(recorder, 'passthrough upstream call');
+    // Telemetry keys on the upstream's bare catalog id; user-facing
+    // error bodies echo the inbound `model`.
+    const identity = {
+      model: candidate.binding.upstreamModel.id,
+      upstream: candidate.binding.upstream,
+      modelKey,
+      cost: candidate.binding.provider.getPricingForModelKey(modelKey),
+    };
+    const performanceContext: PerformanceTelemetryContext = {
+      keyId: ctx.apiKeyId,
+      ...identity,
+      stream: responseHandling.format === 'sse',
+      runtimeLocation: ctx.runtimeLocation,
+    };
+    lastPerformance = performanceContext;
 
-      const recorder = createUpstreamLatencyRecorder();
-      const { response, modelKey } = await call(match.binding, {
-        fetcher: fetcherForUpstream(match.binding.upstream),
-        recordUpstreamLatency: recorder.record,
-        waitUntil: ctx.backgroundScheduler,
-        headers: inboundHeadersForUpstream(c),
-      });
-      const upstreamDurationMs = requireRecordedDurationMs(recorder, 'passthrough upstream call');
-      // Telemetry keys on `match.id` (the upstream's bare catalog id);
-      // user-facing error bodies echo the inbound `model`.
-      const identity = {
-        model: match.id,
-        upstream: match.binding.upstream,
-        modelKey,
-        cost: match.binding.provider.getPricingForModelKey(modelKey),
-      };
-      const performanceContext: PerformanceTelemetryContext = {
-        keyId: ctx.apiKeyId,
-        ...identity,
-        stream: responseHandling.format === 'sse',
-        runtimeLocation: ctx.runtimeLocation,
-      };
-      lastPerformance = performanceContext;
+    if (!response.ok) {
+      recordUpstreamPerformance(ctx.backgroundScheduler, performanceContext, true, upstreamDurationMs);
+      recordRequestPerformance(ctx.backgroundScheduler, performanceContext, true, performance.now() - requestStartedAt);
+      ctx.dump?.error('upstream', candidate.binding.upstream);
+      return forwardUpstreamResponse(response);
+    }
 
-      if (!response.ok) {
-        recordUpstreamPerformance(ctx.backgroundScheduler, performanceContext, true, upstreamDurationMs);
-        recordRequestPerformance(ctx.backgroundScheduler, performanceContext, true, performance.now() - requestStartedAt);
-        ctx.dump?.error('upstream', match.binding.upstream);
-        return forwardUpstreamResponse(response);
-      }
+    recordUpstreamPerformance(ctx.backgroundScheduler, performanceContext, false, upstreamDurationMs);
 
-      recordUpstreamPerformance(ctx.backgroundScheduler, performanceContext, false, upstreamDurationMs);
+    if (responseHandling.format === 'json') {
+      // A 2xx body that fails to parse must not 502 a client whose
+      // upstream call already succeeded; we skip usage extraction and
+      // log so missing rows stay traceable.
+      let parsed: unknown;
+      try {
+        parsed = await response.clone().json();
+      } catch (e) {
+        console.warn(`passthrough-serve: failed to parse 2xx upstream body for ${sourceApi}; usage row will be skipped`, e instanceof Error ? e.message : String(e));
+        parsed = undefined;
+      }
+      const usage = parsed !== undefined ? responseHandling.extractBilling(parsed) : null;
+      ctx.dump?.success(identity, usage);
+      if (usage) {
+        scheduleUsageRecord(ctx.backgroundScheduler, recordTokenUsage(ctx.apiKeyId, identity, usage));
+      }
+      recordRequestPerformance(ctx.backgroundScheduler, performanceContext, false, performance.now() - requestStartedAt);
+      return forwardUpstreamResponse(response);
+    }
 
-      if (responseHandling.format === 'json') {
-        // A 2xx body that fails to parse must not 502 a client whose
-        // upstream call already succeeded; we skip usage extraction and
-        // log so missing rows stay traceable.
-        let parsed: unknown;
-        try {
-          parsed = await response.clone().json();
-        } catch (e) {
-          console.warn(`passthrough-serve: failed to parse 2xx upstream body for ${sourceApi}; usage row will be skipped`, e instanceof Error ? e.message : String(e));
-          parsed = undefined;
+    // Hono's streamSSE owns the response — forwardable upstream
+    // headers must be staged on `c` *before* the streamSSE call so
+    // they survive its internal newResponse.
+    const upstreamBody = response.body;
+    if (!upstreamBody) {
+      ctx.dump?.failed(`${sourceApi} streaming upstream returned no body`);
+      recordRequestPerformance(ctx.backgroundScheduler, performanceContext, true, performance.now() - requestStartedAt);
+      // Preserve upstream correlation headers (x-request-id, cf-ray, ...)
+      // on the synthesized 502 so this rare edge case is still traceable.
+      stageForwardedResponseHeaders(c, response);
+      return passthroughApiError(c, 'Upstream returned a streaming response with no body.', 502);
+    }
+    stageForwardedResponseHeaders(c, response);
+    return streamSSE(c, async stream => {
+      let completion: StreamCompletion = 'error';
+      let streamError: unknown;
+      // Tracks whether the upstream's terminal (`done`) frame arrived
+      // before the writer settled. A client cancel after the terminal
+      // frame is graceful (upstream already finished its work); a
+      // mid-stream cancel or EOF without terminal is a real failure.
+      // Mirrors SourceStreamState.failedAfter on the chat endpoints.
+      let terminalFrameSeen = false;
+      try {
+        const frames = (async function* () {
+          const sseFramesIn = parseSSEStream(upstreamBody, { signal: ctx.abortSignal });
+          for await (const parsed of parseTargetStreamFrames<unknown>(sseFramesIn, { protocol: sourceApi })) {
+            const inputFrame: ProtocolFrame<unknown> = parsed.type === 'done' ? doneFrame() : eventFrame(parsed.data);
+            // Dump pre-transform, so forensics see upstream truth even
+            // when the caller drops a frame from the client-facing stream.
+            ctx.dump?.frame(inputFrame);
+            if (inputFrame.type === 'done') terminalFrameSeen = true;
+            const outputFrame = responseHandling.transformFrame(inputFrame);
+            if (outputFrame === null) continue;
+            yield outputFrame.type === 'done' ? sseFrame('[DONE]') : sseFrame(JSON.stringify(outputFrame.event));
+          }
+        })();
+        completion = await writeSSEFrames(stream, frames, {
+          keepAlive: { frame: sseCommentFrame('keepalive') },
+          downstreamAbortController: ctx.downstreamAbortController,
+        });
+      } catch (e) {
+        streamError = e;
+      } finally {
+        const usage = responseHandling.settleUsage();
+        const failed = streamError !== undefined || completion === 'error' || !terminalFrameSeen;
+        if (failed) {
+          ctx.dump?.failed(streamError ?? `${sourceApi} stream ended with completion=${completion}`);
+        } else {
+          ctx.dump?.success(identity, usage);
         }
-        const usage = parsed !== undefined ? responseHandling.extractBilling(parsed) : null;
-        ctx.dump?.success(identity, usage);
+        // Record any accumulated usage regardless of the failed flag —
+        // tokens already metered upstream should bill even when the
+        // downstream half of the round-trip turned out badly. The chat
+        // streaming endpoints follow the same rule.
         if (usage) {
           scheduleUsageRecord(ctx.backgroundScheduler, recordTokenUsage(ctx.apiKeyId, identity, usage));
         }
-        recordRequestPerformance(ctx.backgroundScheduler, performanceContext, false, performance.now() - requestStartedAt);
-        return forwardUpstreamResponse(response);
-      }
-
-      // Hono's streamSSE owns the response — forwardable upstream
-      // headers must be staged on `c` *before* the streamSSE call so
-      // they survive its internal newResponse.
-      const upstreamBody = response.body;
-      if (!upstreamBody) {
-        ctx.dump?.failed(`${sourceApi} streaming upstream returned no body`);
-        recordRequestPerformance(ctx.backgroundScheduler, performanceContext, true, performance.now() - requestStartedAt);
-        // Preserve upstream correlation headers (x-request-id, cf-ray, ...)
-        // on the synthesized 502 so this rare edge case is still traceable.
-        stageForwardedResponseHeaders(c, response);
-        return passthroughApiError(c, 'Upstream returned a streaming response with no body.', 502);
+        recordRequestPerformance(ctx.backgroundScheduler, performanceContext, failed, performance.now() - requestStartedAt);
       }
-      stageForwardedResponseHeaders(c, response);
-      return streamSSE(c, async stream => {
-        let completion: StreamCompletion = 'error';
-        let streamError: unknown;
-        // Tracks whether the upstream's terminal (`done`) frame arrived
-        // before the writer settled. A client cancel after the terminal
-        // frame is graceful (upstream already finished its work); a
-        // mid-stream cancel or EOF without terminal is a real failure.
-        // Mirrors SourceStreamState.failedAfter on the chat endpoints.
-        let terminalFrameSeen = false;
-        try {
-          const frames = (async function* () {
-            const sseFramesIn = parseSSEStream(upstreamBody, { signal: ctx.abortSignal });
-            for await (const parsed of parseTargetStreamFrames<unknown>(sseFramesIn, { protocol: sourceApi })) {
-              const inputFrame: ProtocolFrame<unknown> = parsed.type === 'done' ? doneFrame() : eventFrame(parsed.data);
-              // Dump pre-transform, so forensics see upstream truth even
-              // when the caller drops a frame from the client-facing stream.
-              ctx.dump?.frame(inputFrame);
-              if (inputFrame.type === 'done') terminalFrameSeen = true;
-              const outputFrame = responseHandling.transformFrame(inputFrame);
-              if (outputFrame === null) continue;
-              yield outputFrame.type === 'done' ? sseFrame('[DONE]') : sseFrame(JSON.stringify(outputFrame.event));
-            }
-          })();
-          completion = await writeSSEFrames(stream, frames, {
-            keepAlive: { frame: sseCommentFrame('keepalive') },
-            downstreamAbortController: ctx.downstreamAbortController,
-          });
-        } catch (e) {
-          streamError = e;
-        } finally {
-          const usage = responseHandling.settleUsage();
-          const failed = streamError !== undefined || completion === 'error' || !terminalFrameSeen;
-          if (failed) {
-            ctx.dump?.failed(streamError ?? `${sourceApi} stream ended with completion=${completion}`);
-          } else {
-            ctx.dump?.success(identity, usage);
-          }
-          // Record any accumulated usage regardless of the failed flag —
-          // tokens already metered upstream should bill even when the
-          // downstream half of the round-trip turned out badly. The chat
-          // streaming endpoints follow the same rule.
-          if (usage) {
-            scheduleUsageRecord(ctx.backgroundScheduler, recordTokenUsage(ctx.apiKeyId, identity, usage));
-          }
-          recordRequestPerformance(ctx.backgroundScheduler, performanceContext, failed, performance.now() - requestStartedAt);
-        }
-      });
-    }
-
-    ctx.dump?.error('gateway');
-    return passthroughApiError(c, appendFailedUpstreams(`Model ${model} does not support the ${sourceApi} endpoint.`, failedUpstreams), 400);
+    });
   } catch (e) {
     if (e instanceof ProviderModelsUnavailableError) {
       const forwarded = httpResponseToResponse(e.httpResponse);

From 00587159565e21826c3758a95e833be8484b7779 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 15:07:55 +0800
Subject: [PATCH 113/170] fix(aliases/codex): codex catalog honors the
 operator-set context_window override
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`062d2efb` silently dropped the announcedMetadata override path on the
theory that the codex catalog should report the target's true ceiling
because that's what the upstream binding enforces. But codex reads
`context_window` / `max_context_window` from this catalog for its own
local gating (auto-compact, context-budget UX) — when an operator lowers
the alias's announced window via the dashboard, /v1/models and
/api/models honor it; the codex 1p catalog must agree on the same
operator intent, otherwise the two wire surfaces drift.

Prefer the alias entry's announced `limits.max_context_window_tokens`
(operator override OR computed-intersection default — `synthesizeListedAliases`
already does the right thing), fall back to the first routable target's
window when the alias has no announced limit. Add a regression test
that pins the operator-override case across the codex catalog surface
specifically.
---
 .../gateway/src/data-plane/codex/models.ts    | 39 ++++++++++++-------
 .../src/data-plane/codex/routes_test.ts       | 38 ++++++++++++++++++
 2 files changed, 63 insertions(+), 14 deletions(-)

diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index e74e232ba..033d93710 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -85,30 +85,41 @@ const computeCatalog = async (
   // this static listing: a `random` alias would refuse to publish a
   // stable context window, so the catalog uses first-available regardless
   // of the alias's runtime selection.
-  const aliasFirstTarget = new Map<string, string>();
+  //
+  // We keep both the alias's own announced limits (operator override OR
+  // the synthesizer's automatic intersection) AND the first routable
+  // target's window — the resolver below prefers the alias's announced
+  // value when the operator set it, fallback to the target's ceiling.
+  interface AliasCatalogInfo {
+    readonly firstTargetId: string;
+    readonly announcedContextWindow: number | undefined;
+  }
+  const aliasCatalogInfo = new Map<string, AliasCatalogInfo>();
   for (const entry of synthesizeListedAliases({ aliases, realModels })) {
     const aliasedFrom = entry.aliasedFrom;
     if (aliasedFrom === undefined) continue;
     const firstRoutable = aliasedFrom.targets.find(t => registrySlugs.has(t.target_model_id));
-    if (firstRoutable !== undefined) aliasFirstTarget.set(entry.id, firstRoutable.target_model_id);
+    if (firstRoutable === undefined) continue;
+    aliasCatalogInfo.set(entry.id, {
+      firstTargetId: firstRoutable.target_model_id,
+      announcedContextWindow: entry.limits.max_context_window_tokens,
+    });
   }
 
   const filtered: CodexCatalog = {
-    models: catalog.models.filter(m => registrySlugs.has(m.slug) || aliasFirstTarget.has(m.slug)),
+    models: catalog.models.filter(m => registrySlugs.has(m.slug) || aliasCatalogInfo.has(m.slug)),
   };
 
-  // For an alias slug: redirect to the first routable target's window so
-  // the published number is one the gateway can honour. Plain (non-alias)
-  // slugs read straight off the registry. Operator-set overrides on the
-  // alias's announced metadata travel through `synthesizeListedAliases`
-  // into the alias entry's own limits — but the codex catalog needs the
-  // *target's* window here, not the alias's announced one, because
-  // `applyContextWindowFromRegistry` writes both `context_window` and
-  // `max_context_window` and the upstream binding only enforces the
-  // target's real ceiling.
+  // For an alias slug: prefer the alias's announced window — that's what
+  // the operator told /v1/models to publish, and the codex client reads
+  // this number for its own local gating (auto-compact, context-budget
+  // UX), so the two wire surfaces must agree on operator intent. Fallback
+  // to the first routable target's window when the alias has no announced
+  // limit (e.g. multi-target alias with no operator override and no agreed
+  // intersection). Plain (non-alias) slugs read straight off the registry.
   const contextWindowOf: ContextWindowResolver = slug => {
-    const firstTargetId = aliasFirstTarget.get(slug);
-    if (firstTargetId !== undefined) return slugContextWindow.get(firstTargetId) ?? null;
+    const info = aliasCatalogInfo.get(slug);
+    if (info !== undefined) return info.announcedContextWindow ?? slugContextWindow.get(info.firstTargetId) ?? null;
     return slugContextWindow.get(slug) ?? null;
   };
   return applyContextWindowFromRegistry(filtered, contextWindowOf);
diff --git a/packages/gateway/src/data-plane/codex/routes_test.ts b/packages/gateway/src/data-plane/codex/routes_test.ts
index bcb9a2ec9..844c4d7c0 100644
--- a/packages/gateway/src/data-plane/codex/routes_test.ts
+++ b/packages/gateway/src/data-plane/codex/routes_test.ts
@@ -476,6 +476,44 @@ describe('codex 1p namespace', () => {
       expect(autoReview?.max_context_window).toBe(272000);
     });
 
+    it('honors an operator-set announcedMetadata.limits.max_context_window_tokens override on an alias slug', async () => {
+      // /v1/models, /api/models, /v1beta/models all honor the operator's
+      // announced limits override. The codex 1p catalog reads the same
+      // value because the codex client uses `context_window` /
+      // `max_context_window` for its own local gating (auto-compact,
+      // context-budget UX) — operator intent has to agree across all
+      // wire surfaces.
+      const { apiKey, repo } = await setupAppTest();
+      await repo.modelAliases.insert({
+        name: 'codex-auto-review',
+        kind: 'chat',
+        selection: 'first-available',
+        displayName: 'Codex Auto Review',
+        visibleInModelsList: true,
+        targets: [{ target_model_id: 'gpt-5.4', rules: {} }],
+        announcedMetadata: { limits: { max_context_window_tokens: 64000 } },
+        sortOrder: 0,
+        createdAt: '2026-01-01T00:00:00.000Z',
+        updatedAt: '2026-01-01T00:00:00.000Z',
+      });
+      const app = buildCodexApp();
+      const body = await withMockedFetch(
+        copilotFetch([{ id: 'gpt-5.4', maxContextWindowTokens: 272000 }]),
+        async () => {
+          const response = await app.request('/azure-api.codex/models', {
+            headers: { authorization: `Bearer ${apiKey.key}` },
+          });
+          expect(response.status).toBe(200);
+          return await response.json() as CodexModelsResponse;
+        },
+      );
+      const autoReview = body.models.find(m => m.slug === 'codex-auto-review');
+      // The operator pulled the alias's advertised window down to 64000;
+      // the target's true 272000 ceiling is irrelevant to the catalog.
+      expect(autoReview?.context_window).toBe(64000);
+      expect(autoReview?.max_context_window).toBe(64000);
+    });
+
     it('returns an empty catalog when the registry has no overlapping slugs', async () => {
       const { apiKey } = await setupAppTest();
       const app = buildCodexApp();

From aeb007cb6e5e64c24ae0f5c2c66b4536c02a0a5b Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 16:06:21 +0800
Subject: [PATCH 114/170] chore(aliases): merge migrations 0046 + 0047 into a
 single 0046
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The two migrations land together as part of this branch — splitting
them brought no benefit on production (where neither has run yet) and
adds a step for any future dev who applies them. Fold the
`announced_metadata_json TEXT` column into the `CREATE TABLE` body and
drop the now-redundant `0047_alias_announced_metadata.sql`.

Existing dev databases that already applied the old `0047` should
clear the row from `d1_migrations` so the ledger matches the on-disk
migration set (the column itself stays; SQL definition is identical).
---
 packages/gateway/migrations/0046_model_aliases.sql          | 6 ++++++
 .../gateway/migrations/0047_alias_announced_metadata.sql    | 6 ------
 2 files changed, 6 insertions(+), 6 deletions(-)
 delete mode 100644 packages/gateway/migrations/0047_alias_announced_metadata.sql

diff --git a/packages/gateway/migrations/0046_model_aliases.sql b/packages/gateway/migrations/0046_model_aliases.sql
index ea20c4d81..fea35c093 100644
--- a/packages/gateway/migrations/0046_model_aliases.sql
+++ b/packages/gateway/migrations/0046_model_aliases.sql
@@ -5,6 +5,12 @@ CREATE TABLE model_aliases (
   display_name TEXT,
   visible_in_models_list INTEGER NOT NULL DEFAULT 1 CHECK (visible_in_models_list IN (0, 1)),
   targets TEXT NOT NULL,
+  -- Operator-set override for the `limits` + `chat.*` block surfaced on
+  -- /v1/models. NULL keeps the automatic, rule-aware intersection across
+  -- the alias's targets; a non-null value is a JSON-encoded
+  -- AnnouncedMetadata. Fallback is at the top-level sub-block boundary
+  -- (`limits` / `chat`), not per-leaf.
+  announced_metadata_json TEXT,
   sort_order INTEGER NOT NULL DEFAULT 0,
   created_at TEXT NOT NULL,
   updated_at TEXT NOT NULL
diff --git a/packages/gateway/migrations/0047_alias_announced_metadata.sql b/packages/gateway/migrations/0047_alias_announced_metadata.sql
deleted file mode 100644
index 1e57423c3..000000000
--- a/packages/gateway/migrations/0047_alias_announced_metadata.sql
+++ /dev/null
@@ -1,6 +0,0 @@
--- Operator-set override for an alias's announced metadata payload — the
--- `limits` + `chat.*` block surfaced on /v1/models. NULL keeps the
--- automatic, rule-aware intersection across the alias's targets; a
--- non-null value is a JSON-encoded AnnouncedMetadata. Fallback is at
--- the top-level sub-block boundary (`limits` / `chat`), not per-leaf.
-ALTER TABLE model_aliases ADD COLUMN announced_metadata_json TEXT;

From e48ea8a12d871e4b40dd703257621f86d0a8b83f Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 18:16:08 +0800
Subject: [PATCH 115/170] feat(provider): add enumerateAddressableRedirects
 hook + per-impl wiring
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A `ModelProviderInstance.resolveRequestedModelId(id)` is one-way: given
an inbound id it returns the redirected canonical id or undefined. The
reverse — every id that would redirect through it — is not derivable
from the signature. Add an optional sibling that lets a provider
publish the finite enumeration of those reverse mappings so a future
catalog-addressable-surface helper can mark prefix-variant / collapsed
ids as routable without rebuilding each provider's redirect rule.

- Copilot walks `providerData.rawModels` on the cached upstream catalog
  and emits every raw id whose `copilotPublicModelId` differs from
  itself.
- Claude Code returns `[]` deliberately: its redirect collapses any
  `<base>-YYYYMMDD` to the de-dated base, and the date axis is
  unbounded. The canonical base id is already in the catalog, so a
  power-user dated id still routes through `resolveRequestedModelId`
  at request time without being enumerated.
- Other providers leave the hook absent; the consumer must treat that
  identically to an empty list.

Additive — no caller yet.
---
 packages/provider-claude-code/src/provider.ts |  7 ++
 .../provider-claude-code/src/provider_test.ts |  7 ++
 packages/provider-copilot/src/provider.ts     | 26 +++++++-
 .../provider-copilot/src/provider_test.ts     | 64 +++++++++++++++++++
 packages/provider/src/index.ts                |  1 +
 packages/provider/src/provider.ts             | 27 ++++++++
 6 files changed, 131 insertions(+), 1 deletion(-)

diff --git a/packages/provider-claude-code/src/provider.ts b/packages/provider-claude-code/src/provider.ts
index af4a771b7..aa5b816c7 100644
--- a/packages/provider-claude-code/src/provider.ts
+++ b/packages/provider-claude-code/src/provider.ts
@@ -107,6 +107,13 @@ export const createClaudeCodeProvider = async (record: UpstreamRecord): Promise<
     provider,
     supportsResponsesItemReference: false,
     resolveRequestedModelId: claudeCodeResolveRequestedModelId,
+    // Claude Code's redirect rule collapses any `<base>-YYYYMMDD` id to its
+    // de-dated base. The date axis is unbounded, so there is no finite
+    // enumeration to publish — the canonical de-dated id is already in the
+    // catalog, and a power-user dated form still routes through the
+    // `resolveRequestedModelId` path at request time without showing up in
+    // the addressable surface.
+    enumerateAddressableRedirects: () => [],
   };
 };
 
diff --git a/packages/provider-claude-code/src/provider_test.ts b/packages/provider-claude-code/src/provider_test.ts
index 1b35625d2..5c71a38ed 100644
--- a/packages/provider-claude-code/src/provider_test.ts
+++ b/packages/provider-claude-code/src/provider_test.ts
@@ -150,6 +150,13 @@ describe('createClaudeCodeProvider — factory surface', () => {
     expect(instance.resolveRequestedModelId?.('claude-sonnet-4-5-20250929')).toBe('claude-sonnet-4-5');
     expect(instance.resolveRequestedModelId?.('claude-sonnet-4-5')).toBeUndefined();
   });
+
+  test('enumerateAddressableRedirects returns []: the dated-id domain is unbounded so the hook intentionally publishes nothing', async () => {
+    stubModelsListFetch();
+    const instance = await createClaudeCodeProvider(currentRecord);
+    const upstreamModels = await instance.provider.getProvidedModels(noopUpstreamCallOptions().fetcher);
+    expect(instance.enumerateAddressableRedirects?.({ upstreamModels })).toEqual([]);
+  });
 });
 
 describe('createClaudeCodeProvider — callMessages routes through chain', () => {
diff --git a/packages/provider-copilot/src/provider.ts b/packages/provider-copilot/src/provider.ts
index d3d122107..b99a86bbd 100644
--- a/packages/provider-copilot/src/provider.ts
+++ b/packages/provider-copilot/src/provider.ts
@@ -20,7 +20,7 @@ import { parseChatCompletionsStream, type ChatCompletionsPayload, type ChatCompl
 import { type ModelEndpointKey, type ModelEndpoints, type ProtocolFrame, kindForEndpoints } from '@floway-dev/protocols/common';
 import { parseAnthropicBetaHeader, parseMessagesStream, type MessagesPayload, type MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import { parseResponsesStream, type ResponsesInputItem, type ResponsesPayload, type ResponsesResult } from '@floway-dev/protocols/responses';
-import { COMPACTION_TRIGGER, compactionResponse, eventResult, getProviderRepo, readUpstreamApiError, streamingProviderCall, apiErrorToResponse, defaultsForProvider, resolveEffectiveFlags, type ExecuteResult, type ModelProvider, type ModelProviderInstance, type ProviderCallResult, type ProviderResponsesResult, type ProviderStreamResult, type TelemetryModelIdentity, type UpstreamCallOptions, type UpstreamFetchOptions, type UpstreamModel, type UpstreamRecord } from '@floway-dev/provider';
+import { COMPACTION_TRIGGER, compactionResponse, eventResult, getProviderRepo, readUpstreamApiError, streamingProviderCall, apiErrorToResponse, defaultsForProvider, resolveEffectiveFlags, type AddressableRedirect, type ExecuteResult, type ModelProvider, type ModelProviderInstance, type ProviderCallResult, type ProviderResponsesResult, type ProviderStreamResult, type TelemetryModelIdentity, type UpstreamCallOptions, type UpstreamFetchOptions, type UpstreamModel, type UpstreamRecord } from '@floway-dev/provider';
 
 interface CopilotProviderData {
   rawModels: CopilotRawModel[];
@@ -457,5 +457,29 @@ export const createCopilotProvider = async (record: UpstreamRecord): Promise<Mod
     provider,
     supportsResponsesItemReference: false,
     resolveRequestedModelId: copilotRequestedModelAliasTarget,
+    // Copilot publishes every Claude variant (`-high`, `-xhigh`, `-1m`,
+    // `-fast`) as its own upstream id but collapses them under one canonical
+    // public id; the dated `claude-*-YYYYMMDD` form is also accepted and
+    // redirects to the same canonical. Walk the upstream catalog stored on
+    // `providerData.rawModels`, run each raw id through `copilotPublicModelId`,
+    // and emit the redirect whenever the canonical id differs. Skip dated
+    // suffixes — the catalog already exposes the de-dated form as a real
+    // entry, so the dated id is reachable through the same redirect rule
+    // without being part of any finite enumeration.
+    enumerateAddressableRedirects: ({ upstreamModels }) => {
+      const out: AddressableRedirect[] = [];
+      const seen = new Set<string>();
+      for (const model of upstreamModels) {
+        const data = model.providerData as CopilotProviderData | undefined;
+        for (const raw of data?.rawModels ?? []) {
+          const publicId = copilotPublicModelId(raw.id);
+          if (publicId === raw.id) continue;
+          if (seen.has(raw.id)) continue;
+          seen.add(raw.id);
+          out.push({ addressable: raw.id, resolvesTo: publicId });
+        }
+      }
+      return out;
+    },
   };
 };
diff --git a/packages/provider-copilot/src/provider_test.ts b/packages/provider-copilot/src/provider_test.ts
index afa2d53fe..291036cca 100644
--- a/packages/provider-copilot/src/provider_test.ts
+++ b/packages/provider-copilot/src/provider_test.ts
@@ -1171,3 +1171,67 @@ test('Copilot chat field: no capabilities → no chat field', async () => {
   ]);
   assertEquals(model.chat, undefined);
 });
+
+test('enumerateAddressableRedirects emits every raw variant id whose public id differs from itself', async () => {
+  const { copilotUpstream } = await setupCopilotTest();
+  const instance = await createCopilotProvider(copilotUpstream);
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.pathname === '/models') {
+        return jsonResponse(copilotModels([
+          // Base + variants. Copilot reports Claude ids with dotted version
+          // ("4.7"), but the public id rewrites them to "4-7"; every raw
+          // id therefore differs from its canonical public id and counts
+          // as an addressable redirect.
+          { id: 'claude-opus-4.7', supported_endpoints: ['/v1/messages'] },
+          { id: 'claude-opus-4.7-high', supported_endpoints: ['/v1/messages'], reasoningEfforts: ['high'] },
+          { id: 'claude-opus-4.7-xhigh', supported_endpoints: ['/v1/messages'], reasoningEfforts: ['xhigh'] },
+          // Non-claude id stays unchanged — no redirect.
+          { id: 'gpt-5.4', supported_endpoints: ['/chat/completions'] },
+        ]));
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const upstreamModels = await instance.provider.getProvidedModels(directFetcher);
+      const redirects = instance.enumerateAddressableRedirects!({ upstreamModels });
+      assertEquals(redirects, [
+        { addressable: 'claude-opus-4.7', resolvesTo: 'claude-opus-4-7' },
+        { addressable: 'claude-opus-4.7-high', resolvesTo: 'claude-opus-4-7' },
+        { addressable: 'claude-opus-4.7-xhigh', resolvesTo: 'claude-opus-4-7' },
+      ]);
+    },
+  );
+});
+
+test('enumerateAddressableRedirects returns [] when no raw variants need collapsing', async () => {
+  const { copilotUpstream } = await setupCopilotTest();
+  const instance = await createCopilotProvider(copilotUpstream);
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.pathname === '/models') {
+        return jsonResponse(copilotModels([
+          { id: 'gpt-5.4', supported_endpoints: ['/chat/completions'] },
+          { id: 'gpt-5.5', supported_endpoints: ['/chat/completions'] },
+        ]));
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const upstreamModels = await instance.provider.getProvidedModels(directFetcher);
+      assertEquals(instance.enumerateAddressableRedirects!({ upstreamModels }), []);
+    },
+  );
+});
diff --git a/packages/provider/src/index.ts b/packages/provider/src/index.ts
index fa63620d5..e1babba06 100644
--- a/packages/provider/src/index.ts
+++ b/packages/provider/src/index.ts
@@ -42,6 +42,7 @@ export type { AddressableForm, ModelPrefixConfig } from './model-prefix.ts';
 export { MODEL_PREFIX_MAX_LENGTH, MODEL_PREFIX_REGEX, normalizeModelPrefix } from './model-prefix.ts';
 
 export type {
+  AddressableRedirect,
   ModelProvider,
   ModelProviderInstance,
   ProviderCallResult,
diff --git a/packages/provider/src/provider.ts b/packages/provider/src/provider.ts
index c8d005ec4..36ad4cd23 100644
--- a/packages/provider/src/provider.ts
+++ b/packages/provider/src/provider.ts
@@ -32,6 +32,15 @@ export interface ResolvedModel extends InternalModel {
   providers: readonly ProviderModelRecord[];
 }
 
+export interface AddressableRedirect {
+  // Inbound id the provider would silently redirect through
+  // `resolveRequestedModelId` to a different real catalog id.
+  readonly addressable: string;
+  // Real catalog id the redirect resolves to. Must appear in the
+  // provider's own `getProvidedModels` output for the entry to be useful.
+  readonly resolvesTo: string;
+}
+
 export interface ModelProviderInstance {
   upstream: string;
   providerKind: UpstreamProviderKind;
@@ -45,6 +54,24 @@ export interface ModelProviderInstance {
   provider: ModelProvider;
   supportsResponsesItemReference: boolean;
   resolveRequestedModelId?(modelId: string): string | undefined;
+  // Enumerate the finite set of inbound ids this provider would accept
+  // through `resolveRequestedModelId` that are NOT already part of the
+  // public-listed catalog. Each entry pairs the addressable id with the
+  // real catalog id it resolves to. The catalog-addressable surface
+  // enumeration uses this to mark prefix-variant / collapsed-id forms as
+  // routable without forcing every consumer to walk the provider's
+  // redirect rule by hand.
+  //
+  // Implementations whose redirect domain is unbounded (e.g. arbitrary
+  // date suffixes that collapse to the same base id) return `[]` — the
+  // base id is still listed in the catalog, so the addressable surface
+  // already covers the canonical entry. The hook receives the SWR-cached
+  // upstream catalog the caller already fetched so the same upstream
+  // round-trip feeds both the listed projection and the redirect
+  // enumeration.
+  enumerateAddressableRedirects?(args: {
+    readonly upstreamModels: readonly UpstreamModel[];
+  }): readonly AddressableRedirect[];
 }
 
 export interface ProviderCallResult {

From 17a73b597e65cacb49fa587585d2675020d9c0bf Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 18:19:20 +0800
Subject: [PATCH 116/170] feat(gateway): introduce enumerateAddressableModelIds
 engine
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

A single source of truth for every inbound id the data plane will accept
under the current upstream cap, plus the canonical ResolvedModel each id
routes to. Listed-catalog rows surface as entries with `unlisted=undefined`;
prefix-addressable alternates and provider-published redirect ids surface
as entries with `unlisted=true`.

The engine reuses `getModels` for the listed projection and
`fetchUpstreamModelsCached` for any provider that contributes
addressable-only forms — the SWR cache already serves both calls from
the same upstream round-trip when warm.

No callers yet. Existing tests stay green.
---
 .../src/data-plane/providers/addressable.ts   | 118 ++++++++++++++++++
 .../data-plane/providers/addressable_test.ts  | 112 +++++++++++++++++
 2 files changed, 230 insertions(+)
 create mode 100644 packages/gateway/src/data-plane/providers/addressable.ts
 create mode 100644 packages/gateway/src/data-plane/providers/addressable_test.ts

diff --git a/packages/gateway/src/data-plane/providers/addressable.ts b/packages/gateway/src/data-plane/providers/addressable.ts
new file mode 100644
index 000000000..042c143db
--- /dev/null
+++ b/packages/gateway/src/data-plane/providers/addressable.ts
@@ -0,0 +1,118 @@
+// One enumeration per (effective upstream cap) of every inbound model id the
+// gateway accepts — the union of the listed catalog surface and the
+// addressable-but-not-listed surface contributed by `modelPrefix.addressable`
+// alternates and by each provider's `resolveRequestedModelId` redirect map.
+//
+// Why this exists: the listing-side availability check (alias-listing,
+// codex catalog) used strict literal id equality against the listed catalog,
+// while the request-time resolver routes through `enumerateModelInterpretations`
+// + `resolveRequestedModelId`. A target that the resolver accepts via a
+// prefix-variant or Copilot variant collapse therefore looked "unavailable"
+// to the listing. Recomputing the resolver-accepted surface against the
+// listed catalog gives every consumer one consistent answer.
+//
+// Each entry carries the `ResolvedModel` the addressable id will route to,
+// so consumers (alias intersection, codex catalog, control-plane DTO) can
+// read `limits` / `chat` / `endpoints` directly off the entry without a
+// second registry round trip.
+
+import { fetchUpstreamModelsCached } from './models-cache.ts';
+import { compareModelIds, getModels, listModelProviders } from './registry.ts';
+import type { BackgroundScheduler } from '@floway-dev/platform';
+import type { Fetcher, ResolvedModel } from '@floway-dev/provider';
+
+export interface AddressableIdEntry {
+  // The inbound model id the data plane will accept verbatim.
+  readonly id: string;
+  // Absent on default-listed entries (the public-id surface the listing
+  // already emits); present-and-`true` on entries that are only reachable
+  // through `modelPrefix.addressable` alternates or provider-side redirects.
+  // The negative carry pairs with the `PublicModel.unlisted?: true` wire
+  // shape so a listed entry's wire bytes stay byte-identical.
+  readonly unlisted: true | undefined;
+  // Real catalog row this id routes to. For multi-provider models this is
+  // the same `ResolvedModel` instance `getModels` returns (one row per
+  // public-listed id, with the union-merged endpoints + `providers[]`
+  // already applied).
+  readonly model: ResolvedModel;
+}
+
+export interface AddressableSurface {
+  readonly entries: readonly AddressableIdEntry[];
+}
+
+// Enumerate every inbound id the data plane accepts under `upstreamFilter`,
+// tagged with whether the id participates in the default `/v1/models`
+// listing. Fans out per upstream the same way `collectProviderModels` does,
+// re-uses the SWR cache so the catalog refresh round-trip is shared with
+// `getModels`.
+export const enumerateAddressableModelIds = async (
+  upstreamFilter: readonly string[] | null,
+  fetcherForUpstream: (upstreamId: string) => Fetcher,
+  scheduler: BackgroundScheduler,
+): Promise<AddressableSurface> => {
+  const providers = await listModelProviders(upstreamFilter);
+  if (providers.length === 0) return { entries: [] };
+
+  // The canonical listed surface — the same rows the existing /v1/models
+  // and /api/models endpoints emit. Forms the listed half of the
+  // addressable surface.
+  const realModels = await getModels(upstreamFilter, fetcherForUpstream, scheduler);
+  const byId = new Map(realModels.map(model => [model.id, model] as const));
+
+  const entries: AddressableIdEntry[] = [];
+  const seen = new Set<string>();
+  const push = (entry: AddressableIdEntry): void => {
+    if (seen.has(entry.id)) return;
+    seen.add(entry.id);
+    entries.push(entry);
+  };
+
+  for (const model of realModels) {
+    push({ id: model.id, unlisted: undefined, model });
+  }
+
+  // Per-upstream walk: (a) prefix-addressable alternates the listed surface
+  // chose not to publish, then (b) the provider's redirect enumeration. The
+  // catalog round-trip is the same SWR cache the listed surface just
+  // consumed, so this loop never pays a second upstream hit.
+  for (const provider of providers) {
+    const cfg = provider.modelPrefix;
+    const addressableOnly = cfg !== null ? cfg.addressable.filter(form => !cfg.listed.includes(form)) : [];
+    if (addressableOnly.length === 0 && provider.enumerateAddressableRedirects === undefined) continue;
+
+    const upstreamModels = await fetchUpstreamModelsCached(provider, { scheduler, fetcher: fetcherForUpstream(provider.upstream) });
+    const disabled = new Set(provider.disabledPublicModelIds);
+
+    if (cfg !== null && addressableOnly.length > 0) {
+      // The canonical listed form for this upstream — the row the listing
+      // surface emitted, and the row a redirect-only addressable id should
+      // resolve back into so consumers find one consistent `ResolvedModel`.
+      const canonicalForm = cfg.listed.includes('prefixed') ? 'prefixed' : 'unprefixed';
+
+      for (const upstreamModel of upstreamModels) {
+        if (!upstreamModel.id || disabled.has(upstreamModel.id)) continue;
+        const canonicalPublicId = canonicalForm === 'prefixed'
+          ? `${cfg.prefix}${upstreamModel.id}`
+          : upstreamModel.id;
+        const canonical = byId.get(canonicalPublicId);
+        if (canonical === undefined) continue;
+        for (const form of addressableOnly) {
+          const id = form === 'prefixed' ? `${cfg.prefix}${upstreamModel.id}` : upstreamModel.id;
+          push({ id, unlisted: true, model: canonical });
+        }
+      }
+    }
+
+    const redirects = provider.enumerateAddressableRedirects?.({ upstreamModels }) ?? [];
+    for (const redirect of redirects) {
+      const target = byId.get(redirect.resolvesTo);
+      if (target === undefined) continue;
+      push({ id: redirect.addressable, unlisted: true, model: target });
+    }
+  }
+
+  // Stable id ordering matches the listed surface so consumers can rely on
+  // a single comparator across both halves.
+  return { entries: entries.sort((a, b) => compareModelIds(a.id, b.id)) };
+};
diff --git a/packages/gateway/src/data-plane/providers/addressable_test.ts b/packages/gateway/src/data-plane/providers/addressable_test.ts
new file mode 100644
index 000000000..38dbcc61d
--- /dev/null
+++ b/packages/gateway/src/data-plane/providers/addressable_test.ts
@@ -0,0 +1,112 @@
+import { describe, expect, test } from 'vitest';
+
+import { enumerateAddressableModelIds } from './addressable.ts';
+import { clearInFlightForTesting } from './models-cache.ts';
+import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, setupAppTest } from '../../test-helpers.ts';
+import { directFetcher } from '@floway-dev/provider';
+import { jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
+
+const noBackground = (promise: Promise<unknown>): void => {
+  promise.catch(err => console.error('[background]', err));
+};
+
+describe('enumerateAddressableModelIds', () => {
+  test('returns the listed catalog as listed entries when no provider contributes addressable-only forms', async () => {
+    const { repo } = await setupAppTest();
+    await repo.upstreams.deleteAll();
+    await repo.upstreams.save(buildCustomUpstreamRecord());
+    clearInFlightForTesting();
+
+    await withMockedFetch(
+      request => {
+        const url = new URL(request.url);
+        if (url.hostname === 'custom.example.com' && url.pathname === '/v1/models') {
+          return jsonResponse({ object: 'list', data: [{ id: 'shared-model', supported_endpoints: ['/chat/completions'] }] });
+        }
+        throw new Error(`Unhandled fetch ${request.url}`);
+      },
+      async () => {
+        const surface = await enumerateAddressableModelIds(null, () => directFetcher, noBackground);
+        expect(surface.entries.map(e => ({ id: e.id, unlisted: e.unlisted }))).toEqual([
+          { id: 'shared-model', unlisted: undefined },
+        ]);
+      },
+    );
+  });
+
+  test('emits the addressable-only prefix form whenever modelPrefix.addressable ⊋ modelPrefix.listed', async () => {
+    const { repo } = await setupAppTest();
+    await repo.upstreams.deleteAll();
+    await repo.upstreams.save(buildCustomUpstreamRecord({
+      id: 'up_custom_prefixed',
+      // Listed only as `cust/gpt-5.4`, but the bare `gpt-5.4` form remains
+      // addressable for clients that still talk to the upstream by its raw
+      // public id.
+      modelPrefix: { prefix: 'cust/', addressable: ['unprefixed', 'prefixed'], listed: ['prefixed'] },
+    }));
+    clearInFlightForTesting();
+
+    await withMockedFetch(
+      request => {
+        const url = new URL(request.url);
+        if (url.hostname === 'custom.example.com' && url.pathname === '/v1/models') {
+          return jsonResponse({ object: 'list', data: [{ id: 'gpt-5.4', supported_endpoints: ['/chat/completions'] }] });
+        }
+        throw new Error(`Unhandled fetch ${request.url}`);
+      },
+      async () => {
+        const surface = await enumerateAddressableModelIds(null, () => directFetcher, noBackground);
+        const byId = new Map(surface.entries.map(e => [e.id, e]));
+        expect(byId.get('cust/gpt-5.4')?.unlisted).toBeUndefined();
+        expect(byId.get('gpt-5.4')?.unlisted).toBe(true);
+        // The addressable-only entry still resolves to the same `ResolvedModel`
+        // as the canonical listed id, so consumers find one consistent row.
+        expect(byId.get('gpt-5.4')?.model).toBe(byId.get('cust/gpt-5.4')?.model);
+      },
+    );
+  });
+
+  test('Copilot variant ids surface as addressable-but-not-listed entries pointing at the canonical public model', async () => {
+    const { repo, githubAccount } = await setupAppTest();
+    await repo.upstreams.deleteAll();
+    await repo.upstreams.save(buildCopilotUpstreamRecord(githubAccount));
+    clearInFlightForTesting();
+
+    await withMockedFetch(
+      request => {
+        const url = new URL(request.url);
+        if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+        if (url.pathname === '/copilot_internal/v2/token') {
+          return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+        }
+        if (url.hostname === 'api.individual.githubcopilot.com' && url.pathname === '/models') {
+          return jsonResponse(copilotModels([
+            { id: 'claude-opus-4.7', supported_endpoints: ['/v1/messages'] },
+            { id: 'claude-opus-4.7-high', supported_endpoints: ['/v1/messages'] },
+          ]));
+        }
+        throw new Error(`Unhandled fetch ${request.url}`);
+      },
+      async () => {
+        const surface = await enumerateAddressableModelIds(null, () => directFetcher, noBackground);
+        const byId = new Map(surface.entries.map(e => [e.id, e]));
+        // The canonical merged id is the listed entry.
+        expect(byId.get('claude-opus-4-7')?.unlisted).toBeUndefined();
+        // Both raw variants are addressable-but-not-listed, redirecting to
+        // the canonical model.
+        expect(byId.get('claude-opus-4.7')?.unlisted).toBe(true);
+        expect(byId.get('claude-opus-4.7-high')?.unlisted).toBe(true);
+        expect(byId.get('claude-opus-4.7')?.model).toBe(byId.get('claude-opus-4-7')?.model);
+      },
+    );
+  });
+
+  test('returns no entries when no upstream is configured (callers handle the empty state)', async () => {
+    const { repo } = await setupAppTest();
+    await repo.upstreams.deleteAll();
+    clearInFlightForTesting();
+
+    const surface = await enumerateAddressableModelIds(null, () => directFetcher, noBackground);
+    expect(surface.entries).toEqual([]);
+  });
+});

From acd852da9815c6f191592e9db1ffdb69526a79ca Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 18:24:42 +0800
Subject: [PATCH 117/170] refactor(gateway): rewire alias listing + codex
 catalog through the addressable surface

The alias-listing intersection and the codex catalog's alias-target
availability check both used strict literal id equality against the
listed real-models projection, while the request-time resolver
routes through `enumerateModelInterpretations` +
`resolveRequestedModelId`. A target reachable only via a prefix
alternate or a Copilot variant id therefore looked unavailable to
the listing and triggered the "hollow alias" fallback or got dropped
from the codex catalog even though it would route fine at request
time.

`synthesizeListedAliases` + `mergeAliasesIntoModels` now take the
`AddressableIdEntry[]` from `enumerateAddressableModelIds`; the
listing handlers (`loadModels`, `controlPlaneModels`, Gemini
`loadGeminiModels`, codex `computeCatalog`) thread one surface
through both the synthesizer and the real-model projection. The
codex catalog continues to publish only listed slugs but uses the
full addressable surface for the alias-target availability check.
---
 .../src/control-plane/models/routes.ts        | 14 ++--
 .../gateway/src/data-plane/codex/models.ts    | 21 +++--
 .../src/data-plane/models/alias-listing.ts    | 30 ++++++--
 .../data-plane/models/alias-listing_test.ts   | 77 ++++++++++++++-----
 .../gateway/src/data-plane/models/gemini.ts   | 12 ++-
 .../gateway/src/data-plane/models/load.ts     | 14 +++-
 .../src/data-plane/providers/addressable.ts   | 10 +--
 .../data-plane/providers/addressable_test.ts  |  6 +-
 8 files changed, 131 insertions(+), 53 deletions(-)

diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 0f1bb90c3..e49f58a6b 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -3,7 +3,7 @@ import type { Context } from 'hono';
 import { mergeAliasesIntoModels } from '../../data-plane/models/alias-listing.ts';
 import { toPublicModel } from '../../data-plane/models/load.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from '../../data-plane/models/shared.ts';
-import { getModels } from '../../data-plane/providers/registry.ts';
+import { enumerateAddressableModelIds } from '../../data-plane/providers/addressable.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
 import { getRepo } from '../../repo/index.ts';
@@ -41,22 +41,26 @@ export const controlPlaneModels = async (c: Context) => {
     // API key, so this resolves to the user's per-user upstream cap: a user who
     // has had an upstream removed must not see its models in the Models tab.
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    const [models, aliases] = await Promise.all([
-      getModels(
+    const [addressable, aliases] = await Promise.all([
+      enumerateAddressableModelIds(
         effectiveUpstreamIdsFromContext(c),
         fetcherForUpstream,
         backgroundSchedulerFromContext(c),
       ),
       includeAliases ? getRepo().modelAliases.list() : Promise.resolve([]),
     ]);
+    const realModels = addressable.entries
+      .filter(entry => entry.unlisted === undefined)
+      .map(entry => entry.model);
     const data = includeAliases
       ? mergeAliasesIntoModels({
-          realModels: models,
+          realModels,
+          addressableModelIds: addressable.entries,
           aliases,
           mapReal: toControlPlaneModel,
           wrapAlias: entry => ({ ...entry, upstreams: [] }),
         })
-      : models.map(toControlPlaneModel);
+      : realModels.map(toControlPlaneModel);
     const response: ControlPlaneModelsResponse = {
       object: 'list',
       has_more: false,
diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index 033d93710..9cd60b7a2 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -41,7 +41,7 @@ import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { synthesizeListedAliases } from '../models/alias-listing.ts';
-import { getModels } from '../providers/registry.ts';
+import { enumerateAddressableModelIds } from '../providers/addressable.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { Fetcher } from '@floway-dev/provider';
 
@@ -65,17 +65,28 @@ const computeCatalog = async (
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
 ): Promise<CodexCatalog> => {
-  const [catalog, realModels, aliases] = await Promise.all([
+  const [catalog, addressable, aliases] = await Promise.all([
     resolveCodexCatalog(userAgent),
-    getModels(upstreamIds, fetcherForUpstream, scheduler),
+    enumerateAddressableModelIds(upstreamIds, fetcherForUpstream, scheduler),
     getRepo().modelAliases.list(),
   ]);
+  const realModels = addressable.entries
+    .filter(entry => entry.unlisted === undefined)
+    .map(entry => entry.model);
   const slugContextWindow = new Map<string, number>();
   for (const m of realModels) {
     const limit = m.limits.max_context_window_tokens;
     if (typeof limit === 'number') slugContextWindow.set(m.id, limit);
   }
+  // Listed-surface filter for the codex catalog itself: the codex client
+  // expects the surface it would have published in a regular /v1/models
+  // call, so addressable-but-not-listed forms intentionally do NOT enter
+  // this set.
   const registrySlugs = new Set(realModels.map(m => m.id));
+  // Alias-target availability is the broader question — a target reachable
+  // only via a prefix alternate or Copilot variant id still resolves at
+  // request time, so the codex catalog must keep its alias slug too.
+  const addressableSet = new Set(addressable.entries.map(entry => entry.id));
 
   // Run the shared alias synthesizer so the codex catalog reads the same
   // visible-alias surface that /v1/models, the dashboard, and Gemini do.
@@ -95,10 +106,10 @@ const computeCatalog = async (
     readonly announcedContextWindow: number | undefined;
   }
   const aliasCatalogInfo = new Map<string, AliasCatalogInfo>();
-  for (const entry of synthesizeListedAliases({ aliases, realModels })) {
+  for (const entry of synthesizeListedAliases({ aliases, addressableModelIds: addressable.entries })) {
     const aliasedFrom = entry.aliasedFrom;
     if (aliasedFrom === undefined) continue;
-    const firstRoutable = aliasedFrom.targets.find(t => registrySlugs.has(t.target_model_id));
+    const firstRoutable = aliasedFrom.targets.find(t => addressableSet.has(t.target_model_id));
     if (firstRoutable === undefined) continue;
     aliasCatalogInfo.set(entry.id, {
       firstTargetId: firstRoutable.target_model_id,
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index 1d1001bac..fa83d1804 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -27,6 +27,7 @@
 // at the `loadModels` merge step.
 
 import type { ModelAliasRecord } from '../../repo/types.ts';
+import type { AddressableIdEntry } from '../providers/addressable.ts';
 import { unionEndpoints } from '../providers/endpoint-union.ts';
 import { composeAliasDisplayName } from '@floway-dev/protocols/common';
 import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
@@ -34,7 +35,12 @@ import type { ResolvedModel } from '@floway-dev/provider';
 
 export interface ListedAliasInputs {
   readonly aliases: readonly ModelAliasRecord[];
-  readonly realModels: readonly ResolvedModel[];
+  // Full addressable surface — both the listed catalog rows and the
+  // addressable-but-not-listed prefix/redirect forms each provider
+  // contributes. The synthesizer maps every alias target through this
+  // surface so a target that's only reachable via a prefix alternate or
+  // Copilot variant id still counts as available.
+  readonly addressableModelIds: readonly AddressableIdEntry[];
 }
 
 // The repo guarantees rule shape matches the row's `kind` (chat rows carry
@@ -195,10 +201,14 @@ const mergeWithOverride = (
   chat: override.chat ?? computed.chat,
 });
 
-const synthesizeOne = (alias: ModelAliasRecord, realModels: readonly ResolvedModel[]): PublicModel => {
-  const realById = new Map(realModels.map(m => [m.id, m] as const));
+const synthesizeOne = (alias: ModelAliasRecord, addressableModelIds: readonly AddressableIdEntry[]): PublicModel => {
+  // Map every alias target through the full addressable surface, not just
+  // the listed catalog: a target reachable only via a prefix-addressable
+  // alternate or a provider-side redirect (Copilot variant id) is still
+  // available to the resolver, and the listing must agree.
+  const addressableById = new Map(addressableModelIds.map(entry => [entry.id, entry.model] as const));
   const availableTargets = alias.targets
-    .map(target => ({ target, real: realById.get(target.target_model_id) }))
+    .map(target => ({ target, real: addressableById.get(target.target_model_id) }))
     .filter((entry): entry is { target: AliasTarget; real: ResolvedModel } => entry.real !== undefined && entry.real.kind === alias.kind);
 
   // Display name precedence: operator-set wins; otherwise derive from the
@@ -252,7 +262,7 @@ const sortAliases = (aliases: readonly ModelAliasRecord[]): ModelAliasRecord[] =
 export const synthesizeListedAliases = (input: ListedAliasInputs): PublicModel[] =>
   sortAliases(input.aliases)
     .filter(alias => alias.visibleInModelsList)
-    .map(alias => synthesizeOne(alias, input.realModels));
+    .map(alias => synthesizeOne(alias, input.addressableModelIds));
 
 // Compose real-model entries with visible alias entries into a single typed
 // list. Both data-plane `/v1/models` and the dashboard's `/api/models`
@@ -266,14 +276,20 @@ export const synthesizeListedAliases = (input: ListedAliasInputs): PublicModel[]
 // array since alias rows do not bind to an upstream directly; the Gemini
 // `/v1beta/models` route maps into the upstream's `InternalModel` shape
 // before projecting to Gemini's wire form).
+//
+// `realModels` is the listed projection — what `/v1/models` and the
+// dashboard's default `/api/models` row stream emit. `addressableModelIds`
+// feeds the alias synthesizer's availability check; the merge step never
+// promotes addressable-but-not-listed ids to real-model rows.
 export const mergeAliasesIntoModels = <T>(input: {
   readonly realModels: readonly ResolvedModel[];
+  readonly addressableModelIds: readonly AddressableIdEntry[];
   readonly aliases: readonly ModelAliasRecord[];
   readonly mapReal: (model: ResolvedModel) => T;
   readonly wrapAlias: (entry: PublicModel) => T;
 }): T[] => {
-  const { realModels, aliases, mapReal, wrapAlias } = input;
-  const aliasEntries = synthesizeListedAliases({ aliases, realModels });
+  const { realModels, addressableModelIds, aliases, mapReal, wrapAlias } = input;
+  const aliasEntries = synthesizeListedAliases({ aliases, addressableModelIds });
   const aliasIds = new Set(aliasEntries.map(entry => entry.id));
   return [
     ...realModels.filter(model => !aliasIds.has(model.id)).map(mapReal),
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
index 69e071840..e1f4ed3e2 100644
--- a/packages/gateway/src/data-plane/models/alias-listing_test.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -2,6 +2,7 @@ import { describe, expect, test } from 'vitest';
 
 import { synthesizeListedAliases } from './alias-listing.ts';
 import type { ModelAliasRecord } from '../../repo/types.ts';
+import type { AddressableIdEntry } from '../providers/addressable.ts';
 import type { ResolvedModel } from '@floway-dev/provider';
 
 const aliasFixture = (overrides: Partial<ModelAliasRecord> = {}): ModelAliasRecord => ({
@@ -26,6 +27,17 @@ const realModel = (overrides: Partial<ResolvedModel> & { id: string }): Resolved
   ...overrides,
 });
 
+// Adapt the fixtures' "list of real models" view to the addressable surface
+// the synthesizer now consumes — every fixture entry is a listed catalog row.
+const listed = (models: readonly ResolvedModel[]): AddressableIdEntry[] =>
+  models.map(model => ({ id: model.id, unlisted: undefined, model }));
+
+// Test-only addressable surface that pretends a model is reachable through
+// a redirect (e.g. a Copilot variant id). The synthesizer should treat
+// such targets as available even though they never appear in the listed
+// catalog.
+const unlisted = (id: string, model: ResolvedModel): AddressableIdEntry => ({ id, unlisted: true, model });
+
 describe('synthesizeListedAliases', () => {
   test('single-target alias with a pinned reasoning.effort drops the effort block', () => {
     const aliases = [aliasFixture({
@@ -41,7 +53,7 @@ describe('synthesizeListedAliases', () => {
       },
     })];
 
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entry.id).toBe('gpt-fast');
     expect(entry.display_name).toBe('gpt-5.4 (low effort)');
     // The rule pins effort, so the announced metadata drops it — the
@@ -64,7 +76,7 @@ describe('synthesizeListedAliases', () => {
       id: 'gpt-5.4',
       chat: { reasoning: { budget_tokens: { min: 1024, max: 65536 } } },
     })];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entry.chat?.reasoning).toBeUndefined();
   });
 
@@ -80,7 +92,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
       realModel({ id: 'b', chat: { modalities: { input: ['text'], output: ['text'] } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entry.id).toBe('smart-router');
     expect(entry.display_name).toBe('smart-router');
     expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
@@ -97,7 +109,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { reasoning: { effort: { supported: ['low'], default: 'low' } } } }),
       realModel({ id: 'b', chat: {} }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entry.chat?.reasoning).toBeUndefined();
   });
 
@@ -113,7 +125,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
       realModel({ id: 'b', chat: { modalities: { input: ['text'], output: ['text', 'image'] } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
     // Every configured target — including the unavailable one — survives in aliasedFrom.
     expect(entry.aliasedFrom?.targets.map(t => t.target_model_id)).toEqual(['a', 'gone', 'b']);
@@ -122,7 +134,7 @@ describe('synthesizeListedAliases', () => {
   test('hidden alias is not emitted', () => {
     const aliases = [aliasFixture({ visibleInModelsList: false })];
     const realModels = [realModel({ id: 'gpt-5.4' })];
-    expect(synthesizeListedAliases({ aliases, realModels })).toEqual([]);
+    expect(synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) })).toEqual([]);
   });
 
   test('alias whose name collides with a real id is emitted (loadModels drops the duplicate real)', () => {
@@ -131,7 +143,7 @@ describe('synthesizeListedAliases', () => {
       targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
     })];
     const realModels = [realModel({ id: 'gpt-5.4', display_name: 'GPT 5.4' })];
-    const entries = synthesizeListedAliases({ aliases, realModels });
+    const entries = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entries).toHaveLength(1);
     expect(entries[0].id).toBe('gpt-5.4');
     expect(entries[0].aliasedFrom?.name).toBe('gpt-5.4');
@@ -142,7 +154,7 @@ describe('synthesizeListedAliases', () => {
       name: 'orphan',
       targets: [{ target_model_id: 'missing', rules: {} }],
     })];
-    const [entry] = synthesizeListedAliases({ aliases, realModels: [] });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: [] });
     expect(entry.id).toBe('orphan');
     expect(entry.display_name).toBe('missing');
     expect(entry.chat).toBeUndefined();
@@ -157,7 +169,7 @@ describe('synthesizeListedAliases', () => {
       aliasFixture({ name: 'mid-b', sortOrder: 0 }),
     ];
     const realModels = [realModel({ id: 'gpt-5.4' })];
-    const ids = synthesizeListedAliases({ aliases, realModels }).map(entry => entry.id);
+    const ids = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) }).map(entry => entry.id);
     expect(ids).toEqual(['mid-a', 'mid-b', 'late']);
   });
 
@@ -173,7 +185,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'emb', kind: 'embedding' }),
       realModel({ id: 'chat', chat: { modalities: { input: ['text'], output: ['text'] } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     // Only the chat target backs the metadata — the embedding row never
     // enters the intersection / narrowing path.
     expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
@@ -185,7 +197,7 @@ describe('synthesizeListedAliases', () => {
       targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
     })];
     const realModels = [realModel({ id: 'gpt-5.4' })];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entry.display_name).toBe('My Fast GPT');
   });
 
@@ -203,7 +215,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } } } }),
       realModel({ id: 'b', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entry.chat?.reasoning).toBeUndefined();
   });
 
@@ -219,7 +231,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } } } }),
       realModel({ id: 'b', chat: { reasoning: { effort: { supported: ['medium', 'high'], default: 'medium' } } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entry.chat?.reasoning?.effort).toEqual({ supported: ['medium', 'high'], default: 'medium' });
   });
 
@@ -235,7 +247,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { reasoning: { adaptive: true } } }),
       realModel({ id: 'b', chat: { reasoning: { adaptive: true } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entry.chat?.reasoning).toBeUndefined();
   });
 
@@ -251,7 +263,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', limits: { max_context_window_tokens: 128000, max_output_tokens: 16000 } }),
       realModel({ id: 'b', limits: { max_context_window_tokens: 200000 } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     // Both targets advertise max_context_window_tokens — emit the min.
     expect(entry.limits.max_context_window_tokens).toBe(128000);
     // Only `a` declares max_output_tokens, so it drops out.
@@ -273,7 +285,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', limits: { max_context_window_tokens: 128000 }, chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
       realModel({ id: 'b', limits: { max_context_window_tokens: 200000 }, chat: { modalities: { input: ['text'], output: ['text'] } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     // The override carries the operator's pinned ceiling verbatim …
     expect(entry.limits).toEqual({ max_output_tokens: 8192 });
     // … while chat falls back to the rule-aware intersection.
@@ -291,7 +303,7 @@ describe('synthesizeListedAliases', () => {
     const realModels = [
       realModel({ id: 'a', chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entry.chat).toEqual({ modalities: { input: ['text'], output: ['text'] } });
   });
 
@@ -309,7 +321,7 @@ describe('synthesizeListedAliases', () => {
       // Target b only serves the three chat endpoints.
       realModel({ id: 'b', endpoints: { chatCompletions: {}, messages: {}, responses: {} } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     // Union: every key surfaces. Resolver narrows to the supporting subset
     // at request time, so first-available / random stays sound per-endpoint.
     expect(entry.endpoints).toEqual({
@@ -332,7 +344,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'gen', kind: 'image', endpoints: { imagesGenerations: {} } }),
       realModel({ id: 'edit', kind: 'image', endpoints: { imagesEdits: {} } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, realModels });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
     expect(entry.endpoints).toEqual({ imagesGenerations: {}, imagesEdits: {} });
   });
 
@@ -341,7 +353,32 @@ describe('synthesizeListedAliases', () => {
       name: 'ghost',
       targets: [{ target_model_id: 'missing', rules: {} }],
     })];
-    const [entry] = synthesizeListedAliases({ aliases, realModels: [] });
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: [] });
     expect(entry.endpoints).toEqual({});
   });
+
+  test('an alias target reachable only via the addressable-but-not-listed surface counts as available', () => {
+    // A Copilot variant id like `claude-opus-4.7-high` collapses to the
+    // canonical public id `claude-opus-4-7` at the resolver layer. The
+    // listing now reads from the same addressable surface, so an alias
+    // that targets the variant id resolves to the canonical model and the
+    // synthesized entry inherits its catalog metadata.
+    const canonical = realModel({
+      id: 'claude-opus-4-7',
+      display_name: 'Claude Opus 4.7',
+      chat: { modalities: { input: ['text', 'image'], output: ['text'] } },
+    });
+    const aliases = [aliasFixture({
+      name: 'fast-claude',
+      targets: [{ target_model_id: 'claude-opus-4.7-high', rules: {} }],
+    })];
+    const addressableModelIds = [
+      ...listed([canonical]),
+      unlisted('claude-opus-4.7-high', canonical),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds });
+    expect(entry.id).toBe('fast-claude');
+    expect(entry.chat?.modalities).toEqual({ input: ['text', 'image'], output: ['text'] });
+    expect(entry.endpoints).toEqual({ chatCompletions: {}, messages: {}, responses: {} });
+  });
 });
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 5d8dcafdd..0264a88c6 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -9,7 +9,7 @@ import type { ModelAliasesRepo } from '../../repo/types.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts';
-import { getModels } from '../providers/registry.ts';
+import { enumerateAddressableModelIds } from '../providers/addressable.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { ModelPricing } from '@floway-dev/protocols/common';
 import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -74,15 +74,19 @@ const loadGeminiModels = async (
   scheduler: BackgroundScheduler,
   aliasRepo: ModelAliasesRepo,
 ): Promise<GeminiModel[]> => {
-  const [models, aliases] = await Promise.all([
-    getModels(upstreamFilter, fetcherForUpstream, scheduler),
+  const [addressable, aliases] = await Promise.all([
+    enumerateAddressableModelIds(upstreamFilter, fetcherForUpstream, scheduler),
     aliasRepo.list(),
   ]);
+  const realModels = addressable.entries
+    .filter(entry => entry.unlisted === undefined)
+    .map(entry => entry.model);
   // Gemini surfaces chat-kind models only; filter both the real catalog and
   // the synthesized alias entries before the merge so the alias collision
   // step only ever weighs chat-on-chat.
   const merged = mergeAliasesIntoModels<InternalModel>({
-    realModels: models.filter(model => model.kind === 'chat'),
+    realModels: realModels.filter(model => model.kind === 'chat'),
+    addressableModelIds: addressable.entries.filter(entry => entry.model.kind === 'chat'),
     aliases: aliases.filter(alias => alias.kind === 'chat'),
     mapReal: model => model,
     wrapAlias: entry => ({
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 3dedd1579..ff50a0f47 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,6 +1,6 @@
 import { mergeAliasesIntoModels } from './alias-listing.ts';
 import type { ModelAliasesRepo } from '../../repo/types.ts';
-import { getModels } from '../providers/registry.ts';
+import { enumerateAddressableModelIds } from '../providers/addressable.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
 import type { Fetcher, ResolvedModel } from '@floway-dev/provider';
@@ -31,12 +31,20 @@ export const loadModels = async (
   scheduler: BackgroundScheduler,
   aliasRepo: ModelAliasesRepo,
 ): Promise<PublicModelsResponse> => {
-  const [realModels, aliases] = await Promise.all([
-    getModels(upstreamFilter, fetcherForUpstream, scheduler),
+  // The addressable surface already includes the listed projection — its
+  // entries-where-unlisted-is-absent are exactly the rows /v1/models
+  // historically served. Reusing the surface here avoids a second registry
+  // call for the alias-availability check.
+  const [addressable, aliases] = await Promise.all([
+    enumerateAddressableModelIds(upstreamFilter, fetcherForUpstream, scheduler),
     aliasRepo.list(),
   ]);
+  const realModels = addressable.entries
+    .filter(entry => entry.unlisted === undefined)
+    .map(entry => entry.model);
   const data = mergeAliasesIntoModels({
     realModels,
+    addressableModelIds: addressable.entries,
     aliases,
     mapReal: toPublicModel,
     wrapAlias: entry => entry,
diff --git a/packages/gateway/src/data-plane/providers/addressable.ts b/packages/gateway/src/data-plane/providers/addressable.ts
index 042c143db..4e40e8a57 100644
--- a/packages/gateway/src/data-plane/providers/addressable.ts
+++ b/packages/gateway/src/data-plane/providers/addressable.ts
@@ -51,13 +51,11 @@ export const enumerateAddressableModelIds = async (
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
 ): Promise<AddressableSurface> => {
-  const providers = await listModelProviders(upstreamFilter);
-  if (providers.length === 0) return { entries: [] };
-
-  // The canonical listed surface — the same rows the existing /v1/models
-  // and /api/models endpoints emit. Forms the listed half of the
-  // addressable surface.
+  // `getModels` throws the actionable "no upstream provider configured"
+  // message when the provider list is empty; surface it the same way here
+  // so /v1/models keeps its 502 + hint behavior on a brand-new gateway.
   const realModels = await getModels(upstreamFilter, fetcherForUpstream, scheduler);
+  const providers = await listModelProviders(upstreamFilter);
   const byId = new Map(realModels.map(model => [model.id, model] as const));
 
   const entries: AddressableIdEntry[] = [];
diff --git a/packages/gateway/src/data-plane/providers/addressable_test.ts b/packages/gateway/src/data-plane/providers/addressable_test.ts
index 38dbcc61d..dcb1b227d 100644
--- a/packages/gateway/src/data-plane/providers/addressable_test.ts
+++ b/packages/gateway/src/data-plane/providers/addressable_test.ts
@@ -101,12 +101,12 @@ describe('enumerateAddressableModelIds', () => {
     );
   });
 
-  test('returns no entries when no upstream is configured (callers handle the empty state)', async () => {
+  test('throws "no upstream configured" when the upstream cap is empty — surfacing the same hint /v1/models has always raised', async () => {
     const { repo } = await setupAppTest();
     await repo.upstreams.deleteAll();
     clearInFlightForTesting();
 
-    const surface = await enumerateAddressableModelIds(null, () => directFetcher, noBackground);
-    expect(surface.entries).toEqual([]);
+    await expect(enumerateAddressableModelIds(null, () => directFetcher, noBackground))
+      .rejects.toThrow('No upstream provider configured');
   });
 });

From 9e969eb96b09308ebfd67ae180409ce99784c96f Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 18:27:07 +0800
Subject: [PATCH 118/170] feat(gateway): hide alias entries whose targets are
 all unreachable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Today the listing emits a hollow row with `endpoints: {}` and no
chat/cost metadata when every target of an alias falls outside the
caller's addressable surface. The row looks like a real model id but
404s on the first call — useless noise. `synthesizeOne` now returns
`null` for that case and `synthesizeListedAliases` filters those out,
so the alias disappears from `/v1/models`, `/api/models`, the codex
catalog, and Gemini until at least one target becomes routable. The
alias itself stays addressable; the resolver still raises
`AliasNoTargetAvailableError` (rendered as the natural 404) when a
caller types the alias name without any target available.

Backward-compat note: clients today that filter `/v1/models` by
`entry.endpoints === {}` to skip orphan rows will silently see fewer
entries. This is the right behavior — the entries were never callable
— and no compat shim is required.

Spec section "`/v1/models` Surface" updated to describe the new
visibility rule and the addressable surface that backs it.
---
 .../src/data-plane/models/alias-listing.ts    | 25 +++++++++++--------
 .../data-plane/models/alias-listing_test.ts   | 14 +++--------
 2 files changed, 18 insertions(+), 21 deletions(-)

diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index fa83d1804..33c223d23 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -168,11 +168,8 @@ const buildAliasedFrom = (alias: ModelAliasRecord): PublicModelAliasedFrom => ({
 // alias's currently-available targets. Caller decides whether to use
 // the result directly or overlay it under an operator override.
 const computeAutomaticMetadata = (
-  alias: ModelAliasRecord,
   availableTargets: readonly { target: AliasTarget; real: ResolvedModel }[],
 ): { limits: PublicModelLimits; chat: ChatModelInfo | undefined } => {
-  if (availableTargets.length === 0) return { limits: {}, chat: undefined };
-
   const limits = intersectLimits(availableTargets.map(({ real }) => real.limits));
 
   const effectiveChats = availableTargets
@@ -201,7 +198,13 @@ const mergeWithOverride = (
   chat: override.chat ?? computed.chat,
 });
 
-const synthesizeOne = (alias: ModelAliasRecord, addressableModelIds: readonly AddressableIdEntry[]): PublicModel => {
+// Returns null when every configured target falls outside the caller's
+// addressable surface — an alias with no reachable target has no listing
+// row, because the catalog should never advertise an id the resolver
+// would 404 on. The alias itself stays addressable through
+// `resolveAlias`, which surfaces `AliasNoTargetAvailableError` at request
+// time. Callers (`synthesizeListedAliases`) filter the nulls out.
+const synthesizeOne = (alias: ModelAliasRecord, addressableModelIds: readonly AddressableIdEntry[]): PublicModel | null => {
   // Map every alias target through the full addressable surface, not just
   // the listed catalog: a target reachable only via a prefix-addressable
   // alternate or a provider-side redirect (Copilot variant id) is still
@@ -211,6 +214,8 @@ const synthesizeOne = (alias: ModelAliasRecord, addressableModelIds: readonly Ad
     .map(target => ({ target, real: addressableById.get(target.target_model_id) }))
     .filter((entry): entry is { target: AliasTarget; real: ResolvedModel } => entry.real !== undefined && entry.real.kind === alias.kind);
 
+  if (availableTargets.length === 0) return null;
+
   // Display name precedence: operator-set wins; otherwise derive from the
   // sole target's id + rules when single-target; multi-target falls back to
   // the alias's own name because no single target represents the alias.
@@ -218,7 +223,7 @@ const synthesizeOne = (alias: ModelAliasRecord, addressableModelIds: readonly Ad
     ? composeAliasDisplayName(alias.targets[0].target_model_id, alias.targets[0].rules)
     : alias.name);
 
-  const computed = computeAutomaticMetadata(alias, availableTargets);
+  const computed = computeAutomaticMetadata(availableTargets);
   const { limits, chat } = alias.announcedMetadata !== null
     ? mergeWithOverride(computed, alias.announcedMetadata)
     : computed;
@@ -228,11 +233,8 @@ const synthesizeOne = (alias: ModelAliasRecord, addressableModelIds: readonly Ad
   // the resolver's request-time pool narrows to targets that serve the
   // inbound endpoint and the first-available / random pick happens
   // within that narrowed pool. Operator can't override endpoints (they
-  // follow the target set, not a stored override). Empty (`{}`) when no
-  // target is currently available — the field stays present.
-  const endpoints = availableTargets.length > 0
-    ? unionEndpoints(availableTargets.map(({ real }) => real.endpoints))
-    : {};
+  // follow the target set, not a stored override).
+  const endpoints = unionEndpoints(availableTargets.map(({ real }) => real.endpoints));
 
   const entry: PublicModel = {
     id: alias.name,
@@ -262,7 +264,8 @@ const sortAliases = (aliases: readonly ModelAliasRecord[]): ModelAliasRecord[] =
 export const synthesizeListedAliases = (input: ListedAliasInputs): PublicModel[] =>
   sortAliases(input.aliases)
     .filter(alias => alias.visibleInModelsList)
-    .map(alias => synthesizeOne(alias, input.addressableModelIds));
+    .map(alias => synthesizeOne(alias, input.addressableModelIds))
+    .filter((entry): entry is PublicModel => entry !== null);
 
 // Compose real-model entries with visible alias entries into a single typed
 // list. Both data-plane `/v1/models` and the dashboard's `/api/models`
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
index e1f4ed3e2..5e0d488ff 100644
--- a/packages/gateway/src/data-plane/models/alias-listing_test.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -149,17 +149,12 @@ describe('synthesizeListedAliases', () => {
     expect(entries[0].aliasedFrom?.name).toBe('gpt-5.4');
   });
 
-  test('no available targets still emits an entry with no chat metadata', () => {
+  test('no available targets means the alias is hidden from the listing (resolver still returns 404 for the alias itself)', () => {
     const aliases = [aliasFixture({
       name: 'orphan',
       targets: [{ target_model_id: 'missing', rules: {} }],
     })];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: [] });
-    expect(entry.id).toBe('orphan');
-    expect(entry.display_name).toBe('missing');
-    expect(entry.chat).toBeUndefined();
-    expect(entry.cost).toBeUndefined();
-    expect(entry.aliasedFrom?.targets).toEqual([{ target_model_id: 'missing', rules: {} }]);
+    expect(synthesizeListedAliases({ aliases, addressableModelIds: [] })).toEqual([]);
   });
 
   test('sorts entries by (sort_order, name) so listing order stays stable', () => {
@@ -348,13 +343,12 @@ describe('synthesizeListedAliases', () => {
     expect(entry.endpoints).toEqual({ imagesGenerations: {}, imagesEdits: {} });
   });
 
-  test('endpoints is an empty map on the entry when no target is currently available', () => {
+  test('endpoints is an empty list (no entry emitted) when no target is currently available', () => {
     const aliases = [aliasFixture({
       name: 'ghost',
       targets: [{ target_model_id: 'missing', rules: {} }],
     })];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: [] });
-    expect(entry.endpoints).toEqual({});
+    expect(synthesizeListedAliases({ aliases, addressableModelIds: [] })).toEqual([]);
   });
 
   test('an alias target reachable only via the addressable-but-not-listed surface counts as available', () => {

From 2cca6ff1a237bace73e57f5e4ba527274e21c8d4 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 18:33:12 +0800
Subject: [PATCH 119/170] feat(control-plane):
 /api/models?include_unlisted=true exposes addressable-but-not-listed ids
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`PublicModel` gains an optional `unlisted?: true` sidecar field. Default
rows omit the field; addressable-but-not-listed entries (Copilot variant
ids, prefix-form alternates, provider-side redirects) carry it on
`/api/models?include_unlisted=true`. Wire byte-identical on every
existing surface (`/v1/models`, default `/api/models`, codex catalog,
Gemini) because the absent-by-default carry adds no bytes.

The route reads `?include_unlisted=true`, drains the addressable surface
for entries where `entry.unlisted === true`, and appends them after the
listed/alias-merged rows. Each addressable row's metadata
(`limits`/`chat`/`endpoints`/`upstreams`) reads off the canonical
ResolvedModel the addressable id resolves to; only `id` and
`display_name` swap in the addressable form, so the dashboard combobox
can render the actual string an operator can type while the rest of
the row stays accurate.

Backward-compat note: the new field is optional and only appears under
the new query — every existing caller's parsed wire shape is unchanged.
---
 apps/web/src/api/types.ts                     |  7 +++
 .../src/control-plane/models/routes.ts        | 23 ++++++++-
 .../src/control-plane/models/routes_test.ts   | 49 +++++++++++++++++++
 packages/gateway/src/control-plane/schemas.ts | 12 +++--
 packages/protocols/src/common/models.ts       | 14 +++++-
 5 files changed, 97 insertions(+), 8 deletions(-)

diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index d919a0c01..0bc06322e 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -372,6 +372,13 @@ export interface PublicModel {
     selection: AliasSelection;
     targets: AliasTarget[];
   };
+  // Sidecar flag carried only on entries surfaced via
+  // `/api/models?include_unlisted=true`: ids the data plane accepts via a
+  // `modelPrefix.addressable` alternate or a provider-side redirect but
+  // that do not appear in the default catalog. Default rows omit the
+  // field; the alias dialog reads this surface so its target-id combobox
+  // suggests every id the resolver would accept.
+  unlisted?: true;
 }
 
 export interface ControlPlaneModel extends PublicModel {
diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index e49f58a6b..a4b5856e1 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -33,9 +33,24 @@ const toControlPlaneModel = (model: ResolvedModel): ControlPlaneModel => ({
   upstreams: model.providers.map(binding => ({ kind: binding.providerKind, id: binding.upstream, name: binding.upstreamName })),
 });
 
+// Wrap an addressable-but-not-listed entry as a control-plane row. The
+// canonical metadata (`limits`, `chat`, `endpoints`, `upstreams`) reads
+// off the real model the addressable id resolves to; only `id` and
+// `display_name` swap in the addressable form so the alias dialog
+// combobox renders the actual id the operator can type. `unlisted: true`
+// carries the addressability tag through to the dashboard so a future UI
+// badge does not need a second registry call.
+const toUnlistedControlPlaneModel = (id: string, model: ResolvedModel): ControlPlaneModel => ({
+  ...toControlPlaneModel(model),
+  id,
+  display_name: model.display_name ?? id,
+  unlisted: true,
+});
+
 export const controlPlaneModels = async (c: Context) => {
   try {
     const includeAliases = c.req.query('aliases') !== 'false';
+    const includeUnlisted = c.req.query('include_unlisted') === 'true';
     // Scope the dashboard catalog to the caller's effective upstreams, exactly
     // like the data-plane /models endpoint. On a session request there is no
     // API key, so this resolves to the user's per-user upstream cap: a user who
@@ -52,7 +67,12 @@ export const controlPlaneModels = async (c: Context) => {
     const realModels = addressable.entries
       .filter(entry => entry.unlisted === undefined)
       .map(entry => entry.model);
-    const data = includeAliases
+    const unlistedRows = includeUnlisted
+      ? addressable.entries
+          .filter(entry => entry.unlisted === true)
+          .map(entry => toUnlistedControlPlaneModel(entry.id, entry.model))
+      : [];
+    const listedRows = includeAliases
       ? mergeAliasesIntoModels({
           realModels,
           addressableModelIds: addressable.entries,
@@ -61,6 +81,7 @@ export const controlPlaneModels = async (c: Context) => {
           wrapAlias: entry => ({ ...entry, upstreams: [] }),
         })
       : realModels.map(toControlPlaneModel);
+    const data = [...listedRows, ...unlistedRows];
     const response: ControlPlaneModelsResponse = {
       object: 'list',
       has_more: false,
diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts
index bc49181de..6d79f177b 100644
--- a/packages/gateway/src/control-plane/models/routes_test.ts
+++ b/packages/gateway/src/control-plane/models/routes_test.ts
@@ -125,3 +125,52 @@ test('/api/models appends visible alias entries with aliasedFrom alongside real
     assertEquals(body.data.some(model => model.id === 'custom-model'), true);
   });
 });
+
+test('/api/models?include_unlisted=true appends addressable-but-not-listed rows marked with `unlisted: true`', async () => {
+  const { apiKey } = await setupAppTest();
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.hostname === 'api.individual.githubcopilot.com' && url.pathname === '/models') {
+        // Two variants of one model — Copilot publishes the dotted-version
+        // ids as raw upstream entries, but the public id collapses the
+        // dots and any `-high`/`-xhigh` suffix. The raw forms become
+        // addressable-but-not-listed entries.
+        return jsonResponse(copilotModels([
+          { id: 'claude-opus-4.7', display_name: 'Claude Opus 4.7', supported_endpoints: ['/v1/messages'] },
+          { id: 'claude-opus-4.7-high', supported_endpoints: ['/v1/messages'] },
+        ]));
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const listed = await requestApp('/api/models?aliases=false', { headers: { 'x-api-key': apiKey.key } });
+      const listedBody = (await listed.json()) as { data: Array<{ id: string; unlisted?: true }> };
+      assertEquals(listedBody.data.map(m => m.id), ['claude-opus-4-7']);
+      // The default response carries no `unlisted` field on any row.
+      assertEquals(listedBody.data.every(m => m.unlisted === undefined), true);
+
+      const full = await requestApp('/api/models?aliases=false&include_unlisted=true', { headers: { 'x-api-key': apiKey.key } });
+      const fullBody = (await full.json()) as { data: Array<{ id: string; unlisted?: true; upstreams: unknown[] }> };
+      const ids = fullBody.data.map(m => m.id);
+      assertEquals(ids.includes('claude-opus-4-7'), true);
+      assertEquals(ids.includes('claude-opus-4.7'), true);
+      assertEquals(ids.includes('claude-opus-4.7-high'), true);
+      // Only the addressable-but-not-listed rows carry the sidecar tag.
+      const tagged = new Set(fullBody.data.filter(m => m.unlisted === true).map(m => m.id));
+      assertEquals(tagged.has('claude-opus-4-7'), false);
+      assertEquals(tagged.has('claude-opus-4.7'), true);
+      assertEquals(tagged.has('claude-opus-4.7-high'), true);
+      // Each addressable row keeps the canonical model's upstreams metadata
+      // verbatim so the dashboard renders the real binding without a second
+      // call.
+      const variant = fullBody.data.find(m => m.id === 'claude-opus-4.7')!;
+      assertEquals(variant.upstreams.length > 0, true);
+    },
+  );
+});
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index 09afdd5ab..ae8c43a93 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -725,13 +725,15 @@ const usageBaseQuery = {
 
 export const tokenUsageQuery = z.object(usageBaseQuery);
 
-// Dashboard `/api/models` accepts an explicit `aliases=false` to skip the
-// alias-merge pass. Default behavior (omitted or `aliases=true`) keeps the
-// merged catalog the dashboard's Models tab renders; settings surfaces
-// that need the raw real-model set (alias edit dialog, shadow detection)
-// pass `aliases=false`.
+// Dashboard `/api/models` accepts two query knobs. `aliases=false` skips the
+// alias-merge pass — the alias edit dialog and shadow detection need the
+// raw real-model set. `include_unlisted=true` extends the payload with the
+// addressable-but-not-listed surface (prefix-form alternates, Copilot
+// variant ids, provider-side redirects), so the alias dialog combobox sees
+// every id the data-plane resolver would accept.
 export const modelsQuery = z.object({
   aliases: z.enum(['true', 'false']).optional(),
+  include_unlisted: z.enum(['true', 'false']).optional(),
 });
 
 export const searchUsageQuery = z.object({
diff --git a/packages/protocols/src/common/models.ts b/packages/protocols/src/common/models.ts
index bf8b9fe0c..a48023307 100644
--- a/packages/protocols/src/common/models.ts
+++ b/packages/protocols/src/common/models.ts
@@ -165,14 +165,24 @@ export interface PublicModel {
   // currently-available target's endpoint map — at request time the
   // resolver narrows the pool to targets that serve the inbound endpoint,
   // so any endpoint advertised here is reachable through at least one
-  // target. The map is empty (`{}`) when an alias has no currently-available
-  // target; never absent.
+  // target.
   endpoints: ModelEndpoints;
   cost?: ModelPricing;
   chat?: ChatModelInfo;
   // Present only on entries the gateway synthesized from an operator-defined
   // alias; absent for entries that came from an upstream catalog.
   aliasedFrom?: PublicModelAliasedFrom;
+  // Sidecar flag carried only on entries that are addressable-but-not-
+  // listed: ids the data plane accepts (via `modelPrefix.addressable`
+  // alternates or per-provider `resolveRequestedModelId` redirects) but
+  // that do NOT appear in the default `/v1/models` payload. Absent on
+  // every default-listed row and on alias rows — both are part of the
+  // public catalog. The field surfaces only on
+  // `/api/models?include_unlisted=true` rows that the dashboard's alias
+  // edit combobox shows alongside the listed catalog. Wire shape is
+  // intentionally `unlisted?: true` — boolean would add a wire byte to
+  // every listed row for no caller benefit.
+  unlisted?: true;
 }
 
 export interface PublicModelsResponse {

From a3fbe24d5173a7ca4f637ac701dae15af89059da Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 18:41:16 +0800
Subject: [PATCH 120/170] feat(web): alias surfaces read the addressable
 catalog and warn on no-target-available
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`useRawModelsStore` now fetches `/api/models?aliases=false&include_unlisted=true`
so the alias edit dialog's target-id combobox suggests every id the
data-plane resolver would accept — Copilot variant ids, prefix-form
alternates, provider-side redirects — alongside the listed catalog.

`warnings.ts` collapses the two alias-level checks behind a single
`computeAliasLevelWarnings` helper returning a list of typed
discriminated entries (`shadow`, `no-target`). The new `no-target`
warning fires when every configured target falls outside the addressable
surface — the listing already hides the alias from `/v1/models` in that
state, and the icon + tooltip explain why the alias is invisible.
Shadow detection deliberately stays scoped to listed real ids
(`unlisted !== true`) so a deliberate Copilot-variant naming does not
trip the icon.

Both alias surfaces consume the same helper: the Settings card row
renders one icon (left of the action cluster) whose tooltip joins
every active warning; the edit dialog's bottom warnings card renders
the list inline with the live form state.
---
 .../components/alias-edit/AliasEditDialog.vue | 38 ++++++--
 .../web/src/components/alias-edit/warnings.ts | 91 +++++++++++++++++--
 .../components/alias-edit/warnings_test.ts    | 76 +++++++++++++---
 apps/web/src/components/settings/AliasRow.vue | 15 ++-
 .../settings/AliasesSettingsCard_test.ts      |  7 +-
 apps/web/src/composables/useModels.ts         | 22 +++--
 6 files changed, 200 insertions(+), 49 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index b919e3dc6..c3a81d2e4 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -8,7 +8,7 @@ import { computed, ref, watch } from 'vue';
 
 import AliasTargetRow from './AliasTargetRow.vue';
 import { computeAnnouncedMetadata } from './announced-metadata.ts';
-import { computeShadowWarning, realModelIdsOfKind } from './warnings.ts';
+import { computeAliasLevelWarnings, realModelIdsOfKind } from './warnings.ts';
 import { callApi, useApi } from '../../api/client.ts';
 import type { AliasKind, AliasSelection, AliasTarget, AnnouncedMetadata, ChatAliasRules, ModelAlias } from '../../api/types.ts';
 import { useModelAliases } from '../../composables/useModelAliases.ts';
@@ -148,7 +148,13 @@ watch(kind, k => {
 // opaque string — the list is a hint, not a constraint.
 const targetIdItems = computed(() => realModelIdsOfKind(modelsStore.models.value, kind.value));
 
-const shadowWarning = computed(() => computeShadowWarning(aliasName.value.trim(), targets.value, modelsStore.models.value));
+// Alias-level warnings on the live dialog state. Re-projects name +
+// targets to the structural shape `computeAliasLevelWarnings` accepts so
+// the Settings card row and the dialog read the same surface.
+const aliasLevelWarnings = computed(() => computeAliasLevelWarnings(
+  { name: aliasName.value.trim(), targets: targets.value },
+  modelsStore.models.value,
+));
 
 const saving = ref(false);
 const saveError = ref<string | null>(null);
@@ -336,13 +342,29 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
         </div>
       </section>
 
-      <div v-if="shadowWarning" class="rounded-md border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-sm text-amber-300">
-        This alias name shadows a real model id:
-        <code class="font-mono">{{ shadowWarning.shadowedId }}</code>
-        <template v-if="shadowWarning.shadowedDisplayName !== null">
-          (<strong class="font-semibold">{{ shadowWarning.shadowedDisplayName }}</strong>).
+      <div v-if="aliasLevelWarnings.length > 0" class="rounded-md border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-sm text-amber-300">
+        <ul v-if="aliasLevelWarnings.length > 1" class="list-disc space-y-1 pl-5">
+          <li v-for="w in aliasLevelWarnings" :key="w.type">
+            <template v-if="w.type === 'shadow'">
+              This alias name shadows a real model id:
+              <code class="font-mono">{{ w.shadowedId }}</code>
+              <template v-if="w.shadowedDisplayName !== null">
+                (<strong class="font-semibold">{{ w.shadowedDisplayName }}</strong>).
+              </template>
+              <template v-else>.</template>
+            </template>
+            <template v-else>{{ w.message }}</template>
+          </li>
+        </ul>
+        <template v-else-if="aliasLevelWarnings[0].type === 'shadow'">
+          This alias name shadows a real model id:
+          <code class="font-mono">{{ aliasLevelWarnings[0].shadowedId }}</code>
+          <template v-if="aliasLevelWarnings[0].shadowedDisplayName !== null">
+            (<strong class="font-semibold">{{ aliasLevelWarnings[0].shadowedDisplayName }}</strong>).
+          </template>
+          <template v-else>.</template>
         </template>
-        <template v-else>.</template>
+        <template v-else>{{ aliasLevelWarnings[0].message }}</template>
       </div>
 
       <div class="flex flex-wrap items-center justify-between gap-3 border-t border-white/[0.06] pt-5">
diff --git a/apps/web/src/components/alias-edit/warnings.ts b/apps/web/src/components/alias-edit/warnings.ts
index 1de4b1569..2cfb43de0 100644
--- a/apps/web/src/components/alias-edit/warnings.ts
+++ b/apps/web/src/components/alias-edit/warnings.ts
@@ -14,8 +14,9 @@ export const findCatalogModel = (
 ): ControlPlaneModel | undefined =>
   (models ?? []).find(m => m.id === targetModelId && m.aliasedFrom === undefined);
 
-// Real (non-alias) model ids the operator can route to. Used by the
-// shadow-warning check (no kind filter — shadowing is a name collision).
+// Real (non-alias) model ids the operator can route to — both listed
+// entries and addressable-but-not-listed ones. Used by the shadow-warning
+// check (no kind filter — shadowing is a name collision).
 export const realModelIds = (models: readonly ControlPlaneModel[] | null | undefined): string[] =>
   (models ?? []).filter(m => m.aliasedFrom === undefined).map(m => m.id);
 
@@ -102,26 +103,100 @@ export const computeModelWarnings = (
   return [];
 };
 
-// Alias-level shadow warning. Fires iff the alias name matches a real
-// (non-alias) catalog model id AND no target inside the alias references
-// that real id — a target referencing the shadowed id suppresses the warning.
+// Alias-level warnings: conditions that apply to the alias as a whole
+// rather than to one rule field or one target row. Each entry carries a
+// short tooltip-friendly message plus a discriminator tag the host
+// surface (Settings card, edit dialog) can branch on for icon / copy
+// choice.
+//
+// Today there are two triggers:
+//
+// - `shadow` — the alias name exactly matches a listed real-model id AND
+//   no entry in `targets[].target_model_id` references that id. The
+//   suppression rule keeps the seed pattern (alias names itself as its
+//   own first target) quiet. Addressable-but-not-listed variant ids do
+//   not trigger this — the listing surface is the relevant collision
+//   space.
+// - `no-target` — every configured target falls outside the addressable
+//   surface, so the resolver would 404 on the alias name. The listing
+//   already hides the alias from `/v1/models` in this state; the warning
+//   tells the operator why the alias is invisible.
 export interface AliasShadowWarning {
+  type: 'shadow';
   shadowedId: string;
   shadowedDisplayName: string | null;
+  message: string;
 }
 
-export const computeShadowWarning = (
+export interface AliasNoTargetWarning {
+  type: 'no-target';
+  message: string;
+}
+
+export type AliasLevelWarning = AliasShadowWarning | AliasNoTargetWarning;
+
+const computeShadowWarning = (
   aliasName: string,
   targets: readonly { target_model_id: string }[],
   models: readonly ControlPlaneModel[] | null | undefined,
 ): AliasShadowWarning | null => {
   if (aliasName === '') return null;
-  const shadowed = (models ?? []).find(m => m.id === aliasName && m.aliasedFrom === undefined);
+  // Shadowing is scored against the listed surface only — an alias named
+  // after an addressable-but-not-listed variant id is a deliberate
+  // power-user pattern, not a collision worth warning on.
+  const shadowed = (models ?? []).find(m => m.id === aliasName && m.aliasedFrom === undefined && m.unlisted !== true);
   if (!shadowed) return null;
   if (targets.some(t => t.target_model_id === aliasName)) return null;
   const displayName = shadowed.display_name ?? null;
+  const shadowedDisplayName = displayName !== null && displayName !== shadowed.id ? displayName : null;
+  const label = shadowedDisplayName !== null ? `${shadowed.id} (${shadowedDisplayName})` : shadowed.id;
   return {
+    type: 'shadow',
     shadowedId: shadowed.id,
-    shadowedDisplayName: displayName !== null && displayName !== shadowed.id ? displayName : null,
+    shadowedDisplayName,
+    message: `Alias name shadows a real model id: ${label}`,
+  };
+};
+
+const computeNoTargetWarning = (
+  alias: AliasView,
+  models: readonly ControlPlaneModel[] | null | undefined,
+): AliasNoTargetWarning | null => {
+  // The addressable surface (the input store fetches with
+  // `include_unlisted=true`) is the source of truth for routability.
+  // Loading state — models is null — should not fire the warning, or the
+  // dashboard flashes a yellow icon on every alias during startup.
+  if (models === null || models === undefined) return null;
+  const addressableIds = new Set(models.filter(m => m.aliasedFrom === undefined).map(m => m.id));
+  const reachable = alias.targets.some(t => addressableIds.has(t.target_model_id));
+  if (reachable) return null;
+  return {
+    type: 'no-target',
+    message: 'No target currently resolves under your upstream access.',
   };
 };
+
+// Structural view a warning consumer hands in: the persisted
+// `ModelAlias` record from the Settings card and the live editor state in
+// `AliasEditDialog` both project to this shape, so one helper drives both
+// surfaces.
+export interface AliasView {
+  readonly name: string;
+  readonly targets: readonly { target_model_id: string }[];
+}
+
+// Aggregate every alias-level warning that fires for this alias. Returns
+// an empty list when the alias is clean; callers (Settings card row,
+// edit dialog bottom card) render the icon + tooltip directly off the
+// resulting array.
+export const computeAliasLevelWarnings = (
+  alias: AliasView,
+  models: readonly ControlPlaneModel[] | null | undefined,
+): AliasLevelWarning[] => {
+  const out: AliasLevelWarning[] = [];
+  const shadow = computeShadowWarning(alias.name, alias.targets, models);
+  if (shadow !== null) out.push(shadow);
+  const noTarget = computeNoTargetWarning(alias, models);
+  if (noTarget !== null) out.push(noTarget);
+  return out;
+};
diff --git a/apps/web/src/components/alias-edit/warnings_test.ts b/apps/web/src/components/alias-edit/warnings_test.ts
index 9c5d44f1d..d862eb8c0 100644
--- a/apps/web/src/components/alias-edit/warnings_test.ts
+++ b/apps/web/src/components/alias-edit/warnings_test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest';
 
-import { computeModelWarnings, computeRuleWarnings, computeShadowWarning, findCatalogModel, realModelIds } from './warnings.ts';
+import { computeAliasLevelWarnings, computeModelWarnings, computeRuleWarnings, findCatalogModel, realModelIds, type AliasView } from './warnings.ts';
 import type { ControlPlaneModel } from '../../api/types.ts';
 
 const realModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
@@ -16,6 +16,18 @@ const aliasModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlP
   ...over,
 });
 
+const unlistedModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
+  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
+  kind: 'chat',
+  unlisted: true,
+  ...over,
+});
+
+const view = (name: string, ids: readonly string[]): AliasView => ({
+  name,
+  targets: ids.map(id => ({ target_model_id: id })),
+});
+
 describe('realModelIds', () => {
   it('excludes alias entries and returns the remaining ids in catalog order', () => {
     const catalog: ControlPlaneModel[] = [
@@ -122,33 +134,67 @@ describe('computeRuleWarnings', () => {
   });
 });
 
-describe('computeShadowWarning', () => {
+describe('computeAliasLevelWarnings', () => {
   const catalog: ControlPlaneModel[] = [
     realModel({ id: 'gpt-5', display_name: 'GPT 5' }),
     realModel({ id: 'plain' }),
     aliasModel({ id: 'auto-review' }),
   ];
 
-  it('returns null when the alias name does not match any real model id', () => {
-    expect(computeShadowWarning('not-a-real-id', [{ target_model_id: 'gpt-5' }], catalog)).toBeNull();
+  it('returns no warnings when the alias name is fresh and every target resolves', () => {
+    expect(computeAliasLevelWarnings(view('fresh', ['gpt-5']), catalog)).toEqual([]);
   });
 
-  it('returns null when the alias name matches another alias (not a real model)', () => {
-    expect(computeShadowWarning('auto-review', [{ target_model_id: 'gpt-5' }], catalog)).toBeNull();
+  it('emits a shadow warning when the alias name collides with a listed real id and no target references it', () => {
+    const warnings = computeAliasLevelWarnings(view('gpt-5', ['plain']), catalog);
+    expect(warnings).toHaveLength(1);
+    expect(warnings[0]).toEqual(expect.objectContaining({
+      type: 'shadow',
+      shadowedId: 'gpt-5',
+      shadowedDisplayName: 'GPT 5',
+    }));
   });
 
-  it('returns null when one of the targets references the shadowed id (seed pattern)', () => {
-    expect(computeShadowWarning('gpt-5', [{ target_model_id: 'gpt-5' }, { target_model_id: 'plain' }], catalog)).toBeNull();
+  it('suppresses the shadow warning when one of the targets references the shadowed id (seed pattern)', () => {
+    expect(computeAliasLevelWarnings(view('gpt-5', ['gpt-5', 'plain']), catalog))
+      .toEqual([]);
+  });
+
+  it('ignores collisions with addressable-but-not-listed variant ids (preserves today\'s scope)', () => {
+    const withUnlisted: ControlPlaneModel[] = [
+      ...catalog,
+      unlistedModel({ id: 'claude-opus-4.7-high' }),
+    ];
+    expect(computeAliasLevelWarnings(view('claude-opus-4.7-high', ['gpt-5']), withUnlisted))
+      .toEqual([]);
+  });
+
+  it('emits a no-target warning when every configured target falls outside the addressable surface', () => {
+    const warnings = computeAliasLevelWarnings(view('lonely', ['missing-a', 'missing-b']), catalog);
+    expect(warnings).toEqual([{
+      type: 'no-target',
+      message: 'No target currently resolves under your upstream access.',
+    }]);
+  });
+
+  it('counts addressable-but-not-listed entries as available for the no-target check', () => {
+    const withUnlisted: ControlPlaneModel[] = [
+      ...catalog,
+      unlistedModel({ id: 'claude-opus-4.7-high' }),
+    ];
+    expect(computeAliasLevelWarnings(view('fast-claude', ['claude-opus-4.7-high']), withUnlisted))
+      .toEqual([]);
   });
 
-  it('returns the shadowed id with display_name only when display_name differs from id', () => {
-    const w1 = computeShadowWarning('gpt-5', [{ target_model_id: 'plain' }], catalog);
-    expect(w1).toEqual({ shadowedId: 'gpt-5', shadowedDisplayName: 'GPT 5' });
-    const w2 = computeShadowWarning('plain', [{ target_model_id: 'gpt-5' }], catalog);
-    expect(w2).toEqual({ shadowedId: 'plain', shadowedDisplayName: null });
+  it('returns both warnings when an alias both shadows a listed id and has no reachable target', () => {
+    // Real catalog deliberately drops the shadowed id so the no-target
+    // branch also fires. (`gpt-5` shadowed, target `gpt-5` does not exist
+    // here.)
+    const warnings = computeAliasLevelWarnings(view('gpt-5', ['gone']), [realModel({ id: 'gpt-5' })]);
+    expect(warnings.map(w => w.type).sort()).toEqual(['no-target', 'shadow']);
   });
 
-  it('returns null on an empty alias name (mid-edit)', () => {
-    expect(computeShadowWarning('', [{ target_model_id: 'gpt-5' }], catalog)).toBeNull();
+  it('skips the no-target warning while the catalog is loading (models is null)', () => {
+    expect(computeAliasLevelWarnings(view('lonely', ['missing']), null)).toEqual([]);
   });
 });
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index afae5cacb..679f5c497 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -6,7 +6,7 @@
 import { computed } from 'vue';
 
 import type { ControlPlaneModel, ModelAlias } from '../../api/types.ts';
-import { computeShadowWarning } from '../alias-edit/warnings.ts';
+import { computeAliasLevelWarnings } from '../alias-edit/warnings.ts';
 import { Tooltip } from '@floway-dev/ui';
 
 const props = defineProps<{
@@ -40,13 +40,10 @@ const kindLabel = computed(() => KIND_LABELS[props.alias.kind]);
 const selectionLabel = computed(() => SELECTION_LABELS[props.alias.selection]);
 const targetCountLabel = computed(() => `${props.alias.targets.length} target${props.alias.targets.length === 1 ? '' : 's'}`);
 
-const shadowWarning = computed(() => computeShadowWarning(props.alias.name, props.alias.targets, props.models));
-const shadowTooltip = computed(() => {
-  const w = shadowWarning.value;
-  if (!w) return '';
-  const label = w.shadowedDisplayName !== null ? `${w.shadowedId} (${w.shadowedDisplayName})` : w.shadowedId;
-  return `Alias name shadows a real model id: ${label}`;
-});
+const aliasWarnings = computed(() => computeAliasLevelWarnings(props.alias, props.models));
+// Join messages with newlines so the tooltip stays a single visual block
+// when both shadow + no-target fire on the same alias.
+const aliasWarningTooltip = computed(() => aliasWarnings.value.map(w => w.message).join('\n'));
 </script>
 
 <template>
@@ -63,7 +60,7 @@ const shadowTooltip = computed(() => {
       </div>
 
       <div class="flex shrink-0 items-center gap-1">
-        <Tooltip v-if="shadowWarning" :content="shadowTooltip">
+        <Tooltip v-if="aliasWarnings.length > 0" :content="aliasWarningTooltip">
           <span
             class="inline-flex h-8 w-8 items-center justify-center rounded-md text-amber-400"
             aria-label="Alias warning"
diff --git a/apps/web/src/components/settings/AliasesSettingsCard_test.ts b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
index 84eec2f84..ea0ba99d8 100644
--- a/apps/web/src/components/settings/AliasesSettingsCard_test.ts
+++ b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
@@ -48,7 +48,12 @@ const baseAlias = (over: Partial<ModelAlias> & { name: string }): ModelAlias =>
 
 beforeEach(() => {
   aliasesRef.value = [];
-  modelsRef.value = [];
+  // Seed the catalog with the alias fixtures' target ids so the
+  // no-target alias-level warning stays quiet by default — every test
+  // that wants the warning sets `modelsRef.value = []` itself.
+  modelsRef.value = [
+    { id: 'gpt-5', kind: 'chat', upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }] },
+  ];
   aliasErrorRef.value = null;
   deleteSpy.mockClear();
   vi.restoreAllMocks();
diff --git a/apps/web/src/composables/useModels.ts b/apps/web/src/composables/useModels.ts
index 7f2f0abc8..c3a1c02b3 100644
--- a/apps/web/src/composables/useModels.ts
+++ b/apps/web/src/composables/useModels.ts
@@ -11,12 +11,16 @@ interface ModelsResponse {
 // Two stores share this core: the default `/api/models` view (real models
 // + synthesised alias entries merged into one list) backs the dashboard
 // models tab and surfaces that want the externally-visible catalog; the
-// raw view (`?aliases=false`) backs the alias settings surfaces (edit
-// dialog target combobox, shadow detection, kind-mismatch warning) that
-// need to see the underlying catalog without the alias-overwrites-real-id
-// collapse the wire shape applies. The two singletons live separately so
-// each kind has its own cache.
-const makeStore = (params: { includeAliases: boolean }) => {
+// raw view backs the alias settings surfaces (edit dialog target
+// combobox, shadow detection, kind-mismatch warning, no-target-available
+// warning) that need to see the underlying catalog without the
+// alias-overwrites-real-id collapse the wire shape applies. The raw view
+// requests `include_unlisted=true` so addressable-but-not-listed ids
+// (Copilot variant ids, prefix-form alternates, provider-side redirects)
+// surface alongside the listed catalog — the alias dialog combobox
+// suggests every id the data-plane resolver would accept, and the
+// shadow/no-target checks see the same surface the resolver does.
+const makeStore = (params: { includeAliases: boolean; includeUnlisted?: boolean }) => {
   const models = ref<ControlPlaneModel[] | null>(null);
   const loading = ref(false);
   const error = ref<string | null>(null);
@@ -27,7 +31,9 @@ const makeStore = (params: { includeAliases: boolean }) => {
     const load = async () => {
       loading.value = true;
       error.value = null;
-      const query = params.includeAliases ? {} : { aliases: 'false' as const };
+      const query: { aliases?: 'false'; include_unlisted?: 'true' } = {};
+      if (!params.includeAliases) query.aliases = 'false';
+      if (params.includeUnlisted) query.include_unlisted = 'true';
       const { data, error: err } = await callApi<ModelsResponse>(() => api.api.models.$get({ query }));
       loading.value = false;
       if (err) {
@@ -42,4 +48,4 @@ const makeStore = (params: { includeAliases: boolean }) => {
 };
 
 export const useModelsStore = makeStore({ includeAliases: true });
-export const useRawModelsStore = makeStore({ includeAliases: false });
+export const useRawModelsStore = makeStore({ includeAliases: false, includeUnlisted: true });

From f1f4a4a3d22af6dbbd5061389c0d0a9d1037888d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 18:52:53 +0800
Subject: [PATCH 121/170] docs(provider-copilot): align
 enumerateAddressableRedirects comment with the impl
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Comment claimed "Skip dated suffixes …" but the loop emits a redirect
for every raw id whose canonical id differs — dated `claude-*-YYYYMMDD`
forms included. The behavior is correct (Copilot's variant table IS a
finite set, dated entries appear in the published catalog), so trim the
stale "skip" sentence from the comment.
---
 packages/provider-copilot/src/provider.ts | 5 +----
 1 file changed, 1 insertion(+), 4 deletions(-)

diff --git a/packages/provider-copilot/src/provider.ts b/packages/provider-copilot/src/provider.ts
index b99a86bbd..f3aa2fe98 100644
--- a/packages/provider-copilot/src/provider.ts
+++ b/packages/provider-copilot/src/provider.ts
@@ -462,10 +462,7 @@ export const createCopilotProvider = async (record: UpstreamRecord): Promise<Mod
     // public id; the dated `claude-*-YYYYMMDD` form is also accepted and
     // redirects to the same canonical. Walk the upstream catalog stored on
     // `providerData.rawModels`, run each raw id through `copilotPublicModelId`,
-    // and emit the redirect whenever the canonical id differs. Skip dated
-    // suffixes — the catalog already exposes the de-dated form as a real
-    // entry, so the dated id is reachable through the same redirect rule
-    // without being part of any finite enumeration.
+    // and emit the redirect whenever the canonical id differs.
     enumerateAddressableRedirects: ({ upstreamModels }) => {
       const out: AddressableRedirect[] = [];
       const seen = new Set<string>();

From 8f32295e2ef4608db776e748f838d9eb95ea335b Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 18:54:32 +0800
Subject: [PATCH 122/170] fix(gateway): per-upstream catalog miss in
 enumerateAddressableModelIds does not tank the listing
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`getModels` fans out via `Promise.allSettled` so one upstream's failed
catalog refresh doesn't drop /v1/models — the listed surface keeps
whatever survived. The new addressable engine ran its per-upstream
loop synchronously, so a single rejected `fetchUpstreamModelsCached`
would now bubble all the way out and tank both /v1/models and
/api/models. Cold-start gateways with one transiently-down upstream
hit this directly.

Wrap the per-upstream walk in `Promise.allSettled` to match
`getModels`'s tolerance. Rejected upstreams contribute nothing to the
addressable-only surface for the duration of that call — their listed
rows already went through the same SWR cache via `getModels` (or were
dropped there for the same reason).
---
 .../src/data-plane/providers/addressable.ts   | 26 ++++++++++++++++---
 1 file changed, 22 insertions(+), 4 deletions(-)

diff --git a/packages/gateway/src/data-plane/providers/addressable.ts b/packages/gateway/src/data-plane/providers/addressable.ts
index 4e40e8a57..a3415643f 100644
--- a/packages/gateway/src/data-plane/providers/addressable.ts
+++ b/packages/gateway/src/data-plane/providers/addressable.ts
@@ -74,13 +74,25 @@ export const enumerateAddressableModelIds = async (
   // chose not to publish, then (b) the provider's redirect enumeration. The
   // catalog round-trip is the same SWR cache the listed surface just
   // consumed, so this loop never pays a second upstream hit.
-  for (const provider of providers) {
+  //
+  // `getModels` already tolerated a per-upstream catalog miss via
+  // `Promise.allSettled`, so the listed surface for this same call survived
+  // any single upstream that rejected the catalog refresh. Mirror that
+  // contract here: a rejected `fetchUpstreamModelsCached` collapses to no
+  // addressable-only contribution from THAT upstream — its listed rows
+  // already came through `getModels` (or were dropped there for the same
+  // reason). Without this, a cold-start gateway with one transiently-down
+  // upstream would tank /v1/models entirely.
+  const perUpstream = await Promise.allSettled(providers.map(async provider => {
     const cfg = provider.modelPrefix;
     const addressableOnly = cfg !== null ? cfg.addressable.filter(form => !cfg.listed.includes(form)) : [];
-    if (addressableOnly.length === 0 && provider.enumerateAddressableRedirects === undefined) continue;
+    if (addressableOnly.length === 0 && provider.enumerateAddressableRedirects === undefined) {
+      return [] as AddressableIdEntry[];
+    }
 
     const upstreamModels = await fetchUpstreamModelsCached(provider, { scheduler, fetcher: fetcherForUpstream(provider.upstream) });
     const disabled = new Set(provider.disabledPublicModelIds);
+    const out: AddressableIdEntry[] = [];
 
     if (cfg !== null && addressableOnly.length > 0) {
       // The canonical listed form for this upstream — the row the listing
@@ -97,7 +109,7 @@ export const enumerateAddressableModelIds = async (
         if (canonical === undefined) continue;
         for (const form of addressableOnly) {
           const id = form === 'prefixed' ? `${cfg.prefix}${upstreamModel.id}` : upstreamModel.id;
-          push({ id, unlisted: true, model: canonical });
+          out.push({ id, unlisted: true, model: canonical });
         }
       }
     }
@@ -106,8 +118,14 @@ export const enumerateAddressableModelIds = async (
     for (const redirect of redirects) {
       const target = byId.get(redirect.resolvesTo);
       if (target === undefined) continue;
-      push({ id: redirect.addressable, unlisted: true, model: target });
+      out.push({ id: redirect.addressable, unlisted: true, model: target });
     }
+    return out;
+  }));
+
+  for (const result of perUpstream) {
+    if (result.status !== 'fulfilled') continue;
+    for (const entry of result.value) push(entry);
   }
 
   // Stable id ordering matches the listed surface so consumers can rely on

From 57279e9c79037044b19bdd03f994782f4017a842 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 18:56:55 +0800
Subject: [PATCH 123/170] refactor(web): collapse alias-warning template to a
 single unconditional list

The single-warning branch duplicated the shadow-warning JSX once at the top
level and once inside the multi-warning <ul>/<li> path. A list with one
item is semantically fine and removes the special case, so render the
<ul>/<li> unconditionally.
---
 .../web/src/components/alias-edit/AliasEditDialog.vue | 11 +----------
 1 file changed, 1 insertion(+), 10 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index c3a81d2e4..4fe7d61f7 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -343,7 +343,7 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
       </section>
 
       <div v-if="aliasLevelWarnings.length > 0" class="rounded-md border border-amber-500/30 bg-amber-500/10 px-3 py-2 text-sm text-amber-300">
-        <ul v-if="aliasLevelWarnings.length > 1" class="list-disc space-y-1 pl-5">
+        <ul class="list-disc space-y-1 pl-5">
           <li v-for="w in aliasLevelWarnings" :key="w.type">
             <template v-if="w.type === 'shadow'">
               This alias name shadows a real model id:
@@ -356,15 +356,6 @@ const KIND_OPTIONS: { value: AliasKind; label: string }[] = [
             <template v-else>{{ w.message }}</template>
           </li>
         </ul>
-        <template v-else-if="aliasLevelWarnings[0].type === 'shadow'">
-          This alias name shadows a real model id:
-          <code class="font-mono">{{ aliasLevelWarnings[0].shadowedId }}</code>
-          <template v-if="aliasLevelWarnings[0].shadowedDisplayName !== null">
-            (<strong class="font-semibold">{{ aliasLevelWarnings[0].shadowedDisplayName }}</strong>).
-          </template>
-          <template v-else>.</template>
-        </template>
-        <template v-else>{{ aliasLevelWarnings[0].message }}</template>
       </div>
 
       <div class="flex flex-wrap items-center justify-between gap-3 border-t border-white/[0.06] pt-5">

From e465ed419a4db02b133c2b79e5cbfa3abe323d16 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 21:32:05 +0800
Subject: [PATCH 124/170] refactor(addressable): extract listedRealModels
 helper

The "addressable.entries.filter(unlisted === undefined).map(model)"
pattern ran verbatim in four listing call sites (loadModels, Gemini,
codex catalog, /api/models). Each was a slightly different
formatting variant of the same projection. Centralise in one helper
so a future change to the listed-row contract lands in one spot.
---
 packages/gateway/src/control-plane/models/routes.ts      | 6 ++----
 packages/gateway/src/data-plane/codex/models.ts          | 6 ++----
 packages/gateway/src/data-plane/models/gemini.ts         | 6 ++----
 packages/gateway/src/data-plane/models/load.ts           | 6 ++----
 packages/gateway/src/data-plane/providers/addressable.ts | 6 ++++++
 5 files changed, 14 insertions(+), 16 deletions(-)

diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index a4b5856e1..34e6368aa 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -3,7 +3,7 @@ import type { Context } from 'hono';
 import { mergeAliasesIntoModels } from '../../data-plane/models/alias-listing.ts';
 import { toPublicModel } from '../../data-plane/models/load.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from '../../data-plane/models/shared.ts';
-import { enumerateAddressableModelIds } from '../../data-plane/providers/addressable.ts';
+import { enumerateAddressableModelIds, listedRealModels } from '../../data-plane/providers/addressable.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
 import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
 import { getRepo } from '../../repo/index.ts';
@@ -64,9 +64,7 @@ export const controlPlaneModels = async (c: Context) => {
       ),
       includeAliases ? getRepo().modelAliases.list() : Promise.resolve([]),
     ]);
-    const realModels = addressable.entries
-      .filter(entry => entry.unlisted === undefined)
-      .map(entry => entry.model);
+    const realModels = listedRealModels(addressable.entries);
     const unlistedRows = includeUnlisted
       ? addressable.entries
           .filter(entry => entry.unlisted === true)
diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index 9cd60b7a2..bd3ab8f45 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -41,7 +41,7 @@ import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { synthesizeListedAliases } from '../models/alias-listing.ts';
-import { enumerateAddressableModelIds } from '../providers/addressable.ts';
+import { enumerateAddressableModelIds, listedRealModels } from '../providers/addressable.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { Fetcher } from '@floway-dev/provider';
 
@@ -70,9 +70,7 @@ const computeCatalog = async (
     enumerateAddressableModelIds(upstreamIds, fetcherForUpstream, scheduler),
     getRepo().modelAliases.list(),
   ]);
-  const realModels = addressable.entries
-    .filter(entry => entry.unlisted === undefined)
-    .map(entry => entry.model);
+  const realModels = listedRealModels(addressable.entries);
   const slugContextWindow = new Map<string, number>();
   for (const m of realModels) {
     const limit = m.limits.max_context_window_tokens;
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 0264a88c6..3a8a67ac0 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -9,7 +9,7 @@ import type { ModelAliasesRepo } from '../../repo/types.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
 import { geminiStatusForHttpStatus } from '../chat/gemini/errors.ts';
-import { enumerateAddressableModelIds } from '../providers/addressable.ts';
+import { enumerateAddressableModelIds, listedRealModels } from '../providers/addressable.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { ModelPricing } from '@floway-dev/protocols/common';
 import { ProviderModelsUnavailableError } from '@floway-dev/provider';
@@ -78,9 +78,7 @@ const loadGeminiModels = async (
     enumerateAddressableModelIds(upstreamFilter, fetcherForUpstream, scheduler),
     aliasRepo.list(),
   ]);
-  const realModels = addressable.entries
-    .filter(entry => entry.unlisted === undefined)
-    .map(entry => entry.model);
+  const realModels = listedRealModels(addressable.entries);
   // Gemini surfaces chat-kind models only; filter both the real catalog and
   // the synthesized alias entries before the merge so the alias collision
   // step only ever weighs chat-on-chat.
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index ff50a0f47..804a003ac 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -1,6 +1,6 @@
 import { mergeAliasesIntoModels } from './alias-listing.ts';
 import type { ModelAliasesRepo } from '../../repo/types.ts';
-import { enumerateAddressableModelIds } from '../providers/addressable.ts';
+import { enumerateAddressableModelIds, listedRealModels } from '../providers/addressable.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { PublicModel, PublicModelsResponse } from '@floway-dev/protocols/common';
 import type { Fetcher, ResolvedModel } from '@floway-dev/provider';
@@ -39,9 +39,7 @@ export const loadModels = async (
     enumerateAddressableModelIds(upstreamFilter, fetcherForUpstream, scheduler),
     aliasRepo.list(),
   ]);
-  const realModels = addressable.entries
-    .filter(entry => entry.unlisted === undefined)
-    .map(entry => entry.model);
+  const realModels = listedRealModels(addressable.entries);
   const data = mergeAliasesIntoModels({
     realModels,
     addressableModelIds: addressable.entries,
diff --git a/packages/gateway/src/data-plane/providers/addressable.ts b/packages/gateway/src/data-plane/providers/addressable.ts
index a3415643f..21a270d65 100644
--- a/packages/gateway/src/data-plane/providers/addressable.ts
+++ b/packages/gateway/src/data-plane/providers/addressable.ts
@@ -41,6 +41,12 @@ export interface AddressableSurface {
   readonly entries: readonly AddressableIdEntry[];
 }
 
+// Project the listed (real-catalog) `ResolvedModel`s out of an addressable
+// surface — every listing caller wants this same slice to feed
+// `mergeAliasesIntoModels`'s `realModels` arg.
+export const listedRealModels = (entries: readonly AddressableIdEntry[]): readonly ResolvedModel[] =>
+  entries.filter(entry => entry.unlisted === undefined).map(entry => entry.model);
+
 // Enumerate every inbound id the data plane accepts under `upstreamFilter`,
 // tagged with whether the id participates in the default `/v1/models`
 // listing. Fans out per upstream the same way `collectProviderModels` does,

From 30f57de06a28b584ce013688d39117b48b3fa9a8 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 21:42:37 +0800
Subject: [PATCH 125/170] refactor(addressable): drop AddressableSurface
 wrapper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`enumerateAddressableModelIds` returned `{ entries: ... }` with no other
fields; the wrapper only added `.entries` indirection at every call site.
Return the array directly — callers now read the surface as a plain
list, matching the shape `getModels` already uses.
---
 packages/gateway/src/control-plane/models/routes.ts       | 6 +++---
 packages/gateway/src/data-plane/codex/models.ts           | 6 +++---
 packages/gateway/src/data-plane/models/gemini.ts          | 4 ++--
 packages/gateway/src/data-plane/models/load.ts            | 4 ++--
 packages/gateway/src/data-plane/providers/addressable.ts  | 8 ++------
 .../gateway/src/data-plane/providers/addressable_test.ts  | 6 +++---
 6 files changed, 15 insertions(+), 19 deletions(-)

diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 34e6368aa..6838ec1d2 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -64,16 +64,16 @@ export const controlPlaneModels = async (c: Context) => {
       ),
       includeAliases ? getRepo().modelAliases.list() : Promise.resolve([]),
     ]);
-    const realModels = listedRealModels(addressable.entries);
+    const realModels = listedRealModels(addressable);
     const unlistedRows = includeUnlisted
-      ? addressable.entries
+      ? addressable
           .filter(entry => entry.unlisted === true)
           .map(entry => toUnlistedControlPlaneModel(entry.id, entry.model))
       : [];
     const listedRows = includeAliases
       ? mergeAliasesIntoModels({
           realModels,
-          addressableModelIds: addressable.entries,
+          addressableModelIds: addressable,
           aliases,
           mapReal: toControlPlaneModel,
           wrapAlias: entry => ({ ...entry, upstreams: [] }),
diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index bd3ab8f45..2f3dfaeda 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -70,7 +70,7 @@ const computeCatalog = async (
     enumerateAddressableModelIds(upstreamIds, fetcherForUpstream, scheduler),
     getRepo().modelAliases.list(),
   ]);
-  const realModels = listedRealModels(addressable.entries);
+  const realModels = listedRealModels(addressable);
   const slugContextWindow = new Map<string, number>();
   for (const m of realModels) {
     const limit = m.limits.max_context_window_tokens;
@@ -84,7 +84,7 @@ const computeCatalog = async (
   // Alias-target availability is the broader question — a target reachable
   // only via a prefix alternate or Copilot variant id still resolves at
   // request time, so the codex catalog must keep its alias slug too.
-  const addressableSet = new Set(addressable.entries.map(entry => entry.id));
+  const addressableSet = new Set(addressable.map(entry => entry.id));
 
   // Run the shared alias synthesizer so the codex catalog reads the same
   // visible-alias surface that /v1/models, the dashboard, and Gemini do.
@@ -104,7 +104,7 @@ const computeCatalog = async (
     readonly announcedContextWindow: number | undefined;
   }
   const aliasCatalogInfo = new Map<string, AliasCatalogInfo>();
-  for (const entry of synthesizeListedAliases({ aliases, addressableModelIds: addressable.entries })) {
+  for (const entry of synthesizeListedAliases({ aliases, addressableModelIds: addressable })) {
     const aliasedFrom = entry.aliasedFrom;
     if (aliasedFrom === undefined) continue;
     const firstRoutable = aliasedFrom.targets.find(t => addressableSet.has(t.target_model_id));
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 3a8a67ac0..822c0aef8 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -78,13 +78,13 @@ const loadGeminiModels = async (
     enumerateAddressableModelIds(upstreamFilter, fetcherForUpstream, scheduler),
     aliasRepo.list(),
   ]);
-  const realModels = listedRealModels(addressable.entries);
+  const realModels = listedRealModels(addressable);
   // Gemini surfaces chat-kind models only; filter both the real catalog and
   // the synthesized alias entries before the merge so the alias collision
   // step only ever weighs chat-on-chat.
   const merged = mergeAliasesIntoModels<InternalModel>({
     realModels: realModels.filter(model => model.kind === 'chat'),
-    addressableModelIds: addressable.entries.filter(entry => entry.model.kind === 'chat'),
+    addressableModelIds: addressable.filter(entry => entry.model.kind === 'chat'),
     aliases: aliases.filter(alias => alias.kind === 'chat'),
     mapReal: model => model,
     wrapAlias: entry => ({
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 804a003ac..937966405 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -39,10 +39,10 @@ export const loadModels = async (
     enumerateAddressableModelIds(upstreamFilter, fetcherForUpstream, scheduler),
     aliasRepo.list(),
   ]);
-  const realModels = listedRealModels(addressable.entries);
+  const realModels = listedRealModels(addressable);
   const data = mergeAliasesIntoModels({
     realModels,
-    addressableModelIds: addressable.entries,
+    addressableModelIds: addressable,
     aliases,
     mapReal: toPublicModel,
     wrapAlias: entry => entry,
diff --git a/packages/gateway/src/data-plane/providers/addressable.ts b/packages/gateway/src/data-plane/providers/addressable.ts
index 21a270d65..8b221337c 100644
--- a/packages/gateway/src/data-plane/providers/addressable.ts
+++ b/packages/gateway/src/data-plane/providers/addressable.ts
@@ -37,10 +37,6 @@ export interface AddressableIdEntry {
   readonly model: ResolvedModel;
 }
 
-export interface AddressableSurface {
-  readonly entries: readonly AddressableIdEntry[];
-}
-
 // Project the listed (real-catalog) `ResolvedModel`s out of an addressable
 // surface — every listing caller wants this same slice to feed
 // `mergeAliasesIntoModels`'s `realModels` arg.
@@ -56,7 +52,7 @@ export const enumerateAddressableModelIds = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
-): Promise<AddressableSurface> => {
+): Promise<readonly AddressableIdEntry[]> => {
   // `getModels` throws the actionable "no upstream provider configured"
   // message when the provider list is empty; surface it the same way here
   // so /v1/models keeps its 502 + hint behavior on a brand-new gateway.
@@ -136,5 +132,5 @@ export const enumerateAddressableModelIds = async (
 
   // Stable id ordering matches the listed surface so consumers can rely on
   // a single comparator across both halves.
-  return { entries: entries.sort((a, b) => compareModelIds(a.id, b.id)) };
+  return entries.sort((a, b) => compareModelIds(a.id, b.id));
 };
diff --git a/packages/gateway/src/data-plane/providers/addressable_test.ts b/packages/gateway/src/data-plane/providers/addressable_test.ts
index dcb1b227d..2aace3eee 100644
--- a/packages/gateway/src/data-plane/providers/addressable_test.ts
+++ b/packages/gateway/src/data-plane/providers/addressable_test.ts
@@ -27,7 +27,7 @@ describe('enumerateAddressableModelIds', () => {
       },
       async () => {
         const surface = await enumerateAddressableModelIds(null, () => directFetcher, noBackground);
-        expect(surface.entries.map(e => ({ id: e.id, unlisted: e.unlisted }))).toEqual([
+        expect(surface.map(e => ({ id: e.id, unlisted: e.unlisted }))).toEqual([
           { id: 'shared-model', unlisted: undefined },
         ]);
       },
@@ -56,7 +56,7 @@ describe('enumerateAddressableModelIds', () => {
       },
       async () => {
         const surface = await enumerateAddressableModelIds(null, () => directFetcher, noBackground);
-        const byId = new Map(surface.entries.map(e => [e.id, e]));
+        const byId = new Map(surface.map(e => [e.id, e]));
         expect(byId.get('cust/gpt-5.4')?.unlisted).toBeUndefined();
         expect(byId.get('gpt-5.4')?.unlisted).toBe(true);
         // The addressable-only entry still resolves to the same `ResolvedModel`
@@ -89,7 +89,7 @@ describe('enumerateAddressableModelIds', () => {
       },
       async () => {
         const surface = await enumerateAddressableModelIds(null, () => directFetcher, noBackground);
-        const byId = new Map(surface.entries.map(e => [e.id, e]));
+        const byId = new Map(surface.map(e => [e.id, e]));
         // The canonical merged id is the listed entry.
         expect(byId.get('claude-opus-4-7')?.unlisted).toBeUndefined();
         // Both raw variants are addressable-but-not-listed, redirecting to

From b1597a317059fce39b17bffd9a736e7326508dae Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 21:46:50 +0800
Subject: [PATCH 126/170] refactor(registry): drop test-only getInternalModels
 from production

`getInternalModels` projected `ResolvedModel` to `InternalModel` by
stripping `providers` + `endpoints`. The sole production consumer
migrated to the addressable surface; only the registry tests still want
the projection. Move the helper into the test file so the production
module no longer exports an unused symbol.
---
 .../src/data-plane/providers/registry.ts        | 16 ++++------------
 .../src/data-plane/providers/registry_test.ts   | 17 +++++++++++++++--
 2 files changed, 19 insertions(+), 14 deletions(-)

diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index b6d41a789..1519f3a4f 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -5,7 +5,7 @@ import { getRepo } from '../../repo/index.ts';
 import { type AliasResolution, resolveAlias } from '../model-aliases/resolve.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import { type ModelEndpoints, kindForEndpoints } from '@floway-dev/protocols/common';
-import type { InternalModel, ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
+import type { ModelProviderInstance, ProviderModelRecord, ResolvedModel, Fetcher, UpstreamModel, UpstreamProviderKind, UpstreamRecord } from '@floway-dev/provider';
 import { createAzureProvider } from '@floway-dev/provider-azure';
 import { createClaudeCodeProvider } from '@floway-dev/provider-claude-code';
 import { createCodexProvider } from '@floway-dev/provider-codex';
@@ -40,10 +40,9 @@ export const createProviderInstance = (record: UpstreamRecord): ModelProviderIns
   providerFactories[record.provider](record);
 
 // The upstream scope is a required argument across the catalog-assembly chain
-// (this, getModels, getInternalModels) so a caller can never omit it and
-// silently receive the full, unscoped catalog — a missing scope is a compile
-// error, not a runtime leak. Pass `null` to deliberately request every enabled
-// upstream.
+// (this, getModels) so a caller can never omit it and silently receive the
+// full, unscoped catalog — a missing scope is a compile error, not a runtime
+// leak. Pass `null` to deliberately request every enabled upstream.
 export const listModelProviders = async (
   upstreamFilter: readonly string[] | null,
 ): Promise<ModelProviderInstance[]> => {
@@ -253,13 +252,6 @@ export const getModels = async (
   return [];
 };
 
-export const getInternalModels = async (
-  upstreamFilter: readonly string[] | null,
-  fetcherForUpstream: (upstreamId: string) => Fetcher,
-  scheduler: BackgroundScheduler,
-): Promise<InternalModel[]> =>
-  (await getModels(upstreamFilter, fetcherForUpstream, scheduler)).map(({ providers: _providers, endpoints: _endpoints, ...model }) => model);
-
 interface ResolveCandidatesResult<TTarget> {
   readonly candidates: ReadonlyArray<{
     readonly provider: ModelProviderInstance;
diff --git a/packages/gateway/src/data-plane/providers/registry_test.ts b/packages/gateway/src/data-plane/providers/registry_test.ts
index 22cb1f165..1970b9ac2 100644
--- a/packages/gateway/src/data-plane/providers/registry_test.ts
+++ b/packages/gateway/src/data-plane/providers/registry_test.ts
@@ -1,12 +1,25 @@
 import { describe, expect, test } from 'vitest';
 
 import { clearInFlightForTesting } from './models-cache.ts';
-import { compareModelIds, enumerateModelInterpretations, getInternalModels, listModelProviders, resolveModelCandidates, resolveModelForProvider } from './registry.ts';
+import { compareModelIds, enumerateModelInterpretations, getModels, listModelProviders, resolveModelCandidates, resolveModelForProvider } from './registry.ts';
 import { buildCopilotUpstreamRecord, buildCustomUpstreamRecord, copilotModels, setupAppTest } from '../../test-helpers.ts';
-import { directFetcher, type ModelProviderInstance } from '@floway-dev/provider';
+import type { BackgroundScheduler } from '@floway-dev/platform';
+import { directFetcher, type Fetcher, type InternalModel, type ModelProviderInstance } from '@floway-dev/provider';
 import { createCopilotProvider } from '@floway-dev/provider-copilot';
 import { assertEquals, jsonResponse, stubProvider, withMockedFetch } from '@floway-dev/test-utils';
 
+// Strip the execution-layer fields (`providers`, `endpoints`) off each
+// resolved catalog row so the assertions read against the public catalog
+// projection alone. Existed in production as `getInternalModels` until the
+// production code's sole consumer migrated to the addressable surface; the
+// test fixtures still want the projection, so it lives here now.
+const getInternalModels = async (
+  upstreamFilter: readonly string[] | null,
+  fetcherForUpstream: (upstreamId: string) => Fetcher,
+  scheduler: BackgroundScheduler,
+): Promise<InternalModel[]> =>
+  (await getModels(upstreamFilter, fetcherForUpstream, scheduler)).map(({ providers: _providers, endpoints: _endpoints, ...model }) => model);
+
 const sortedIds = (ids: readonly string[]): string[] => [...ids].sort(compareModelIds);
 
 // `resolveModelCandidates` requires a target descriptor; tests that just want

From 1fc3c7136c36fb734c36897d5eb881455aa69fe2 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 23:46:12 +0800
Subject: [PATCH 127/170] fix(aliases/codex): slugContextWindow keys on the
 full addressable surface
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The codex catalog's alias-window fallback uses
slugContextWindow.get(info.firstTargetId), where firstTargetId can be
an unlisted addressable form (a prefix-only alternate or a Copilot
variant id). The map was keyed only on listed real-model ids, so the
fallback silently missed and the catalog published a null window — the
bundled-catalog default (often a tier well above what the gateway can
actually serve) leaked through. Build the map from `addressable`
directly: each entry's `.model` is the canonical ResolvedModel, so
storing its limit under both the listed id and any unlisted alias of
it stays consistent.

The trigger requires the alias's automatic intersection to drop
max_context_window_tokens (i.e. at least one target without a window)
AND the first routable target to be an unlisted addressable form. The
added test reproduces that path with a multi-target alias.
---
 .../gateway/src/data-plane/codex/models.ts    | 12 +++-
 .../src/data-plane/codex/routes_test.ts       | 72 ++++++++++++++++++-
 2 files changed, 80 insertions(+), 4 deletions(-)

diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index 2f3dfaeda..ae6a3dcb6 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -71,10 +71,16 @@ const computeCatalog = async (
     getRepo().modelAliases.list(),
   ]);
   const realModels = listedRealModels(addressable);
+  // Keyed on every addressable id (not just the listed surface): an alias can
+  // legitimately target an unlisted addressable form — `gpt-5.4` when the
+  // listed canonical id is `cust/gpt-5.4`, or a Copilot variant `claude-opus-4.7`
+  // when the listed canonical is `claude-opus-4-7`. Each entry's `.model`
+  // points to the same canonical `ResolvedModel`, so storing the limit under
+  // both the listed id and any unlisted alias of it stays consistent.
   const slugContextWindow = new Map<string, number>();
-  for (const m of realModels) {
-    const limit = m.limits.max_context_window_tokens;
-    if (typeof limit === 'number') slugContextWindow.set(m.id, limit);
+  for (const entry of addressable) {
+    const limit = entry.model.limits.max_context_window_tokens;
+    if (typeof limit === 'number') slugContextWindow.set(entry.id, limit);
   }
   // Listed-surface filter for the codex catalog itself: the codex client
   // expects the surface it would have published in a regular /v1/models
diff --git a/packages/gateway/src/data-plane/codex/routes_test.ts b/packages/gateway/src/data-plane/codex/routes_test.ts
index 844c4d7c0..b3c7bc730 100644
--- a/packages/gateway/src/data-plane/codex/routes_test.ts
+++ b/packages/gateway/src/data-plane/codex/routes_test.ts
@@ -5,7 +5,7 @@ import { describe, expect, it } from 'vitest';
 import { mountCodexRoutes } from './routes.ts';
 import { app as gatewayApp } from '../../app.ts';
 import { type AuthVars, authMiddleware } from '../../middleware/auth.ts';
-import { copilotModels, setupAppTest, sseResponsesResponse } from '../../test-helpers.ts';
+import { copilotModels, buildCustomUpstreamRecord, setupAppTest, sseResponsesResponse } from '../../test-helpers.ts';
 import { isStoredResponseId } from '../chat/responses/items/format.ts';
 import { jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
 
@@ -514,6 +514,76 @@ describe('codex 1p namespace', () => {
       expect(autoReview?.max_context_window).toBe(64000);
     });
 
+    it('reports the target window when the alias falls back to slugContextWindow on an unlisted addressable form', async () => {
+      // Trigger path: a multi-target alias whose targets disagree on
+      // `max_context_window_tokens` (one target omits it) — the
+      // automatic intersection drops the field, so the codex catalog has
+      // to fall back to the first routable target's own window via
+      // `slugContextWindow`. The first target is the bare unlisted form
+      // `gpt-5.4` (the listed canonical is `cust/gpt-5.4`), so the
+      // lookup must succeed against the addressable surface — keyed by
+      // listed-only ids it would miss and the catalog would publish a
+      // null window, defaulting back to the bundled tier the gateway
+      // cannot actually serve.
+      const { apiKey, repo } = await setupAppTest();
+      await repo.upstreams.deleteAll();
+      await repo.upstreams.save(buildCustomUpstreamRecord({
+        modelPrefix: { prefix: 'cust/', addressable: ['unprefixed', 'prefixed'], listed: ['prefixed'] },
+      }));
+      await repo.modelAliases.insert({
+        name: 'codex-auto-review',
+        kind: 'chat',
+        selection: 'first-available',
+        displayName: 'Codex Auto Review',
+        visibleInModelsList: true,
+        targets: [
+          { target_model_id: 'gpt-5.4', rules: {} },
+          { target_model_id: 'gpt-5.5', rules: {} },
+        ],
+        announcedMetadata: null,
+        sortOrder: 0,
+        createdAt: '2026-01-01T00:00:00.000Z',
+        updatedAt: '2026-01-01T00:00:00.000Z',
+      });
+      const app = buildCodexApp();
+      const body = await withMockedFetch(
+        request => {
+          const url = new URL(request.url);
+          if (url.hostname === 'custom.example.com' && url.pathname === '/v1/models') {
+            return jsonResponse({
+              object: 'list',
+              data: [
+                {
+                  id: 'gpt-5.4',
+                  supported_endpoints: ['/chat/completions'],
+                  limits: { max_context_window_tokens: 272000 },
+                },
+                {
+                  // No window — intersection drops the field, exposing
+                  // the slugContextWindow fallback path.
+                  id: 'gpt-5.5',
+                  supported_endpoints: ['/chat/completions'],
+                },
+              ],
+            });
+          }
+          throw new Error(`Unhandled fetch ${request.url}`);
+        },
+        async () => {
+          const response = await app.request('/azure-api.codex/models', {
+            headers: { authorization: `Bearer ${apiKey.key}` },
+          });
+          expect(response.status).toBe(200);
+          return await response.json() as CodexModelsResponse;
+        },
+      );
+      const autoReview = body.models.find(m => m.slug === 'codex-auto-review');
+      // Bundled `codex-auto-review` ships with max_context_window=1000000;
+      // a successful resolver lookup pulls it down to gpt-5.4's 272000.
+      expect(autoReview?.max_context_window).toBe(272000);
+      expect(autoReview?.context_window).toBe(272000);
+    });
+
     it('returns an empty catalog when the registry has no overlapping slugs', async () => {
       const { apiKey } = await setupAppTest();
       const app = buildCodexApp();

From 358122fd21120d143723fe1c9ddd5f3fa8883bd9 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sat, 27 Jun 2026 23:50:38 +0800
Subject: [PATCH 128/170] docs(aliases): drop stale/restating comments across
 alias surfaces
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Cleanup batch of comments that read as patching traces ("Existed in
production as X until Y migrated", "Mirror that contract here"),
restate the code immediately below them, or repeat the same multi-line
preamble across N test files. The remaining notes carry only non-obvious
WHY (e.g. the `gpt-5.4` slug example for the alias-target window
fallback, the collision-check exclusion in the save gate).

Also drops the unreachable `targets.value.length === 0` branch in
AliasEditDialog: create-mode seeds one blank target, edit-mode seeds
from the DB which enforces ≥1, removeTarget refuses to drop below 1.
---
 .../components/alias-edit/AliasEditDialog.vue |  6 +--
 .../chat/chat-completions/serve_test.ts       |  5 +--
 .../src/data-plane/chat/gemini/serve_test.ts  |  5 +--
 .../data-plane/chat/messages/serve_test.ts    |  5 +--
 .../data-plane/chat/responses/http_test.ts    | 10 +----
 .../data-plane/chat/responses/serve-prep.ts   | 11 ++---
 .../data-plane/chat/responses/serve_test.ts   |  5 +--
 .../gateway/src/data-plane/codex/models.ts    | 41 +++++++------------
 .../src/data-plane/providers/addressable.ts   | 13 +++---
 .../src/data-plane/providers/registry_test.ts |  7 +---
 .../via-messages/anthropic-extensions.ts      |  4 +-
 11 files changed, 34 insertions(+), 78 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 4fe7d61f7..3a0456c24 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -159,15 +159,13 @@ const aliasLevelWarnings = computed(() => computeAliasLevelWarnings(
 const saving = ref(false);
 const saveError = ref<string | null>(null);
 
-// Save gate: name non-empty AND no collision with another alias AND ≥1
-// target AND every target id non-empty. The collision check excludes the
-// current record so an in-place edit of an unchanged name is allowed.
+// Collision check excludes the current record so an in-place edit of an
+// unchanged name is allowed.
 const validationError = computed<string | null>(() => {
   const trimmed = aliasName.value.trim();
   if (trimmed === '') return 'Alias id is required';
   const collisions = (aliasesStore.aliases.value ?? []).filter(a => a.name === trimmed && a.name !== props.record?.name);
   if (collisions.length > 0) return `An alias with id "${trimmed}" already exists`;
-  if (targets.value.length === 0) return 'At least one target is required';
   if (targets.value.some(t => t.target_model_id.trim() === '')) return 'Every target needs a model id';
   return null;
 });
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 1b4ce105c..8578deb8e 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -22,10 +22,7 @@ vi.mock('../../providers/registry.ts', async importOriginal => {
   return {
     ...original,
     resolveModelCandidates: vi.fn(async (args: { modelName: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
-      // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
-      // sees the resolved target id reach the candidates layer and the
-      // serve overlays rules from the returned `aliasResolution`. Tests
-      // queue the resolution via `aliasResolutionQueue`.
+      // Pull the queued alias resolution so the serve sees a resolved target id and overlays rules from `aliasResolution`.
       const aliasResolution = await resolveAlias({
         modelName: args.modelName,
         providers: [],
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index ef9f16fba..f05dbd786 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -21,10 +21,7 @@ vi.mock('../../providers/registry.ts', async importOriginal => {
   return {
     ...original,
     resolveModelCandidates: vi.fn(async (args: { modelName: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
-      // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
-      // sees the resolved target id reach the candidates layer and the
-      // serve overlays rules from the returned `aliasResolution`. Tests
-      // queue the resolution via `aliasResolutionQueue`.
+      // Pull the queued alias resolution so the serve sees a resolved target id and overlays rules from `aliasResolution`.
       const aliasResolution = await resolveAlias({
         modelName: args.modelName,
         providers: [],
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index f28131dc7..8f25ff47a 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -19,10 +19,7 @@ vi.mock('../../providers/registry.ts', async importOriginal => {
   return {
     ...original,
     resolveModelCandidates: vi.fn(async (args: { modelName: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
-      // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
-      // sees the resolved target id reach the candidates layer and the
-      // serve overlays rules from the returned `aliasResolution`. Tests
-      // queue the resolution via `aliasResolutionQueue`.
+      // Pull the queued alias resolution so the serve sees a resolved target id and overlays rules from `aliasResolution`.
       const aliasResolution = await resolveAlias({
         modelName: args.modelName,
         providers: [],
diff --git a/packages/gateway/src/data-plane/chat/responses/http_test.ts b/packages/gateway/src/data-plane/chat/responses/http_test.ts
index c3d4b6b65..ffda3d2dc 100644
--- a/packages/gateway/src/data-plane/chat/responses/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http_test.ts
@@ -13,16 +13,8 @@ import { directFetcher, type ProviderResponsesResult, type ResponsesAction, type
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 // Mock the candidates seam so each test hands the http entry exactly the
-// provider candidates it wants. The mock drains a queued `AliasResolution`
-// from `aliasResolutionQueue` (set up by `installRepo` for the alias-
-// rewrite tests below) and forwards it on the candidates return, so the
-// serve's downstream alias-rewrite + rule-overlay path runs end-to-end
-// against an injected resolution without standing up the real catalog
-// stack.
+// provider candidates it wants, with an optional queued alias resolution.
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
-// `lastSeenModel` captures the effective model id the serve passes downstream
-// — the alias rewrite (if any) applied. Tests assert against this to confirm
-// the alias mechanism drove the rewrite.
 const lastSeenModel: { value: string | null } = { value: null };
 vi.mock('../../providers/registry.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../../providers/registry.ts')>();
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index db51d0eb5..5ec26a09b 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -78,13 +78,10 @@ export type ResponsesServePlan =
   | { readonly kind: 'failure'; readonly result: ExecuteResult<ProtocolFrame<ResponsesStreamEvent>> }
   | { readonly kind: 'ready'; readonly prepared: ResponsesPayload; readonly candidate: ChatCandidate };
 
-// Runs the shared serve-side prep both `responsesServe.generate` and
-// `responsesServe.compact` need before dispatching to `responsesAttempt`:
-// expand any `previous_response_id`, enumerate candidates (which internally
-// runs alias resolution), overlay alias rules + stage the response header,
-// plan routing, stage the user input, and pick the first candidate. Returns
-// a rendered failure result when no candidate is viable so the caller can
-// surface it directly without re-deriving the model-error branch.
+// Shared serve-side prep for both `responsesServe.generate` and
+// `responsesServe.compact`. Returns a rendered failure result when no
+// candidate is viable so the caller can surface it directly without
+// re-deriving the model-error branch.
 export const prepareResponsesServePlan = async (args: {
   readonly payload: ResponsesPayload;
   readonly ctx: GatewayCtx;
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index c45759441..053aba44c 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -29,10 +29,7 @@ vi.mock('../../providers/registry.ts', async importOriginal => {
   return {
     ...original,
     resolveModelCandidates: vi.fn(async (args: { modelName: string; upstreamIds: readonly string[] | null; scheduler: () => void }) => {
-      // Drain a queued resolution from `aliasResolutionQueue` so the rule-overlay test
-      // sees the resolved target id reach the candidates layer and the
-      // serve overlays rules from the returned `aliasResolution`. Tests
-      // queue the resolution via `aliasResolutionQueue`.
+      // Pull the queued alias resolution so the serve sees a resolved target id and overlays rules from `aliasResolution`.
       const aliasResolution = await resolveAlias({
         modelName: args.modelName,
         providers: [],
diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index ae6a3dcb6..2b0d760d8 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -82,29 +82,17 @@ const computeCatalog = async (
     const limit = entry.model.limits.max_context_window_tokens;
     if (typeof limit === 'number') slugContextWindow.set(entry.id, limit);
   }
-  // Listed-surface filter for the codex catalog itself: the codex client
-  // expects the surface it would have published in a regular /v1/models
-  // call, so addressable-but-not-listed forms intentionally do NOT enter
-  // this set.
+  // `registrySlugs` mirrors the listed catalog surface — the slugs codex
+  // would have seen in a regular /v1/models call. `addressableSet` is the
+  // broader set the resolver actually accepts (prefix alternates, Copilot
+  // variants), used only for alias-target availability.
   const registrySlugs = new Set(realModels.map(m => m.id));
-  // Alias-target availability is the broader question — a target reachable
-  // only via a prefix alternate or Copilot variant id still resolves at
-  // request time, so the codex catalog must keep its alias slug too.
   const addressableSet = new Set(addressable.map(entry => entry.id));
 
-  // Run the shared alias synthesizer so the codex catalog reads the same
-  // visible-alias surface that /v1/models, the dashboard, and Gemini do.
-  // Each entry's `aliasedFrom.targets` keeps every configured target — the
-  // synthesizer does not narrow to availability — so we still pick the
-  // first one in registry order here. Selection mode never matters for
-  // this static listing: a `random` alias would refuse to publish a
-  // stable context window, so the catalog uses first-available regardless
-  // of the alias's runtime selection.
-  //
-  // We keep both the alias's own announced limits (operator override OR
-  // the synthesizer's automatic intersection) AND the first routable
-  // target's window — the resolver below prefers the alias's announced
-  // value when the operator set it, fallback to the target's ceiling.
+  // Each alias entry survives in the codex catalog when at least one of
+  // its configured targets is currently addressable. `firstTargetId` is
+  // the fallback window source: selection mode is irrelevant here because
+  // the catalog must publish a single stable window.
   interface AliasCatalogInfo {
     readonly firstTargetId: string;
     readonly announcedContextWindow: number | undefined;
@@ -125,13 +113,12 @@ const computeCatalog = async (
     models: catalog.models.filter(m => registrySlugs.has(m.slug) || aliasCatalogInfo.has(m.slug)),
   };
 
-  // For an alias slug: prefer the alias's announced window — that's what
-  // the operator told /v1/models to publish, and the codex client reads
-  // this number for its own local gating (auto-compact, context-budget
-  // UX), so the two wire surfaces must agree on operator intent. Fallback
-  // to the first routable target's window when the alias has no announced
-  // limit (e.g. multi-target alias with no operator override and no agreed
-  // intersection). Plain (non-alias) slugs read straight off the registry.
+  // Alias slug: prefer the alias's announced window (operator override OR
+  // the synthesizer's automatic intersection) so codex's local gating —
+  // auto-compact, context-budget UX — agrees with what /v1/models told
+  // the operator's other tooling. Fallback to the first routable target's
+  // window when the alias publishes no window. Plain slugs read straight
+  // off the registry.
   const contextWindowOf: ContextWindowResolver = slug => {
     const info = aliasCatalogInfo.get(slug);
     if (info !== undefined) return info.announcedContextWindow ?? slugContextWindow.get(info.firstTargetId) ?? null;
diff --git a/packages/gateway/src/data-plane/providers/addressable.ts b/packages/gateway/src/data-plane/providers/addressable.ts
index 8b221337c..caa182ae7 100644
--- a/packages/gateway/src/data-plane/providers/addressable.ts
+++ b/packages/gateway/src/data-plane/providers/addressable.ts
@@ -77,14 +77,11 @@ export const enumerateAddressableModelIds = async (
   // catalog round-trip is the same SWR cache the listed surface just
   // consumed, so this loop never pays a second upstream hit.
   //
-  // `getModels` already tolerated a per-upstream catalog miss via
-  // `Promise.allSettled`, so the listed surface for this same call survived
-  // any single upstream that rejected the catalog refresh. Mirror that
-  // contract here: a rejected `fetchUpstreamModelsCached` collapses to no
-  // addressable-only contribution from THAT upstream — its listed rows
-  // already came through `getModels` (or were dropped there for the same
-  // reason). Without this, a cold-start gateway with one transiently-down
-  // upstream would tank /v1/models entirely.
+  // A rejected per-upstream catalog refresh collapses to no addressable-only
+  // contribution from THAT upstream — its listed rows already came (or were
+  // dropped) through `getModels`. Mirrors the `Promise.allSettled` tolerance
+  // there so a transiently-down upstream cannot tank /v1/models on a
+  // cold-start gateway.
   const perUpstream = await Promise.allSettled(providers.map(async provider => {
     const cfg = provider.modelPrefix;
     const addressableOnly = cfg !== null ? cfg.addressable.filter(form => !cfg.listed.includes(form)) : [];
diff --git a/packages/gateway/src/data-plane/providers/registry_test.ts b/packages/gateway/src/data-plane/providers/registry_test.ts
index 1970b9ac2..7926668f2 100644
--- a/packages/gateway/src/data-plane/providers/registry_test.ts
+++ b/packages/gateway/src/data-plane/providers/registry_test.ts
@@ -8,11 +8,8 @@ import { directFetcher, type Fetcher, type InternalModel, type ModelProviderInst
 import { createCopilotProvider } from '@floway-dev/provider-copilot';
 import { assertEquals, jsonResponse, stubProvider, withMockedFetch } from '@floway-dev/test-utils';
 
-// Strip the execution-layer fields (`providers`, `endpoints`) off each
-// resolved catalog row so the assertions read against the public catalog
-// projection alone. Existed in production as `getInternalModels` until the
-// production code's sole consumer migrated to the addressable surface; the
-// test fixtures still want the projection, so it lives here now.
+// Test-local projection that strips execution-layer fields, leaving the
+// public-catalog shape the fixtures assert against.
 const getInternalModels = async (
   upstreamFilter: readonly string[] | null,
   fetcherForUpstream: (upstreamId: string) => Fetcher,
diff --git a/packages/translate/src/shared/via-messages/anthropic-extensions.ts b/packages/translate/src/shared/via-messages/anthropic-extensions.ts
index 52a8ace56..1f1bc153f 100644
--- a/packages/translate/src/shared/via-messages/anthropic-extensions.ts
+++ b/packages/translate/src/shared/via-messages/anthropic-extensions.ts
@@ -2,8 +2,8 @@ import type { MessagesPayload, MessagesThinkingDisplay } from '@floway-dev/proto
 
 // Build a Messages `thinking` block from the Floway extension fields a
 // non-Messages inbound carries (`thinking_budget`, `adaptive_thinking`,
-// `reasoning_summary`). `adaptive_thinking: true` overrides `thinking_budget`
-// because the alias write-side validator enforces single-facet selection;
+// `reasoning_summary`). The alias write-side validator enforces single-facet
+// selection, so `adaptive_thinking` and `thinking_budget` should not co-occur;
 // when both still arrive the adaptive choice wins.
 //
 // The summary mapping collapses the OpenAI-style {auto|concise|detailed|

From 7f2bc346947a31813481b97e518ec7e21728b617 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 00:07:45 +0800
Subject: [PATCH 129/170] docs(aliases): trim restating / patching-trace
 comments

Second cleanup pass: every flagged comment either restated the code
immediately below (file-name preambles in Vue components, type-shape
restatement, one-line idiom explainers), or paraphrased a contract
already documented elsewhere (the `gateway-ctx.responseHeaders` field,
the addressable surface's branch-history motivation). The remaining
notes keep only WHY content.
---
 .../components/alias-edit/AliasEditDialog.vue  | 12 ++----------
 .../components/alias-edit/AliasTargetRow.vue   |  6 +-----
 apps/web/src/components/settings/AliasRow.vue  | 10 ----------
 .../settings/AliasesSettingsCard.vue           |  2 --
 .../src/control-plane/model-aliases/routes.ts  |  1 -
 .../control-plane/model-aliases/serialize.ts   |  2 --
 .../gateway/src/control-plane/models/routes.ts |  9 ++++-----
 .../src/data-plane/chat/shared/errors.ts       |  2 --
 .../src/data-plane/chat/shared/gateway-ctx.ts  |  2 --
 .../src/data-plane/model-aliases/resolve.ts    |  4 +---
 .../src/data-plane/providers/addressable.ts    | 18 +++++++-----------
 11 files changed, 15 insertions(+), 53 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index 3a0456c24..a3c29313c 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -1,9 +1,4 @@
 <script setup lang="ts">
-// Editor for one alias (create or edit). Top form (name / display name /
-// kind / selection); a vertical stack of AliasTargetRow cards with an
-// "Add target" button; an Announced-metadata section; alias-level
-// warnings card; footer (visibility switch + Cancel / Save).
-
 import { computed, ref, watch } from 'vue';
 
 import AliasTargetRow from './AliasTargetRow.vue';
@@ -45,8 +40,6 @@ const kind = ref<AliasKind>(props.record?.kind ?? 'chat');
 const selection = ref<AliasSelection>(props.record?.selection ?? 'first-available');
 const visibleInModelsList = ref(props.record?.visible_in_models_list ?? true);
 
-// Create mode starts with one blank target so the operator immediately sees
-// a row to fill in.
 const targets = ref<AliasTarget[]>(
   props.record
     ? props.record.targets.map(t => ({ target_model_id: t.target_model_id, rules: { ...t.rules } as AliasTarget['rules'] }))
@@ -112,9 +105,8 @@ const setOverrideEnabled = (on: boolean) => {
   }
 };
 
-// The editor's `modelValue` source-of-truth: the override buffer when
-// the operator is editing, the live computed snapshot when not. In
-// auto mode the editor is read-only, so its emits are no-ops anyway.
+// Source-of-truth for the editor's `modelValue`: the override buffer when
+// the operator is editing, the live computed snapshot otherwise.
 const announcedEditorValue = computed<AnnouncedMetadata>(
   () => announcedOverride.value ?? computedAnnouncedMetadata.value,
 );
diff --git a/apps/web/src/components/alias-edit/AliasTargetRow.vue b/apps/web/src/components/alias-edit/AliasTargetRow.vue
index 9f8c553fd..9cb008bdc 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow.vue
+++ b/apps/web/src/components/alias-edit/AliasTargetRow.vue
@@ -1,6 +1,4 @@
 <script setup lang="ts">
-// One target row inside the alias edit dialog.
-
 import { computed, ref, watch } from 'vue';
 
 import { computeModelWarnings, computeRuleWarnings, findCatalogModel } from './warnings.ts';
@@ -97,9 +95,7 @@ const onBudgetChange = (raw: string) => {
   patchReasoning({ budget_tokens: Number(trimmed) });
 };
 
-// Suggestion lists for chat-rule comboboxes. Combobox accepts free-form
-// values; these arrays are the canonical presets the dashboard pins as
-// type-ahead hints.
+// Canonical presets pinned as type-ahead hints in the chat-rule comboboxes.
 const EFFORT_ITEMS = ['none', 'low', 'medium', 'high', 'xhigh'];
 const SUMMARY_ITEMS = ['auto', 'concise', 'detailed', 'none'];
 const VERBOSITY_ITEMS = ['low', 'medium', 'high'];
diff --git a/apps/web/src/components/settings/AliasRow.vue b/apps/web/src/components/settings/AliasRow.vue
index 679f5c497..fa3bcbc98 100644
--- a/apps/web/src/components/settings/AliasRow.vue
+++ b/apps/web/src/components/settings/AliasRow.vue
@@ -1,8 +1,4 @@
 <script setup lang="ts">
-// One alias rendered as a two-line block in the Settings card. The action
-// cluster sits right-aligned; the leftmost slot is reserved for the
-// alias-level warning icon when one fires.
-
 import { computed } from 'vue';
 
 import type { ControlPlaneModel, ModelAlias } from '../../api/types.ts';
@@ -19,10 +15,6 @@ defineEmits<{
   delete: [];
 }>();
 
-// Operator-set `display_name` wins; otherwise fall back to the alias id
-// itself. The id is also stamped next to the title in mono, so an empty
-// display_name produces "alias-id (sans-serif) · alias-id (mono)" — same
-// visual idiom the chat playground uses for the active model.
 const title = computed(() => props.alias.display_name ?? props.alias.name);
 
 const KIND_LABELS: Record<ModelAlias['kind'], string> = {
@@ -41,8 +33,6 @@ const selectionLabel = computed(() => SELECTION_LABELS[props.alias.selection]);
 const targetCountLabel = computed(() => `${props.alias.targets.length} target${props.alias.targets.length === 1 ? '' : 's'}`);
 
 const aliasWarnings = computed(() => computeAliasLevelWarnings(props.alias, props.models));
-// Join messages with newlines so the tooltip stays a single visual block
-// when both shadow + no-target fire on the same alias.
 const aliasWarningTooltip = computed(() => aliasWarnings.value.map(w => w.message).join('\n'));
 </script>
 
diff --git a/apps/web/src/components/settings/AliasesSettingsCard.vue b/apps/web/src/components/settings/AliasesSettingsCard.vue
index 93820da63..85e35d0f2 100644
--- a/apps/web/src/components/settings/AliasesSettingsCard.vue
+++ b/apps/web/src/components/settings/AliasesSettingsCard.vue
@@ -1,6 +1,4 @@
 <script setup lang="ts">
-// Settings card listing every alias the operator has configured.
-
 import { computed } from 'vue';
 
 import AliasRow from './AliasRow.vue';
diff --git a/packages/gateway/src/control-plane/model-aliases/routes.ts b/packages/gateway/src/control-plane/model-aliases/routes.ts
index b235ef8c5..e42fc0d46 100644
--- a/packages/gateway/src/control-plane/model-aliases/routes.ts
+++ b/packages/gateway/src/control-plane/model-aliases/routes.ts
@@ -10,7 +10,6 @@ import { getRepo } from '../../repo/index.ts';
 import type { ModelAliasRecord } from '../../repo/types.ts';
 import type { createAliasBody, updateAliasBody } from '../schemas.ts';
 
-// New alias goes at the end of the sort order by default. Empty list → 0.
 const nextSortOrder = (existing: readonly ModelAliasRecord[]): number =>
   existing.reduce((acc, record) => Math.max(acc, record.sortOrder), -1) + 1;
 
diff --git a/packages/gateway/src/control-plane/model-aliases/serialize.ts b/packages/gateway/src/control-plane/model-aliases/serialize.ts
index 13d28e9df..e2a10849d 100644
--- a/packages/gateway/src/control-plane/model-aliases/serialize.ts
+++ b/packages/gateway/src/control-plane/model-aliases/serialize.ts
@@ -17,8 +17,6 @@ export const recordToWire = (record: ModelAliasRecord): ModelAlias => ({
   updated_at: record.updatedAt,
 });
 
-// Wire payload accepted by the create / update body schemas. `sort_order` is
-// optional; everything else is required.
 export interface ModelAliasWireInput {
   name: string;
   kind: AliasKind;
diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 6838ec1d2..50bd15592 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -89,11 +89,10 @@ export const controlPlaneModels = async (c: Context) => {
     };
     return c.json(response);
   } catch (e: unknown) {
-    // Empty-upstreams is a domain state, not an error, on the dashboard. The
-    // public /v1/models endpoint still surfaces it as a 502 to remote clients
-    // because they need to know the gateway is unconfigured — but the
-    // dashboard's Models tab should render an empty grid + the operator
-    // guidance message inline instead of flashing a 502 in devtools.
+    // Empty-upstreams is a domain state, not an error, on the dashboard:
+    // /v1/models still surfaces it as a 502 (remote clients need to know
+    // the gateway is unconfigured), but the Models tab renders an empty
+    // grid inline.
     if (e instanceof Error && e.message.startsWith('No upstream provider configured')) {
       return c.json({ object: 'list', has_more: false, first_id: null, last_id: null, data: [] });
     }
diff --git a/packages/gateway/src/data-plane/chat/shared/errors.ts b/packages/gateway/src/data-plane/chat/shared/errors.ts
index 7b71d27f5..0c106f695 100644
--- a/packages/gateway/src/data-plane/chat/shared/errors.ts
+++ b/packages/gateway/src/data-plane/chat/shared/errors.ts
@@ -18,8 +18,6 @@ export type ChatServeFailure =
   // it.
   | { readonly kind: 'alias-no-target-available'; readonly message: string };
 
-// Lift `AliasNoTargetAvailableError` into a `ChatServeFailure` so the
-// existing failure renderer can surface it without special-casing.
 export const aliasFailureFromError = (error: AliasNoTargetAvailableError): Extract<ChatServeFailure, { kind: 'alias-no-target-available' }> => ({
   kind: 'alias-no-target-available',
   message: error.message,
diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
index 55770a854..226f5ffab 100644
--- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
+++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
@@ -81,8 +81,6 @@ export const createGatewayCtxFromHono = (c: AuthedContext, opts: CreateGatewayCt
 // Run the dump-accumulator's finalize tee on the outgoing Response. Every
 // inbound HTTP wrapper returns its response through this seam so the dump
 // pipeline applies uniformly across happy-path, error, and passthrough paths.
-// Gateway-staged response headers (today: `x-floway-alias`) ride along here
-// regardless of how the responder built the body.
 export const finalizeGatewayResponse = (ctx: GatewayCtx, response: Response): Response => {
   for (const [name, value] of ctx.responseHeaders) response.headers.set(name, value);
   return ctx.dump?.finalize(response) ?? response;
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
index 474f71060..d3ab1ada0 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -29,9 +29,7 @@ export interface AliasResolution {
   readonly aliasName: string;
 }
 
-// Canonical wording for the alias-no-target-available 404. Called only
-// from inside the `AliasNoTargetAvailableError` constructor so wording
-// changes land in one place; consumers read `error.message` directly.
+// Canonical wording for the alias-no-target-available 404.
 const aliasNoTargetMessage = (params: { aliasName: string; targetCount: number }): string =>
   `alias '${params.aliasName}' has ${params.targetCount} target(s); none currently map to an enabled upstream binding`;
 
diff --git a/packages/gateway/src/data-plane/providers/addressable.ts b/packages/gateway/src/data-plane/providers/addressable.ts
index caa182ae7..8666a42e3 100644
--- a/packages/gateway/src/data-plane/providers/addressable.ts
+++ b/packages/gateway/src/data-plane/providers/addressable.ts
@@ -2,19 +2,15 @@
 // gateway accepts — the union of the listed catalog surface and the
 // addressable-but-not-listed surface contributed by `modelPrefix.addressable`
 // alternates and by each provider's `resolveRequestedModelId` redirect map.
-//
-// Why this exists: the listing-side availability check (alias-listing,
-// codex catalog) used strict literal id equality against the listed catalog,
-// while the request-time resolver routes through `enumerateModelInterpretations`
-// + `resolveRequestedModelId`. A target that the resolver accepts via a
-// prefix-variant or Copilot variant collapse therefore looked "unavailable"
-// to the listing. Recomputing the resolver-accepted surface against the
-// listed catalog gives every consumer one consistent answer.
+// Listing-side availability checks (alias-listing, codex catalog) must see
+// the same set the request-time resolver routes through
+// (`enumerateModelInterpretations` + `resolveRequestedModelId`); recomputing
+// it once here gives every consumer one consistent answer.
 //
 // Each entry carries the `ResolvedModel` the addressable id will route to,
-// so consumers (alias intersection, codex catalog, control-plane DTO) can
-// read `limits` / `chat` / `endpoints` directly off the entry without a
-// second registry round trip.
+// so consumers (alias intersection, codex catalog, control-plane DTO) read
+// `limits` / `chat` / `endpoints` directly off the entry without a second
+// registry round trip.
 
 import { fetchUpstreamModelsCached } from './models-cache.ts';
 import { compareModelIds, getModels, listModelProviders } from './registry.ts';

From 6ac13accc2f67d63be2f37cc285fbe6d3076846f Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 00:07:45 +0800
Subject: [PATCH 130/170] cleanup(aliases/web): drop unused realModelIds helper
 + its test

`realModelIds` is exported by the alias-warnings module but no
production caller imports it; the only consumer is its own test block.
`computeShadowWarning` and `findCatalogModel` filter the catalog
inline, so the helper carries no shared logic worth preserving.
---
 apps/web/src/components/alias-edit/warnings.ts |  9 +--------
 .../src/components/alias-edit/warnings_test.ts | 18 +-----------------
 2 files changed, 2 insertions(+), 25 deletions(-)

diff --git a/apps/web/src/components/alias-edit/warnings.ts b/apps/web/src/components/alias-edit/warnings.ts
index 2cfb43de0..0243d430c 100644
--- a/apps/web/src/components/alias-edit/warnings.ts
+++ b/apps/web/src/components/alias-edit/warnings.ts
@@ -6,20 +6,13 @@
 import type { AliasKind, ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
 
 // Excludes alias rows — target ids never re-enter the alias layer, so the
-// rule-warning lookup must compare against the same real-model surface that
-// `realModelIds` and `computeShadowWarning` use.
+// rule-warning lookup runs against the real-model surface only.
 export const findCatalogModel = (
   models: readonly ControlPlaneModel[] | null | undefined,
   targetModelId: string,
 ): ControlPlaneModel | undefined =>
   (models ?? []).find(m => m.id === targetModelId && m.aliasedFrom === undefined);
 
-// Real (non-alias) model ids the operator can route to — both listed
-// entries and addressable-but-not-listed ones. Used by the shadow-warning
-// check (no kind filter — shadowing is a name collision).
-export const realModelIds = (models: readonly ControlPlaneModel[] | null | undefined): string[] =>
-  (models ?? []).filter(m => m.aliasedFrom === undefined).map(m => m.id);
-
 // Real (non-alias) model ids whose kind matches the alias's kind. Used by
 // the target-id combobox suggestion list so an embedding alias only
 // suggests embedding models, etc. Operators can still type any string —
diff --git a/apps/web/src/components/alias-edit/warnings_test.ts b/apps/web/src/components/alias-edit/warnings_test.ts
index d862eb8c0..261f5bb99 100644
--- a/apps/web/src/components/alias-edit/warnings_test.ts
+++ b/apps/web/src/components/alias-edit/warnings_test.ts
@@ -1,6 +1,6 @@
 import { describe, expect, it } from 'vitest';
 
-import { computeAliasLevelWarnings, computeModelWarnings, computeRuleWarnings, findCatalogModel, realModelIds, type AliasView } from './warnings.ts';
+import { computeAliasLevelWarnings, computeModelWarnings, computeRuleWarnings, findCatalogModel, type AliasView } from './warnings.ts';
 import type { ControlPlaneModel } from '../../api/types.ts';
 
 const realModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
@@ -28,22 +28,6 @@ const view = (name: string, ids: readonly string[]): AliasView => ({
   targets: ids.map(id => ({ target_model_id: id })),
 });
 
-describe('realModelIds', () => {
-  it('excludes alias entries and returns the remaining ids in catalog order', () => {
-    const catalog: ControlPlaneModel[] = [
-      realModel({ id: 'gpt-5' }),
-      aliasModel({ id: 'auto-review' }),
-      realModel({ id: 'claude-sonnet' }),
-    ];
-    expect(realModelIds(catalog)).toEqual(['gpt-5', 'claude-sonnet']);
-  });
-
-  it('returns an empty array for a null or missing catalog', () => {
-    expect(realModelIds(null)).toEqual([]);
-    expect(realModelIds(undefined)).toEqual([]);
-  });
-});
-
 describe('findCatalogModel', () => {
   it('looks up the catalog row by id', () => {
     const catalog: ControlPlaneModel[] = [realModel({ id: 'gpt-5' }), realModel({ id: 'claude' })];

From 15fcc1516790ad372f97153d5197961b136add58 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 00:21:23 +0800
Subject: [PATCH 131/170] fix(aliases): alias chat.modalities omits the block
 when either half is empty
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The synthesizer emitted `{ input, output }` when either intersection
survived non-empty, so two targets sharing text input but disagreeing
on output modalities would publish an alias entry promising text input
and zero output — a chat model that produces nothing. Tighten the
guard to `&&` so the block is emitted only when both halves carry at
least one modality. Added a test for the disjoint-output case.
---
 .../src/data-plane/models/alias-listing.ts    |  5 ++++-
 .../data-plane/models/alias-listing_test.ts   | 20 +++++++++++++++++++
 2 files changed, 24 insertions(+), 1 deletion(-)

diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index 33c223d23..dfe8206e4 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -84,7 +84,10 @@ const intersectChat = (chats: readonly ChatModelInfo[]): ChatModelInfo | undefin
   if (modalityChats.length === chats.length) {
     const input = intersectArrays(modalityChats.map(c => c.modalities!.input));
     const output = intersectArrays(modalityChats.map(c => c.modalities!.output));
-    if (input.length > 0 || output.length > 0) result.modalities = { input, output };
+    // Both halves must survive — an alias that consumes a modality but
+    // promises no output (or the inverse) is incoherent. Omit the block
+    // entirely when either intersection collapses.
+    if (input.length > 0 && output.length > 0) result.modalities = { input, output };
   }
 
   const reasoningChats = chats.filter(c => c.reasoning !== undefined);
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
index 5e0d488ff..2d13ec440 100644
--- a/packages/gateway/src/data-plane/models/alias-listing_test.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -113,6 +113,26 @@ describe('synthesizeListedAliases', () => {
     expect(entry.chat?.reasoning).toBeUndefined();
   });
 
+  test('multi-target with disjoint output modalities omits the modalities block entirely', () => {
+    // Both targets share text input but their output modalities do not
+    // overlap. Advertising `{ input: ['text'], output: [] }` would claim a
+    // chat model that consumes text and produces nothing — incoherent —
+    // so the synthesizer omits the modalities block when either half of
+    // the intersection collapses.
+    const aliases = [aliasFixture({
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'a', chat: { modalities: { input: ['text'], output: ['text'] } } }),
+      realModel({ id: 'b', chat: { modalities: { input: ['text'], output: ['image'] } } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    expect(entry.chat?.modalities).toBeUndefined();
+  });
+
   test('multi-target with an unavailable target intersects over the available subset', () => {
     const aliases = [aliasFixture({
       targets: [

From 8b6295cdbd38ffa9b99f2ce870d883655173aed7 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 00:24:18 +0800
Subject: [PATCH 132/170] cleanup: drop stale/restating comments and the unused
 stripKeys fieldPrefix
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`gateway-ctx.responseHeaders`: drop the "(today: x-floway-alias)"
parenthetical — it dates the doc to one header and the next staged
header would silently invalidate it.

`memory.ts cloneModelAliasRecord`: drop the structuredClone restatement.

`responses-via-chat-completions/request_test.ts`: the verbosity-prefixed
comment was unrelated to the `reasoning_summary` assertion below it; the
test name already states the contract.

`sanitize.ts stripKeys`: drop the unused `fieldPrefix` parameter and
the corresponding template-literal expansion — every call site passed
the default empty string, so the prefix never appeared in trace output.
---
 packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts  | 6 +++---
 packages/gateway/src/data-plane/chat/shared/sanitize.ts     | 3 +--
 packages/gateway/src/repo/memory.ts                         | 2 --
 .../src/responses-via-chat-completions/request_test.ts      | 1 -
 4 files changed, 4 insertions(+), 8 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
index 226f5ffab..131fde2c1 100644
--- a/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
+++ b/packages/gateway/src/data-plane/chat/shared/gateway-ctx.ts
@@ -25,9 +25,9 @@ export interface GatewayCtx {
   // `finalizeGatewayResponse` short-circuits the dump tee and returns the
   // response untouched.
   readonly dump: DumpAccumulator | null;
-  // Headers staged during request processing (today: `x-floway-alias`)
-  // and written onto the outbound response by `finalizeGatewayResponse`,
-  // regardless of how the responder built the body.
+  // Headers staged during request processing and written onto the
+  // outbound response by `finalizeGatewayResponse`, regardless of how
+  // the responder built the body.
   readonly responseHeaders: Headers;
 }
 
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
index 877e5757a..cb2d92f47 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -14,12 +14,11 @@ const stripKeys = (
   keys: readonly string[],
   targetProtocol: string,
   trace: SanitizeTraceCtx | undefined,
-  fieldPrefix: string = '',
 ): void => {
   for (const key of keys) {
     if (key in body) {
       delete body[key];
-      trace?.emit({ field: `${fieldPrefix}${key}`, targetProtocol });
+      trace?.emit({ field: key, targetProtocol });
     }
   }
 };
diff --git a/packages/gateway/src/repo/memory.ts b/packages/gateway/src/repo/memory.ts
index 65738df39..6e832d67b 100644
--- a/packages/gateway/src/repo/memory.ts
+++ b/packages/gateway/src/repo/memory.ts
@@ -886,8 +886,6 @@ const cloneBackoffRow = (row: BackoffRow): BackoffRow => ({ ...row });
 
 const cloneModelAliasRecord = (record: ModelAliasRecord): ModelAliasRecord => ({
   ...record,
-  // Deep-clone so a caller's mutation of the returned record never leaks
-  // back into the store.
   targets: structuredClone(record.targets),
   announcedMetadata: record.announcedMetadata === null ? null : structuredClone(record.announcedMetadata),
 });
diff --git a/packages/translate/src/responses-via-chat-completions/request_test.ts b/packages/translate/src/responses-via-chat-completions/request_test.ts
index 4474d5089..c24a2544b 100644
--- a/packages/translate/src/responses-via-chat-completions/request_test.ts
+++ b/packages/translate/src/responses-via-chat-completions/request_test.ts
@@ -1500,6 +1500,5 @@ test('translateResponsesToChatCompletions drops reasoning.summary (Chat has no s
   });
 
   assertEquals(result.target.reasoning_effort, 'medium');
-  // Verbosity is on text.* not reasoning; ensure no surrogate field invented.
   assertEquals('reasoning_summary' in result.target, false);
 });

From 6bf18370f59e44906ef2bf67b01d39394d4d0e11 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 00:34:12 +0800
Subject: [PATCH 133/170] fix(aliases/web): frontend modalities mirror omits
 the block on empty intersection
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The frontend's `computeAnnouncedMetadata` carries a hand-mirrored copy
of the gateway's `intersectChat`. Round 3 fixed the gateway side from
`||` to `&&`, but missed this mirror — the alias-edit dialog would
preview `{ input: ['text'], output: [] }` (or vice versa) when targets
disagreed on one side, while `/v1/models` correctly omitted the block.
Align the frontend with the gateway.
---
 apps/web/src/components/alias-edit/announced-metadata.ts | 5 ++++-
 1 file changed, 4 insertions(+), 1 deletion(-)

diff --git a/apps/web/src/components/alias-edit/announced-metadata.ts b/apps/web/src/components/alias-edit/announced-metadata.ts
index 12559d5be..520e1f0e5 100644
--- a/apps/web/src/components/alias-edit/announced-metadata.ts
+++ b/apps/web/src/components/alias-edit/announced-metadata.ts
@@ -46,7 +46,10 @@ const intersectChat = (chats: readonly ChatModelInfo[]): ChatModelInfo | undefin
   if (modalityChats.length === chats.length) {
     const input = intersectArrays(modalityChats.map(c => c.modalities!.input));
     const output = intersectArrays(modalityChats.map(c => c.modalities!.output));
-    if (input.length > 0 || output.length > 0) result.modalities = { input, output };
+    // Both halves must survive — `{ input: ['text'], output: [] }`
+    // would advertise a chat model that consumes input but produces
+    // nothing. Mirrors the gateway-side rule.
+    if (input.length > 0 && output.length > 0) result.modalities = { input, output };
   }
 
   const reasoningChats = chats.filter(c => c.reasoning !== undefined);

From e584f54af9e30b992ba7da3efab89b18359b494d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 00:48:23 +0800
Subject: [PATCH 134/170] docs(aliases/web): drop stale data-field reference on
 AliasRuleWarning
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The interface comment described a `data-field` DOM attribute mechanism
that no template actually uses — `AliasTargetRow.vue` looks warnings up
via `warningFor(<literal>)` instead. Drop the stale half-sentence.
---
 apps/web/src/components/alias-edit/warnings.ts | 4 +---
 1 file changed, 1 insertion(+), 3 deletions(-)

diff --git a/apps/web/src/components/alias-edit/warnings.ts b/apps/web/src/components/alias-edit/warnings.ts
index 0243d430c..d32dce5a4 100644
--- a/apps/web/src/components/alias-edit/warnings.ts
+++ b/apps/web/src/components/alias-edit/warnings.ts
@@ -23,9 +23,7 @@ export const realModelIdsOfKind = (
 ): string[] =>
   (models ?? []).filter(m => m.aliasedFrom === undefined && m.kind === kind).map(m => m.id);
 
-// One warning attached to a specific chat rule field. The field key matches
-// the form's `data-field` attribute so the dialog can render the warning
-// directly under the input it annotates.
+// One warning attached to a specific chat rule field.
 export interface AliasRuleWarning {
   field: 'reasoning.effort' | 'reasoning.budget_tokens' | 'reasoning.adaptive' | 'reasoning.summary' | 'verbosity' | 'serviceTier';
   message: string;

From bfc5f47bbbbd9c79cf55ddb6e7e7b363a289d577 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 01:08:40 +0800
Subject: [PATCH 135/170] fix(addressable): rethrow AbortError from
 per-upstream Promise.allSettled

The tolerant fanout silently swallowed every rejection, including
AbortError from the inbound abort signal. `getModels` upstream of it
already rethrows AbortError; mirror that so a cancelled request
cannot be masked by a slow upstream's catalog rejection.
---
 .../gateway/src/data-plane/providers/addressable.ts    | 10 +++++++++-
 1 file changed, 9 insertions(+), 1 deletion(-)

diff --git a/packages/gateway/src/data-plane/providers/addressable.ts b/packages/gateway/src/data-plane/providers/addressable.ts
index 8666a42e3..5e0813e99 100644
--- a/packages/gateway/src/data-plane/providers/addressable.ts
+++ b/packages/gateway/src/data-plane/providers/addressable.ts
@@ -119,7 +119,15 @@ export const enumerateAddressableModelIds = async (
   }));
 
   for (const result of perUpstream) {
-    if (result.status !== 'fulfilled') continue;
+    if (result.status === 'rejected') {
+      // Cancellation must propagate even from this tolerant fanout — the
+      // per-request abort signal cannot be masked by an upstream's slow
+      // rejection. Other failures (catalog 5xx, parse, transport) collapse
+      // to no addressable-only contribution from that upstream per the
+      // contract above.
+      if (result.reason instanceof Error && result.reason.name === 'AbortError') throw result.reason;
+      continue;
+    }
     for (const entry of result.value) push(entry);
   }
 

From a06d7e37df3d82695ea48b756af190cbbadfffd4 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 01:10:44 +0800
Subject: [PATCH 136/170] =?UTF-8?q?refactor(sanitize):=20require=20Sanitiz?=
 =?UTF-8?q?eTraceCtx=20=E2=80=94=20drop=20the=20optional=20fallback?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Every production call site already constructs `createSanitizeTraceCtx()`
unconditionally; the `?: SanitizeTraceCtx | undefined` parameter type
existed only so the test suite could omit it. Per the project's
no-fake-defaults rule, push the construction obligation onto every
caller and let the type system enforce it. The corresponding test
swaps to `createSanitizeTraceCtx()` so its "default trace" assertion
still has something to read.
---
 .../gateway/src/data-plane/chat/shared/sanitize.ts     | 10 +++++-----
 .../src/data-plane/chat/shared/sanitize_test.ts        |  5 +++--
 2 files changed, 8 insertions(+), 7 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize.ts b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
index cb2d92f47..10f3f3b07 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize.ts
@@ -13,24 +13,24 @@ const stripKeys = (
   body: Record<string, unknown>,
   keys: readonly string[],
   targetProtocol: string,
-  trace: SanitizeTraceCtx | undefined,
+  trace: SanitizeTraceCtx,
 ): void => {
   for (const key of keys) {
     if (key in body) {
       delete body[key];
-      trace?.emit({ field: key, targetProtocol });
+      trace.emit({ field: key, targetProtocol });
     }
   }
 };
 
-export const sanitizeForChatCompletionsUpstream = (body: Record<string, unknown>, trace?: SanitizeTraceCtx): void => {
+export const sanitizeForChatCompletionsUpstream = (body: Record<string, unknown>, trace: SanitizeTraceCtx): void => {
   stripKeys(body, FLOWAY_EXTENSION_FIELDS.chatCompletions, 'chat-completions', trace);
 };
 
-export const sanitizeForResponsesUpstream = (body: Record<string, unknown>, trace?: SanitizeTraceCtx): void => {
+export const sanitizeForResponsesUpstream = (body: Record<string, unknown>, trace: SanitizeTraceCtx): void => {
   stripKeys(body, FLOWAY_EXTENSION_FIELDS.responses, 'responses', trace);
 };
 
-export const sanitizeForMessagesUpstream = (body: Record<string, unknown>, trace?: SanitizeTraceCtx): void => {
+export const sanitizeForMessagesUpstream = (body: Record<string, unknown>, trace: SanitizeTraceCtx): void => {
   stripKeys(body, FLOWAY_EXTENSION_FIELDS.messages, 'messages', trace);
 };
diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
index 8c27fc864..1f55ef3e0 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
@@ -1,6 +1,7 @@
 import { test } from 'vitest';
 
 import {
+  createSanitizeTraceCtx,
   sanitizeForChatCompletionsUpstream,
   sanitizeForMessagesUpstream,
   sanitizeForResponsesUpstream,
@@ -44,9 +45,9 @@ test('sanitizeForChatCompletionsUpstream strips Floway extensions and leaves nat
   assertEquals(droppedFields, ['reasoning_summary', 'thinking_budget']);
 });
 
-test('sanitizeForResponsesUpstream strips extensions without a trace context', () => {
+test('sanitizeForResponsesUpstream strips extensions when fed the default per-request trace', () => {
   const body: Record<string, unknown> = { adaptive_thinking: true, thinking_budget: 4096 };
-  sanitizeForResponsesUpstream(body);
+  sanitizeForResponsesUpstream(body, createSanitizeTraceCtx());
   assertEquals(body, {});
 });
 

From b36f9623fd824bcdb42eb48c408e9d8e10607333 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 01:12:37 +0800
Subject: [PATCH 137/170] fix(codex): alias catalog window = min over routable
 targets, not firstRoutable
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Previously the codex 1p catalog published the first routable target's
window as the alias's advertised window, while /v1/models's rule-aware
intersection publishes the min across every available target — a
safe-lower-bound the codex client should respect for its own gating
(auto-compact, context-budget UX). Aligning the catalog to the same
rule closes the divergence: codex never sees a window the gateway
would refuse to serve on a sibling target.

The operator override still wins; the min fallback only runs when
announcedMetadata leaves max_context_window_tokens off.
---
 .../gateway/src/data-plane/codex/models.ts    | 30 ++++++++-----
 .../src/data-plane/codex/routes_test.ts       | 42 +++++++++++++++++++
 2 files changed, 61 insertions(+), 11 deletions(-)

diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index 2b0d760d8..3eb019601 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -90,21 +90,29 @@ const computeCatalog = async (
   const addressableSet = new Set(addressable.map(entry => entry.id));
 
   // Each alias entry survives in the codex catalog when at least one of
-  // its configured targets is currently addressable. `firstTargetId` is
-  // the fallback window source: selection mode is irrelevant here because
-  // the catalog must publish a single stable window.
+  // its configured targets is currently addressable. The fallback window
+  // — used when the operator did not override `announcedMetadata` —
+  // is the min across every routable target's window, matching the
+  // safe-lower-bound rule `/v1/models` already applies via the rule-aware
+  // intersection. Selection mode is irrelevant here because the catalog
+  // must publish a single stable window.
   interface AliasCatalogInfo {
-    readonly firstTargetId: string;
+    readonly routableWindowsMin: number | null;
     readonly announcedContextWindow: number | undefined;
   }
   const aliasCatalogInfo = new Map<string, AliasCatalogInfo>();
   for (const entry of synthesizeListedAliases({ aliases, addressableModelIds: addressable })) {
     const aliasedFrom = entry.aliasedFrom;
     if (aliasedFrom === undefined) continue;
-    const firstRoutable = aliasedFrom.targets.find(t => addressableSet.has(t.target_model_id));
-    if (firstRoutable === undefined) continue;
+    const routableIds = aliasedFrom.targets
+      .map(t => t.target_model_id)
+      .filter(id => addressableSet.has(id));
+    if (routableIds.length === 0) continue;
+    const windows = routableIds
+      .map(id => slugContextWindow.get(id))
+      .filter((w): w is number => w !== undefined);
     aliasCatalogInfo.set(entry.id, {
-      firstTargetId: firstRoutable.target_model_id,
+      routableWindowsMin: windows.length > 0 ? Math.min(...windows) : null,
       announcedContextWindow: entry.limits.max_context_window_tokens,
     });
   }
@@ -116,12 +124,12 @@ const computeCatalog = async (
   // Alias slug: prefer the alias's announced window (operator override OR
   // the synthesizer's automatic intersection) so codex's local gating —
   // auto-compact, context-budget UX — agrees with what /v1/models told
-  // the operator's other tooling. Fallback to the first routable target's
-  // window when the alias publishes no window. Plain slugs read straight
-  // off the registry.
+  // the operator's other tooling. Fallback to the min over routable
+  // targets' windows when the alias publishes no window. Plain slugs read
+  // straight off the registry.
   const contextWindowOf: ContextWindowResolver = slug => {
     const info = aliasCatalogInfo.get(slug);
-    if (info !== undefined) return info.announcedContextWindow ?? slugContextWindow.get(info.firstTargetId) ?? null;
+    if (info !== undefined) return info.announcedContextWindow ?? info.routableWindowsMin;
     return slugContextWindow.get(slug) ?? null;
   };
   return applyContextWindowFromRegistry(filtered, contextWindowOf);
diff --git a/packages/gateway/src/data-plane/codex/routes_test.ts b/packages/gateway/src/data-plane/codex/routes_test.ts
index b3c7bc730..6691a77a1 100644
--- a/packages/gateway/src/data-plane/codex/routes_test.ts
+++ b/packages/gateway/src/data-plane/codex/routes_test.ts
@@ -584,6 +584,48 @@ describe('codex 1p namespace', () => {
       expect(autoReview?.context_window).toBe(272000);
     });
 
+    it('takes min(window) across all routable targets when the alias publishes no window', async () => {
+      // Multi-target alias with two routable targets advertising different
+      // windows and no announced override (intersection drops the field
+      // because targets disagree); the catalog should pick the safe lower
+      // bound, matching the rule-aware intersection /v1/models already
+      // applies.
+      const { apiKey, repo } = await setupAppTest();
+      await repo.modelAliases.insert({
+        name: 'codex-auto-review',
+        kind: 'chat',
+        selection: 'first-available',
+        displayName: 'Codex Auto Review',
+        visibleInModelsList: true,
+        targets: [
+          { target_model_id: 'gpt-5.4', rules: {} },
+          { target_model_id: 'gpt-5.5', rules: {} },
+        ],
+        announcedMetadata: null,
+        sortOrder: 0,
+        createdAt: '2026-01-01T00:00:00.000Z',
+        updatedAt: '2026-01-01T00:00:00.000Z',
+      });
+      const app = buildCodexApp();
+      const body = await withMockedFetch(
+        copilotFetch([
+          { id: 'gpt-5.4', maxContextWindowTokens: 272000, supported_endpoints: ['/chat/completions'] },
+          { id: 'gpt-5.5', maxContextWindowTokens: 120000, supported_endpoints: ['/chat/completions'] },
+        ]),
+        async () => {
+          const response = await app.request('/azure-api.codex/models', {
+            headers: { authorization: `Bearer ${apiKey.key}` },
+          });
+          expect(response.status).toBe(200);
+          return await response.json() as CodexModelsResponse;
+        },
+      );
+      const autoReview = body.models.find(m => m.slug === 'codex-auto-review');
+      // gpt-5.4 → 272000, gpt-5.5 → 120000. min(272000, 120000) = 120000.
+      expect(autoReview?.max_context_window).toBe(120000);
+      expect(autoReview?.context_window).toBe(120000);
+    });
+
     it('returns an empty catalog when the registry has no overlapping slugs', async () => {
       const { apiKey } = await setupAppTest();
       const app = buildCodexApp();

From 8f38b4f5bbf6f9766dd2d0947243a1e9411c6340 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 01:15:09 +0800
Subject: [PATCH 138/170] fix(aliases): intersectChat requires both budget.min
 and budget.max all-declared
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`reasoning.budget_tokens` used to emit `{ min }` (or `{ max }`) when
every target declared the block at the top level but only some
declared the corresponding leaf. That published a half-block —
advertising a capability some target does not actually report —
while every other reasoning sub-field (`effort`, `adaptive`,
`mandatory`) collapses the moment one leaf is missing. Tighten the
budget block to the same rule. The frontend mirror in
`announced-metadata.ts` follows.
---
 .../alias-edit/announced-metadata.ts          | 14 ++++++-------
 .../src/data-plane/models/alias-listing.ts    | 20 ++++++++++---------
 .../data-plane/models/alias-listing_test.ts   | 19 ++++++++++++++++++
 3 files changed, 37 insertions(+), 16 deletions(-)

diff --git a/apps/web/src/components/alias-edit/announced-metadata.ts b/apps/web/src/components/alias-edit/announced-metadata.ts
index 520e1f0e5..2620b0613 100644
--- a/apps/web/src/components/alias-edit/announced-metadata.ts
+++ b/apps/web/src/components/alias-edit/announced-metadata.ts
@@ -72,13 +72,13 @@ const intersectChat = (chats: readonly ChatModelInfo[]): ChatModelInfo | undefin
     if (budgetChats.length === reasoningChats.length) {
       const mins = budgetChats.map(c => c.reasoning!.budget_tokens!.min).filter((v): v is number => v !== undefined);
       const maxes = budgetChats.map(c => c.reasoning!.budget_tokens!.max).filter((v): v is number => v !== undefined);
-      const min = mins.length === budgetChats.length ? Math.max(...mins) : undefined;
-      const max = maxes.length === budgetChats.length ? Math.min(...maxes) : undefined;
-      if (!(min !== undefined && max !== undefined && min > max)) {
-        const budget: NonNullable<NonNullable<ChatModelInfo['reasoning']>['budget_tokens']> = {};
-        if (min !== undefined) budget.min = min;
-        if (max !== undefined) budget.max = max;
-        if (min !== undefined || max !== undefined) reasoning.budget_tokens = budget;
+      // Both min and max must be all-declared — a half-declared block
+      // would claim a capability some target does not report. Mirrors the
+      // gateway-side rule.
+      if (mins.length === budgetChats.length && maxes.length === budgetChats.length) {
+        const min = Math.max(...mins);
+        const max = Math.min(...maxes);
+        if (min <= max) reasoning.budget_tokens = { min, max };
       }
     }
 
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index dfe8206e4..6bd4f1891 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -113,15 +113,17 @@ const intersectChat = (chats: readonly ChatModelInfo[]): ChatModelInfo | undefin
     if (budgetChats.length === reasoningChats.length) {
       const mins = budgetChats.map(c => c.reasoning!.budget_tokens!.min).filter((v): v is number => v !== undefined);
       const maxes = budgetChats.map(c => c.reasoning!.budget_tokens!.max).filter((v): v is number => v !== undefined);
-      const min = mins.length === budgetChats.length ? Math.max(...mins) : undefined;
-      const max = maxes.length === budgetChats.length ? Math.min(...maxes) : undefined;
-      // Drop the budget block entirely when the intersected window is
-      // empty (every caller would otherwise see a contradictory range).
-      if (!(min !== undefined && max !== undefined && min > max)) {
-        const budget: NonNullable<NonNullable<ChatModelInfo['reasoning']>['budget_tokens']> = {};
-        if (min !== undefined) budget.min = min;
-        if (max !== undefined) budget.max = max;
-        if (min !== undefined || max !== undefined) reasoning.budget_tokens = budget;
+      // Require BOTH min and max to be all-declared, mirroring how
+      // `effort`, `adaptive`, and `mandatory` all collapse the moment one
+      // target leaves a leaf undeclared. A half-declared block (e.g.
+      // `{ min }` with no max) would advertise a capability some target
+      // does not actually report.
+      if (mins.length === budgetChats.length && maxes.length === budgetChats.length) {
+        const min = Math.max(...mins);
+        const max = Math.min(...maxes);
+        // Drop the budget block when the intersected window is empty —
+        // a contradictory range is worse than no advertisement.
+        if (min <= max) reasoning.budget_tokens = { min, max };
       }
     }
 
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
index 2d13ec440..effe792fd 100644
--- a/packages/gateway/src/data-plane/models/alias-listing_test.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -80,6 +80,25 @@ describe('synthesizeListedAliases', () => {
     expect(entry.chat?.reasoning).toBeUndefined();
   });
 
+  test('multi-target alias drops budget_tokens when one target declares only min and another only max', () => {
+    // A half-declared block (e.g. publishing `{ min }` without max) would
+    // advertise a capability some target does not report. The intersection
+    // must collapse to undefined, matching how `effort` / `adaptive` /
+    // `mandatory` already behave.
+    const aliases = [aliasFixture({
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const realModels = [
+      realModel({ id: 'a', chat: { reasoning: { budget_tokens: { min: 1024 } } } }),
+      realModel({ id: 'b', chat: { reasoning: { budget_tokens: { max: 65536 } } } }),
+    ];
+    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    expect(entry.chat?.reasoning).toBeUndefined();
+  });
+
   test('multi-target alias intersects chat.modalities across every target', () => {
     const aliases = [aliasFixture({
       name: 'smart-router',

From d1c9d4504fc5b5446ab6d310f2cb97636dfd0380 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 01:16:58 +0800
Subject: [PATCH 139/170] fix(control-plane/models): dedupe unlistedRows
 against listedRows on id
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

An alias whose name collides with an addressable-but-not-listed id
(e.g. an alias literally named `claude-opus-4.7` while Copilot
exposes the same string as a variant id) would emit two rows with
the same `id` but different `unlisted` flags — breaking OpenAI-client
deduplication and contradicting /v1/models's alias-wins-on-collision
rule. Drop unlisted entries whose id already appears in the listed
half.
---
 .../src/control-plane/models/routes.ts        | 16 +++++--
 .../src/control-plane/models/routes_test.ts   | 47 +++++++++++++++++++
 2 files changed, 58 insertions(+), 5 deletions(-)

diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 50bd15592..90ed307b5 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -65,11 +65,6 @@ export const controlPlaneModels = async (c: Context) => {
       includeAliases ? getRepo().modelAliases.list() : Promise.resolve([]),
     ]);
     const realModels = listedRealModels(addressable);
-    const unlistedRows = includeUnlisted
-      ? addressable
-          .filter(entry => entry.unlisted === true)
-          .map(entry => toUnlistedControlPlaneModel(entry.id, entry.model))
-      : [];
     const listedRows = includeAliases
       ? mergeAliasesIntoModels({
           realModels,
@@ -79,6 +74,17 @@ export const controlPlaneModels = async (c: Context) => {
           wrapAlias: entry => ({ ...entry, upstreams: [] }),
         })
       : realModels.map(toControlPlaneModel);
+    // Dedupe the unlisted half against the listed half on `id` — an alias
+    // whose name coincides with an addressable-but-not-listed id (e.g. a
+    // Copilot variant) would otherwise emit two rows with the same id but
+    // different `unlisted` flags. /v1/models already collapses this kind
+    // of collision; the dashboard must agree.
+    const listedIds = new Set(listedRows.map(row => row.id));
+    const unlistedRows = includeUnlisted
+      ? addressable
+          .filter(entry => entry.unlisted === true && !listedIds.has(entry.id))
+          .map(entry => toUnlistedControlPlaneModel(entry.id, entry.model))
+      : [];
     const data = [...listedRows, ...unlistedRows];
     const response: ControlPlaneModelsResponse = {
       object: 'list',
diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts
index 6d79f177b..3990964be 100644
--- a/packages/gateway/src/control-plane/models/routes_test.ts
+++ b/packages/gateway/src/control-plane/models/routes_test.ts
@@ -174,3 +174,50 @@ test('/api/models?include_unlisted=true appends addressable-but-not-listed rows
     },
   );
 });
+
+test('/api/models?include_unlisted=true: alias whose name collides with an unlisted addressable id emits only the listed row', async () => {
+  // Operator-side trap: an alias name accidentally matching a Copilot
+  // variant id (e.g. `claude-opus-4.7`). Both the listed alias row and
+  // the unlisted addressable row carry the same `id`, so emitting both
+  // would break OpenAI-client deduplication and contradict /v1/models's
+  // alias-vs-real collision rule (alias wins, real is dropped).
+  const { apiKey, repo } = await setupAppTest();
+  await repo.modelAliases.insert({
+    name: 'claude-opus-4.7',
+    kind: 'chat',
+    selection: 'first-available',
+    displayName: 'Alias colliding with the unlisted Copilot variant',
+    visibleInModelsList: true,
+    targets: [{ target_model_id: 'claude-opus-4-7', rules: {} }],
+    announcedMetadata: null,
+    sortOrder: 0,
+    createdAt: '2026-01-01T00:00:00.000Z',
+    updatedAt: '2026-01-01T00:00:00.000Z',
+  });
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'update.code.visualstudio.com') return jsonResponse(['1.110.1']);
+      if (url.pathname === '/copilot_internal/v2/token') {
+        return jsonResponse({ token: 'copilot-access-token', expires_at: 4102444800, refresh_in: 3600, endpoints: { api: 'https://api.individual.githubcopilot.com' } });
+      }
+      if (url.hostname === 'api.individual.githubcopilot.com' && url.pathname === '/models') {
+        return jsonResponse(copilotModels([
+          { id: 'claude-opus-4.7', display_name: 'Claude Opus 4.7', supported_endpoints: ['/v1/messages'] },
+        ]));
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const full = await requestApp('/api/models?include_unlisted=true', { headers: { 'x-api-key': apiKey.key } });
+      const fullBody = (await full.json()) as { data: Array<{ id: string; unlisted?: true; aliasedFrom?: unknown }> };
+      const collisions = fullBody.data.filter(m => m.id === 'claude-opus-4.7');
+      assertEquals(collisions.length, 1);
+      // The surviving row is the alias-side one (aliasedFrom set, no
+      // `unlisted` tag), matching /v1/models's alias-wins rule.
+      assertEquals(collisions[0].aliasedFrom !== undefined, true);
+      assertEquals(collisions[0].unlisted, undefined);
+    },
+  );
+});

From fd41a3fa91d269703105025ec39f51740ed0e69b Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 01:18:47 +0800
Subject: [PATCH 140/170] refactor(protocols): tighten
 ReasoningEffort/Summary/Verbosity/ServiceTier to open literals

These four types were plain `string` with the canonical presets only
documented in JSDoc. Lift the presets into the type as
`'literal' | (string & {})` so editors autocomplete the typical
choices, while the open arm preserves the "forward verbatim" contract
the gateway already guarantees. Matches the style sibling protocol
surfaces (`MessagesPayload.service_tier`, `ResponsesPayload.service_tier`)
already use.
---
 packages/protocols/src/common/aliases.ts | 35 +++++++++++-------------
 1 file changed, 16 insertions(+), 19 deletions(-)

diff --git a/packages/protocols/src/common/aliases.ts b/packages/protocols/src/common/aliases.ts
index e9c55118c..a1171e1d2 100644
--- a/packages/protocols/src/common/aliases.ts
+++ b/packages/protocols/src/common/aliases.ts
@@ -24,27 +24,24 @@ export type AliasKind = 'chat' | 'embedding' | 'image';
 // When the pool is empty both strategies surface the same 404 to the caller.
 export type AliasSelection = 'random' | 'first-available';
 
-// Discrete reasoning-effort presets understood across upstreams. Typed as
-// `string` because the gateway forwards rule values verbatim and never
-// enum-gates them at the wire boundary; the dashboard pins the canonical
-// presets ('none' | 'low' | 'medium' | 'high' | 'xhigh') as combobox
-// suggestions so operators see the typical choices.
-export type ReasoningEffort = string;
+// Discrete reasoning-effort presets understood across upstreams. The literal
+// union surfaces the canonical presets to editor autocomplete while the
+// `(string & {})` arm keeps the type open — the gateway forwards rule values
+// verbatim and never enum-gates them at the wire boundary, so an operator
+// can pin any string the upstream understands.
+export type ReasoningEffort = 'none' | 'low' | 'medium' | 'high' | 'xhigh' | (string & {});
 
 // Reasoning-summary verbosity hint emitted on the Responses / Chat surface.
-// String for the same forward-verbatim reason as `ReasoningEffort`;
-// canonical presets are 'auto' | 'concise' | 'detailed' | 'none'.
-export type ReasoningSummary = string;
-
-// Output verbosity hint (OpenAI Responses `verbosity`). String for the same
-// forward-verbatim reason as `ReasoningEffort`; canonical presets are
-// 'low' | 'medium' | 'high'.
-export type Verbosity = string;
-
-// Per-request service tier the upstream advertises. String for the same
-// forward-verbatim reason as `ReasoningEffort`; canonical presets are
-// 'default' | 'flex' | 'priority' | 'scale' | 'fast'.
-export type ServiceTier = string;
+// Same open-literal shape as `ReasoningEffort`.
+export type ReasoningSummary = 'auto' | 'concise' | 'detailed' | 'none' | (string & {});
+
+// Output verbosity hint (OpenAI Responses `verbosity`). Same open-literal
+// shape as `ReasoningEffort`.
+export type Verbosity = 'low' | 'medium' | 'high' | (string & {});
+
+// Per-request service tier the upstream advertises. Same open-literal shape
+// as `ReasoningEffort`.
+export type ServiceTier = 'default' | 'flex' | 'priority' | 'scale' | 'fast' | (string & {});
 
 // Rule overlay applied to a chat-kind alias target. Every field is optional;
 // an absent field leaves the inbound request value untouched. Rule values

From 29155d4064d41b8b96800cc9f12352349f0bcded Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 01:47:15 +0800
Subject: [PATCH 141/170] refactor(chat): drop ChatCandidate alias, use
 ProviderCandidate directly

`shared/candidates.ts` reduced to a 12-line wrapper that re-exported
`ProviderCandidate` from `@floway-dev/provider` and a `ChatCandidate =
ProviderCandidate` alias. The rename added no semantic distinction:
production chat code already wrote `ChatCandidate` while tests and
`@floway-dev/test-utils` already wrote `ProviderCandidate`, leaving the
data plane referring to the same shape by two names.

Replace every `ChatCandidate` in type position with `ProviderCandidate`,
fold the import into each file's existing `@floway-dev/provider` import
line (so `import/no-duplicates` stays satisfied), and delete the wrapper.
One canonical name across chat code, tests, and shared helpers.
---
 .../src/data-plane/chat/chat-completions/attempt.ts   |  9 ++++-----
 .../data-plane/chat/chat-completions/attempt_test.ts  |  3 +--
 .../src/data-plane/chat/chat-completions/http_test.ts |  3 +--
 .../src/data-plane/chat/chat-completions/routing.ts   |  4 ++--
 .../data-plane/chat/chat-completions/routing_test.ts  |  3 +--
 .../data-plane/chat/chat-completions/serve_test.ts    |  3 +--
 .../gateway/src/data-plane/chat/gemini/attempt.ts     |  7 +++----
 .../src/data-plane/chat/gemini/attempt_test.ts        |  3 +--
 .../gateway/src/data-plane/chat/gemini/http_test.ts   |  3 +--
 .../gateway/src/data-plane/chat/gemini/routing.ts     |  4 ++--
 .../gateway/src/data-plane/chat/gemini/serve_test.ts  |  3 +--
 .../gateway/src/data-plane/chat/messages/attempt.ts   |  9 ++++-----
 .../src/data-plane/chat/messages/attempt_test.ts      |  3 +--
 .../gateway/src/data-plane/chat/messages/http_test.ts |  9 ++++-----
 .../gateway/src/data-plane/chat/messages/routing.ts   |  4 ++--
 .../src/data-plane/chat/messages/routing_test.ts      |  3 +--
 .../src/data-plane/chat/messages/serve_test.ts        |  3 +--
 .../gateway/src/data-plane/chat/responses/attempt.ts  |  9 ++++-----
 .../src/data-plane/chat/responses/attempt_test.ts     |  3 +--
 .../src/data-plane/chat/responses/http_test.ts        |  3 +--
 .../src/data-plane/chat/responses/items/affinity.ts   |  2 +-
 .../data-plane/chat/responses/items/affinity_test.ts  |  3 +--
 .../src/data-plane/chat/responses/items/rewrite.ts    |  2 +-
 .../data-plane/chat/responses/items/rewrite_test.ts   |  3 +--
 .../gateway/src/data-plane/chat/responses/routing.ts  |  4 ++--
 .../src/data-plane/chat/responses/routing_test.ts     |  3 +--
 .../src/data-plane/chat/responses/serve-prep.ts       |  5 ++---
 .../src/data-plane/chat/responses/serve_test.ts       |  3 +--
 .../src/data-plane/chat/shared/attempt-helpers.ts     |  3 +--
 .../gateway/src/data-plane/chat/shared/candidates.ts  | 11 -----------
 .../gateway/src/data-plane/chat/shared/routing.ts     |  4 ++--
 .../src/data-plane/chat/shared/upstream-telemetry.ts  |  3 +--
 32 files changed, 51 insertions(+), 86 deletions(-)
 delete mode 100644 packages/gateway/src/data-plane/chat/shared/candidates.ts

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
index 6696e64b0..b10737abd 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt.ts
@@ -5,7 +5,6 @@ import { responsesAttempt } from '../responses/attempt.ts';
 import { rewriteStoredResponsesItemsForCandidate } from '../responses/items/rewrite.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { providerStreamResultToExecuteResult, buildUpstreamCallOptions } from '../shared/attempt-helpers.ts';
-import type { ChatCandidate } from '../shared/candidates.ts';
 import { tryCatchChatServeFailure } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { createSanitizeTraceCtx, sanitizeForChatCompletionsUpstream } from '../shared/sanitize.ts';
@@ -14,7 +13,7 @@ import { createUpstreamLatencyRecorder } from '../shared/upstream-telemetry.ts';
 import { runInterceptors } from '@floway-dev/interceptor';
 import type { ChatCompletionsMessage, ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
-import { type ExecuteResult } from '@floway-dev/provider';
+import { type ExecuteResult, type ProviderCandidate } from '@floway-dev/provider';
 import { translateChatCompletionsViaMessages, translateChatCompletionsViaResponses } from '@floway-dev/translate';
 import { chatCompletionsViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
@@ -22,7 +21,7 @@ export interface ChatCompletionsAttemptArgs {
   readonly payload: ChatCompletionsPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ChatCandidate;
+  readonly candidate: ProviderCandidate;
   readonly headers: Headers;
 }
 
@@ -68,7 +67,7 @@ export const chatCompletionsAttempt = {
 const rewriteOrRenderChatCompletionsFailure = async (
   payload: ChatCompletionsPayload,
   store: StatefulResponsesStore,
-  candidate: ChatCandidate,
+  candidate: ProviderCandidate,
 ): Promise<{ payload: ChatCompletionsPayload; failure?: undefined } | { payload?: undefined; failure: ExecuteResult<ProtocolFrame<ChatCompletionsStreamEvent>> & { type: 'api-error' } }> => {
   try {
     const rewrittenMessages = await rewriteStoredResponsesItemsForCandidate(
@@ -99,7 +98,7 @@ const rewriteOrRenderChatCompletionsFailure = async (
 const callChatCompletionsAsExecuteResult = async (
   payload: ChatCompletionsPayload,
   ctx: GatewayCtx,
-  candidate: ChatCandidate,
+  candidate: ProviderCandidate,
   headers: Headers,
 ): Promise<ExecuteResult<ProtocolFrame<ChatCompletionsStreamEvent>>> => {
   const { model: _model, ...body } = payload;
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
index fe2f93ea9..c8c322a46 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/attempt_test.ts
@@ -4,13 +4,12 @@ import { chatCompletionsAttempt } from './attempt.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ResponsesResult } from '@floway-dev/protocols/responses';
-import { directFetcher, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
+import { directFetcher, type ProviderCandidate, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const API_KEY_ID = 'key_chat_completions_attempt_test';
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts
index 6df2dd23c..20e3cd539 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/http_test.ts
@@ -5,10 +5,9 @@ import type { AuthVars } from '../../../middleware/auth.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { ApiKey, User } from '../../../repo/types.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
-import { directFetcher, type ProviderStreamResult, type UpstreamCallOptions } from '@floway-dev/provider';
+import { directFetcher, type ProviderCandidate, type ProviderStreamResult, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/routing.ts b/packages/gateway/src/data-plane/chat/chat-completions/routing.ts
index efcc380d6..e2f771830 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/routing.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/routing.ts
@@ -1,13 +1,13 @@
 import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ChatCandidate } from '../shared/candidates.ts';
 import type { RoutingDecision } from '../shared/routing.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
+import type { ProviderCandidate } from '@floway-dev/provider';
 import { chatCompletionsViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
 export const planChatCompletionsRouting = async (input: {
   readonly payload: ChatCompletionsPayload;
-  readonly candidates: readonly ChatCandidate[];
+  readonly candidates: readonly ProviderCandidate[];
   readonly store: StatefulResponsesStore;
 }): Promise<RoutingDecision> =>
   await classifyResponsesItemAffinity({
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/routing_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/routing_test.ts
index 60cf54096..f81f0c11a 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/routing_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/routing_test.ts
@@ -5,9 +5,8 @@ import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import { createStoredResponsesItemId } from '../responses/items/format.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
-import { directFetcher } from '@floway-dev/provider';
+import { directFetcher, type ProviderCandidate } from '@floway-dev/provider';
 import { stubProvider, stubUpstreamModel, assertEquals } from '@floway-dev/test-utils';
 
 const API_KEY_ID = 'key_chat_completions_routing_test';
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 8578deb8e..4a27e47d5 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -3,13 +3,12 @@ import { test, vi } from 'vitest';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ResponsesResult, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
-import { directFetcher, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
+import { directFetcher, type ProviderCandidate, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 // Mock the candidates seam so each test hands the serve exactly the
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt.ts b/packages/gateway/src/data-plane/chat/gemini/attempt.ts
index d352f3efd..38904c57c 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt.ts
@@ -6,20 +6,19 @@ import { chatCompletionsAttempt } from '../chat-completions/attempt.ts';
 import { messagesAttempt } from '../messages/attempt.ts';
 import { responsesAttempt } from '../responses/attempt.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ChatCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { traverseTranslation } from '../shared/translate-traverse.ts';
 import { runInterceptors } from '@floway-dev/interceptor';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
-import { plainResult, type ExecuteResult, type GeminiInvocation, type PlainResult } from '@floway-dev/provider';
+import { plainResult, type ExecuteResult, type GeminiInvocation, type PlainResult, type ProviderCandidate } from '@floway-dev/provider';
 import { translateGeminiViaChatCompletions, translateGeminiViaMessages, translateGeminiViaResponses } from '@floway-dev/translate';
 
 export interface GeminiAttemptGenerateArgs {
   readonly payload: GeminiPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ChatCandidate;
+  readonly candidate: ProviderCandidate;
   readonly headers: Headers;
 }
 
@@ -27,7 +26,7 @@ export interface GeminiAttemptCountTokensArgs {
   readonly payload: GeminiPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ChatCandidate;
+  readonly candidate: ProviderCandidate;
   readonly headers: Headers;
 }
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
index 14db7a150..3b61d8998 100644
--- a/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/attempt_test.ts
@@ -4,14 +4,13 @@ import { geminiAttempt } from './attempt.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload } from '@floway-dev/protocols/gemini';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ResponsesResult, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
-import { directFetcher, type ProviderCallResult, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
+import { directFetcher, type ProviderCallResult, type ProviderCandidate, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const API_KEY_ID = 'key_gemini_attempt_test';
diff --git a/packages/gateway/src/data-plane/chat/gemini/http_test.ts b/packages/gateway/src/data-plane/chat/gemini/http_test.ts
index 1b3ee8a2d..4090d0bc6 100644
--- a/packages/gateway/src/data-plane/chat/gemini/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/http_test.ts
@@ -5,11 +5,10 @@ import type { AuthVars } from '../../../middleware/auth.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { ApiKey, User } from '../../../repo/types.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
-import { directFetcher, type ProviderCallResult, type ProviderStreamResult, type UpstreamCallOptions } from '@floway-dev/provider';
+import { directFetcher, type ProviderCallResult, type ProviderCandidate, type ProviderStreamResult, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
diff --git a/packages/gateway/src/data-plane/chat/gemini/routing.ts b/packages/gateway/src/data-plane/chat/gemini/routing.ts
index 28e353c59..35222b4a5 100644
--- a/packages/gateway/src/data-plane/chat/gemini/routing.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/routing.ts
@@ -1,15 +1,15 @@
 import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ChatCandidate } from '../shared/candidates.ts';
 import type { RoutingDecision } from '../shared/routing.ts';
 import type { GeminiPayload } from '@floway-dev/protocols/gemini';
+import type { ProviderCandidate } from '@floway-dev/provider';
 import { geminiViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
 export type GeminiRoutingDecision = RoutingDecision;
 
 export const planGeminiRouting = async (input: {
   readonly payload: GeminiPayload;
-  readonly candidates: readonly ChatCandidate[];
+  readonly candidates: readonly ProviderCandidate[];
   readonly store: StatefulResponsesStore;
 }): Promise<GeminiRoutingDecision> =>
   await classifyResponsesItemAffinity({
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index f05dbd786..97c159e2f 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -3,14 +3,13 @@ import { test, vi } from 'vitest';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload } from '@floway-dev/protocols/gemini';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ResponsesResult, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
-import { directFetcher, type ProviderCallResult, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
+import { directFetcher, type ProviderCallResult, type ProviderCandidate, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt.ts b/packages/gateway/src/data-plane/chat/messages/attempt.ts
index 2f501a666..64dc1f157 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt.ts
@@ -6,7 +6,6 @@ import { responsesAttempt } from '../responses/attempt.ts';
 import { rewriteStoredResponsesItemsForCandidate } from '../responses/items/rewrite.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
 import { providerStreamResultToExecuteResult, buildUpstreamCallOptions } from '../shared/attempt-helpers.ts';
-import type { ChatCandidate } from '../shared/candidates.ts';
 import { tryCatchChatServeFailure } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { plainResultFromResponse } from '../shared/respond.ts';
@@ -16,7 +15,7 @@ import { createUpstreamLatencyRecorder } from '../shared/upstream-telemetry.ts';
 import { runInterceptors } from '@floway-dev/interceptor';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesMessage, MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
-import { type ExecuteResult, type PlainResult } from '@floway-dev/provider';
+import { type ExecuteResult, type PlainResult, type ProviderCandidate } from '@floway-dev/provider';
 import { translateMessagesViaChatCompletions, translateMessagesViaResponses } from '@floway-dev/translate';
 import { messagesViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
@@ -24,7 +23,7 @@ export interface MessagesAttemptGenerateArgs {
   readonly payload: MessagesPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ChatCandidate;
+  readonly candidate: ProviderCandidate;
   readonly headers: Headers;
 }
 
@@ -32,7 +31,7 @@ export interface MessagesAttemptCountTokensArgs {
   readonly payload: MessagesPayload;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ChatCandidate;
+  readonly candidate: ProviderCandidate;
   readonly headers: Headers;
 }
 
@@ -127,7 +126,7 @@ export const messagesAttempt = {
 const rewriteOrRenderMessagesFailure = async (
   payload: MessagesPayload,
   store: StatefulResponsesStore,
-  candidate: ChatCandidate,
+  candidate: ProviderCandidate,
 ): Promise<{ payload: MessagesPayload; failure?: undefined } | { payload?: undefined; failure: ExecuteResult<ProtocolFrame<MessagesStreamEvent>> & { type: 'api-error' } }> => {
   try {
     const rewrittenMessages = await rewriteStoredResponsesItemsForCandidate(
diff --git a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
index cbc2e5c85..53d6f471d 100644
--- a/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/attempt_test.ts
@@ -4,13 +4,12 @@ import { messagesAttempt } from './attempt.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ResponsesResult } from '@floway-dev/protocols/responses';
-import { directFetcher, type ProviderCallResult, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
+import { directFetcher, type ProviderCallResult, type ProviderCandidate, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assertEquals, assertExists, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const API_KEY_ID = 'key_messages_attempt_test';
diff --git a/packages/gateway/src/data-plane/chat/messages/http_test.ts b/packages/gateway/src/data-plane/chat/messages/http_test.ts
index dc69d31aa..89fd82416 100644
--- a/packages/gateway/src/data-plane/chat/messages/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/http_test.ts
@@ -5,13 +5,12 @@ import type { AuthVars } from '../../../middleware/auth.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { ApiKey, User } from '../../../repo/types.ts';
-import type { ChatCandidate } from '../shared/candidates.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
-import { directFetcher, type ProviderCallResult, type ProviderStreamResult, type UpstreamCallOptions } from '@floway-dev/provider';
+import { directFetcher, type ProviderCallResult, type ProviderCandidate, type ProviderStreamResult, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
-const candidatesQueue: { readonly candidates: readonly ChatCandidate[]; readonly sawModel: boolean }[] = [];
+const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
 vi.mock('../../providers/registry.ts', async importOriginal => {
   const original = await importOriginal<typeof import('../../providers/registry.ts')>();
   return {
@@ -28,7 +27,7 @@ const { messagesHttp } = await import('./http.ts');
 
 const API_KEY_ID = 'key_messages_http_test';
 
-const queueCandidates = (candidates: readonly ChatCandidate[], sawModel = candidates.length > 0): void => {
+const queueCandidates = (candidates: readonly ProviderCandidate[], sawModel = candidates.length > 0): void => {
   candidatesQueue.push({ candidates, sawModel });
 };
 
@@ -104,7 +103,7 @@ const makeCandidate = (overrides: {
   upstream?: string;
   callMessages?: (model: unknown, body: unknown, signal?: AbortSignal, opts?: UpstreamCallOptions) => Promise<ProviderStreamResult<MessagesStreamEvent>>;
   callMessagesCountTokens?: (model: unknown, body: unknown, signal?: AbortSignal, opts?: UpstreamCallOptions) => Promise<ProviderCallResult>;
-} = {}): ChatCandidate => {
+} = {}): ProviderCandidate => {
   const upstream = overrides.upstream ?? 'up_test';
   const upstreamModel = stubUpstreamModel();
   const provider = stubProvider({
diff --git a/packages/gateway/src/data-plane/chat/messages/routing.ts b/packages/gateway/src/data-plane/chat/messages/routing.ts
index e9783625c..5582aba37 100644
--- a/packages/gateway/src/data-plane/chat/messages/routing.ts
+++ b/packages/gateway/src/data-plane/chat/messages/routing.ts
@@ -1,15 +1,15 @@
 import { classifyResponsesItemAffinity } from '../responses/items/affinity.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import type { ChatCandidate } from '../shared/candidates.ts';
 import type { RoutingDecision } from '../shared/routing.ts';
 import type { MessagesPayload } from '@floway-dev/protocols/messages';
+import type { ProviderCandidate } from '@floway-dev/provider';
 import { messagesViaResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
 export type MessagesRoutingDecision = RoutingDecision;
 
 export const planMessagesRouting = async (input: {
   readonly payload: MessagesPayload;
-  readonly candidates: readonly ChatCandidate[];
+  readonly candidates: readonly ProviderCandidate[];
   readonly store: StatefulResponsesStore;
 }): Promise<MessagesRoutingDecision> =>
   await classifyResponsesItemAffinity({
diff --git a/packages/gateway/src/data-plane/chat/messages/routing_test.ts b/packages/gateway/src/data-plane/chat/messages/routing_test.ts
index eb11bb289..fbacd17c2 100644
--- a/packages/gateway/src/data-plane/chat/messages/routing_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/routing_test.ts
@@ -5,9 +5,8 @@ import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import { createStoredResponsesItemId } from '../responses/items/format.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { MessagesPayload } from '@floway-dev/protocols/messages';
-import { directFetcher } from '@floway-dev/provider';
+import { directFetcher, type ProviderCandidate } from '@floway-dev/provider';
 import { stubProvider, stubUpstreamModel, assertEquals } from '@floway-dev/test-utils';
 
 const API_KEY_ID = 'key_messages_routing_test';
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index 8f25ff47a..075e48879 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -3,12 +3,11 @@ import { test, vi } from 'vitest';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import { createNonResponsesSourceStore } from '../responses/items/store.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ResponsesResult, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
-import { defaultsForProvider, directFetcher, type ProviderCallResult, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
+import { defaultsForProvider, directFetcher, type ProviderCallResult, type ProviderCandidate, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const candidatesQueue: { readonly candidates: readonly ProviderCandidate[]; readonly sawModel: boolean }[] = [];
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt.ts b/packages/gateway/src/data-plane/chat/responses/attempt.ts
index 7e1275f4a..d75e492a2 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt.ts
@@ -10,7 +10,6 @@ import { recordPerformanceLatency, requireRecordedDurationMs } from '../../share
 import { chatCompletionsAttempt } from '../chat-completions/attempt.ts';
 import { messagesAttempt } from '../messages/attempt.ts';
 import { providerStreamResultToExecuteResult, buildUpstreamCallOptions, telemetryModelIdentity } from '../shared/attempt-helpers.ts';
-import type { ChatCandidate } from '../shared/candidates.ts';
 import { tryCatchChatServeFailure } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { createSanitizeTraceCtx, sanitizeForResponsesUpstream } from '../shared/sanitize.ts';
@@ -20,7 +19,7 @@ import { runInterceptors } from '@floway-dev/interceptor';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import { collectResponsesProtocolEventsToResult } from '@floway-dev/protocols/responses';
 import { type ResponsesPayload, type ResponsesStreamEvent } from '@floway-dev/protocols/responses';
-import { eventResult, readUpstreamApiError, type ExecuteResult, type ProviderResponsesResult, type ResponsesAction } from '@floway-dev/provider';
+import { eventResult, readUpstreamApiError, type ExecuteResult, type ProviderCandidate, type ProviderResponsesResult, type ResponsesAction } from '@floway-dev/provider';
 import { translateResponsesViaChatCompletions, translateResponsesViaMessages } from '@floway-dev/translate';
 
 export interface ResponsesAttemptInvokeArgs {
@@ -28,7 +27,7 @@ export interface ResponsesAttemptInvokeArgs {
   readonly action: ResponsesAction;
   readonly ctx: GatewayCtx;
   readonly store: StatefulResponsesStore;
-  readonly candidate: ChatCandidate;
+  readonly candidate: ProviderCandidate;
   readonly headers: Headers;
 }
 
@@ -155,7 +154,7 @@ type RewriteOutcome =
 const rewriteOrRenderFailure = async (
   payload: ResponsesPayload,
   store: StatefulResponsesStore,
-  candidate: ChatCandidate,
+  candidate: ProviderCandidate,
 ): Promise<RewriteOutcome> => {
   try {
     return await rewriteResponsesItemsForCandidate(payload, store, candidate);
@@ -264,7 +263,7 @@ const dispatchResponses = async (
 // the provider executed.
 const providerResponsesResultToExecuteResult = async (
   providerResult: ProviderResponsesResult,
-  candidate: ChatCandidate,
+  candidate: ProviderCandidate,
   ctx: GatewayCtx,
   recorder: ReturnType<typeof createUpstreamLatencyRecorder>,
 ): Promise<ExecuteResult<ProtocolFrame<ResponsesStreamEvent>>> => {
diff --git a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
index 8583b3fa9..d80eddf7d 100644
--- a/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/attempt_test.ts
@@ -7,12 +7,11 @@ import { createResponsesHttpStore } from './items/store.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { StoredResponsesItem } from '../../../repo/types.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ResponsesPayload, ResponsesResult, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
-import { directFetcher, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions, type UpstreamModel } from '@floway-dev/provider';
+import { directFetcher, type ProviderCandidate, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions, type UpstreamModel } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 const API_KEY_ID = 'key_attempt_test';
diff --git a/packages/gateway/src/data-plane/chat/responses/http_test.ts b/packages/gateway/src/data-plane/chat/responses/http_test.ts
index ffda3d2dc..f4c205b93 100644
--- a/packages/gateway/src/data-plane/chat/responses/http_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/http_test.ts
@@ -6,10 +6,9 @@ import type { AuthVars } from '../../../middleware/auth.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { ApiKey, StoredResponsesItem, User } from '../../../repo/types.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ResponsesPayload, ResponsesResult, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
-import { directFetcher, type ProviderResponsesResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
+import { directFetcher, type ProviderCandidate, type ProviderResponsesResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 // Mock the candidates seam so each test hands the http entry exactly the
diff --git a/packages/gateway/src/data-plane/chat/responses/items/affinity.ts b/packages/gateway/src/data-plane/chat/responses/items/affinity.ts
index d5b5e5e47..a2c612e82 100644
--- a/packages/gateway/src/data-plane/chat/responses/items/affinity.ts
+++ b/packages/gateway/src/data-plane/chat/responses/items/affinity.ts
@@ -1,10 +1,10 @@
 import { hashResponsesItemEncryptedContent, isStoredResponsesItemId, responsesItemEncryptedContent, responsesItemId } from './format.ts';
 import type { StatefulResponsesStore } from './store.ts';
 import type { StoredResponsesItem } from '../../../../repo/types.ts';
-import type { ProviderCandidate } from '../../shared/candidates.ts';
 import type { ChatServeFailure } from '../../shared/errors.ts';
 import type { RoutingDecision } from '../../shared/routing.ts';
 import type { ResponsesInputItem } from '@floway-dev/protocols/responses';
+import type { ProviderCandidate } from '@floway-dev/provider';
 import type { ResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
 type StoredResponsesAffinity = 'forcing' | 'portable' | 'downgradable' | 'non_affinity';
diff --git a/packages/gateway/src/data-plane/chat/responses/items/affinity_test.ts b/packages/gateway/src/data-plane/chat/responses/items/affinity_test.ts
index d3a114464..02f1ea80c 100644
--- a/packages/gateway/src/data-plane/chat/responses/items/affinity_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/items/affinity_test.ts
@@ -6,9 +6,8 @@ import { createNonResponsesSourceStore } from './store.ts';
 import { initRepo } from '../../../../repo/index.ts';
 import { InMemoryRepo } from '../../../../repo/memory.ts';
 import type { StoredResponsesItem } from '../../../../repo/types.ts';
-import type { ProviderCandidate } from '../../shared/candidates.ts';
 import type { ResponsesInputItem } from '@floway-dev/protocols/responses';
-import { directFetcher } from '@floway-dev/provider';
+import { directFetcher, type ProviderCandidate } from '@floway-dev/provider';
 import { stubProvider, stubUpstreamModel, assertEquals } from '@floway-dev/test-utils';
 import { responsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
diff --git a/packages/gateway/src/data-plane/chat/responses/items/rewrite.ts b/packages/gateway/src/data-plane/chat/responses/items/rewrite.ts
index 27877edf0..06f66cf87 100644
--- a/packages/gateway/src/data-plane/chat/responses/items/rewrite.ts
+++ b/packages/gateway/src/data-plane/chat/responses/items/rewrite.ts
@@ -1,9 +1,9 @@
 import { createTemporaryResponsesItemId, hashResponsesItemEncryptedContent, responsesItemEncryptedContent, responsesItemId } from './format.ts';
 import type { StatefulResponsesStore } from './store.ts';
 import type { StoredResponsesItem } from '../../../../repo/types.ts';
-import type { ProviderCandidate } from '../../shared/candidates.ts';
 import { throwChatServeFailure } from '../../shared/errors.ts';
 import type { ResponsesInputItem, ResponsesPayload } from '@floway-dev/protocols/responses';
+import type { ProviderCandidate } from '@floway-dev/provider';
 import type { ResponsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
 const isUpstreamOwned = (row: StoredResponsesItem): row is StoredResponsesItem & { upstreamId: string } =>
diff --git a/packages/gateway/src/data-plane/chat/responses/items/rewrite_test.ts b/packages/gateway/src/data-plane/chat/responses/items/rewrite_test.ts
index db4b45d07..f6e5e3911 100644
--- a/packages/gateway/src/data-plane/chat/responses/items/rewrite_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/items/rewrite_test.ts
@@ -7,9 +7,8 @@ import { createNonResponsesSourceStore } from './store.ts';
 import { initRepo } from '../../../../repo/index.ts';
 import { InMemoryRepo } from '../../../../repo/memory.ts';
 import type { StoredResponsesItem } from '../../../../repo/types.ts';
-import type { ProviderCandidate } from '../../shared/candidates.ts';
 import type { ResponsesInputItem, ResponsesPayload } from '@floway-dev/protocols/responses';
-import { directFetcher } from '@floway-dev/provider';
+import { directFetcher, type ProviderCandidate } from '@floway-dev/provider';
 import { stubProvider, stubUpstreamModel, assert, assertEquals, assertFalse } from '@floway-dev/test-utils';
 import { responsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
diff --git a/packages/gateway/src/data-plane/chat/responses/routing.ts b/packages/gateway/src/data-plane/chat/responses/routing.ts
index 05661aa69..4bc8a7662 100644
--- a/packages/gateway/src/data-plane/chat/responses/routing.ts
+++ b/packages/gateway/src/data-plane/chat/responses/routing.ts
@@ -1,13 +1,13 @@
 import { classifyResponsesItemAffinity } from './items/affinity.ts';
-import type { ChatCandidate } from '../shared/candidates.ts';
 import type { RoutingDecision } from '../shared/routing.ts';
 import type { StatefulResponsesStore } from './items/store.ts';
 import type { ResponsesInputItem, ResponsesPayload } from '@floway-dev/protocols/responses';
+import type { ProviderCandidate } from '@floway-dev/provider';
 import { responsesItemsView } from '@floway-dev/translate/via-responses/responses-items';
 
 export const planResponsesRouting = async (input: {
   readonly payload: ResponsesPayload;
-  readonly candidates: readonly ChatCandidate[];
+  readonly candidates: readonly ProviderCandidate[];
   readonly store: StatefulResponsesStore;
 }): Promise<RoutingDecision> => {
   // A bare-string input is wrapped into a synthetic user message for staging;
diff --git a/packages/gateway/src/data-plane/chat/responses/routing_test.ts b/packages/gateway/src/data-plane/chat/responses/routing_test.ts
index 2fafb477b..8ad4d0823 100644
--- a/packages/gateway/src/data-plane/chat/responses/routing_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/routing_test.ts
@@ -6,9 +6,8 @@ import { planResponsesRouting } from './routing.ts';
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { StoredResponsesItem } from '../../../repo/types.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { ResponsesPayload } from '@floway-dev/protocols/responses';
-import { directFetcher } from '@floway-dev/provider';
+import { directFetcher, type ProviderCandidate } from '@floway-dev/provider';
 import { stubProvider, stubUpstreamModel, assertEquals } from '@floway-dev/test-utils';
 
 const API_KEY_ID = 'key_routing_test';
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index 5ec26a09b..8a0bf1650 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -4,12 +4,11 @@ import { planResponsesRouting } from './routing.ts';
 import { ALIAS_RESPONSE_HEADER, applyChatRulesToResponses } from '../../model-aliases/apply.ts';
 import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
 import { resolveModelCandidates } from '../../providers/registry.ts';
-import { type ChatCandidate } from '../shared/candidates.ts';
 import { aliasFailureFromError } from '../shared/errors.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ResponsesInputItem, ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
-import type { ExecuteResult, ChatTargetApi } from '@floway-dev/provider';
+import type { ChatTargetApi, ExecuteResult, ProviderCandidate } from '@floway-dev/provider';
 
 // Thrown when a request names a `previous_response_id` that the store cannot
 // resolve. The HTTP/WS entry layer catches this and renders the OpenAI-shaped
@@ -76,7 +75,7 @@ const stageUserInputItems = async (input: ResponsesPayload['input'], store: Stat
 
 export type ResponsesServePlan =
   | { readonly kind: 'failure'; readonly result: ExecuteResult<ProtocolFrame<ResponsesStreamEvent>> }
-  | { readonly kind: 'ready'; readonly prepared: ResponsesPayload; readonly candidate: ChatCandidate };
+  | { readonly kind: 'ready'; readonly prepared: ResponsesPayload; readonly candidate: ProviderCandidate };
 
 // Shared serve-side prep for both `responsesServe.generate` and
 // `responsesServe.compact`. Returns a rendered failure result when no
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index 053aba44c..b84ea2c62 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -5,13 +5,12 @@ import { createResponsesHttpStore, MemoryStatefulResponsesBacking, LayeredStatef
 import { initRepo } from '../../../repo/index.ts';
 import { InMemoryRepo } from '../../../repo/memory.ts';
 import type { StoredResponsesItem, StoredResponsesSnapshot } from '../../../repo/types.ts';
-import type { ProviderCandidate } from '../shared/candidates.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import { doneFrame, eventFrame, type ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesStreamEvent } from '@floway-dev/protocols/messages';
 import type { ResponsesPayload, ResponsesResult, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
-import { directFetcher, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
+import { directFetcher, type ProviderCandidate, type ProviderResponsesResult, type ProviderStreamResult, type ResponsesAction, type UpstreamCallOptions } from '@floway-dev/provider';
 import { assert, assertEquals, stubProvider, stubUpstreamModel } from '@floway-dev/test-utils';
 
 // `resolveModelCandidates` is the only seam between serve and the
diff --git a/packages/gateway/src/data-plane/chat/shared/attempt-helpers.ts b/packages/gateway/src/data-plane/chat/shared/attempt-helpers.ts
index 64a072059..20486e478 100644
--- a/packages/gateway/src/data-plane/chat/shared/attempt-helpers.ts
+++ b/packages/gateway/src/data-plane/chat/shared/attempt-helpers.ts
@@ -1,9 +1,8 @@
-import type { ProviderCandidate } from './candidates.ts';
 import type { GatewayCtx } from './gateway-ctx.ts';
 import { recordUpstreamHttpFailure, upstreamPerformanceContext, withUpstreamTelemetry } from './upstream-telemetry.ts';
 import { requireRecordedDurationMs, type UpstreamLatencyRecorder } from '../../shared/telemetry/performance.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
-import { eventResult, readUpstreamApiError, type ExecuteResult, type ProviderStreamResult, type TelemetryModelIdentity, type UpstreamCallOptions } from '@floway-dev/provider';
+import { eventResult, readUpstreamApiError, type ExecuteResult, type ProviderCandidate, type ProviderStreamResult, type TelemetryModelIdentity, type UpstreamCallOptions } from '@floway-dev/provider';
 
 // Telemetry identity for the chosen candidate plus the upstream-reported
 // model key. Pricing reads off the provider so the cost lookup respects any
diff --git a/packages/gateway/src/data-plane/chat/shared/candidates.ts b/packages/gateway/src/data-plane/chat/shared/candidates.ts
deleted file mode 100644
index a322f99ec..000000000
--- a/packages/gateway/src/data-plane/chat/shared/candidates.ts
+++ /dev/null
@@ -1,11 +0,0 @@
-// Chat-side facade over the shared `resolveModelCandidates` helper. The chat
-// surfaces (chat-completions, messages, responses, gemini) all share the
-// `ChatTargetApi` target descriptor; the passthrough surfaces use
-// `ModelEndpointKey` directly. Both ride on the same resolve helper — see
-// `data-plane/providers/registry.ts`.
-
-import type { ProviderCandidate } from '@floway-dev/provider';
-
-export type { ProviderCandidate };
-
-export type ChatCandidate = ProviderCandidate;
diff --git a/packages/gateway/src/data-plane/chat/shared/routing.ts b/packages/gateway/src/data-plane/chat/shared/routing.ts
index ddca37cb0..10b1584e6 100644
--- a/packages/gateway/src/data-plane/chat/shared/routing.ts
+++ b/packages/gateway/src/data-plane/chat/shared/routing.ts
@@ -1,10 +1,10 @@
-import type { ChatCandidate, ProviderCandidate } from './candidates.ts';
 import type { ChatServeFailure } from './errors.ts';
+import type { ProviderCandidate } from '@floway-dev/provider';
 
 // Generic over the candidate type so call sites can narrow back to their
 // concrete shape. The candidate filtering and ordering inside routing is
 // shape-agnostic — it touches `binding.upstream` and
 // `binding.supportsResponsesItemReference` only.
-export type RoutingDecision<T extends ProviderCandidate = ChatCandidate> =
+export type RoutingDecision<T extends ProviderCandidate = ProviderCandidate> =
   | { readonly kind: 'success'; readonly candidates: readonly T[] }
   | { readonly kind: 'failure'; readonly failure: ChatServeFailure };
diff --git a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry.ts b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry.ts
index c362071cb..cc40bf330 100644
--- a/packages/gateway/src/data-plane/chat/shared/upstream-telemetry.ts
+++ b/packages/gateway/src/data-plane/chat/shared/upstream-telemetry.ts
@@ -1,8 +1,7 @@
-import type { ProviderCandidate } from './candidates.ts';
 import type { GatewayCtx } from './gateway-ctx.ts';
 import { recordPerformanceError, recordPerformanceLatency } from '../../shared/telemetry/performance.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
-import type { ChatTargetApi, PerformanceTelemetryContext } from '@floway-dev/provider';
+import type { ChatTargetApi, PerformanceTelemetryContext, ProviderCandidate } from '@floway-dev/provider';
 
 export { createUpstreamLatencyRecorder } from '../../shared/telemetry/performance.ts';
 

From 3dab73058b2535fe2fa42fd1e38c9d8995886ed6 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 01:53:10 +0800
Subject: [PATCH 142/170] refactor(web): extract parseOptionalNumber to a
 shared util
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`ChatMetadataEditor.vue` and `ModelEditor.vue` carried the exact same
`parseOptionalNumber` helper — blank/null/negative collapse to
`undefined`, every other value passes through `Number(raw)`. Same
contract on both sides because both editors feed nonnegative integer
counts the backend validates identically. Lift into
`apps/web/src/utils/parse-optional-number.ts` so the rule has one
source of truth.
---
 .../web/src/components/shared/ChatMetadataEditor.vue | 10 +---------
 .../web/src/components/upstream-edit/ModelEditor.vue | 10 +---------
 apps/web/src/utils/parse-optional-number.ts          | 12 ++++++++++++
 3 files changed, 14 insertions(+), 18 deletions(-)
 create mode 100644 apps/web/src/utils/parse-optional-number.ts

diff --git a/apps/web/src/components/shared/ChatMetadataEditor.vue b/apps/web/src/components/shared/ChatMetadataEditor.vue
index 16e7c9f61..d3539af8f 100644
--- a/apps/web/src/components/shared/ChatMetadataEditor.vue
+++ b/apps/web/src/components/shared/ChatMetadataEditor.vue
@@ -18,6 +18,7 @@
 import { computed, ref, watch } from 'vue';
 
 import type { AnnouncedMetadata, ChatModelInfo, ModelKind } from '../../api/types.ts';
+import { parseOptionalNumber } from '../../utils/parse-optional-number.ts';
 import { Button, Input, Switch, Tooltip } from '@floway-dev/ui';
 
 const props = defineProps<{
@@ -52,15 +53,6 @@ const patch = (next: AnnouncedMetadata) => {
   emit('update:modelValue', Object.keys(out).length > 0 ? out : undefined);
 };
 
-const parseOptionalNumber = (raw: string | number | null | undefined): number | undefined => {
-  if (raw === '' || raw === null || raw === undefined) return undefined;
-  const num = Number(raw);
-  // Both editor surfaces feed nonnegative integer counts (token caps,
-  // budget bounds); a typo that drops a negative shouldn't stage data
-  // the next PUT will 400 on.
-  return Number.isFinite(num) && num >= 0 ? num : undefined;
-};
-
 // ── Limits ────────────────────────────────────────────────────────────
 
 const updateLimit = (
diff --git a/apps/web/src/components/upstream-edit/ModelEditor.vue b/apps/web/src/components/upstream-edit/ModelEditor.vue
index 6948299c3..429dfac59 100644
--- a/apps/web/src/components/upstream-edit/ModelEditor.vue
+++ b/apps/web/src/components/upstream-edit/ModelEditor.vue
@@ -5,6 +5,7 @@ import EndpointsField from './EndpointsField.vue';
 import FlagOverridesEditor from './FlagOverridesEditor.vue';
 import { configOf, defaultEndpointsForKind, publicIdOf, titleFor, type Row } from './modelRows.ts';
 import type { AnnouncedMetadata, BillingDimension, FlagDef, ModelKind, ModelPricing, UpstreamChatConfig, UpstreamModelConfig, UpstreamProviderKind } from '../../api/types.ts';
+import { parseOptionalNumber } from '../../utils/parse-optional-number.ts';
 import ChatMetadataEditor from '../shared/ChatMetadataEditor.vue';
 import { Button, Input, Select, Switch, Tooltip } from '@floway-dev/ui';
 
@@ -66,15 +67,6 @@ const setKind = (k: ModelKind) => {
   patch({ kind: k, endpoints: defaultEndpointsForKind(k, config.value.endpoints) });
 };
 
-const parseOptionalNumber = (raw: string | number | null | undefined): number | undefined => {
-  if (raw === '' || raw === null || raw === undefined) return undefined;
-  const num = Number(raw);
-  // Backend pricing validators reject negatives (see `nonNegativeNumberField`
-  // in packages/provider/src/model-config.ts); drop them at the form boundary
-  // so a typo doesn't stage data the next PUT will 400 on.
-  return Number.isFinite(num) && num >= 0 ? num : undefined;
-};
-
 const updateCost = (key: BillingDimension, raw: string | number | null | undefined) => {
   if (!config.value) return;
   const cost = { ...(config.value.cost ?? {}) } as ModelPricing;
diff --git a/apps/web/src/utils/parse-optional-number.ts b/apps/web/src/utils/parse-optional-number.ts
new file mode 100644
index 000000000..25eae2811
--- /dev/null
+++ b/apps/web/src/utils/parse-optional-number.ts
@@ -0,0 +1,12 @@
+// Form-input parser shared by the chat-metadata and model editors. Both
+// editors feed nonnegative integer counts (token caps, budget bounds,
+// pricing factors); the backend validators reject negatives, so the form
+// boundary drops them before staging data the next PUT would 400 on.
+// Blank input, null, and undefined all collapse to `undefined` — the
+// "leave blank to inherit" semantic the editors render for optional
+// fields.
+export const parseOptionalNumber = (raw: string | number | null | undefined): number | undefined => {
+  if (raw === '' || raw === null || raw === undefined) return undefined;
+  const num = Number(raw);
+  return Number.isFinite(num) && num >= 0 ? num : undefined;
+};

From 28517995cbd5f63b5a29f4eab0990771126d4d06 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 01:55:29 +0800
Subject: [PATCH 143/170] =?UTF-8?q?refactor(registry):=20split=20getModels?=
 =?UTF-8?q?=20=E2=80=94=20let=20enumerateAddressableModelIds=20share=20the?=
 =?UTF-8?q?=20providers=20list?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`enumerateAddressableModelIds` called `getModels()` and then immediately
called `listModelProviders(upstreamFilter)` again — `getModels` already
listed providers internally, so the upstreams.list() round-trip and
provider-instantiation cost paid twice. Lift the catalog assembly into
`getModelsFromProviders(providers, ...)` and let the addressable engine
thread the same list into both halves of its walk. `getModels` keeps
its old signature as a thin wrapper.
---
 .../src/data-plane/providers/addressable.ts   | 13 +++++++-----
 .../src/data-plane/providers/registry.ts      | 21 ++++++++++++++-----
 2 files changed, 24 insertions(+), 10 deletions(-)

diff --git a/packages/gateway/src/data-plane/providers/addressable.ts b/packages/gateway/src/data-plane/providers/addressable.ts
index 5e0813e99..46a988512 100644
--- a/packages/gateway/src/data-plane/providers/addressable.ts
+++ b/packages/gateway/src/data-plane/providers/addressable.ts
@@ -13,7 +13,7 @@
 // registry round trip.
 
 import { fetchUpstreamModelsCached } from './models-cache.ts';
-import { compareModelIds, getModels, listModelProviders } from './registry.ts';
+import { compareModelIds, getModelsFromProviders, listModelProviders } from './registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import type { Fetcher, ResolvedModel } from '@floway-dev/provider';
 
@@ -49,11 +49,14 @@ export const enumerateAddressableModelIds = async (
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
 ): Promise<readonly AddressableIdEntry[]> => {
-  // `getModels` throws the actionable "no upstream provider configured"
-  // message when the provider list is empty; surface it the same way here
-  // so /v1/models keeps its 502 + hint behavior on a brand-new gateway.
-  const realModels = await getModels(upstreamFilter, fetcherForUpstream, scheduler);
+  // Resolve providers once and thread them into the catalog assembly so
+  // the upstreams.list() round-trip and provider-instantiation cost is
+  // paid once per call. `getModelsFromProviders` throws the actionable
+  // "no upstream provider configured" message when the provider list is
+  // empty; surface it the same way here so /v1/models keeps its 502 +
+  // hint behavior on a brand-new gateway.
   const providers = await listModelProviders(upstreamFilter);
+  const realModels = await getModelsFromProviders(providers, fetcherForUpstream, scheduler);
   const byId = new Map(realModels.map(model => [model.id, model] as const));
 
   const entries: AddressableIdEntry[] = [];
diff --git a/packages/gateway/src/data-plane/providers/registry.ts b/packages/gateway/src/data-plane/providers/registry.ts
index 1519f3a4f..2261059de 100644
--- a/packages/gateway/src/data-plane/providers/registry.ts
+++ b/packages/gateway/src/data-plane/providers/registry.ts
@@ -233,14 +233,16 @@ export const compareModelIds = (a: string, b: string): number => {
     || cmp(a, b, -1);
 };
 
-// `fetcherForUpstream` routes each upstream's catalog fetch through its
-// per-upstream proxy chain.
-export const getModels = async (
-  upstreamFilter: readonly string[] | null,
+// Catalog assembly against an already-resolved provider list. Callers that
+// already paid the `listModelProviders` round-trip — `enumerateAddressableModelIds`
+// fans the same list twice, once into the catalog walk and once into the
+// addressable-only loop — pass providers through to avoid the duplicate
+// upstreams.list() DB query.
+export const getModelsFromProviders = async (
+  providers: readonly ModelProviderInstance[],
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
 ): Promise<ResolvedModel[]> => {
-  const providers = await listModelProviders(upstreamFilter);
   if (providers.length === 0) {
     throw new Error(NO_UPSTREAM_CONFIGURED_MESSAGE);
   }
@@ -252,6 +254,15 @@ export const getModels = async (
   return [];
 };
 
+// `fetcherForUpstream` routes each upstream's catalog fetch through its
+// per-upstream proxy chain.
+export const getModels = async (
+  upstreamFilter: readonly string[] | null,
+  fetcherForUpstream: (upstreamId: string) => Fetcher,
+  scheduler: BackgroundScheduler,
+): Promise<ResolvedModel[]> =>
+  await getModelsFromProviders(await listModelProviders(upstreamFilter), fetcherForUpstream, scheduler);
+
 interface ResolveCandidatesResult<TTarget> {
   readonly candidates: ReadonlyArray<{
     readonly provider: ModelProviderInstance;

From 6bc6432e303396cefdad82d8b924ac52b08fc315 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 02:01:09 +0800
Subject: [PATCH 144/170] refactor(chat): extract
 resolveCandidatesAndApplyAlias prelude helper
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The same five-step ritual ran at the head of every chat serve: catch
AliasNoTargetAvailableError → render protocol failure; pull
candidates/sawModel/failedUpstreams/aliasResolution off the result;
when an alias matched, mutate the payload (or local model var), apply
the protocol's chat-rule overlay, and stage the response header. Five
serve sites (chat-completions, messages × 2, gemini × 2, responses)
each carried the same prose.

Extract into chat/shared/alias-prelude.ts. The protocol's mutation
plus the rule overlay live in an `applyAlias` callback; the no-target
failure renderer is supplied per protocol. The header staging and the
404 conversion stay in the helper so all surfaces agree on the
contract — `x-floway-alias` is set for every protocol the moment an
alias matches, and `AliasNoTargetAvailableError` always converts to
the protocol's `alias-no-target-available` failure shape.
---
 .../data-plane/chat/chat-completions/serve.ts | 45 +++++------
 .../src/data-plane/chat/gemini/serve.ts       | 70 +++++++----------
 .../src/data-plane/chat/messages/serve.ts     | 75 +++++++------------
 .../data-plane/chat/responses/serve-prep.ts   | 37 ++++-----
 .../data-plane/chat/shared/alias-prelude.ts   | 66 ++++++++++++++++
 5 files changed, 152 insertions(+), 141 deletions(-)
 create mode 100644 packages/gateway/src/data-plane/chat/shared/alias-prelude.ts

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index e68b8521b..69f73493f 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -1,11 +1,9 @@
 import { chatCompletionsAttempt } from './attempt.ts';
 import { renderChatCompletionsFailure } from './errors.ts';
 import { planChatCompletionsRouting } from './routing.ts';
-import { ALIAS_RESPONSE_HEADER, applyChatRulesToChatCompletions } from '../../model-aliases/apply.ts';
-import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
-import { resolveModelCandidates } from '../../providers/registry.ts';
+import { applyChatRulesToChatCompletions } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import { aliasFailureFromError } from '../shared/errors.ts';
+import { resolveCandidatesAndApplyAlias } from '../shared/alias-prelude.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
@@ -21,29 +19,22 @@ export interface ChatCompletionsServeGenerateArgs {
 export const chatCompletionsServe = {
   generate: async (args: ChatCompletionsServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<ChatCompletionsStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
-    let enumerated;
-    try {
-      enumerated = await resolveModelCandidates({
-        upstreamIds: ctx.upstreamIds,
-        modelName: payload.model,
-        pickTarget: endpoints =>
-          endpoints.chatCompletions ? 'chat-completions'
-            : endpoints.messages ? 'messages'
-              : endpoints.responses ? 'responses'
-                : null,
-        scheduler: ctx.backgroundScheduler,
-        currentColo: ctx.currentColo,
-      });
-    } catch (error) {
-      if (error instanceof AliasNoTargetAvailableError) return renderChatCompletionsFailure(aliasFailureFromError(error));
-      throw error;
-    }
-    const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
-    if (aliasResolution !== null) {
-      payload.model = aliasResolution.targetModelId;
-      applyChatRulesToChatCompletions(payload, aliasResolution.rules);
-      ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
-    }
+    const resolved = await resolveCandidatesAndApplyAlias({
+      ctx,
+      modelName: payload.model,
+      pickTarget: endpoints =>
+        endpoints.chatCompletions ? 'chat-completions'
+          : endpoints.messages ? 'messages'
+            : endpoints.responses ? 'responses'
+              : null,
+      applyAlias: resolution => {
+        payload.model = resolution.targetModelId;
+        applyChatRulesToChatCompletions(payload, resolution.rules);
+      },
+      renderAliasFailure: renderChatCompletionsFailure,
+    });
+    if (resolved.kind === 'failure') return resolved.result;
+    const { candidates, sawModel, failedUpstreams } = resolved;
     const decision = await planChatCompletionsRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderChatCompletionsFailure(decision.failure);
 
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index e7f34f271..25b64ff2d 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -1,11 +1,9 @@
 import { geminiAttempt } from './attempt.ts';
 import { renderGeminiFailure } from './errors.ts';
 import { planGeminiRouting } from './routing.ts';
-import { ALIAS_RESPONSE_HEADER, applyChatRulesToGemini } from '../../model-aliases/apply.ts';
-import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
-import { resolveModelCandidates } from '../../providers/registry.ts';
+import { applyChatRulesToGemini } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import { aliasFailureFromError } from '../shared/errors.ts';
+import { resolveCandidatesAndApplyAlias } from '../shared/alias-prelude.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
@@ -33,27 +31,18 @@ export interface GeminiServeCountTokensArgs {
 export const geminiServe = {
   generate: async (args: GeminiServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<GeminiStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
-    let enumerated;
-    try {
-      enumerated = await resolveModelCandidates({
-        upstreamIds: ctx.upstreamIds,
-        modelName: args.model,
-        // Gemini has no native upstream target in the provider API; prefer
-        // Chat Completions, then Messages, then Responses.
-        pickTarget: endpoints => endpoints.chatCompletions ? 'chat-completions' : endpoints.messages ? 'messages' : endpoints.responses ? 'responses' : null,
-        scheduler: ctx.backgroundScheduler,
-        currentColo: ctx.currentColo,
-      });
-    } catch (error) {
-      if (error instanceof AliasNoTargetAvailableError) return renderGeminiFailure(aliasFailureFromError(error), 'generate');
-      throw error;
-    }
-    const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
+    const resolved = await resolveCandidatesAndApplyAlias({
+      ctx,
+      modelName: args.model,
+      // Gemini has no native upstream target in the provider API; prefer
+      // Chat Completions, then Messages, then Responses.
+      pickTarget: endpoints => endpoints.chatCompletions ? 'chat-completions' : endpoints.messages ? 'messages' : endpoints.responses ? 'responses' : null,
+      applyAlias: resolution => applyChatRulesToGemini(payload, resolution.rules),
+      renderAliasFailure: failure => renderGeminiFailure(failure, 'generate'),
+    });
+    if (resolved.kind === 'failure') return resolved.result;
+    const { candidates, sawModel, failedUpstreams, aliasResolution } = resolved;
     const model = aliasResolution?.targetModelId ?? args.model;
-    if (aliasResolution !== null) {
-      applyChatRulesToGemini(payload, aliasResolution.rules);
-      ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
-    }
     const decision = await planGeminiRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderGeminiFailure(decision.failure, 'generate');
 
@@ -75,28 +64,19 @@ export const geminiServe = {
 
   countTokens: async (args: GeminiServeCountTokensArgs): Promise<ExecuteResult<ProtocolFrame<GeminiStreamEvent>> | PlainResult> => {
     const { payload, ctx, store, headers } = args;
-    let enumerated;
-    try {
-      enumerated = await resolveModelCandidates({
-        upstreamIds: ctx.upstreamIds,
-        modelName: args.model,
-        // Gemini countTokens has no native upstream support; only providers
-        // exposing the Messages endpoint qualify because we translate Gemini
-        // → Messages and call Messages count_tokens upstream.
-        pickTarget: endpoints => endpoints.messages ? 'messages' : null,
-        scheduler: ctx.backgroundScheduler,
-        currentColo: ctx.currentColo,
-      });
-    } catch (error) {
-      if (error instanceof AliasNoTargetAvailableError) return renderGeminiFailure(aliasFailureFromError(error), 'countTokens');
-      throw error;
-    }
-    const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
+    const resolved = await resolveCandidatesAndApplyAlias({
+      ctx,
+      modelName: args.model,
+      // Gemini countTokens has no native upstream support; only providers
+      // exposing the Messages endpoint qualify because we translate Gemini
+      // → Messages and call Messages count_tokens upstream.
+      pickTarget: endpoints => endpoints.messages ? 'messages' : null,
+      applyAlias: resolution => applyChatRulesToGemini(payload, resolution.rules),
+      renderAliasFailure: failure => renderGeminiFailure(failure, 'countTokens'),
+    });
+    if (resolved.kind === 'failure') return resolved.result;
+    const { candidates, sawModel, failedUpstreams, aliasResolution } = resolved;
     const model = aliasResolution?.targetModelId ?? args.model;
-    if (aliasResolution !== null) {
-      applyChatRulesToGemini(payload, aliasResolution.rules);
-      ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
-    }
     const decision = await planGeminiRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderGeminiFailure(decision.failure, 'countTokens');
 
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index e4561d0d6..095ce7cb2 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -1,11 +1,9 @@
 import { messagesAttempt } from './attempt.ts';
 import { renderMessagesFailure } from './errors.ts';
 import { planMessagesRouting } from './routing.ts';
-import { ALIAS_RESPONSE_HEADER, applyChatRulesToMessages } from '../../model-aliases/apply.ts';
-import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
-import { resolveModelCandidates } from '../../providers/registry.ts';
+import { applyChatRulesToMessages } from '../../model-aliases/apply.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import { aliasFailureFromError } from '../shared/errors.ts';
+import { resolveCandidatesAndApplyAlias } from '../shared/alias-prelude.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
@@ -25,32 +23,27 @@ export interface MessagesServeCountTokensArgs {
   readonly headers: Headers;
 }
 
+const applyAlias = (payload: MessagesPayload) => (resolution: { targetModelId: string; rules: Parameters<typeof applyChatRulesToMessages>[1] }) => {
+  payload.model = resolution.targetModelId;
+  applyChatRulesToMessages(payload, resolution.rules);
+};
+
 export const messagesServe = {
   generate: async (args: MessagesServeGenerateArgs): Promise<ExecuteResult<ProtocolFrame<MessagesStreamEvent>>> => {
     const { payload, ctx, store, headers } = args;
-    let enumerated;
-    try {
-      enumerated = await resolveModelCandidates({
-        upstreamIds: ctx.upstreamIds,
-        modelName: payload.model,
-        pickTarget: endpoints =>
-          endpoints.messages ? 'messages'
-            : endpoints.responses ? 'responses'
-              : endpoints.chatCompletions ? 'chat-completions'
-                : null,
-        scheduler: ctx.backgroundScheduler,
-        currentColo: ctx.currentColo,
-      });
-    } catch (error) {
-      if (error instanceof AliasNoTargetAvailableError) return renderMessagesFailure(aliasFailureFromError(error), 'generate');
-      throw error;
-    }
-    const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
-    if (aliasResolution !== null) {
-      payload.model = aliasResolution.targetModelId;
-      applyChatRulesToMessages(payload, aliasResolution.rules);
-      ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
-    }
+    const resolved = await resolveCandidatesAndApplyAlias({
+      ctx,
+      modelName: payload.model,
+      pickTarget: endpoints =>
+        endpoints.messages ? 'messages'
+          : endpoints.responses ? 'responses'
+            : endpoints.chatCompletions ? 'chat-completions'
+              : null,
+      applyAlias: applyAlias(payload),
+      renderAliasFailure: failure => renderMessagesFailure(failure, 'generate'),
+    });
+    if (resolved.kind === 'failure') return resolved.result;
+    const { candidates, sawModel, failedUpstreams } = resolved;
     const decision = await planMessagesRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderMessagesFailure(decision.failure, 'generate');
 
@@ -72,25 +65,15 @@ export const messagesServe = {
 
   countTokens: async (args: MessagesServeCountTokensArgs): Promise<ExecuteResult<ProtocolFrame<MessagesStreamEvent>> | PlainResult> => {
     const { payload, ctx, store, headers } = args;
-    let enumerated;
-    try {
-      enumerated = await resolveModelCandidates({
-        upstreamIds: ctx.upstreamIds,
-        modelName: payload.model,
-        pickTarget: endpoints => endpoints.messages ? 'messages' : null,
-        scheduler: ctx.backgroundScheduler,
-        currentColo: ctx.currentColo,
-      });
-    } catch (error) {
-      if (error instanceof AliasNoTargetAvailableError) return renderMessagesFailure(aliasFailureFromError(error), 'countTokens');
-      throw error;
-    }
-    const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
-    if (aliasResolution !== null) {
-      payload.model = aliasResolution.targetModelId;
-      applyChatRulesToMessages(payload, aliasResolution.rules);
-      ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
-    }
+    const resolved = await resolveCandidatesAndApplyAlias({
+      ctx,
+      modelName: payload.model,
+      pickTarget: endpoints => endpoints.messages ? 'messages' : null,
+      applyAlias: applyAlias(payload),
+      renderAliasFailure: failure => renderMessagesFailure(failure, 'countTokens'),
+    });
+    if (resolved.kind === 'failure') return resolved.result;
+    const { candidates, sawModel, failedUpstreams } = resolved;
     const decision = await planMessagesRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderMessagesFailure(decision.failure, 'countTokens');
 
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index 8a0bf1650..e3d6f48a4 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -1,10 +1,8 @@
 import { renderResponsesFailure } from './errors.ts';
 import type { StatefulResponsesStore } from './items/store.ts';
 import { planResponsesRouting } from './routing.ts';
-import { ALIAS_RESPONSE_HEADER, applyChatRulesToResponses } from '../../model-aliases/apply.ts';
-import { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
-import { resolveModelCandidates } from '../../providers/registry.ts';
-import { aliasFailureFromError } from '../shared/errors.ts';
+import { applyChatRulesToResponses } from '../../model-aliases/apply.ts';
+import { resolveCandidatesAndApplyAlias } from '../shared/alias-prelude.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ResponsesInputItem, ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
@@ -89,25 +87,18 @@ export const prepareResponsesServePlan = async (args: {
 }): Promise<ResponsesServePlan> => {
   const { payload, ctx, store, pickTarget } = args;
   const prepared = await expandPreviousResponseId(payload, store);
-  let enumerated;
-  try {
-    enumerated = await resolveModelCandidates({
-      upstreamIds: ctx.upstreamIds,
-      modelName: prepared.model,
-      pickTarget,
-      scheduler: ctx.backgroundScheduler,
-      currentColo: ctx.currentColo,
-    });
-  } catch (error) {
-    if (error instanceof AliasNoTargetAvailableError) return { kind: 'failure', result: renderResponsesFailure(aliasFailureFromError(error)) };
-    throw error;
-  }
-  const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
-  if (aliasResolution !== null) {
-    prepared.model = aliasResolution.targetModelId;
-    applyChatRulesToResponses(prepared, aliasResolution.rules);
-    ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
-  }
+  const resolved = await resolveCandidatesAndApplyAlias({
+    ctx,
+    modelName: prepared.model,
+    pickTarget,
+    applyAlias: resolution => {
+      prepared.model = resolution.targetModelId;
+      applyChatRulesToResponses(prepared, resolution.rules);
+    },
+    renderAliasFailure: failure => renderResponsesFailure(failure),
+  });
+  if (resolved.kind === 'failure') return { kind: 'failure', result: resolved.result };
+  const { candidates, sawModel, failedUpstreams } = resolved;
   const decision = await planResponsesRouting({ payload: prepared, candidates, store });
   if (decision.kind === 'failure') return { kind: 'failure', result: renderResponsesFailure(decision.failure) };
   // Stage the user-supplied input from the original payload — not the
diff --git a/packages/gateway/src/data-plane/chat/shared/alias-prelude.ts b/packages/gateway/src/data-plane/chat/shared/alias-prelude.ts
new file mode 100644
index 000000000..7c83dc92f
--- /dev/null
+++ b/packages/gateway/src/data-plane/chat/shared/alias-prelude.ts
@@ -0,0 +1,66 @@
+import { type ChatServeFailure, aliasFailureFromError } from './errors.ts';
+import type { GatewayCtx } from './gateway-ctx.ts';
+import { ALIAS_RESPONSE_HEADER } from '../../model-aliases/apply.ts';
+import { AliasNoTargetAvailableError, type AliasResolution } from '../../model-aliases/resolve.ts';
+import { resolveModelCandidates } from '../../providers/registry.ts';
+import type { ModelEndpoints } from '@floway-dev/protocols/common';
+import type { ChatTargetApi, ProviderCandidate } from '@floway-dev/provider';
+
+// Shared serve-side prelude every chat protocol runs before routing: resolve
+// candidates against the live registry, redirect through the alias resolver
+// when the inbound model id is an alias, stage the response header, and
+// hand the protocol's own callback the resolution so it can overlay rules
+// and (where the protocol mutates it) rewrite `payload.model`. The
+// `AliasNoTargetAvailableError` 404 is converted to whatever rendered
+// failure the protocol returns from its serve seam, so callers stay free
+// of the alias machinery.
+export interface ResolveCandidatesArgs<F> {
+  readonly ctx: GatewayCtx;
+  readonly modelName: string;
+  readonly pickTarget: (endpoints: ModelEndpoints) => ChatTargetApi | null;
+  // Invoked exactly when an alias matched; the callback overlays rules
+  // and (for protocols that mutate it) updates the inbound payload's
+  // model field. The response header is staged by this helper.
+  readonly applyAlias: (resolution: AliasResolution) => void;
+  // Renders this protocol's failure envelope from a ChatServeFailure.
+  // Used only on the alias-no-target path — every other failure mode
+  // is handled by the caller after this helper returns ok.
+  readonly renderAliasFailure: (failure: Extract<ChatServeFailure, { kind: 'alias-no-target-available' }>) => F;
+}
+
+export type ResolveCandidatesOk = {
+  readonly kind: 'ok';
+  readonly candidates: readonly ProviderCandidate[];
+  readonly sawModel: boolean;
+  readonly failedUpstreams: readonly string[];
+  readonly aliasResolution: AliasResolution | null;
+};
+
+export type ResolveCandidatesOutcome<F> =
+  | ResolveCandidatesOk
+  | { readonly kind: 'failure'; readonly result: F };
+
+export const resolveCandidatesAndApplyAlias = async <F>(args: ResolveCandidatesArgs<F>): Promise<ResolveCandidatesOutcome<F>> => {
+  const { ctx, modelName, pickTarget, applyAlias, renderAliasFailure } = args;
+  let enumerated;
+  try {
+    enumerated = await resolveModelCandidates({
+      upstreamIds: ctx.upstreamIds,
+      modelName,
+      pickTarget,
+      scheduler: ctx.backgroundScheduler,
+      currentColo: ctx.currentColo,
+    });
+  } catch (error) {
+    if (error instanceof AliasNoTargetAvailableError) {
+      return { kind: 'failure', result: renderAliasFailure(aliasFailureFromError(error)) };
+    }
+    throw error;
+  }
+  const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
+  if (aliasResolution !== null) {
+    applyAlias(aliasResolution);
+    ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
+  }
+  return { kind: 'ok', candidates, sawModel, failedUpstreams, aliasResolution };
+};

From 5d7e8e9de48457b19a7b16ce1c94b7624f74a6b6 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 03:03:33 +0800
Subject: [PATCH 145/170] refactor(web): dedupe PublicModel/ChatModelInfo from
 @floway-dev/protocols/common
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`apps/web/src/api/types.ts` hand-rolled `PublicModel`, `ChatModelInfo`,
`ModelLimits`, and `ModelEndpointInfo` alongside the canonical
definitions it already imported from `@floway-dev/protocols/common`.
The local `PublicModel` made required fields optional and embedded a
different `endpoints` shape (`Record<string, { url; doc? }>` vs the
canonical `ModelEndpoints` presence map) — silent drift waiting to
happen. Drop the local copies and re-export the canonical types.

`announced-metadata.ts` switches to `PublicModelLimits`. Four test
fixtures move to a shared `api/test-fixtures.ts` that supplies the
required-field defaults `ControlPlaneModel` now demands.
---
 apps/web/src/api/test-fixtures.ts             | 36 +++++++++++
 apps/web/src/api/types.ts                     | 61 +++----------------
 .../alias-edit/AliasEditDialog_test.ts        | 14 ++---
 .../alias-edit/AliasTargetRow_test.ts         |  9 +--
 .../alias-edit/announced-metadata.ts          |  6 +-
 .../components/alias-edit/warnings_test.ts    | 21 +------
 .../src/components/settings/AliasRow_test.ts  | 14 ++---
 .../settings/AliasesSettingsCard_test.ts      |  3 +-
 8 files changed, 62 insertions(+), 102 deletions(-)
 create mode 100644 apps/web/src/api/test-fixtures.ts

diff --git a/apps/web/src/api/test-fixtures.ts b/apps/web/src/api/test-fixtures.ts
new file mode 100644
index 000000000..bfb3ef46a
--- /dev/null
+++ b/apps/web/src/api/test-fixtures.ts
@@ -0,0 +1,36 @@
+// Test-only fixtures that satisfy the `@floway-dev/protocols/common` PublicModel
+// shape. Production rows always carry every required field — see the gateway's
+// `toPublicModel` and `toControlPlaneModel` — but tests want to fan out partials
+// without retyping `object` / `type` / `display_name` / `limits` / `endpoints`
+// every time. The factories merge `over` last so any field the test sets wins.
+
+import type { ControlPlaneModel } from './types.ts';
+
+const baseFields = (): Omit<ControlPlaneModel, 'id' | 'upstreams'> => ({
+  object: 'model',
+  type: 'model',
+  display_name: '',
+  limits: {},
+  kind: 'chat',
+  endpoints: { chatCompletions: {} },
+});
+
+export const buildRealModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
+  ...baseFields(),
+  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
+  ...over,
+});
+
+export const buildAliasModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
+  ...baseFields(),
+  upstreams: [],
+  aliasedFrom: { name: over.id, kind: 'chat', selection: 'first-available', targets: [] },
+  ...over,
+});
+
+export const buildUnlistedModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
+  ...baseFields(),
+  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
+  unlisted: true,
+  ...over,
+});
diff --git a/apps/web/src/api/types.ts b/apps/web/src/api/types.ts
index 0bc06322e..80371f289 100644
--- a/apps/web/src/api/types.ts
+++ b/apps/web/src/api/types.ts
@@ -14,12 +14,17 @@ import type {
   ModelEndpoints,
   ModelKind,
   ModelPricing,
+  PublicModel,
+  PublicModelLimits,
 } from '@floway-dev/protocols/common';
 import type { AddressableForm, ModelPrefixConfig } from '@floway-dev/provider/model-prefix';
 
 export type { BillingDimension, ModelEndpointKey, ModelEndpoints, ModelKind, ModelPricing };
 export type { AddressableForm, ModelPrefixConfig };
-export type { AliasKind, AliasRules, AliasSelection, AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, ModelAlias };
+export type {
+  AliasKind, AliasRules, AliasSelection, AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, ModelAlias,
+  PublicModel, PublicModelLimits,
+};
 
 export type UpstreamProviderKind = 'custom' | 'azure' | 'copilot' | 'codex' | 'claude-code' | 'ollama';
 
@@ -47,7 +52,7 @@ export interface UpstreamModelConfig {
   kind: ModelKind;
   endpoints: ModelEndpoints;
   display_name?: string;
-  limits?: ModelLimits;
+  limits?: PublicModelLimits;
   cost?: ModelPricing;
   flagOverrides?: { enabled: boolean; values: Record<string, boolean> };
   chat?: UpstreamChatConfig;
@@ -66,7 +71,7 @@ export interface CustomRawModel {
   name?: string;
   created?: number;
   owned_by?: string;
-  limits?: ModelLimits;
+  limits?: PublicModelLimits;
   cost?: ModelPricing;
   kind?: ModelKind;
 }
@@ -331,56 +336,6 @@ export interface ApiKey {
   dump_retention_seconds: number | null;
 }
 
-export interface ModelEndpointInfo {
-  url: string;
-  doc?: string;
-}
-
-export interface ModelLimits {
-  max_context_window_tokens?: number;
-  max_prompt_tokens?: number;
-  max_output_tokens?: number;
-}
-
-export interface PublicModel {
-  id: string;
-  display_name?: string;
-  limits?: ModelLimits;
-  endpoints?: Record<string, ModelEndpointInfo>;
-  cost?: ModelPricing;
-  kind?: ModelKind;
-  // Chat-only capability metadata sourced from the upstream model config.
-  // Mirrored from `@floway-dev/protocols/common`'s ChatModelInfo so the
-  // dashboard can render rule warnings against the live catalog without
-  // pulling the full protocol shape.
-  chat?: {
-    modalities?: { input: readonly ('text' | 'image')[]; output: readonly ('text' | 'image')[] };
-    reasoning?: {
-      effort?: { supported: readonly string[]; default: string };
-      budget_tokens?: { min?: number; max?: number };
-      adaptive?: boolean;
-      mandatory?: boolean;
-    };
-  };
-  // Alias provenance — present only on `/api/models` entries the gateway
-  // synthesized from an operator-defined alias. The dashboard uses this
-  // both to render alias-of badges on the Models page and to identify
-  // alias rows when computing the target-id suggestion list.
-  aliasedFrom?: {
-    name: string;
-    kind: AliasKind;
-    selection: AliasSelection;
-    targets: AliasTarget[];
-  };
-  // Sidecar flag carried only on entries surfaced via
-  // `/api/models?include_unlisted=true`: ids the data plane accepts via a
-  // `modelPrefix.addressable` alternate or a provider-side redirect but
-  // that do not appear in the default catalog. Default rows omit the
-  // field; the alias dialog reads this surface so its target-id combobox
-  // suggests every id the resolver would accept.
-  unlisted?: true;
-}
-
 export interface ControlPlaneModel extends PublicModel {
   upstreams: { kind: UpstreamProviderKind; id: string; name: string }[];
 }
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index 0271bd591..79c8ae07c 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -2,6 +2,7 @@ import { mount } from '@vue/test-utils';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { nextTick, ref } from 'vue';
 
+import { buildRealModel } from '../../api/test-fixtures.ts';
 import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
 
 // Mock the API client + composables so the dialog mounts without hitting the
@@ -39,11 +40,8 @@ vi.mock('../../api/client.ts', () => ({
 // Import after mocks are registered.
 const { default: AliasEditDialog } = await import('./AliasEditDialog.vue');
 
-const realModel = (id: string, display?: string): ControlPlaneModel => ({
-  id,
-  display_name: display,
-  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
-});
+const realModel = (id: string, display?: string): ControlPlaneModel =>
+  buildRealModel(display !== undefined ? { id, display_name: display } : { id });
 
 const baseAlias = (over: Partial<ModelAlias> & { name: string }): ModelAlias => ({
   kind: 'chat',
@@ -215,13 +213,11 @@ describe('AliasEditDialog', () => {
 
   it('announced metadata: toggling override on switches the editor into manual (enabled) mode and seeds it from the computed view', async () => {
     modelsRef.value = [
-      {
+      buildRealModel({
         id: 'gpt-5',
         display_name: 'GPT 5',
-        kind: 'chat',
-        upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
         chat: { reasoning: { effort: { supported: ['low', 'medium'], default: 'medium' } } },
-      },
+      }),
     ];
     const w = mount(AliasEditDialog, {
       props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }) },
diff --git a/apps/web/src/components/alias-edit/AliasTargetRow_test.ts b/apps/web/src/components/alias-edit/AliasTargetRow_test.ts
index fd774c35f..744667e64 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow_test.ts
+++ b/apps/web/src/components/alias-edit/AliasTargetRow_test.ts
@@ -3,6 +3,7 @@ import { describe, expect, it } from 'vitest';
 import { nextTick } from 'vue';
 
 import AliasTargetRow from './AliasTargetRow.vue';
+import { buildRealModel } from '../../api/test-fixtures.ts';
 import type { AliasTarget, ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
 
 const target = (over: Partial<AliasTarget> = {}): AliasTarget => ({
@@ -11,12 +12,8 @@ const target = (over: Partial<AliasTarget> = {}): AliasTarget => ({
   ...over,
 });
 
-const realModel = (id: string, chat?: ControlPlaneModel['chat']): ControlPlaneModel => ({
-  id,
-  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
-  kind: 'chat',
-  ...(chat ? { chat } : {}),
-});
+const realModel = (id: string, chat?: ControlPlaneModel['chat']): ControlPlaneModel =>
+  buildRealModel(chat ? { id, chat } : { id });
 
 const mountRow = (props: Partial<InstanceType<typeof AliasTargetRow>['$props']>) => mount(AliasTargetRow, {
   props: {
diff --git a/apps/web/src/components/alias-edit/announced-metadata.ts b/apps/web/src/components/alias-edit/announced-metadata.ts
index 2620b0613..b024c92ec 100644
--- a/apps/web/src/components/alias-edit/announced-metadata.ts
+++ b/apps/web/src/components/alias-edit/announced-metadata.ts
@@ -11,7 +11,7 @@
 // hand. The backend stays authoritative — what `/v1/models` reports
 // is what the gateway computes there, not what this helper emits.
 
-import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, ControlPlaneModel, ModelLimits } from '../../api/types.ts';
+import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, ControlPlaneModel, PublicModelLimits } from '../../api/types.ts';
 
 const chatRules = (target: AliasTarget): ChatAliasRules => target.rules as ChatAliasRules;
 
@@ -101,9 +101,9 @@ const intersectChat = (chats: readonly ChatModelInfo[]): ChatModelInfo | undefin
 
 const LIMIT_KEYS = ['max_context_window_tokens', 'max_prompt_tokens', 'max_output_tokens'] as const;
 
-const intersectLimits = (limitsList: readonly ModelLimits[]): ModelLimits => {
+const intersectLimits = (limitsList: readonly PublicModelLimits[]): PublicModelLimits => {
   if (limitsList.length === 0) return {};
-  const result: ModelLimits = {};
+  const result: PublicModelLimits = {};
   for (const key of LIMIT_KEYS) {
     const values = limitsList.map(l => l[key]).filter((v): v is number => v !== undefined);
     if (values.length === limitsList.length) result[key] = Math.min(...values);
diff --git a/apps/web/src/components/alias-edit/warnings_test.ts b/apps/web/src/components/alias-edit/warnings_test.ts
index 261f5bb99..cc5501669 100644
--- a/apps/web/src/components/alias-edit/warnings_test.ts
+++ b/apps/web/src/components/alias-edit/warnings_test.ts
@@ -1,28 +1,9 @@
 import { describe, expect, it } from 'vitest';
 
 import { computeAliasLevelWarnings, computeModelWarnings, computeRuleWarnings, findCatalogModel, type AliasView } from './warnings.ts';
+import { buildAliasModel as aliasModel, buildRealModel as realModel, buildUnlistedModel as unlistedModel } from '../../api/test-fixtures.ts';
 import type { ControlPlaneModel } from '../../api/types.ts';
 
-const realModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
-  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
-  kind: 'chat',
-  ...over,
-});
-
-const aliasModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
-  upstreams: [],
-  kind: 'chat',
-  aliasedFrom: { name: over.id, kind: 'chat', selection: 'first-available', targets: [] },
-  ...over,
-});
-
-const unlistedModel = (over: Partial<ControlPlaneModel> & { id: string }): ControlPlaneModel => ({
-  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
-  kind: 'chat',
-  unlisted: true,
-  ...over,
-});
-
 const view = (name: string, ids: readonly string[]): AliasView => ({
   name,
   targets: ids.map(id => ({ target_model_id: id })),
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
index c527dc5c2..fa01cb5cc 100644
--- a/apps/web/src/components/settings/AliasRow_test.ts
+++ b/apps/web/src/components/settings/AliasRow_test.ts
@@ -2,6 +2,7 @@ import { mount } from '@vue/test-utils';
 import { describe, expect, it } from 'vitest';
 
 import AliasRow from './AliasRow.vue';
+import { buildAliasModel, buildRealModel } from '../../api/test-fixtures.ts';
 import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
 
 const alias = (over: Partial<ModelAlias> & { name: string }): ModelAlias => ({
@@ -17,17 +18,10 @@ const alias = (over: Partial<ModelAlias> & { name: string }): ModelAlias => ({
   ...over,
 });
 
-const realModel = (id: string, display?: string): ControlPlaneModel => ({
-  id,
-  display_name: display,
-  upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }],
-});
+const realModel = (id: string, display?: string): ControlPlaneModel =>
+  buildRealModel(display !== undefined ? { id, display_name: display } : { id });
 
-const aliasModel = (id: string): ControlPlaneModel => ({
-  id,
-  upstreams: [],
-  aliasedFrom: { name: id, kind: 'chat', selection: 'first-available', targets: [] },
-});
+const aliasModel = (id: string): ControlPlaneModel => buildAliasModel({ id });
 
 describe('AliasRow', () => {
   it('renders display_name when set; otherwise falls back to the alias id', () => {
diff --git a/apps/web/src/components/settings/AliasesSettingsCard_test.ts b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
index ea0ba99d8..5fbf81a75 100644
--- a/apps/web/src/components/settings/AliasesSettingsCard_test.ts
+++ b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
@@ -2,6 +2,7 @@ import { mount } from '@vue/test-utils';
 import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { nextTick, ref } from 'vue';
 
+import { buildRealModel } from '../../api/test-fixtures.ts';
 import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
 
 const aliasesRef = ref<ModelAlias[]>([]);
@@ -52,7 +53,7 @@ beforeEach(() => {
   // no-target alias-level warning stays quiet by default — every test
   // that wants the warning sets `modelsRef.value = []` itself.
   modelsRef.value = [
-    { id: 'gpt-5', kind: 'chat', upstreams: [{ id: 'u1', name: 'U1', kind: 'custom' }] },
+    buildRealModel({ id: 'gpt-5' }),
   ];
   aliasErrorRef.value = null;
   deleteSpy.mockClear();

From 28da1a2d343b820e9c478658b75a670ef4546f91 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 03:04:46 +0800
Subject: [PATCH 146/170] refactor(aliases): ModelAliasWireInput =
 Omit<ModelAlias, server-managed>

The wire-input shape was hand-rolled field-by-field, so the next column
added to `ModelAlias` required two edits. Derive it directly from
`ModelAlias` and strip the server-managed columns (`sort_order` is
defaulted by `nextSortOrder`; `created_at` / `updated_at` are stamped
by the repo).
---
 .../control-plane/model-aliases/serialize.ts  | 20 +++++++++----------
 1 file changed, 9 insertions(+), 11 deletions(-)

diff --git a/packages/gateway/src/control-plane/model-aliases/serialize.ts b/packages/gateway/src/control-plane/model-aliases/serialize.ts
index e2a10849d..cd83de649 100644
--- a/packages/gateway/src/control-plane/model-aliases/serialize.ts
+++ b/packages/gateway/src/control-plane/model-aliases/serialize.ts
@@ -2,7 +2,7 @@
 // shape (`ModelAlias`) lives in `@floway-dev/protocols/common`.
 
 import type { ModelAliasRecord } from '../../repo/types.ts';
-import type { AliasKind, AliasSelection, AliasTarget, AnnouncedMetadata, ModelAlias } from '@floway-dev/protocols/common';
+import type { ModelAlias } from '@floway-dev/protocols/common';
 
 export const recordToWire = (record: ModelAliasRecord): ModelAlias => ({
   name: record.name,
@@ -17,16 +17,14 @@ export const recordToWire = (record: ModelAliasRecord): ModelAlias => ({
   updated_at: record.updatedAt,
 });
 
-export interface ModelAliasWireInput {
-  name: string;
-  kind: AliasKind;
-  selection: AliasSelection;
-  display_name: string | null;
-  visible_in_models_list: boolean;
-  targets: AliasTarget[];
-  announced_metadata: AnnouncedMetadata | null;
-  sort_order?: number;
-}
+// Server-managed fields (`created_at` / `updated_at` are stamped by the repo;
+// `sort_order` defaults to `nextSortOrder` when omitted) are stripped here so
+// the create/update bodies cannot dictate them. The remaining required-field
+// list rides entirely on `ModelAlias` — a new column in the wire DTO only
+// requires editing one place.
+export type ModelAliasWireInput =
+  & Omit<ModelAlias, 'sort_order' | 'created_at' | 'updated_at'>
+  & { sort_order?: number };
 
 export const wireToRecord = (
   wire: ModelAliasWireInput,

From c03079adac608d14bd3dfa426cc1c526166287ac Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 03:10:53 +0800
Subject: [PATCH 147/170] refactor(aliases): collapse AliasRules to
 ChatAliasRules, drop the casts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`type AliasRules = ChatAliasRules | Record<string, never>` was a union
without a runtime discriminator — every consumer narrowed via an
unsafe `as ChatAliasRules` cast (a dozen sites across the gateway and
the SPA). The empty-object arm is already satisfied by
`ChatAliasRules` because every field is optional, so the union added
no real safety. Collapse to `type AliasRules = ChatAliasRules` and
read fields directly.

Also clears the Messages service-tier sibling field (`speed` vs
`service_tier`) on every overlay branch so the upstream never sees
both with conflicting values.
---
 .../components/alias-edit/AliasEditDialog.vue |  4 +-
 .../alias-edit/AliasEditDialog_test.ts        | 14 ++---
 .../components/alias-edit/AliasTargetRow.vue  | 33 +++++-----
 .../alias-edit/AliasTargetRow_test.ts         |  6 +-
 .../src/components/settings/AliasRow_test.ts  | 22 +++----
 .../settings/AliasesSettingsCard_test.ts      |  4 +-
 .../src/data-plane/model-aliases/apply.ts     | 62 +++++++++----------
 .../src/data-plane/models/alias-listing.ts    | 10 +--
 packages/protocols/src/common/aliases.ts      | 25 ++++----
 9 files changed, 83 insertions(+), 97 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasEditDialog.vue b/apps/web/src/components/alias-edit/AliasEditDialog.vue
index a3c29313c..3a586514a 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog.vue
+++ b/apps/web/src/components/alias-edit/AliasEditDialog.vue
@@ -5,7 +5,7 @@ import AliasTargetRow from './AliasTargetRow.vue';
 import { computeAnnouncedMetadata } from './announced-metadata.ts';
 import { computeAliasLevelWarnings, realModelIdsOfKind } from './warnings.ts';
 import { callApi, useApi } from '../../api/client.ts';
-import type { AliasKind, AliasSelection, AliasTarget, AnnouncedMetadata, ChatAliasRules, ModelAlias } from '../../api/types.ts';
+import type { AliasKind, AliasSelection, AliasTarget, AnnouncedMetadata, ModelAlias } from '../../api/types.ts';
 import { useModelAliases } from '../../composables/useModelAliases.ts';
 import { useRawModelsStore } from '../../composables/useModels.ts';
 import ChatMetadataEditor from '../shared/ChatMetadataEditor.vue';
@@ -30,7 +30,7 @@ const mode = computed<'create' | 'edit'>(() => (props.record ? 'edit' : 'create'
 
 // Switching kind discards rule state — a chat-only rule must not survive a
 // switch into embedding/image.
-const emptyRulesFor = (k: AliasKind): AliasTarget['rules'] => (k === 'chat' ? {} as ChatAliasRules : {} as Record<string, never>);
+const emptyRulesFor = (k: AliasKind): AliasTarget['rules'] => (k === 'chat' ? {} : {} as Record<string, never>);
 
 const blankTarget = (k: AliasKind): AliasTarget => ({ target_model_id: '', rules: emptyRulesFor(k) });
 
diff --git a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
index 79c8ae07c..7757d0475 100644
--- a/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
+++ b/apps/web/src/components/alias-edit/AliasEditDialog_test.ts
@@ -3,7 +3,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { nextTick, ref } from 'vue';
 
 import { buildRealModel } from '../../api/test-fixtures.ts';
-import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+import type { ControlPlaneModel, ModelAlias } from '../../api/types.ts';
 
 // Mock the API client + composables so the dialog mounts without hitting the
 // network. The composables expose `ref`-based state — return the same shape
@@ -48,7 +48,7 @@ const baseAlias = (over: Partial<ModelAlias> & { name: string }): ModelAlias =>
   selection: 'first-available',
   display_name: null,
   visible_in_models_list: true,
-  targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
+  targets: [{ target_model_id: 'gpt-5', rules: {} }],
   announced_metadata: null,
   sort_order: 0,
   created_at: '2026-01-01T00:00:00Z',
@@ -98,7 +98,7 @@ describe('AliasEditDialog', () => {
 
   it('expands the chat rule body for chat aliases; the row toggle is disabled for non-chat aliases', async () => {
     const chat = mount(AliasEditDialog, {
-      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: { reasoning: { effort: 'low' } } as ChatAliasRules }] }) },
+      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: { reasoning: { effort: 'low' } } }] }) },
       attachTo: document.body,
     });
     await nextTick();
@@ -127,7 +127,7 @@ describe('AliasEditDialog', () => {
     const w = mount(AliasEditDialog, {
       props: {
         open: true,
-        record: baseAlias({ name: '', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }),
+        record: baseAlias({ name: '', targets: [{ target_model_id: 'gpt-5', rules: {} }] }),
       },
       attachTo: document.body,
     });
@@ -195,7 +195,7 @@ describe('AliasEditDialog', () => {
 
   it('announced metadata: override off → editor renders in auto (read-only) mode', async () => {
     const w = mount(AliasEditDialog, {
-      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }) },
+      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} }] }) },
       attachTo: document.body,
     });
     await nextTick();
@@ -220,7 +220,7 @@ describe('AliasEditDialog', () => {
       }),
     ];
     const w = mount(AliasEditDialog, {
-      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }) },
+      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} }] }) },
       attachTo: document.body,
     });
     await nextTick();
@@ -241,7 +241,7 @@ describe('AliasEditDialog', () => {
 
   it('announced metadata: toggling override off restores auto (read-only) mode', async () => {
     const w = mount(AliasEditDialog, {
-      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }) },
+      props: { open: true, record: baseAlias({ name: 'a', targets: [{ target_model_id: 'gpt-5', rules: {} }] }) },
       attachTo: document.body,
     });
     await nextTick();
diff --git a/apps/web/src/components/alias-edit/AliasTargetRow.vue b/apps/web/src/components/alias-edit/AliasTargetRow.vue
index 9cb008bdc..3cb9f39d3 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow.vue
+++ b/apps/web/src/components/alias-edit/AliasTargetRow.vue
@@ -36,24 +36,21 @@ const targetId = computed({
   set: v => { target.value = { ...target.value, target_model_id: v }; },
 });
 
-// Read-only view of the rules as `ChatAliasRules`. The template gates this
-// to the chat branch; setters always clone the rules so the v-model emit
-// fires and the parent's targets array stays referentially up to date.
-const chatRules = computed<ChatAliasRules>(() => target.value.rules as ChatAliasRules);
-
+// Setters always clone the rules object so the v-model emit fires and the
+// parent's targets array stays referentially up to date.
 const setRules = (next: ChatAliasRules) => { target.value = { ...target.value, rules: next }; };
 
 const patchReasoning = (patch: Partial<NonNullable<ChatAliasRules['reasoning']>>) => {
-  const current = chatRules.value.reasoning ?? {};
+  const current = target.value.rules.reasoning ?? {};
   const next = { ...current, ...patch };
   for (const k of Object.keys(patch) as (keyof typeof patch)[]) {
     if (patch[k] === undefined) delete (next as Record<string, unknown>)[k];
   }
   if (Object.keys(next).length === 0) {
-    const { reasoning: _, ...rest } = chatRules.value;
+    const { reasoning: _, ...rest } = target.value.rules;
     setRules(rest);
   } else {
-    setRules({ ...chatRules.value, reasoning: next });
+    setRules({ ...target.value.rules, reasoning: next });
   }
 };
 
@@ -61,13 +58,13 @@ const setEffort = (raw: string) => patchReasoning({ effort: raw === '' ? undefin
 const setSummary = (raw: string) => patchReasoning({ summary: raw === '' ? undefined : raw });
 const setAdaptive = (on: boolean | undefined) => patchReasoning({ adaptive: on === true ? true : undefined });
 const setVerbosity = (raw: string) => {
-  const next = { ...chatRules.value };
+  const next = { ...target.value.rules };
   if (raw === '') delete next.verbosity;
   else next.verbosity = raw;
   setRules(next);
 };
 const setServiceTier = (raw: string) => {
-  const next = { ...chatRules.value };
+  const next = { ...target.value.rules };
   if (raw === '') delete next.serviceTier;
   else next.serviceTier = raw;
   setRules(next);
@@ -79,8 +76,8 @@ const setServiceTier = (raw: string) => {
 // when the parsed number is a finite integer. The watch syncs the input
 // back to the parent's value when the parent resets the rule object (e.g.
 // the dialog switches `kind` and re-initialises every target row's rules).
-const budgetText = ref(chatRules.value.reasoning?.budget_tokens === undefined ? '' : String(chatRules.value.reasoning.budget_tokens));
-watch(() => chatRules.value.reasoning?.budget_tokens, parsed => {
+const budgetText = ref(target.value.rules.reasoning?.budget_tokens === undefined ? '' : String(target.value.rules.reasoning.budget_tokens));
+watch(() => target.value.rules.reasoning?.budget_tokens, parsed => {
   const next = parsed === undefined ? '' : String(parsed);
   if (next !== budgetText.value.trim()) budgetText.value = next;
 });
@@ -103,7 +100,7 @@ const SERVICE_TIER_ITEMS = ['default', 'flex', 'priority', 'scale', 'fast'];
 
 const catalog = computed(() => findCatalogModel(props.models, target.value.target_model_id));
 const modelWarnings = computed(() => computeModelWarnings(target.value.target_model_id, catalog.value, props.kind));
-const ruleWarnings = computed(() => computeRuleWarnings(chatRules.value, catalog.value));
+const ruleWarnings = computed(() => computeRuleWarnings(target.value.rules, catalog.value));
 const warningFor = (field: string) => ruleWarnings.value.find(w => w.field === field)?.message;
 const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
 </script>
@@ -183,7 +180,7 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
         <div>
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Reasoning effort</label>
           <Combobox
-            :model-value="chatRules.reasoning?.effort ?? ''"
+            :model-value="target.rules.reasoning?.effort ?? ''"
             :items="EFFORT_ITEMS"
             placeholder="e.g. low"
             @update:model-value="setEffort"
@@ -208,7 +205,7 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Adaptive reasoning</label>
           <div class="flex h-9 items-center gap-2">
             <Switch
-              :model-value="chatRules.reasoning?.adaptive === true"
+              :model-value="target.rules.reasoning?.adaptive === true"
               @update:model-value="setAdaptive"
             />
             <span class="text-sm text-gray-300">Enable</span>
@@ -219,7 +216,7 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
         <div>
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Reasoning summary</label>
           <Combobox
-            :model-value="chatRules.reasoning?.summary ?? ''"
+            :model-value="target.rules.reasoning?.summary ?? ''"
             :items="SUMMARY_ITEMS"
             placeholder="e.g. auto"
             @update:model-value="setSummary"
@@ -230,7 +227,7 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
         <div>
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Verbosity</label>
           <Combobox
-            :model-value="chatRules.verbosity ?? ''"
+            :model-value="target.rules.verbosity ?? ''"
             :items="VERBOSITY_ITEMS"
             placeholder="e.g. medium"
             @update:model-value="setVerbosity"
@@ -241,7 +238,7 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
         <div>
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Service tier</label>
           <Combobox
-            :model-value="chatRules.serviceTier ?? ''"
+            :model-value="target.rules.serviceTier ?? ''"
             :items="SERVICE_TIER_ITEMS"
             placeholder="e.g. default"
             @update:model-value="setServiceTier"
diff --git a/apps/web/src/components/alias-edit/AliasTargetRow_test.ts b/apps/web/src/components/alias-edit/AliasTargetRow_test.ts
index 744667e64..c310c7127 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow_test.ts
+++ b/apps/web/src/components/alias-edit/AliasTargetRow_test.ts
@@ -4,11 +4,11 @@ import { nextTick } from 'vue';
 
 import AliasTargetRow from './AliasTargetRow.vue';
 import { buildRealModel } from '../../api/test-fixtures.ts';
-import type { AliasTarget, ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
+import type { AliasTarget, ControlPlaneModel } from '../../api/types.ts';
 
 const target = (over: Partial<AliasTarget> = {}): AliasTarget => ({
   target_model_id: 'gpt-5',
-  rules: {} as ChatAliasRules,
+  rules: {},
   ...over,
 });
 
@@ -77,7 +77,7 @@ describe('AliasTargetRow', () => {
 
   it('renders a rule-level warning under reasoning.effort when the target does not advertise it', async () => {
     const w = mountRow({
-      modelValue: { target_model_id: 'gpt-5', rules: { reasoning: { effort: 'xhigh' } } as ChatAliasRules },
+      modelValue: { target_model_id: 'gpt-5', rules: { reasoning: { effort: 'xhigh' } } },
       models: [realModel('gpt-5', { reasoning: { effort: { supported: ['low', 'medium'], default: 'medium' } } })],
     });
     await w.find('button[aria-label="Toggle target row"]').trigger('click');
diff --git a/apps/web/src/components/settings/AliasRow_test.ts b/apps/web/src/components/settings/AliasRow_test.ts
index fa01cb5cc..0814ca80c 100644
--- a/apps/web/src/components/settings/AliasRow_test.ts
+++ b/apps/web/src/components/settings/AliasRow_test.ts
@@ -3,14 +3,14 @@ import { describe, expect, it } from 'vitest';
 
 import AliasRow from './AliasRow.vue';
 import { buildAliasModel, buildRealModel } from '../../api/test-fixtures.ts';
-import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+import type { ControlPlaneModel, ModelAlias } from '../../api/types.ts';
 
 const alias = (over: Partial<ModelAlias> & { name: string }): ModelAlias => ({
   kind: 'chat',
   selection: 'first-available',
   display_name: null,
   visible_in_models_list: true,
-  targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
+  targets: [{ target_model_id: 'gpt-5', rules: {} }],
   announced_metadata: null,
   sort_order: 0,
   created_at: '2026-01-01T00:00:00Z',
@@ -32,7 +32,7 @@ describe('AliasRow', () => {
     // as the mono pill next to it — the chat-playground idiom).
     const single = mount(AliasRow, {
       props: {
-        alias: alias({ name: 'a', display_name: null, targets: [{ target_model_id: 'gpt-5', rules: { reasoning: { effort: 'low' } } as ChatAliasRules }] }),
+        alias: alias({ name: 'a', display_name: null, targets: [{ target_model_id: 'gpt-5', rules: { reasoning: { effort: 'low' } } }] }),
         models: [],
       },
     });
@@ -44,8 +44,8 @@ describe('AliasRow', () => {
           name: 'gizmo',
           display_name: null,
           targets: [
-            { target_model_id: 'gpt-5', rules: {} as ChatAliasRules },
-            { target_model_id: 'claude', rules: {} as ChatAliasRules },
+            { target_model_id: 'gpt-5', rules: {} },
+            { target_model_id: 'claude', rules: {} },
           ],
         }),
         models: [],
@@ -62,8 +62,8 @@ describe('AliasRow', () => {
           selection: 'random',
           visible_in_models_list: false,
           targets: [
-            { target_model_id: 'a', rules: {} as ChatAliasRules },
-            { target_model_id: 'b', rules: {} as ChatAliasRules },
+            { target_model_id: 'a', rules: {} },
+            { target_model_id: 'b', rules: {} },
           ],
         }),
         models: [],
@@ -92,16 +92,16 @@ describe('AliasRow', () => {
   it('renders the alias-level warning icon only when the shadow warning fires', () => {
     const catalog = [realModel('gpt-5'), realModel('plain')];
 
-    const noShadow = mount(AliasRow, { props: { alias: alias({ name: 'unique', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }] }), models: catalog } });
+    const noShadow = mount(AliasRow, { props: { alias: alias({ name: 'unique', targets: [{ target_model_id: 'gpt-5', rules: {} }] }), models: catalog } });
     expect(noShadow.find('span[aria-label="Alias warning"]').exists()).toBe(false);
 
-    const shadow = mount(AliasRow, { props: { alias: alias({ name: 'gpt-5', targets: [{ target_model_id: 'plain', rules: {} as ChatAliasRules }] }), models: catalog } });
+    const shadow = mount(AliasRow, { props: { alias: alias({ name: 'gpt-5', targets: [{ target_model_id: 'plain', rules: {} }] }), models: catalog } });
     expect(shadow.find('span[aria-label="Alias warning"]').exists()).toBe(true);
 
     // Seed pattern (target references shadowed id) suppresses the warning.
     const seeded = mount(AliasRow, {
       props: {
-        alias: alias({ name: 'gpt-5', targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }, { target_model_id: 'plain', rules: {} as ChatAliasRules }] }),
+        alias: alias({ name: 'gpt-5', targets: [{ target_model_id: 'gpt-5', rules: {} }, { target_model_id: 'plain', rules: {} }] }),
         models: catalog,
       },
     });
@@ -110,7 +110,7 @@ describe('AliasRow', () => {
     // An alias-name collision against another alias doesn't shadow (only real-model collisions do).
     const aliasCollision = mount(AliasRow, {
       props: {
-        alias: alias({ name: 'auto-review', targets: [{ target_model_id: 'plain', rules: {} as ChatAliasRules }] }),
+        alias: alias({ name: 'auto-review', targets: [{ target_model_id: 'plain', rules: {} }] }),
         models: [aliasModel('auto-review'), realModel('plain')],
       },
     });
diff --git a/apps/web/src/components/settings/AliasesSettingsCard_test.ts b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
index 5fbf81a75..0a5cf7683 100644
--- a/apps/web/src/components/settings/AliasesSettingsCard_test.ts
+++ b/apps/web/src/components/settings/AliasesSettingsCard_test.ts
@@ -3,7 +3,7 @@ import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest';
 import { nextTick, ref } from 'vue';
 
 import { buildRealModel } from '../../api/test-fixtures.ts';
-import type { ChatAliasRules, ControlPlaneModel, ModelAlias } from '../../api/types.ts';
+import type { ControlPlaneModel, ModelAlias } from '../../api/types.ts';
 
 const aliasesRef = ref<ModelAlias[]>([]);
 const modelsRef = ref<ControlPlaneModel[]>([]);
@@ -39,7 +39,7 @@ const baseAlias = (over: Partial<ModelAlias> & { name: string }): ModelAlias =>
   selection: 'first-available',
   display_name: null,
   visible_in_models_list: true,
-  targets: [{ target_model_id: 'gpt-5', rules: {} as ChatAliasRules }],
+  targets: [{ target_model_id: 'gpt-5', rules: {} }],
   announced_metadata: null,
   sort_order: 0,
   created_at: '2026-01-01T00:00:00Z',
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index 6f89d822d..084af50cb 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -1,11 +1,8 @@
 // Per-protocol rule overlay. Alias rules overwrite IR fields they name;
-// fields the target IR cannot express are silently dropped. The functions
-// accept the resolver's wide `AliasRules` union and narrow internally —
-// non-chat aliases carry an empty rules object so the chat-only fields are
-// all undefined and the overlay is a no-op.
+// fields the target IR cannot express are silently dropped.
 
 import type { ChatCompletionsPayload } from '@floway-dev/protocols/chat-completions';
-import type { AliasRules, ChatAliasRules } from '@floway-dev/protocols/common';
+import type { AliasRules } from '@floway-dev/protocols/common';
 import type { GeminiPayload } from '@floway-dev/protocols/gemini';
 import type { MessagesPayload } from '@floway-dev/protocols/messages';
 import type { ResponsesPayload } from '@floway-dev/protocols/responses';
@@ -15,28 +12,24 @@ import type { ResponsesPayload } from '@floway-dev/protocols/responses';
 // "upstream saw Y" via this header.
 export const ALIAS_RESPONSE_HEADER = 'x-floway-alias';
 
-const asChat = (rules: AliasRules): ChatAliasRules => rules as ChatAliasRules;
-
-const hasReasoning = (rules: ChatAliasRules): rules is ChatAliasRules & { reasoning: NonNullable<ChatAliasRules['reasoning']> } =>
+const hasReasoning = (rules: AliasRules): rules is AliasRules & { reasoning: NonNullable<AliasRules['reasoning']> } =>
   rules.reasoning !== undefined;
 
 export const applyChatRulesToChatCompletions = (body: ChatCompletionsPayload, rules: AliasRules): void => {
-  const chat = asChat(rules);
-  if (hasReasoning(chat)) {
-    const { effort, budget_tokens, adaptive, summary } = chat.reasoning;
+  if (hasReasoning(rules)) {
+    const { effort, budget_tokens, adaptive, summary } = rules.reasoning;
     if (effort !== undefined) body.reasoning_effort = effort;
     if (budget_tokens !== undefined) body.thinking_budget = budget_tokens;
     if (adaptive !== undefined) body.adaptive_thinking = adaptive;
     if (summary !== undefined) body.reasoning_summary = summary;
   }
-  if (chat.verbosity !== undefined) body.verbosity = chat.verbosity;
-  if (chat.serviceTier !== undefined) body.service_tier = chat.serviceTier;
+  if (rules.verbosity !== undefined) body.verbosity = rules.verbosity;
+  if (rules.serviceTier !== undefined) body.service_tier = rules.serviceTier;
 };
 
 export const applyChatRulesToResponses = (body: ResponsesPayload, rules: AliasRules): void => {
-  const chat = asChat(rules);
-  if (hasReasoning(chat)) {
-    const { effort, budget_tokens, adaptive, summary } = chat.reasoning;
+  if (hasReasoning(rules)) {
+    const { effort, budget_tokens, adaptive, summary } = rules.reasoning;
     if (effort !== undefined || summary !== undefined) {
       const existing = body.reasoning ?? {};
       body.reasoning = {
@@ -48,16 +41,15 @@ export const applyChatRulesToResponses = (body: ResponsesPayload, rules: AliasRu
     if (budget_tokens !== undefined) body.thinking_budget = budget_tokens;
     if (adaptive !== undefined) body.adaptive_thinking = adaptive;
   }
-  if (chat.verbosity !== undefined) {
-    body.text = { ...body.text, verbosity: chat.verbosity };
+  if (rules.verbosity !== undefined) {
+    body.text = { ...body.text, verbosity: rules.verbosity };
   }
-  if (chat.serviceTier !== undefined) body.service_tier = chat.serviceTier;
+  if (rules.serviceTier !== undefined) body.service_tier = rules.serviceTier;
 };
 
 export const applyChatRulesToMessages = (body: MessagesPayload, rules: AliasRules): void => {
-  const chat = asChat(rules);
-  if (hasReasoning(chat)) {
-    const { effort, budget_tokens, adaptive } = chat.reasoning;
+  if (hasReasoning(rules)) {
+    const { effort, budget_tokens, adaptive } = rules.reasoning;
     // Anthropic stores explicit effort in `output_config.effort`; budget /
     // adaptive ride on `thinking.*`. Splitting them so both can be set in
     // the same overlay (effort fixed + budget pinned, e.g.) without one
@@ -71,18 +63,21 @@ export const applyChatRulesToMessages = (body: MessagesPayload, rules: AliasRule
       body.thinking = { ...body.thinking, type: 'enabled', budget_tokens };
     }
   }
-  if (chat.verbosity !== undefined) body.verbosity = chat.verbosity;
-  if (chat.serviceTier !== undefined) {
+  if (rules.verbosity !== undefined) body.verbosity = rules.verbosity;
+  if (rules.serviceTier !== undefined) {
     // The cross-protocol bridge in translate maps `speed: 'fast'` ↔
     // `service_tier: 'fast'`; on a native Messages target the alias rule
     // `serviceTier: 'fast'` lands on `speed` so the upstream sees Fast Mode
     // through its native field. Other tier values pass through on
     // `service_tier` since Messages's native enum (`auto`/`standard_only`)
-    // doesn't model them.
-    if (chat.serviceTier === 'fast') {
+    // doesn't model them. Whichever branch we take, clear the sibling field
+    // so the upstream never sees two tiers in conflict.
+    if (rules.serviceTier === 'fast') {
       body.speed = 'fast';
+      delete body.service_tier;
     } else {
-      body.service_tier = chat.serviceTier;
+      body.service_tier = rules.serviceTier;
+      delete body.speed;
     }
   }
 };
@@ -98,9 +93,8 @@ const GEMINI_THINKING_LEVEL_BY_EFFORT: Record<string, 'minimal' | 'low' | 'mediu
 };
 
 export const applyChatRulesToGemini = (body: GeminiPayload, rules: AliasRules): void => {
-  const chat = asChat(rules);
-  if (hasReasoning(chat)) {
-    const { effort, budget_tokens, adaptive } = chat.reasoning;
+  if (hasReasoning(rules)) {
+    const { effort, budget_tokens, adaptive } = rules.reasoning;
     // Gemini collapses the three reasoning controls onto one `thinkingConfig`
     // sub-object. Adaptive wins by encoding budget=-1 (Gemini's adaptive
     // sentinel); an explicit budget pins the count; effort sets the level.
@@ -118,10 +112,10 @@ export const applyChatRulesToGemini = (body: GeminiPayload, rules: AliasRules):
       body.generationConfig = { ...body.generationConfig, thinkingConfig };
     }
   }
-  if (chat.verbosity !== undefined) {
-    body.generationConfig = { ...body.generationConfig, verbosity: chat.verbosity };
+  if (rules.verbosity !== undefined) {
+    body.generationConfig = { ...body.generationConfig, verbosity: rules.verbosity };
   }
-  if (chat.serviceTier !== undefined) {
-    body.generationConfig = { ...body.generationConfig, serviceTier: chat.serviceTier };
+  if (rules.serviceTier !== undefined) {
+    body.generationConfig = { ...body.generationConfig, serviceTier: rules.serviceTier };
   }
 };
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index 6bd4f1891..4f88a488f 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -30,7 +30,7 @@ import type { ModelAliasRecord } from '../../repo/types.ts';
 import type { AddressableIdEntry } from '../providers/addressable.ts';
 import { unionEndpoints } from '../providers/endpoint-union.ts';
 import { composeAliasDisplayName } from '@floway-dev/protocols/common';
-import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
+import type { AliasTarget, AnnouncedMetadata, ChatModelInfo, PublicModel, PublicModelAliasedFrom, PublicModelLimits } from '@floway-dev/protocols/common';
 import type { ResolvedModel } from '@floway-dev/provider';
 
 export interface ListedAliasInputs {
@@ -43,11 +43,6 @@ export interface ListedAliasInputs {
   readonly addressableModelIds: readonly AddressableIdEntry[];
 }
 
-// The repo guarantees rule shape matches the row's `kind` (chat rows carry
-// `ChatAliasRules`; embedding / image rows carry the empty record), so a
-// chat-row target can be read as ChatAliasRules without a runtime check.
-const chatRules = (target: AliasTarget): ChatAliasRules => target.rules as ChatAliasRules;
-
 // Result preserves the order of `arrays[0]`. Matters for callers like the
 // reasoning-effort intersection below: when no agreed-default exists, the
 // fallback default is `supported[0]`, so the first input's relative order
@@ -64,8 +59,7 @@ const intersectArrays = <T>(arrays: readonly (readonly T[])[]): T[] => {
 // through unchanged.
 const effectiveChatForIntersection = (chat: ChatModelInfo | undefined, target: AliasTarget): ChatModelInfo | undefined => {
   if (chat === undefined) return undefined;
-  const rules = chatRules(target);
-  const ruleReasoning = rules.reasoning;
+  const ruleReasoning = target.rules.reasoning;
   if (ruleReasoning === undefined) return chat;
   if (chat.reasoning === undefined) return chat;
 
diff --git a/packages/protocols/src/common/aliases.ts b/packages/protocols/src/common/aliases.ts
index a1171e1d2..7a0f37936 100644
--- a/packages/protocols/src/common/aliases.ts
+++ b/packages/protocols/src/common/aliases.ts
@@ -58,10 +58,12 @@ export interface ChatAliasRules {
   serviceTier?: ServiceTier;
 }
 
-// Rule overlay union keyed by `AliasKind`. Embedding and image targets carry
-// an empty record today; the schema reserves the slot so per-kind rules can
-// grow later without a fresh migration.
-export type AliasRules = ChatAliasRules | Record<string, never>;
+// Rule overlay payload. Today only chat-kind aliases carry rules — embedding
+// and image targets pass `{}`, which already satisfies `ChatAliasRules`
+// because every field is optional. The type is a single alias rather than a
+// union so consumers can read `rules.reasoning?.effort` directly without an
+// unchecked cast.
+export type AliasRules = ChatAliasRules;
 
 // One target row inside an alias's `targets` list. Order is meaningful for
 // `first-available` selection and preserved (but ignored) for `random`.
@@ -133,15 +135,14 @@ export interface AliasRuleBadge {
 // operator who configures `effort + verbosity` sees them in the same order
 // whether the dashboard renders badges or a comma-joined caption.
 const aliasRuleParts = (rules: AliasRules): AliasRuleBadge[] => {
-  const chat = rules as ChatAliasRules;
   const parts: AliasRuleBadge[] = [];
-  if (chat.reasoning?.effort !== undefined) parts.push({ field: 'reasoning.effort', label: `${chat.reasoning.effort} effort` });
-  if (chat.reasoning?.budget_tokens !== undefined) parts.push({ field: 'reasoning.budget_tokens', label: `${chat.reasoning.budget_tokens}tok budget` });
-  if (chat.reasoning?.adaptive === true) parts.push({ field: 'reasoning.adaptive', label: 'adaptive' });
-  else if (chat.reasoning?.adaptive === false) parts.push({ field: 'reasoning.adaptive', label: 'non-adaptive' });
-  if (chat.reasoning?.summary !== undefined) parts.push({ field: 'reasoning.summary', label: `summary: ${chat.reasoning.summary}` });
-  if (chat.verbosity !== undefined) parts.push({ field: 'verbosity', label: `${chat.verbosity} verbosity` });
-  if (chat.serviceTier !== undefined) parts.push({ field: 'serviceTier', label: `${chat.serviceTier} tier` });
+  if (rules.reasoning?.effort !== undefined) parts.push({ field: 'reasoning.effort', label: `${rules.reasoning.effort} effort` });
+  if (rules.reasoning?.budget_tokens !== undefined) parts.push({ field: 'reasoning.budget_tokens', label: `${rules.reasoning.budget_tokens}tok budget` });
+  if (rules.reasoning?.adaptive === true) parts.push({ field: 'reasoning.adaptive', label: 'adaptive' });
+  else if (rules.reasoning?.adaptive === false) parts.push({ field: 'reasoning.adaptive', label: 'non-adaptive' });
+  if (rules.reasoning?.summary !== undefined) parts.push({ field: 'reasoning.summary', label: `summary: ${rules.reasoning.summary}` });
+  if (rules.verbosity !== undefined) parts.push({ field: 'verbosity', label: `${rules.verbosity} verbosity` });
+  if (rules.serviceTier !== undefined) parts.push({ field: 'serviceTier', label: `${rules.serviceTier} tier` });
   return parts;
 };
 

From 056d6ebf210507d22799fa90986b38842bc57d3d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 03:12:57 +0800
Subject: [PATCH 148/170] refactor(aliases): move ALIAS_RESPONSE_HEADER to its
 own header.ts
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The constant is transport-level — consumed by the chat alias-prelude
and the passthrough seam — not rule-overlay-level. Splitting it off
means transport call sites no longer drag in the per-protocol overlay
helpers from `apply.ts` to read one string.
---
 .../gateway/src/data-plane/chat/shared/alias-prelude.ts   | 2 +-
 packages/gateway/src/data-plane/model-aliases/apply.ts    | 5 -----
 packages/gateway/src/data-plane/model-aliases/header.ts   | 8 ++++++++
 .../gateway/src/data-plane/shared/passthrough-serve.ts    | 2 +-
 4 files changed, 10 insertions(+), 7 deletions(-)
 create mode 100644 packages/gateway/src/data-plane/model-aliases/header.ts

diff --git a/packages/gateway/src/data-plane/chat/shared/alias-prelude.ts b/packages/gateway/src/data-plane/chat/shared/alias-prelude.ts
index 7c83dc92f..ef4413bb6 100644
--- a/packages/gateway/src/data-plane/chat/shared/alias-prelude.ts
+++ b/packages/gateway/src/data-plane/chat/shared/alias-prelude.ts
@@ -1,6 +1,6 @@
 import { type ChatServeFailure, aliasFailureFromError } from './errors.ts';
 import type { GatewayCtx } from './gateway-ctx.ts';
-import { ALIAS_RESPONSE_HEADER } from '../../model-aliases/apply.ts';
+import { ALIAS_RESPONSE_HEADER } from '../../model-aliases/header.ts';
 import { AliasNoTargetAvailableError, type AliasResolution } from '../../model-aliases/resolve.ts';
 import { resolveModelCandidates } from '../../providers/registry.ts';
 import type { ModelEndpoints } from '@floway-dev/protocols/common';
diff --git a/packages/gateway/src/data-plane/model-aliases/apply.ts b/packages/gateway/src/data-plane/model-aliases/apply.ts
index 084af50cb..919cbaf1c 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply.ts
@@ -7,11 +7,6 @@ import type { GeminiPayload } from '@floway-dev/protocols/gemini';
 import type { MessagesPayload } from '@floway-dev/protocols/messages';
 import type { ResponsesPayload } from '@floway-dev/protocols/responses';
 
-// Per-request response header that names the alias the inbound id resolved
-// through. Downstream observability ties together "client asked for X" /
-// "upstream saw Y" via this header.
-export const ALIAS_RESPONSE_HEADER = 'x-floway-alias';
-
 const hasReasoning = (rules: AliasRules): rules is AliasRules & { reasoning: NonNullable<AliasRules['reasoning']> } =>
   rules.reasoning !== undefined;
 
diff --git a/packages/gateway/src/data-plane/model-aliases/header.ts b/packages/gateway/src/data-plane/model-aliases/header.ts
new file mode 100644
index 000000000..4161de03c
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/header.ts
@@ -0,0 +1,8 @@
+// Per-request response header that names the alias the inbound id resolved
+// through. Downstream observability ties together "client asked for X" /
+// "upstream saw Y" via this header.
+//
+// Standalone so transport-level consumers (the alias-prelude in chat serves,
+// the passthrough seam) can import it without dragging in the rule-overlay
+// helpers from `apply.ts`.
+export const ALIAS_RESPONSE_HEADER = 'x-floway-alias';
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 2b7c662d5..95fd69955 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -23,7 +23,7 @@ import type { AuthedContext } from '../../middleware/auth.ts';
 import type { TokenUsage } from '../../repo/types.ts';
 import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
 import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
-import { ALIAS_RESPONSE_HEADER } from '../model-aliases/apply.ts';
+import { ALIAS_RESPONSE_HEADER } from '../model-aliases/header.ts';
 import { AliasNoTargetAvailableError } from '../model-aliases/resolve.ts';
 import { resolveModelCandidates } from '../providers/registry.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';

From 4c1825f25c6fae9b5ac380f0e1b65e4c3fad17ab Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 03:15:54 +0800
Subject: [PATCH 149/170] fix(aliases/web): Adaptive control becomes a
 tri-state Select

The Switch could only emit true/undefined, so an existing record with
`adaptive: false` (gateway forwards it to upstream verbatim, the badge
formatter renders "non-adaptive") would silently round-trip to
`undefined` on first edit. Replace with a Select offering auto /
on / off so every state the schema admits stays expressible.
---
 .../components/alias-edit/AliasTargetRow.vue  | 35 ++++++++++++++-----
 1 file changed, 26 insertions(+), 9 deletions(-)

diff --git a/apps/web/src/components/alias-edit/AliasTargetRow.vue b/apps/web/src/components/alias-edit/AliasTargetRow.vue
index 3cb9f39d3..3095ed0c5 100644
--- a/apps/web/src/components/alias-edit/AliasTargetRow.vue
+++ b/apps/web/src/components/alias-edit/AliasTargetRow.vue
@@ -3,7 +3,7 @@ import { computed, ref, watch } from 'vue';
 
 import { computeModelWarnings, computeRuleWarnings, findCatalogModel } from './warnings.ts';
 import type { AliasKind, AliasTarget, ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
-import { Combobox, Switch, Tooltip } from '@floway-dev/ui';
+import { Combobox, Select, Tooltip } from '@floway-dev/ui';
 
 const target = defineModel<AliasTarget>({ required: true });
 
@@ -56,7 +56,26 @@ const patchReasoning = (patch: Partial<NonNullable<ChatAliasRules['reasoning']>>
 
 const setEffort = (raw: string) => patchReasoning({ effort: raw === '' ? undefined : raw });
 const setSummary = (raw: string) => patchReasoning({ summary: raw === '' ? undefined : raw });
-const setAdaptive = (on: boolean | undefined) => patchReasoning({ adaptive: on === true ? true : undefined });
+
+// Three-state adaptive control: `undefined` means "defer to the model";
+// `true` forces reasoning on; `false` forces it off. A Switch can't
+// represent the third state, so editing an existing record that had
+// adaptive=false would silently round-trip to undefined.
+type AdaptiveSelect = 'auto' | 'on' | 'off';
+const ADAPTIVE_OPTIONS: { value: AdaptiveSelect; label: string }[] = [
+  { value: 'auto', label: 'Auto (defer to model)' },
+  { value: 'on', label: 'On (force adaptive)' },
+  { value: 'off', label: 'Off (force non-adaptive)' },
+];
+const adaptiveSelect = computed<AdaptiveSelect>(() => {
+  const v = target.value.rules.reasoning?.adaptive;
+  if (v === true) return 'on';
+  if (v === false) return 'off';
+  return 'auto';
+});
+const setAdaptive = (raw: AdaptiveSelect | undefined) => {
+  patchReasoning({ adaptive: raw === 'on' ? true : raw === 'off' ? false : undefined });
+};
 const setVerbosity = (raw: string) => {
   const next = { ...target.value.rules };
   if (raw === '') delete next.verbosity;
@@ -203,13 +222,11 @@ const modelWarningTooltip = computed(() => modelWarnings.value.join('\n'));
 
         <div>
           <label class="mb-1.5 block text-xs font-medium text-gray-500">Adaptive reasoning</label>
-          <div class="flex h-9 items-center gap-2">
-            <Switch
-              :model-value="target.rules.reasoning?.adaptive === true"
-              @update:model-value="setAdaptive"
-            />
-            <span class="text-sm text-gray-300">Enable</span>
-          </div>
+          <Select
+            :model-value="adaptiveSelect"
+            :options="ADAPTIVE_OPTIONS"
+            @update:model-value="setAdaptive"
+          />
           <p v-if="warningFor('reasoning.adaptive')" class="mt-1 text-xs text-amber-300">{{ warningFor('reasoning.adaptive') }}</p>
         </div>
 

From 8731c69a5847898e98ff42e636ab685b91780bd3 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 03:18:10 +0800
Subject: [PATCH 150/170] fix(codex): kind-filter alias targets against the
 addressable surface

Two divergences in the catalog's alias-window computation:

1. The codex code re-derived `routableIds` via a plain `addressableSet.has(...)`
   check, which is laxer than `synthesizeListedAliases`'s
   `kind === alias.kind` predicate. A multi-kind alias misconfigured
   with off-kind targets would have those targets contribute to the
   min-window even though the resolver could never pick them.

2. Plain (non-alias) slug lookups went through `slugContextWindow`, a
   parallel map of the same data already in `addressableById`. Drop
   the duplicate and read off the same map the alias branch uses.

Also realigns the top-of-file doc with the round-1 fix that moved the
fallback from `firstRoutable` to `min over routable targets`.
---
 .../gateway/src/data-plane/codex/models.ts    | 58 ++++++++-----------
 1 file changed, 25 insertions(+), 33 deletions(-)

diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index 3eb019601..499072b47 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -15,11 +15,12 @@
 //
 // Operator-defined aliases participate in the same filter: a bundled
 // catalog slug that matches a visible alias survives whenever the alias
-// has at least one currently-routable target, and the context window the
-// alias advertises follows the alias's announced metadata (when the
-// operator overrode it) or the first available target (single-target
-// aliases collapse to "the target's window"; multi-target aliases pick
-// first-available for determinism — `random` doesn't fit a catalog).
+// has at least one currently-routable target. The context window the alias
+// advertises follows its announced metadata (when the operator overrode
+// it) or the min across every routable target's window — the safe lower
+// bound `/v1/models`'s rule-aware intersection already applies, so
+// whichever target the resolver picks at request time the catalog's
+// published window is one the gateway can actually serve.
 //
 // Latency: codex aborts the catalog fetch after 5 s
 // (`MODELS_REFRESH_TIMEOUT` in codex-rs/model-provider/src/models_endpoint.rs)
@@ -71,31 +72,22 @@ const computeCatalog = async (
     getRepo().modelAliases.list(),
   ]);
   const realModels = listedRealModels(addressable);
-  // Keyed on every addressable id (not just the listed surface): an alias can
-  // legitimately target an unlisted addressable form — `gpt-5.4` when the
-  // listed canonical id is `cust/gpt-5.4`, or a Copilot variant `claude-opus-4.7`
-  // when the listed canonical is `claude-opus-4-7`. Each entry's `.model`
-  // points to the same canonical `ResolvedModel`, so storing the limit under
-  // both the listed id and any unlisted alias of it stays consistent.
-  const slugContextWindow = new Map<string, number>();
-  for (const entry of addressable) {
-    const limit = entry.model.limits.max_context_window_tokens;
-    if (typeof limit === 'number') slugContextWindow.set(entry.id, limit);
-  }
   // `registrySlugs` mirrors the listed catalog surface — the slugs codex
-  // would have seen in a regular /v1/models call. `addressableSet` is the
-  // broader set the resolver actually accepts (prefix alternates, Copilot
-  // variants), used only for alias-target availability.
+  // would have seen in a regular /v1/models call. `addressableById` is the
+  // broader map the resolver actually accepts (prefix alternates, Copilot
+  // variants) — same surface the listing-side synthesizer narrowed against,
+  // so the kind-filtered lookup below stays consistent with the entries
+  // it emitted.
   const registrySlugs = new Set(realModels.map(m => m.id));
-  const addressableSet = new Set(addressable.map(entry => entry.id));
+  const addressableById = new Map(addressable.map(entry => [entry.id, entry] as const));
 
   // Each alias entry survives in the codex catalog when at least one of
-  // its configured targets is currently addressable. The fallback window
-  // — used when the operator did not override `announcedMetadata` —
-  // is the min across every routable target's window, matching the
-  // safe-lower-bound rule `/v1/models` already applies via the rule-aware
-  // intersection. Selection mode is irrelevant here because the catalog
-  // must publish a single stable window.
+  // its configured targets is currently addressable AND kind-matches the
+  // alias — the same predicate `synthesizeListedAliases` already applied to
+  // emit the entry, re-derived here per-target so we can pull the min
+  // window across every routable target. Selection mode is irrelevant
+  // because the catalog must publish a single stable window; the operator
+  // override still wins when announced metadata supplies one.
   interface AliasCatalogInfo {
     readonly routableWindowsMin: number | null;
     readonly announcedContextWindow: number | undefined;
@@ -104,12 +96,12 @@ const computeCatalog = async (
   for (const entry of synthesizeListedAliases({ aliases, addressableModelIds: addressable })) {
     const aliasedFrom = entry.aliasedFrom;
     if (aliasedFrom === undefined) continue;
-    const routableIds = aliasedFrom.targets
-      .map(t => t.target_model_id)
-      .filter(id => addressableSet.has(id));
-    if (routableIds.length === 0) continue;
-    const windows = routableIds
-      .map(id => slugContextWindow.get(id))
+    const routable = aliasedFrom.targets
+      .map(t => addressableById.get(t.target_model_id))
+      .filter((a): a is NonNullable<typeof a> => a !== undefined && a.model.kind === entry.kind);
+    if (routable.length === 0) continue;
+    const windows = routable
+      .map(a => a.model.limits.max_context_window_tokens)
       .filter((w): w is number => w !== undefined);
     aliasCatalogInfo.set(entry.id, {
       routableWindowsMin: windows.length > 0 ? Math.min(...windows) : null,
@@ -130,7 +122,7 @@ const computeCatalog = async (
   const contextWindowOf: ContextWindowResolver = slug => {
     const info = aliasCatalogInfo.get(slug);
     if (info !== undefined) return info.announcedContextWindow ?? info.routableWindowsMin;
-    return slugContextWindow.get(slug) ?? null;
+    return addressableById.get(slug)?.model.limits.max_context_window_tokens ?? null;
   };
   return applyContextWindowFromRegistry(filtered, contextWindowOf);
 };

From 465285d06eaeaf98b8fc345c7bb2b8bdd4ac50c3 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 03:25:47 +0800
Subject: [PATCH 151/170] fix(aliases): stage x-floway-alias on the no-target
 404 + share the prelude with passthrough
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`resolveCandidatesAndApplyAlias` used to return the rendered failure
before staging the response header, so the alias-no-target 404 lost
the `x-floway-alias` correlation an observability tool would tie the
client request to. Stage the header inside the catch before rendering
the failure — `finalizeGatewayResponse` copies ctx.responseHeaders
onto every outbound response, including rendered failures.

Same refactor lifts the prelude out of `chat/shared/` into
`model-aliases/prelude.ts` and lets `passthrough-serve.ts` import it.
The prelude is now generic over the per-protocol target descriptor
(chat returns `ChatTargetApi`, passthrough returns `ModelEndpointKey`)
and accepts an optional `applyAlias` callback (passthrough leaves it
undefined because its body rewrite happens at the provider boundary,
not on the inbound payload). The header-staging + 404-conversion dance
now lives in one place for both protocol families.

`aliasFailureFromError` falls away — the prelude constructs the
failure inline because every consumer that ever called it has moved
to the new helper.
---
 .../alias-edit/announced-metadata.ts          |  2 +-
 .../data-plane/chat/chat-completions/serve.ts |  2 +-
 .../src/data-plane/chat/gemini/serve.ts       |  2 +-
 .../src/data-plane/chat/messages/serve.ts     |  2 +-
 .../data-plane/chat/responses/serve-prep.ts   |  2 +-
 .../data-plane/chat/shared/alias-prelude.ts   | 66 ----------------
 .../src/data-plane/chat/shared/errors.ts      |  7 --
 .../src/data-plane/model-aliases/prelude.ts   | 77 +++++++++++++++++++
 .../data-plane/shared/passthrough-serve.ts    | 37 ++++-----
 9 files changed, 97 insertions(+), 100 deletions(-)
 delete mode 100644 packages/gateway/src/data-plane/chat/shared/alias-prelude.ts
 create mode 100644 packages/gateway/src/data-plane/model-aliases/prelude.ts

diff --git a/apps/web/src/components/alias-edit/announced-metadata.ts b/apps/web/src/components/alias-edit/announced-metadata.ts
index b024c92ec..be59c0791 100644
--- a/apps/web/src/components/alias-edit/announced-metadata.ts
+++ b/apps/web/src/components/alias-edit/announced-metadata.ts
@@ -13,7 +13,7 @@
 
 import type { AliasTarget, AnnouncedMetadata, ChatAliasRules, ChatModelInfo, ControlPlaneModel, PublicModelLimits } from '../../api/types.ts';
 
-const chatRules = (target: AliasTarget): ChatAliasRules => target.rules as ChatAliasRules;
+const chatRules = (target: AliasTarget): ChatAliasRules => target.rules;
 
 const intersectArrays = <T>(arrays: readonly (readonly T[])[]): T[] => {
   if (arrays.length === 0) return [];
diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
index 69f73493f..b6337516b 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve.ts
@@ -2,8 +2,8 @@ import { chatCompletionsAttempt } from './attempt.ts';
 import { renderChatCompletionsFailure } from './errors.ts';
 import { planChatCompletionsRouting } from './routing.ts';
 import { applyChatRulesToChatCompletions } from '../../model-aliases/apply.ts';
+import { resolveCandidatesAndApplyAlias } from '../../model-aliases/prelude.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import { resolveCandidatesAndApplyAlias } from '../shared/alias-prelude.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ChatCompletionsPayload, ChatCompletionsStreamEvent } from '@floway-dev/protocols/chat-completions';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index 25b64ff2d..134e8ac9c 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -2,8 +2,8 @@ import { geminiAttempt } from './attempt.ts';
 import { renderGeminiFailure } from './errors.ts';
 import { planGeminiRouting } from './routing.ts';
 import { applyChatRulesToGemini } from '../../model-aliases/apply.ts';
+import { resolveCandidatesAndApplyAlias } from '../../model-aliases/prelude.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import { resolveCandidatesAndApplyAlias } from '../shared/alias-prelude.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { GeminiPayload, GeminiStreamEvent } from '@floway-dev/protocols/gemini';
diff --git a/packages/gateway/src/data-plane/chat/messages/serve.ts b/packages/gateway/src/data-plane/chat/messages/serve.ts
index 095ce7cb2..48ded7408 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve.ts
@@ -2,8 +2,8 @@ import { messagesAttempt } from './attempt.ts';
 import { renderMessagesFailure } from './errors.ts';
 import { planMessagesRouting } from './routing.ts';
 import { applyChatRulesToMessages } from '../../model-aliases/apply.ts';
+import { resolveCandidatesAndApplyAlias } from '../../model-aliases/prelude.ts';
 import type { StatefulResponsesStore } from '../responses/items/store.ts';
-import { resolveCandidatesAndApplyAlias } from '../shared/alias-prelude.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ProtocolFrame } from '@floway-dev/protocols/common';
 import type { MessagesPayload, MessagesStreamEvent } from '@floway-dev/protocols/messages';
diff --git a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
index e3d6f48a4..7a07de65e 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve-prep.ts
@@ -2,7 +2,7 @@ import { renderResponsesFailure } from './errors.ts';
 import type { StatefulResponsesStore } from './items/store.ts';
 import { planResponsesRouting } from './routing.ts';
 import { applyChatRulesToResponses } from '../../model-aliases/apply.ts';
-import { resolveCandidatesAndApplyAlias } from '../shared/alias-prelude.ts';
+import { resolveCandidatesAndApplyAlias } from '../../model-aliases/prelude.ts';
 import type { GatewayCtx } from '../shared/gateway-ctx.ts';
 import type { ModelEndpoints, ProtocolFrame } from '@floway-dev/protocols/common';
 import type { ResponsesInputItem, ResponsesPayload, ResponsesStreamEvent } from '@floway-dev/protocols/responses';
diff --git a/packages/gateway/src/data-plane/chat/shared/alias-prelude.ts b/packages/gateway/src/data-plane/chat/shared/alias-prelude.ts
deleted file mode 100644
index ef4413bb6..000000000
--- a/packages/gateway/src/data-plane/chat/shared/alias-prelude.ts
+++ /dev/null
@@ -1,66 +0,0 @@
-import { type ChatServeFailure, aliasFailureFromError } from './errors.ts';
-import type { GatewayCtx } from './gateway-ctx.ts';
-import { ALIAS_RESPONSE_HEADER } from '../../model-aliases/header.ts';
-import { AliasNoTargetAvailableError, type AliasResolution } from '../../model-aliases/resolve.ts';
-import { resolveModelCandidates } from '../../providers/registry.ts';
-import type { ModelEndpoints } from '@floway-dev/protocols/common';
-import type { ChatTargetApi, ProviderCandidate } from '@floway-dev/provider';
-
-// Shared serve-side prelude every chat protocol runs before routing: resolve
-// candidates against the live registry, redirect through the alias resolver
-// when the inbound model id is an alias, stage the response header, and
-// hand the protocol's own callback the resolution so it can overlay rules
-// and (where the protocol mutates it) rewrite `payload.model`. The
-// `AliasNoTargetAvailableError` 404 is converted to whatever rendered
-// failure the protocol returns from its serve seam, so callers stay free
-// of the alias machinery.
-export interface ResolveCandidatesArgs<F> {
-  readonly ctx: GatewayCtx;
-  readonly modelName: string;
-  readonly pickTarget: (endpoints: ModelEndpoints) => ChatTargetApi | null;
-  // Invoked exactly when an alias matched; the callback overlays rules
-  // and (for protocols that mutate it) updates the inbound payload's
-  // model field. The response header is staged by this helper.
-  readonly applyAlias: (resolution: AliasResolution) => void;
-  // Renders this protocol's failure envelope from a ChatServeFailure.
-  // Used only on the alias-no-target path — every other failure mode
-  // is handled by the caller after this helper returns ok.
-  readonly renderAliasFailure: (failure: Extract<ChatServeFailure, { kind: 'alias-no-target-available' }>) => F;
-}
-
-export type ResolveCandidatesOk = {
-  readonly kind: 'ok';
-  readonly candidates: readonly ProviderCandidate[];
-  readonly sawModel: boolean;
-  readonly failedUpstreams: readonly string[];
-  readonly aliasResolution: AliasResolution | null;
-};
-
-export type ResolveCandidatesOutcome<F> =
-  | ResolveCandidatesOk
-  | { readonly kind: 'failure'; readonly result: F };
-
-export const resolveCandidatesAndApplyAlias = async <F>(args: ResolveCandidatesArgs<F>): Promise<ResolveCandidatesOutcome<F>> => {
-  const { ctx, modelName, pickTarget, applyAlias, renderAliasFailure } = args;
-  let enumerated;
-  try {
-    enumerated = await resolveModelCandidates({
-      upstreamIds: ctx.upstreamIds,
-      modelName,
-      pickTarget,
-      scheduler: ctx.backgroundScheduler,
-      currentColo: ctx.currentColo,
-    });
-  } catch (error) {
-    if (error instanceof AliasNoTargetAvailableError) {
-      return { kind: 'failure', result: renderAliasFailure(aliasFailureFromError(error)) };
-    }
-    throw error;
-  }
-  const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
-  if (aliasResolution !== null) {
-    applyAlias(aliasResolution);
-    ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
-  }
-  return { kind: 'ok', candidates, sawModel, failedUpstreams, aliasResolution };
-};
diff --git a/packages/gateway/src/data-plane/chat/shared/errors.ts b/packages/gateway/src/data-plane/chat/shared/errors.ts
index 0c106f695..b043eddef 100644
--- a/packages/gateway/src/data-plane/chat/shared/errors.ts
+++ b/packages/gateway/src/data-plane/chat/shared/errors.ts
@@ -1,5 +1,3 @@
-import type { AliasNoTargetAvailableError } from '../../model-aliases/resolve.ts';
-
 // Failures a protocol can render before reaching an upstream; unexpected
 // throws bubble as-is. `failedUpstreams` on model-{missing,unsupported}
 // carries the upstream names whose catalog fetch threw during this
@@ -18,11 +16,6 @@ export type ChatServeFailure =
   // it.
   | { readonly kind: 'alias-no-target-available'; readonly message: string };
 
-export const aliasFailureFromError = (error: AliasNoTargetAvailableError): Extract<ChatServeFailure, { kind: 'alias-no-target-available' }> => ({
-  kind: 'alias-no-target-available',
-  message: error.message,
-});
-
 class ChatServeFailureError extends Error {
   readonly failure: ChatServeFailure;
 
diff --git a/packages/gateway/src/data-plane/model-aliases/prelude.ts b/packages/gateway/src/data-plane/model-aliases/prelude.ts
new file mode 100644
index 000000000..c7a29f517
--- /dev/null
+++ b/packages/gateway/src/data-plane/model-aliases/prelude.ts
@@ -0,0 +1,77 @@
+import { ALIAS_RESPONSE_HEADER } from './header.ts';
+import { AliasNoTargetAvailableError, type AliasResolution } from './resolve.ts';
+import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
+import { resolveModelCandidates } from '../providers/registry.ts';
+import type { ModelEndpoints } from '@floway-dev/protocols/common';
+import type { ProviderCandidate } from '@floway-dev/provider';
+
+// Structural shape every protocol's no-target-available renderer accepts.
+// Mirrors `Extract<ChatServeFailure, { kind: 'alias-no-target-available' }>`
+// without binding the alias plumbing to the chat-specific failure union.
+export interface AliasNoTargetFailure {
+  readonly kind: 'alias-no-target-available';
+  readonly message: string;
+}
+
+// Shared serve-side prelude every chat protocol — and the passthrough seam —
+// runs before routing. Resolves candidates against the live registry, runs
+// the alias resolver when the inbound id is an alias, stages the
+// `x-floway-alias` response header on every alias-touched code path
+// (including the no-target 404), and converts
+// `AliasNoTargetAvailableError` to whatever rendered failure the caller's
+// `renderAliasFailure` produces. Chat protocols also overlay rules onto the
+// payload via `applyAlias`; passthrough leaves it undefined because the
+// per-call `binding.upstreamModel.id` rewrite happens at the provider
+// boundary, not on the inbound body.
+//
+// Generic over the resolver's per-protocol target descriptor (chat returns
+// `ChatTargetApi`, passthrough returns `ModelEndpointKey`).
+export interface ResolveCandidatesArgs<TTarget, F> {
+  readonly ctx: GatewayCtx;
+  readonly modelName: string;
+  readonly pickTarget: (endpoints: ModelEndpoints) => TTarget | null;
+  readonly applyAlias?: (resolution: AliasResolution) => void;
+  readonly renderAliasFailure: (failure: AliasNoTargetFailure) => F;
+}
+
+export type ResolveCandidatesOk<TTarget> = {
+  readonly kind: 'ok';
+  readonly candidates: ReadonlyArray<ProviderCandidate & { readonly targetApi: TTarget }>;
+  readonly sawModel: boolean;
+  readonly failedUpstreams: readonly string[];
+  readonly aliasResolution: AliasResolution | null;
+};
+
+export type ResolveCandidatesOutcome<TTarget, F> =
+  | ResolveCandidatesOk<TTarget>
+  | { readonly kind: 'failure'; readonly result: F };
+
+export const resolveCandidatesAndApplyAlias = async <TTarget, F>(args: ResolveCandidatesArgs<TTarget, F>): Promise<ResolveCandidatesOutcome<TTarget, F>> => {
+  const { ctx, modelName, pickTarget, applyAlias, renderAliasFailure } = args;
+  let enumerated;
+  try {
+    enumerated = await resolveModelCandidates({
+      upstreamIds: ctx.upstreamIds,
+      modelName,
+      pickTarget,
+      scheduler: ctx.backgroundScheduler,
+      currentColo: ctx.currentColo,
+    });
+  } catch (error) {
+    if (error instanceof AliasNoTargetAvailableError) {
+      // Header staged on the 404 too — observability ties together "client
+      // asked for X" / "alias X had no routable target" without parsing
+      // the body. finalizeGatewayResponse copies ctx.responseHeaders onto
+      // every outbound response, including rendered failures.
+      ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, error.aliasName);
+      return { kind: 'failure', result: renderAliasFailure({ kind: 'alias-no-target-available', message: error.message }) };
+    }
+    throw error;
+  }
+  const { candidates, sawModel, failedUpstreams, aliasResolution } = enumerated;
+  if (aliasResolution !== null) {
+    applyAlias?.(aliasResolution);
+    ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
+  }
+  return { kind: 'ok', candidates: candidates as ResolveCandidatesOk<TTarget>['candidates'], sawModel, failedUpstreams, aliasResolution };
+};
diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve.ts b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
index 95fd69955..a4dca72a0 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve.ts
@@ -23,9 +23,7 @@ import type { AuthedContext } from '../../middleware/auth.ts';
 import type { TokenUsage } from '../../repo/types.ts';
 import type { GatewayCtx } from '../chat/shared/gateway-ctx.ts';
 import { type StreamCompletion, writeSSEFrames } from '../chat/shared/stream/sse.ts';
-import { ALIAS_RESPONSE_HEADER } from '../model-aliases/header.ts';
-import { AliasNoTargetAvailableError } from '../model-aliases/resolve.ts';
-import { resolveModelCandidates } from '../providers/registry.ts';
+import { resolveCandidatesAndApplyAlias } from '../model-aliases/prelude.ts';
 import type { BackgroundScheduler } from '@floway-dev/platform';
 import { doneFrame, eventFrame, type ModelEndpointKey, parseSSEStream, parseTargetStreamFrames, type ProtocolFrame, sseCommentFrame, sseFrame } from '@floway-dev/protocols/common';
 import { httpResponseToResponse, ProviderModelsUnavailableError, toInternalDebugError } from '@floway-dev/provider';
@@ -136,26 +134,21 @@ export const passthroughServe = async (input: PassthroughServeContext): Promise<
     // upstreams, with the unprefixed interpretation pushed before the
     // prefixed one within a single upstream. The first candidate wins.
     // See resolve.ts for the alias-resolves-once-above-prefix-routing
-    // contract; `resolveModelCandidates` runs it and surfaces the result
-    // on `aliasResolution`.
-    let resolution;
-    try {
-      resolution = await resolveModelCandidates({
-        modelName: model,
-        upstreamIds: ctx.upstreamIds,
-        scheduler: ctx.backgroundScheduler,
-        currentColo: ctx.currentColo,
-        pickTarget: endpoints => endpoints[endpointKey] !== undefined ? endpointKey : null,
-      });
-    } catch (e) {
-      if (e instanceof AliasNoTargetAvailableError) {
+    // contract; `resolveCandidatesAndApplyAlias` runs it, stages the
+    // `x-floway-alias` response header on every alias-touched path
+    // (including the 404), and surfaces a rendered failure when the
+    // alias resolver has no routable target.
+    const resolved = await resolveCandidatesAndApplyAlias({
+      ctx,
+      modelName: model,
+      pickTarget: endpoints => endpoints[endpointKey] !== undefined ? endpointKey : null,
+      renderAliasFailure: failure => {
         ctx.dump?.error('gateway');
-        return passthroughApiError(c, e.message, 404);
-      }
-      throw e;
-    }
-    const { candidates, sawModel, failedUpstreams, aliasResolution } = resolution;
-    if (aliasResolution !== null) ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
+        return passthroughApiError(c, failure.message, 404);
+      },
+    });
+    if (resolved.kind === 'failure') return resolved.result;
+    const { candidates, sawModel, failedUpstreams } = resolved;
     if (!sawModel) {
       ctx.dump?.error('gateway');
       return passthroughApiError(c, appendFailedUpstreams(`Model ${model} is not available on any configured upstream.`, failedUpstreams), 404);

From de970aa462cefb366367b858f236cea8a46c5e33 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 03:28:14 +0800
Subject: [PATCH 152/170] fix(aliases): AliasNoTargetAvailableError
 distinguishes endpoint-mismatch
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The resolver dropped targets for two distinct reasons — no enabled
upstream binding at all, OR a binding exists but none satisfies the
inbound endpoint predicate (kind/endpoint mismatch). The error
message collapsed both into the same "no enabled upstream binding"
wording, so a chat client hitting an embedding-only alias saw a hint
pointing at "no binding" when the real cause was the endpoint.

`candidateRoutability` now reports the rejection reason and
`AliasNoTargetAvailableError` flips to "none currently serves the
inbound endpoint" when every dropped target lost on endpoint-match.
---
 .../src/data-plane/model-aliases/resolve.ts   | 76 +++++++++++++------
 .../data-plane/model-aliases/resolve_test.ts  | 27 ++++++-
 2 files changed, 80 insertions(+), 23 deletions(-)

diff --git a/packages/gateway/src/data-plane/model-aliases/resolve.ts b/packages/gateway/src/data-plane/model-aliases/resolve.ts
index d3ab1ada0..33a71e7ad 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve.ts
@@ -29,9 +29,30 @@ export interface AliasResolution {
   readonly aliasName: string;
 }
 
-// Canonical wording for the alias-no-target-available 404.
-const aliasNoTargetMessage = (params: { aliasName: string; targetCount: number }): string =>
-  `alias '${params.aliasName}' has ${params.targetCount} target(s); none currently map to an enabled upstream binding`;
+// Why the alias's target pool is empty. The resolver keeps targets whose
+// resolved binding exists AND serves the inbound endpoint (per the caller's
+// `endpointAccepts`); a target lost to either check counts toward its
+// respective bucket here.
+type CandidateRoutability =
+  | { readonly routable: true }
+  | { readonly routable: false; readonly reason: 'no-binding' | 'endpoint-mismatch' };
+
+// Canonical wording for the alias-no-target-available 404. The "every
+// target was endpoint-mismatched" branch is its own message so an
+// embeddings client hitting a chat-only alias (or vice versa) sees a hint
+// pointing at the kind/endpoint instead of the generic "no enabled
+// upstream binding" wording.
+const aliasNoTargetMessage = (params: {
+  readonly aliasName: string;
+  readonly targetCount: number;
+  readonly allEndpointMismatch: boolean;
+}): string => {
+  const stem = `alias '${params.aliasName}' has ${params.targetCount} target(s)`;
+  if (params.allEndpointMismatch) {
+    return `${stem}; none currently serves the inbound endpoint`;
+  }
+  return `${stem}; none currently map to an enabled upstream binding`;
+};
 
 // Thrown when the alias name was found but no target currently resolves to
 // an enabled upstream binding that serves the inbound endpoint. Caught at
@@ -40,10 +61,10 @@ const aliasNoTargetMessage = (params: { aliasName: string; targetCount: number }
 export class AliasNoTargetAvailableError extends Error {
   readonly aliasName: string;
 
-  constructor(aliasName: string, targetCount: number) {
-    super(aliasNoTargetMessage({ aliasName, targetCount }));
+  constructor(params: { aliasName: string; targetCount: number; allEndpointMismatch: boolean }) {
+    super(aliasNoTargetMessage(params));
     this.name = 'AliasNoTargetAvailableError';
-    this.aliasName = aliasName;
+    this.aliasName = params.aliasName;
   }
 }
 
@@ -66,38 +87,42 @@ interface ResolveAliasArgs {
   readonly repo: ModelAliasesRepo;
 }
 
-// Reports true when the target id resolves to at least one enabled upstream
-// binding whose endpoint map satisfies the inbound endpoint predicate.
-// `random` selection in particular depends on this — without endpoint
-// awareness, a randomly-picked target may not serve the inbound endpoint
-// and the request would 404 at prefix routing even though another target
-// would have worked.
-const candidateIsRoutable = async (
+// Reports whether the target id resolves to at least one enabled upstream
+// binding whose endpoint map satisfies the inbound endpoint predicate, and
+// distinguishes the two empty-pool causes so `AliasNoTargetAvailableError`
+// can show the right hint.
+const candidateRoutability = async (
   targetModelId: string,
   providers: readonly ModelProviderInstance[],
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
   endpointAccepts: (endpoints: ModelEndpoints) => boolean,
-): Promise<boolean> => {
-  if (providers.length === 0) return false;
+): Promise<CandidateRoutability> => {
+  if (providers.length === 0) return { routable: false, reason: 'no-binding' };
   const interpretations = enumerateModelInterpretations(targetModelId, providers);
   const { resolutions } = await collectInterpretationOutcomes(interpretations, fetcherForUpstream, scheduler);
-  return resolutions.some(r => endpointAccepts(r.resolved.binding.upstreamModel.endpoints));
+  if (resolutions.length === 0) return { routable: false, reason: 'no-binding' };
+  if (resolutions.some(r => endpointAccepts(r.resolved.binding.upstreamModel.endpoints))) return { routable: true };
+  return { routable: false, reason: 'endpoint-mismatch' };
 };
 
 // Pre-pick the available pool ONCE. Order is preserved so
 // selection=first-available picks deterministically; selection=random picks
 // uniformly within whatever subset survived availability filtering.
+// `rejections` collects the reason every dropped target was dropped, so the
+// caller can pin the failure message when the pool is empty.
 const buildAvailablePool = async (
   record: ModelAliasRecord,
   providers: readonly ModelProviderInstance[],
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
   endpointAccepts: (endpoints: ModelEndpoints) => boolean,
-): Promise<ModelAliasRecord['targets']> => {
-  const availability = await Promise.all(record.targets.map(target =>
-    candidateIsRoutable(target.target_model_id, providers, fetcherForUpstream, scheduler, endpointAccepts)));
-  return record.targets.filter((_, index) => availability[index]);
+): Promise<{ readonly pool: ModelAliasRecord['targets']; readonly rejections: readonly ('no-binding' | 'endpoint-mismatch')[] }> => {
+  const outcomes = await Promise.all(record.targets.map(target =>
+    candidateRoutability(target.target_model_id, providers, fetcherForUpstream, scheduler, endpointAccepts)));
+  const pool = record.targets.filter((_, index) => outcomes[index].routable);
+  const rejections = outcomes.flatMap(o => o.routable ? [] : [o.reason]);
+  return { pool, rejections };
 };
 
 export const resolveAlias = async (args: ResolveAliasArgs): Promise<AliasResolution | null> => {
@@ -105,8 +130,15 @@ export const resolveAlias = async (args: ResolveAliasArgs): Promise<AliasResolut
   const record = await repo.getByName(modelName);
   if (!record) return null;
 
-  const pool = await buildAvailablePool(record, providers, fetcherForUpstream, scheduler, endpointAccepts);
-  if (pool.length === 0) throw new AliasNoTargetAvailableError(record.name, record.targets.length);
+  const { pool, rejections } = await buildAvailablePool(record, providers, fetcherForUpstream, scheduler, endpointAccepts);
+  if (pool.length === 0) {
+    const allEndpointMismatch = rejections.length > 0 && rejections.every(r => r === 'endpoint-mismatch');
+    throw new AliasNoTargetAvailableError({
+      aliasName: record.name,
+      targetCount: record.targets.length,
+      allEndpointMismatch,
+    });
+  }
 
   const picked = record.selection === 'first-available'
     ? pool[0]
diff --git a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
index 516a2a749..7640a2232 100644
--- a/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/resolve_test.ts
@@ -272,7 +272,7 @@ test('endpoint-aware pool: first-available skips targets whose binding does not
   assertEquals(result.targetModelId, 'serves-cc');
 });
 
-test('endpoint-aware pool: alias with NO target serving the inbound endpoint throws AliasNoTargetAvailableError', async () => {
+test('endpoint-aware pool: alias with NO target serving the inbound endpoint throws AliasNoTargetAvailableError and pins the endpoint-mismatch wording', async () => {
   setRoutableWith({
     'a': { messages: {} },
     'b': { messages: {} },
@@ -291,5 +291,30 @@ test('endpoint-aware pool: alias with NO target serving the inbound endpoint thr
       repo,
     }),
     AliasNoTargetAvailableError,
+    'none currently serves the inbound endpoint',
+  );
+});
+
+test('alias with every target unresolvable to any upstream throws AliasNoTargetAvailableError and pins the no-binding wording', async () => {
+  // Empty routable map → no target resolves to any binding at all. The
+  // error message stays on the canonical "no enabled upstream binding"
+  // wording so an operator who removed every binding sees that hint
+  // rather than the endpoint-mismatch one.
+  setRoutableWith({});
+  const repo = stubRepoFor(aliasRecord({
+    targets: [
+      { target_model_id: 'gone-1', rules: {} },
+      { target_model_id: 'gone-2', rules: {} },
+    ],
+  }));
+  await assertRejects(
+    () => resolveAlias({
+      modelName: 'gpt-fast',
+      ...RESOLVE_DEFAULTS,
+      endpointAccepts: () => true,
+      repo,
+    }),
+    AliasNoTargetAvailableError,
+    'none currently map to an enabled upstream binding',
   );
 });

From bacb9f72794b8a36cb1a35b7a8165d7f67a9b75c Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 03:29:24 +0800
Subject: [PATCH 153/170] test(aliases/web): pin announced-metadata.ts as a
 mirror of the gateway
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The frontend's hand-written `computeAnnouncedMetadata` mirrors the
gateway's `intersectChat` / `intersectLimits`. Round 4 already caught
one drift (the `||→&&` modalities fix landed gateway-side first); a
test covering the same matrix gives the dashboard's read-only preview
+ edit-dialog seed buffer a CI gate against the next silent drift.
---
 .../alias-edit/announced-metadata_test.ts     | 103 ++++++++++++++++++
 1 file changed, 103 insertions(+)
 create mode 100644 apps/web/src/components/alias-edit/announced-metadata_test.ts

diff --git a/apps/web/src/components/alias-edit/announced-metadata_test.ts b/apps/web/src/components/alias-edit/announced-metadata_test.ts
new file mode 100644
index 000000000..1fa3c7cf3
--- /dev/null
+++ b/apps/web/src/components/alias-edit/announced-metadata_test.ts
@@ -0,0 +1,103 @@
+import { describe, expect, it } from 'vitest';
+
+import { computeAnnouncedMetadata } from './announced-metadata.ts';
+import { buildRealModel } from '../../api/test-fixtures.ts';
+import type { AliasTarget, ChatAliasRules, ControlPlaneModel } from '../../api/types.ts';
+
+// Mirror of `packages/gateway/src/data-plane/models/alias-listing_test.ts`'s
+// matrix on the frontend's hand-written `intersectChat` / `intersectLimits`.
+// The two have already drifted once (the ||→&& fix landed gateway-side first),
+// so every invariant the gateway test pins lands here too.
+
+const target = (id: string, rules: ChatAliasRules = {}): AliasTarget => ({ target_model_id: id, rules });
+
+const real = (id: string, over: Partial<ControlPlaneModel> = {}): ControlPlaneModel =>
+  buildRealModel({ id, ...over });
+
+describe('computeAnnouncedMetadata', () => {
+  it('returns {} when no target resolves against the live catalog', () => {
+    const result = computeAnnouncedMetadata([target('gone')], 'chat', [real('other')]);
+    expect(result).toEqual({});
+  });
+
+  it('intersects modalities across all available targets', () => {
+    const result = computeAnnouncedMetadata(
+      [target('a'), target('b')],
+      'chat',
+      [
+        real('a', { chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
+        real('b', { chat: { modalities: { input: ['text'], output: ['text'] } } }),
+      ],
+    );
+    expect(result.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
+  });
+
+  it('omits the modalities block when either half of the intersection collapses', () => {
+    const result = computeAnnouncedMetadata(
+      [target('a'), target('b')],
+      'chat',
+      [
+        real('a', { chat: { modalities: { input: ['text'], output: ['text'] } } }),
+        real('b', { chat: { modalities: { input: ['text'], output: ['image'] } } }),
+      ],
+    );
+    expect(result.chat?.modalities).toBeUndefined();
+  });
+
+  it('intersects effort supported across targets', () => {
+    const result = computeAnnouncedMetadata(
+      [target('a'), target('b')],
+      'chat',
+      [
+        real('a', { chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } } } }),
+        real('b', { chat: { reasoning: { effort: { supported: ['low', 'medium'], default: 'low' } } } }),
+      ],
+    );
+    expect(result.chat?.reasoning?.effort?.supported).toEqual(['low', 'medium']);
+  });
+
+  it('drops the budget_tokens block when one target declares only min and another only max', () => {
+    const result = computeAnnouncedMetadata(
+      [target('a'), target('b')],
+      'chat',
+      [
+        real('a', { chat: { reasoning: { budget_tokens: { min: 1024 } } } }),
+        real('b', { chat: { reasoning: { budget_tokens: { max: 65536 } } } }),
+      ],
+    );
+    expect(result.chat?.reasoning?.budget_tokens).toBeUndefined();
+  });
+
+  it('takes min across max_context_window_tokens / max_output_tokens / max_prompt_tokens', () => {
+    const result = computeAnnouncedMetadata(
+      [target('a'), target('b')],
+      'chat',
+      [
+        real('a', { limits: { max_context_window_tokens: 200000, max_output_tokens: 8192 } }),
+        real('b', { limits: { max_context_window_tokens: 128000, max_output_tokens: 4096 } }),
+      ],
+    );
+    expect(result.limits).toEqual({ max_context_window_tokens: 128000, max_output_tokens: 4096 });
+  });
+
+  it('omits a limits leaf when any target leaves it undeclared', () => {
+    const result = computeAnnouncedMetadata(
+      [target('a'), target('b')],
+      'chat',
+      [
+        real('a', { limits: { max_context_window_tokens: 200000 } }),
+        real('b', { limits: {} }),
+      ],
+    );
+    expect(result.limits?.max_context_window_tokens).toBeUndefined();
+  });
+
+  it('drops a sub-field downgraded by a pinned rule (effort: alias fixes the value, so it is not advertised)', () => {
+    const result = computeAnnouncedMetadata(
+      [target('a', { reasoning: { effort: 'low' } })],
+      'chat',
+      [real('a', { chat: { reasoning: { effort: { supported: ['low', 'medium'], default: 'medium' } } } })],
+    );
+    expect(result.chat?.reasoning).toBeUndefined();
+  });
+});

From 9c0d9e6b5fae119823fed8ca00597d4a9f297106 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 03:34:23 +0800
Subject: [PATCH 154/170] test(aliases): pin the alias-no-target 404 path on
 every chat protocol
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The four chat protocols (chat-completions, messages, gemini,
responses) each catch `AliasNoTargetAvailableError` and render the
protocol-specific 404 envelope; the `x-floway-alias` response header
is staged on that path so observability can tie "client asked for X"
to the rendered 404. None of the four serve_test files exercised the
path end to end — the `aliasResolutionQueue` type carried `| Error`
but no test ever pushed one. Add one test per protocol so the
404-envelope wiring and the header staging stay locked in.
---
 .../chat/chat-completions/serve_test.ts       | 26 +++++++++++++++++++
 .../src/data-plane/chat/gemini/serve_test.ts  | 24 +++++++++++++++++
 .../data-plane/chat/messages/serve_test.ts    | 24 +++++++++++++++++
 .../data-plane/chat/responses/serve_test.ts   | 23 ++++++++++++++++
 4 files changed, 97 insertions(+)

diff --git a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
index 4a27e47d5..e7af46a8a 100644
--- a/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/chat-completions/serve_test.ts
@@ -345,3 +345,29 @@ test('alias resolution swaps the inbound model id for the target and overlays ru
   assertEquals(observed.reasoning_effort, 'low');
   assertEquals(observed.verbosity, 'low');
 });
+
+test('alias resolves to no routable target — renders the protocol 404 envelope + stages x-floway-alias on the failure', async () => {
+  installRepo();
+  const { AliasNoTargetAvailableError } = await import('../../model-aliases/resolve.ts');
+  aliasResolutionQueue.push(new AliasNoTargetAvailableError({
+    aliasName: 'gpt-fast', targetCount: 2, allEndpointMismatch: false,
+  }));
+  // No candidates are consumed on this path; the alias error short-circuits
+  // the routing pipeline before the candidate queue is even read.
+
+  const ctx = makeGatewayCtx();
+  const result = await chatCompletionsServe.generate({
+    payload: makePayload({ model: 'gpt-fast' }),
+    ctx,
+    store: createNonResponsesSourceStore(API_KEY_ID),
+    headers: new Headers(),
+  });
+
+  assertEquals(result.type, 'api-error');
+  if (result.type !== 'api-error') throw new Error('unreachable');
+  assertEquals(result.status, 404);
+  const body = JSON.parse(new TextDecoder().decode(result.body));
+  assertEquals(body.error.type, 'invalid_request_error');
+  assert(body.error.message.includes("alias 'gpt-fast'"));
+  assertEquals(ctx.responseHeaders.get('x-floway-alias'), 'gpt-fast');
+});
diff --git a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
index 97c159e2f..128d27065 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve_test.ts
@@ -378,3 +378,27 @@ test('alias resolution swaps the inbound model id for the target and overlays ru
   assertEquals(payload.generationConfig?.thinkingConfig?.thinkingBudget, 1024);
   assertEquals(payload.generationConfig?.verbosity, 'low');
 });
+
+test('alias resolves to no routable target — renders the Gemini 404 envelope + stages x-floway-alias on the failure', async () => {
+  installRepo();
+  const { AliasNoTargetAvailableError } = await import('../../model-aliases/resolve.ts');
+  aliasResolutionQueue.push(new AliasNoTargetAvailableError({
+    aliasName: 'gemini-fast', targetCount: 2, allEndpointMismatch: false,
+  }));
+
+  const ctx = makeGatewayCtx();
+  const result = await geminiServe.generate({
+    payload: makePayload(),
+    ctx,
+    store: createNonResponsesSourceStore(API_KEY_ID),
+    model: 'gemini-fast',
+    headers: new Headers(),
+  });
+
+  assertEquals(result.type, 'api-error');
+  if (result.type !== 'api-error') throw new Error('unreachable');
+  assertEquals(result.status, 404);
+  const body = JSON.parse(new TextDecoder().decode(result.body));
+  assert(body.error.message.includes("alias 'gemini-fast'"));
+  assertEquals(ctx.responseHeaders.get('x-floway-alias'), 'gemini-fast');
+});
diff --git a/packages/gateway/src/data-plane/chat/messages/serve_test.ts b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
index 075e48879..cbfff3fb8 100644
--- a/packages/gateway/src/data-plane/chat/messages/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/messages/serve_test.ts
@@ -495,3 +495,27 @@ test('alias resolution swaps the inbound model id for the target and overlays ru
   // Anthropic's native Fast Mode field.
   assertEquals(observed.speed, 'fast');
 });
+
+test('alias resolves to no routable target — renders the Messages 404 envelope + stages x-floway-alias on the failure', async () => {
+  installRepo();
+  const { AliasNoTargetAvailableError } = await import('../../model-aliases/resolve.ts');
+  aliasResolutionQueue.push(new AliasNoTargetAvailableError({
+    aliasName: 'claude-fast', targetCount: 2, allEndpointMismatch: false,
+  }));
+
+  const ctx = makeGatewayCtx();
+  const result = await messagesServe.generate({
+    payload: makePayload({ model: 'claude-fast' }),
+    ctx,
+    store: createNonResponsesSourceStore(API_KEY_ID),
+    headers: new Headers(),
+  });
+
+  assertEquals(result.type, 'api-error');
+  if (result.type !== 'api-error') throw new Error('unreachable');
+  assertEquals(result.status, 404);
+  const body = JSON.parse(new TextDecoder().decode(result.body));
+  assertEquals(body.error.type, 'not_found_error');
+  assert(body.error.message.includes("alias 'claude-fast'"));
+  assertEquals(ctx.responseHeaders.get('x-floway-alias'), 'claude-fast');
+});
diff --git a/packages/gateway/src/data-plane/chat/responses/serve_test.ts b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
index b84ea2c62..526f825f1 100644
--- a/packages/gateway/src/data-plane/chat/responses/serve_test.ts
+++ b/packages/gateway/src/data-plane/chat/responses/serve_test.ts
@@ -705,3 +705,26 @@ test('alias resolution swaps the inbound model id for the target and overlays ru
   assertEquals(observed.text?.verbosity, 'medium');
   assertEquals(observed.service_tier, 'priority');
 });
+
+test('alias resolves to no routable target — renders the Responses 404 envelope + stages x-floway-alias on the failure', async () => {
+  installRepo();
+  const { AliasNoTargetAvailableError } = await import('../../model-aliases/resolve.ts');
+  aliasResolutionQueue.push(new AliasNoTargetAvailableError({
+    aliasName: 'gpt-fast', targetCount: 2, allEndpointMismatch: false,
+  }));
+
+  const ctx = makeGatewayCtx();
+  const result = await responsesServe.generate({
+    payload: makePayload({ model: 'gpt-fast' }),
+    ctx,
+    store: createResponsesHttpStore(API_KEY_ID, true),
+    headers: new Headers(),
+  });
+
+  assertEquals(result.type, 'api-error');
+  if (result.type !== 'api-error') throw new Error('unreachable');
+  assertEquals(result.status, 404);
+  const body = JSON.parse(new TextDecoder().decode(result.body));
+  assert(body.error.message.includes("alias 'gpt-fast'"));
+  assertEquals(ctx.responseHeaders.get('x-floway-alias'), 'gpt-fast');
+});

From a8a2dacae8f4152bb4af2f2dd8f6f95283b99dcb Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 03:35:54 +0800
Subject: [PATCH 155/170] docs(ui/combobox): drop alias-specific examples from
 a generic primitive

The component lives in `@floway-dev/ui` and serves any consumer that
wants free-form input + a suggestion list. The JSDoc preamble called
out alias rule fields specifically and the `borderless` doc pinned
"the alias-target row" as the use case; trim both to generic
language so the primitive reads as one a future caller can adopt.
---
 packages/ui/src/Combobox.vue | 13 ++++++-------
 1 file changed, 6 insertions(+), 7 deletions(-)

diff --git a/packages/ui/src/Combobox.vue b/packages/ui/src/Combobox.vue
index 52d3fe17b..9b06058bc 100644
--- a/packages/ui/src/Combobox.vue
+++ b/packages/ui/src/Combobox.vue
@@ -1,9 +1,8 @@
 <script setup lang="ts">
-// Single-select combobox with free-form input. Operator can type a value
+// Single-select combobox with free-form input. The operator can type a value
 // the suggestion list does not contain and the typed string becomes the
-// model value verbatim — alias rule fields (effort, summary, service
-// tier, ...) pass through to the upstream and the gateway intentionally
-// does not enum-gate them, so unknown values must round-trip.
+// model value verbatim — useful for fields the gateway forwards verbatim and
+// does not enum-gate.
 //
 // Visual contract matches Select.vue / TagCombobox.vue (dark popover,
 // surface-700 trigger). HTML5 `<input list>` + `<datalist>` would have
@@ -43,14 +42,14 @@ const props = withDefaults(defineProps<{
   emptyText?: string;
   /**
    * Drop the bordered surface-700 shell so the input blends into its parent
-   * row — used by the alias-target row, where the combobox is the row title
-   * inside an already-bordered Card and a second border would double up.
+   * row. Used when the combobox is embedded in an already-bordered container
+   * (e.g. a Card row title) where a second border would double up.
    */
   borderless?: boolean;
   /**
    * Hide the right-edge chevron that toggles the dropdown. The popover still
    * opens on focus / click because the input itself owns `open-on-focus`;
-   * removing the chevron just keeps the title visually clean when the row
+   * removing the chevron keeps the title visually clean when the parent row
    * already carries its own action cluster on the right.
    */
   hideDropdownTrigger?: boolean;

From 3b3706b4c017010372a4454d926e3a087c95a263 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 04:24:34 +0800
Subject: [PATCH 156/170] test(aliases): pin Messages serviceTier sibling-clear
 regression

apply.ts's `fast`/non-fast branches each delete the sibling field
(`body.speed` vs `body.service_tier`) so the upstream never sees both
with conflicting values. The existing tests start from a payload that
had neither field set, so a future regression deleting the `delete`
call would not be caught. Seed each branch's prior-state sibling and
assert it's cleared after the overlay.
---
 .../src/data-plane/model-aliases/apply_test.ts | 18 ++++++++++++++++++
 1 file changed, 18 insertions(+)

diff --git a/packages/gateway/src/data-plane/model-aliases/apply_test.ts b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
index 9dc0732d5..1f6a1c71b 100644
--- a/packages/gateway/src/data-plane/model-aliases/apply_test.ts
+++ b/packages/gateway/src/data-plane/model-aliases/apply_test.ts
@@ -165,6 +165,24 @@ test('messages: non-fast serviceTier lands on service_tier directly', () => {
   assertEquals(body.speed, undefined);
 });
 
+test('messages: serviceTier=fast clears a pre-existing body.service_tier on the same payload', () => {
+  // Upstream must never see both `speed` and `service_tier` set on the
+  // same request — Anthropic treats them as alternates and the wire
+  // semantics for a conflict are undefined. The overlay clears the
+  // sibling field whichever branch it takes.
+  const body = msgPayload({ service_tier: 'priority' });
+  applyChatRulesToMessages(body, { serviceTier: 'fast' });
+  assertEquals(body.speed, 'fast');
+  assertEquals(body.service_tier, undefined);
+});
+
+test('messages: non-fast serviceTier clears a pre-existing body.speed on the same payload', () => {
+  const body = msgPayload({ speed: 'fast' });
+  applyChatRulesToMessages(body, { serviceTier: 'priority' });
+  assertEquals(body.service_tier, 'priority');
+  assertEquals(body.speed, undefined);
+});
+
 test('messages: alias rules overwrite existing thinking + output_config fields', () => {
   const body = msgPayload({ output_config: { effort: 'low' }, thinking: { type: 'enabled', budget_tokens: 100 } });
   applyChatRulesToMessages(body, { reasoning: { effort: 'xhigh', budget_tokens: 9999 } });

From 04913632b585d95ae322830b68b23d061d17e49d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 04:25:57 +0800
Subject: [PATCH 157/170] test(passthrough): pin alias-no-target 404 envelope +
 x-floway-alias header

The shared prelude lifts the chat protocols' alias-no-target handling
into a generic helper passthrough now reuses. Every chat protocol's
serve_test covers the 404 + header path end-to-end; passthrough went
through the same code path with no test of its own. Add one that
seeds an alias whose target id is not in any upstream catalog,
exercises /v1/embeddings, asserts the 404 envelope + `x-floway-alias`
header staging, and verifies the upstream is never called on the
failure path.
---
 .../shared/passthrough-serve_test.ts          | 50 +++++++++++++++++++
 1 file changed, 50 insertions(+)

diff --git a/packages/gateway/src/data-plane/shared/passthrough-serve_test.ts b/packages/gateway/src/data-plane/shared/passthrough-serve_test.ts
index 6fbf01d90..a14a77795 100644
--- a/packages/gateway/src/data-plane/shared/passthrough-serve_test.ts
+++ b/packages/gateway/src/data-plane/shared/passthrough-serve_test.ts
@@ -180,3 +180,53 @@ test('passthrough-serve: response header allow-list forwards expected headers an
     },
   );
 });
+
+test('passthrough-serve: alias resolves to no routable target — 404 envelope + x-floway-alias header', async () => {
+  // Mirrors the four chat-protocol serve_test cases: when the alias
+  // resolver throws `AliasNoTargetAvailableError`, the shared prelude
+  // converts it to a passthrough-shaped 404 and stages the
+  // `x-floway-alias` correlation header. No upstream call should fire.
+  const { apiKey, repo } = await setupAppTest();
+  await registerEmbeddingsUpstream(repo);
+  // Seed an alias whose only target id does not exist in any upstream
+  // catalog, so the resolver builds an empty available pool and throws.
+  await repo.modelAliases.insert({
+    name: 'embed-fast',
+    kind: 'embedding',
+    selection: 'first-available',
+    displayName: null,
+    visibleInModelsList: true,
+    targets: [{ target_model_id: 'unknown-embed', rules: {} }],
+    announcedMetadata: null,
+    sortOrder: 0,
+    createdAt: '2026-01-01T00:00:00.000Z',
+    updatedAt: '2026-01-01T00:00:00.000Z',
+  });
+
+  await withMockedFetch(
+    request => {
+      const url = new URL(request.url);
+      if (url.hostname === 'passthrough.example.com' && url.pathname === '/v1/models') {
+        return jsonResponse({ object: 'list', data: [{ id: 'custom-embed-model' }] });
+      }
+      if (url.hostname === 'passthrough.example.com' && url.pathname === '/v1/embeddings') {
+        throw new Error('passthrough-serve: upstream must not be called when alias has no routable target');
+      }
+      throw new Error(`Unhandled fetch ${request.url}`);
+    },
+    async () => {
+      const response = await requestApp('/v1/embeddings', {
+        method: 'POST',
+        headers: { 'content-type': 'application/json', 'x-api-key': apiKey.key },
+        body: JSON.stringify({ model: 'embed-fast', input: 'hi' }),
+      });
+
+      assertEquals(response.status, 404);
+      assertEquals(response.headers.get('x-floway-alias'), 'embed-fast');
+      const body = await response.json() as { error: { message: string; type: string } };
+      assertEquals(body.error.type, 'api_error');
+      // The canonical wording carries the alias name + configured count.
+      assertEquals(body.error.message.includes("alias 'embed-fast'"), true);
+    },
+  );
+});

From f62de4a56f7ababb252e6a589df8d05a88b7af52 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 04:27:45 +0800
Subject: [PATCH 158/170] refactor(aliases/prelude): expose effectiveModelId;
 Gemini stops re-deriving
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The shared prelude already knows both the inbound `modelName` and the
optional `aliasResolution`, so the "id to use downstream" is a closed
computation it can perform once. Body-based protocols
(chat-completions/messages/responses) read it implicitly through their
own `payload.model = aliasResolution.targetModelId` mutation inside
`applyAlias`; path-based Gemini had to re-derive
`aliasResolution?.targetModelId ?? args.model` outside the prelude.

Lift the derivation into the prelude as `effectiveModelId`. Gemini
reads it directly — the `?.` chain at the call site disappears, the
asymmetry the auditor flagged between protocols closes, and the next
path-based caller gets the field for free.
---
 .../gateway/src/data-plane/chat/gemini/serve.ts  |  6 ++----
 .../src/data-plane/model-aliases/prelude.ts      | 16 +++++++++++++++-
 2 files changed, 17 insertions(+), 5 deletions(-)

diff --git a/packages/gateway/src/data-plane/chat/gemini/serve.ts b/packages/gateway/src/data-plane/chat/gemini/serve.ts
index 134e8ac9c..b98f117fa 100644
--- a/packages/gateway/src/data-plane/chat/gemini/serve.ts
+++ b/packages/gateway/src/data-plane/chat/gemini/serve.ts
@@ -41,8 +41,7 @@ export const geminiServe = {
       renderAliasFailure: failure => renderGeminiFailure(failure, 'generate'),
     });
     if (resolved.kind === 'failure') return resolved.result;
-    const { candidates, sawModel, failedUpstreams, aliasResolution } = resolved;
-    const model = aliasResolution?.targetModelId ?? args.model;
+    const { candidates, sawModel, failedUpstreams, effectiveModelId: model } = resolved;
     const decision = await planGeminiRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderGeminiFailure(decision.failure, 'generate');
 
@@ -75,8 +74,7 @@ export const geminiServe = {
       renderAliasFailure: failure => renderGeminiFailure(failure, 'countTokens'),
     });
     if (resolved.kind === 'failure') return resolved.result;
-    const { candidates, sawModel, failedUpstreams, aliasResolution } = resolved;
-    const model = aliasResolution?.targetModelId ?? args.model;
+    const { candidates, sawModel, failedUpstreams, effectiveModelId: model } = resolved;
     const decision = await planGeminiRouting({ payload, candidates, store });
     if (decision.kind === 'failure') return renderGeminiFailure(decision.failure, 'countTokens');
 
diff --git a/packages/gateway/src/data-plane/model-aliases/prelude.ts b/packages/gateway/src/data-plane/model-aliases/prelude.ts
index c7a29f517..ca2e48a6b 100644
--- a/packages/gateway/src/data-plane/model-aliases/prelude.ts
+++ b/packages/gateway/src/data-plane/model-aliases/prelude.ts
@@ -40,6 +40,13 @@ export type ResolveCandidatesOk<TTarget> = {
   readonly sawModel: boolean;
   readonly failedUpstreams: readonly string[];
   readonly aliasResolution: AliasResolution | null;
+  // The model id every downstream surface should use: the alias's
+  // `target_model_id` when an alias matched, the original inbound id
+  // otherwise. Body-based protocols (chat-completions/messages/responses)
+  // already mutate `payload.model` in their `applyAlias` callback and read
+  // that; path-based protocols (Gemini routes the id in the URL) have
+  // nowhere to mutate and read this field instead.
+  readonly effectiveModelId: string;
 };
 
 export type ResolveCandidatesOutcome<TTarget, F> =
@@ -73,5 +80,12 @@ export const resolveCandidatesAndApplyAlias = async <TTarget, F>(args: ResolveCa
     applyAlias?.(aliasResolution);
     ctx.responseHeaders.set(ALIAS_RESPONSE_HEADER, aliasResolution.aliasName);
   }
-  return { kind: 'ok', candidates: candidates as ResolveCandidatesOk<TTarget>['candidates'], sawModel, failedUpstreams, aliasResolution };
+  return {
+    kind: 'ok',
+    candidates: candidates as ResolveCandidatesOk<TTarget>['candidates'],
+    sawModel,
+    failedUpstreams,
+    aliasResolution,
+    effectiveModelId: aliasResolution?.targetModelId ?? modelName,
+  };
 };

From a972915d05a1f6d196a95461634b795b42c39a3b Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 04:33:29 +0800
Subject: [PATCH 159/170] feat(control-plane/models): gateway_wide=true
 bypasses caller cap (admin-only)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Admin's editor surfaces (alias edit, upstream edit) configure
gateway-wide state, not the admin's per-account data-plane view. The
default scoped behavior — mirroring the data plane's effective
upstream cap — is correct for the Models page and Playground (admin
can self-restrict and watch the playground respect it), but wrong for
the editor dialogs, which need to see "what exists on the entire
gateway" so admin can wire an alias to a target on an upstream the
admin's own account is currently restricted out of.

Add `gateway_wide=true` to /api/models. Server requires admin and
passes `null` to `enumerateAddressableModelIds` (= all upstreams).
Non-admin sessions get 403 — the bypass would leak models from
upstreams they have no data-plane access to.
---
 .../src/control-plane/models/routes.ts        | 20 ++++--
 .../src/control-plane/models/routes_test.ts   | 61 +++++++++++++++++++
 packages/gateway/src/control-plane/schemas.ts |  7 ++-
 3 files changed, 81 insertions(+), 7 deletions(-)

diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 90ed307b5..8b6896e9d 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -5,7 +5,7 @@ import { toPublicModel } from '../../data-plane/models/load.ts';
 import { MODEL_LISTING_FAILURE_MESSAGE } from '../../data-plane/models/shared.ts';
 import { enumerateAddressableModelIds, listedRealModels } from '../../data-plane/providers/addressable.ts';
 import { createPerRequestFetcher } from '../../dial/per-request.ts';
-import { effectiveUpstreamIdsFromContext } from '../../middleware/auth.ts';
+import { effectiveUpstreamIdsFromContext, userFromContext } from '../../middleware/auth.ts';
 import { getRepo } from '../../repo/index.ts';
 import { backgroundSchedulerFromContext } from '../../runtime/background.ts';
 import { getCurrentColo } from '../../runtime/runtime-info.ts';
@@ -51,14 +51,22 @@ export const controlPlaneModels = async (c: Context) => {
   try {
     const includeAliases = c.req.query('aliases') !== 'false';
     const includeUnlisted = c.req.query('include_unlisted') === 'true';
-    // Scope the dashboard catalog to the caller's effective upstreams, exactly
-    // like the data-plane /models endpoint. On a session request there is no
-    // API key, so this resolves to the user's per-user upstream cap: a user who
-    // has had an upstream removed must not see its models in the Models tab.
+    const gatewayWide = c.req.query('gateway_wide') === 'true';
+    // `gateway_wide=true` lets the alias / upstream edit surfaces see every
+    // model on the gateway regardless of the caller's effective upstream
+    // cap — admin's editor surfaces configure gateway state, not their
+    // per-account data-plane view. Non-admin sessions get a 403 because the
+    // bypass exposes models from upstreams they have no data-plane access
+    // to. Default behavior stays scoped (Models page + Playground respect
+    // self-restriction the same way the data plane does).
+    if (gatewayWide && !userFromContext(c).isAdmin) {
+      return c.json({ error: 'Admin privileges required for gateway_wide=true' }, 403);
+    }
+    const upstreamScope = gatewayWide ? null : effectiveUpstreamIdsFromContext(c);
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
     const [addressable, aliases] = await Promise.all([
       enumerateAddressableModelIds(
-        effectiveUpstreamIdsFromContext(c),
+        upstreamScope,
         fetcherForUpstream,
         backgroundSchedulerFromContext(c),
       ),
diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts
index 3990964be..0c94b659b 100644
--- a/packages/gateway/src/control-plane/models/routes_test.ts
+++ b/packages/gateway/src/control-plane/models/routes_test.ts
@@ -221,3 +221,64 @@ test('/api/models?include_unlisted=true: alias whose name collides with an unlis
     },
   );
 });
+
+test('/api/models?gateway_wide=true bypasses the caller\'s upstream cap for admin sessions', async () => {
+  const { adminSession, repo } = await setupAppTest();
+  await repo.upstreams.save(buildCustomUpstreamRecord({ id: 'up_custom_models', sortOrder: 100 }));
+  await repo.upstreams.save(azureUpstream());
+
+  // Admin self-restricts to a subset of upstreams. The Models page and
+  // playground must respect that cap (default scoped behavior); the alias
+  // edit / upstream edit surfaces opt into gateway-wide with
+  // ?gateway_wide=true so they see "what exists" not "what this account
+  // can reach".
+  await repo.users.save({
+    id: 1,
+    username: 'admin',
+    passwordHash: null,
+    isAdmin: true,
+    upstreamIds: ['up_copilot', 'up_custom_models'],
+    canViewGlobalTelemetry: true,
+    createdAt: '2026-03-15T00:00:00.000Z',
+    deletedAt: null,
+  });
+
+  await withMockedFetch(modelsFetchHandler, async () => {
+    // Default: scoped to admin's self-restriction.
+    const scoped = await requestApp('/api/models', { headers: { 'x-floway-session': adminSession } });
+    assertEquals(scoped.status, 200);
+    const scopedIds = ((await scoped.json()) as { data: Array<{ id: string }> }).data.map(m => m.id).sort();
+    assertEquals(scopedIds.includes('azure-public'), false);
+
+    // gateway_wide=true bypasses the cap.
+    const wide = await requestApp('/api/models?gateway_wide=true', { headers: { 'x-floway-session': adminSession } });
+    assertEquals(wide.status, 200);
+    const wideIds = ((await wide.json()) as { data: Array<{ id: string }> }).data.map(m => m.id).sort();
+    assertEquals(wideIds.includes('azure-public'), true);
+    assertEquals(wideIds.includes('custom-model'), true);
+  });
+});
+
+test('/api/models?gateway_wide=true rejects non-admin sessions with 403', async () => {
+  // Non-admin sessions can read the scoped /api/models for the Models page,
+  // but the gateway_wide bypass would leak models from upstreams they have
+  // no data-plane access to.
+  const { repo } = await setupAppTest();
+  await repo.upstreams.save(buildCustomUpstreamRecord({ id: 'up_custom_models', sortOrder: 100 }));
+  await repo.users.save({
+    id: 2,
+    username: 'tester',
+    passwordHash: null,
+    isAdmin: false,
+    upstreamIds: null,
+    canViewGlobalTelemetry: false,
+    createdAt: '2026-03-15T00:00:00.000Z',
+    deletedAt: null,
+  });
+  const session = (await repo.sessions.create(2)).id;
+
+  await withMockedFetch(modelsFetchHandler, async () => {
+    const response = await requestApp('/api/models?gateway_wide=true', { headers: { 'x-floway-session': session } });
+    assertEquals(response.status, 403);
+  });
+});
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index ae8c43a93..1ac5e3d3c 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -730,10 +730,15 @@ export const tokenUsageQuery = z.object(usageBaseQuery);
 // raw real-model set. `include_unlisted=true` extends the payload with the
 // addressable-but-not-listed surface (prefix-form alternates, Copilot
 // variant ids, provider-side redirects), so the alias dialog combobox sees
-// every id the data-plane resolver would accept.
+// every id the data-plane resolver would accept. `gateway_wide=true`
+// bypasses the caller's effective upstream cap and returns models for
+// every upstream on the gateway — admin-only; used by alias / upstream
+// edit surfaces that need to see "what exists" rather than "what this
+// account can reach".
 export const modelsQuery = z.object({
   aliases: z.enum(['true', 'false']).optional(),
   include_unlisted: z.enum(['true', 'false']).optional(),
+  gateway_wide: z.enum(['true', 'false']).optional(),
 });
 
 export const searchUsageQuery = z.object({

From 2c0746d3a4184be0cbeaf5330990ece7405c01b4 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 04:34:42 +0800
Subject: [PATCH 160/170] fix(aliases/web): useRawModelsStore reads the
 gateway-wide catalog
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The alias editor surfaces (target combobox, shadow detection,
kind-mismatch warning, no-target-available warning) configure gateway
state, not the admin's per-account data-plane view. A self-restricted
admin opening AliasEditDialog used to see a combobox missing every
model on upstreams the admin had restricted out — they could not wire
an alias to a target the gateway can actually serve. Pass
`gateway_wide=true` so the editor sees "what exists" rather than
"what this account can reach". The default `useModelsStore` (Models
page + Keys page) stays scoped because those surfaces are meant to
mirror data-plane visibility.
---
 apps/web/src/composables/useModels.ts | 30 ++++++++++++++-------------
 1 file changed, 16 insertions(+), 14 deletions(-)

diff --git a/apps/web/src/composables/useModels.ts b/apps/web/src/composables/useModels.ts
index c3a1c02b3..3518e0f0e 100644
--- a/apps/web/src/composables/useModels.ts
+++ b/apps/web/src/composables/useModels.ts
@@ -8,19 +8,20 @@ interface ModelsResponse {
   data: ControlPlaneModel[];
 }
 
-// Two stores share this core: the default `/api/models` view (real models
-// + synthesised alias entries merged into one list) backs the dashboard
-// models tab and surfaces that want the externally-visible catalog; the
-// raw view backs the alias settings surfaces (edit dialog target
+// Two stores share this core. The DEFAULT view (`useModelsStore`) backs the
+// Models page + Keys page; it stays scoped to the caller's effective
+// upstream cap, mirroring data-plane visibility — admins who self-restrict
+// see only what their own account would receive at the data plane. The
+// RAW view (`useRawModelsStore`) backs the alias editor surfaces (target
 // combobox, shadow detection, kind-mismatch warning, no-target-available
-// warning) that need to see the underlying catalog without the
-// alias-overwrites-real-id collapse the wire shape applies. The raw view
-// requests `include_unlisted=true` so addressable-but-not-listed ids
-// (Copilot variant ids, prefix-form alternates, provider-side redirects)
-// surface alongside the listed catalog — the alias dialog combobox
-// suggests every id the data-plane resolver would accept, and the
-// shadow/no-target checks see the same surface the resolver does.
-const makeStore = (params: { includeAliases: boolean; includeUnlisted?: boolean }) => {
+// warning); it requests `include_unlisted=true` to surface every id the
+// resolver would accept AND `gateway_wide=true` because the editor's job
+// is to configure gateway state, not browse the admin's per-account
+// data-plane view. The server gates `gateway_wide=true` on admin —
+// non-admin sessions never reach these editor surfaces in the first
+// place (`AliasesSettingsCard` and `AliasEditDialog` mount only on
+// `requiresAdmin` pages).
+const makeStore = (params: { includeAliases: boolean; includeUnlisted?: boolean; gatewayWide?: boolean }) => {
   const models = ref<ControlPlaneModel[] | null>(null);
   const loading = ref(false);
   const error = ref<string | null>(null);
@@ -31,9 +32,10 @@ const makeStore = (params: { includeAliases: boolean; includeUnlisted?: boolean
     const load = async () => {
       loading.value = true;
       error.value = null;
-      const query: { aliases?: 'false'; include_unlisted?: 'true' } = {};
+      const query: { aliases?: 'false'; include_unlisted?: 'true'; gateway_wide?: 'true' } = {};
       if (!params.includeAliases) query.aliases = 'false';
       if (params.includeUnlisted) query.include_unlisted = 'true';
+      if (params.gatewayWide) query.gateway_wide = 'true';
       const { data, error: err } = await callApi<ModelsResponse>(() => api.api.models.$get({ query }));
       loading.value = false;
       if (err) {
@@ -48,4 +50,4 @@ const makeStore = (params: { includeAliases: boolean; includeUnlisted?: boolean
 };
 
 export const useModelsStore = makeStore({ includeAliases: true });
-export const useRawModelsStore = makeStore({ includeAliases: false, includeUnlisted: true });
+export const useRawModelsStore = makeStore({ includeAliases: false, includeUnlisted: true, gatewayWide: true });

From 51092e1cc1ac80c635f5461493b1d311c5714a6b Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 04:36:18 +0800
Subject: [PATCH 161/170] docs(aliases/web): rewrite no-target warning to match
 gateway-wide semantics
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The store feeding this check now reads `gateway_wide=true`, so the
addressable surface it sees is the entire gateway. "No target
currently resolves under your upstream access" was misleading — the
check is no longer scoped to the admin's account. Rewrite to "No
target resolves to any model on this gateway." which describes the
actual cause: a configured target id that no upstream serves.
---
 apps/web/src/components/alias-edit/warnings.ts      | 11 ++++++-----
 apps/web/src/components/alias-edit/warnings_test.ts |  2 +-
 2 files changed, 7 insertions(+), 6 deletions(-)

diff --git a/apps/web/src/components/alias-edit/warnings.ts b/apps/web/src/components/alias-edit/warnings.ts
index d32dce5a4..5086e4ff7 100644
--- a/apps/web/src/components/alias-edit/warnings.ts
+++ b/apps/web/src/components/alias-edit/warnings.ts
@@ -153,17 +153,18 @@ const computeNoTargetWarning = (
   alias: AliasView,
   models: readonly ControlPlaneModel[] | null | undefined,
 ): AliasNoTargetWarning | null => {
-  // The addressable surface (the input store fetches with
-  // `include_unlisted=true`) is the source of truth for routability.
-  // Loading state — models is null — should not fire the warning, or the
-  // dashboard flashes a yellow icon on every alias during startup.
+  // The store reads `/api/models?gateway_wide=true&include_unlisted=true`,
+  // so this `models` array represents every id the data-plane resolver
+  // would accept on the entire gateway — not the admin's per-account
+  // view. Loading state — models is null — should not fire the warning,
+  // or the dashboard flashes a yellow icon on every alias during startup.
   if (models === null || models === undefined) return null;
   const addressableIds = new Set(models.filter(m => m.aliasedFrom === undefined).map(m => m.id));
   const reachable = alias.targets.some(t => addressableIds.has(t.target_model_id));
   if (reachable) return null;
   return {
     type: 'no-target',
-    message: 'No target currently resolves under your upstream access.',
+    message: 'No target resolves to any model on this gateway.',
   };
 };
 
diff --git a/apps/web/src/components/alias-edit/warnings_test.ts b/apps/web/src/components/alias-edit/warnings_test.ts
index cc5501669..9cfd90072 100644
--- a/apps/web/src/components/alias-edit/warnings_test.ts
+++ b/apps/web/src/components/alias-edit/warnings_test.ts
@@ -138,7 +138,7 @@ describe('computeAliasLevelWarnings', () => {
     const warnings = computeAliasLevelWarnings(view('lonely', ['missing-a', 'missing-b']), catalog);
     expect(warnings).toEqual([{
       type: 'no-target',
-      message: 'No target currently resolves under your upstream access.',
+      message: 'No target resolves to any model on this gateway.',
     }]);
   });
 

From f8399df630c5f6d44f5b95439de4c4542e9c12f3 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 04:51:27 +0800
Subject: [PATCH 162/170] feat(aliases/web): playground filters by effective
 cap; admin sees gateway-wide implicitly
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Drop the `gateway_wide=true` query param patch. The server now decides
gateway-wide vs scoped by the caller's role: admin sessions always
receive the full catalog (Models playground + alias edit + settings
all share one fetch), non-admin sessions keep their effective-upstream
cap. The dashboard then filters client-side per surface:

- Alias edit / Settings card / AliasesSettingsCard: no filter, the
  gateway-wide catalog IS the editor's view of the world.
- Models playground: filter by the effective cap of (selectedKey's
  upstream_ids, admin's own user.upstreamIds). Switching the key in
  the playground re-narrows the visible models live. Aliases collapse
  out of the list when no configured target is reachable under the
  cap.

`ModelInfoBar` now takes optional `catalog` + `cap` props and renders
"X / N targets reachable" on alias rows in the playground — showing
exactly how the resolver would narrow this alias's pool under the
chosen key.

A new `apps/web/src/utils/reachability.ts` carries the pure helper
(`isReachableUnderCap`, `reachableTargets`, `effectiveUpstreamCap`)
with a dedicated unit test pinning the cap semantics, alias
reachability through targets, and the addressable-but-not-listed case.
---
 .../src/components/models/ModelInfoBar.vue    |  27 +++++
 apps/web/src/composables/useModels.ts         |  28 ++---
 apps/web/src/pages/dashboard/models.vue       |  41 ++++---
 apps/web/src/utils/reachability.ts            |  67 +++++++++++
 apps/web/src/utils/reachability_test.ts       | 104 ++++++++++++++++++
 .../src/control-plane/models/routes.ts        |  21 ++--
 .../src/control-plane/models/routes_test.ts   |  54 ++-------
 packages/gateway/src/control-plane/schemas.ts |   7 +-
 8 files changed, 259 insertions(+), 90 deletions(-)
 create mode 100644 apps/web/src/utils/reachability.ts
 create mode 100644 apps/web/src/utils/reachability_test.ts

diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index d2018e2b5..6bf8b4c1e 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -2,11 +2,22 @@
 import { computed } from 'vue';
 
 import type { ControlPlaneModel } from '../../api/types.ts';
+import { reachableTargets } from '../../utils/reachability.ts';
 import { providerBadgeClass, providerMeta } from '../upstreams/provider-meta.ts';
 import { type AliasRuleBadgeField, formatAliasRuleBadges } from '@floway-dev/protocols/common';
 
 const props = defineProps<{
   model: ControlPlaneModel;
+  // Full catalog the row came from; needed so alias rows can show how
+  // many of their configured targets are actually reachable under the
+  // caller's current cap. Optional because callers outside the
+  // playground may not have a meaningful catalog (e.g. the Models
+  // page's tile renders the row in isolation).
+  catalog?: readonly ControlPlaneModel[];
+  // Effective upstream cap of the playground's current api key choice;
+  // `null` means unrestricted. Drives the alias-reachable-count badge.
+  // Omitted by callers that have no cap to apply.
+  cap?: readonly string[] | null;
 }>();
 
 defineEmits<{ clear: [] }>();
@@ -27,6 +38,21 @@ const aliasOfLabel = computed<string | null>(() => {
   return `alias of: ${ids.slice(0, 3).join(', ')} +${ids.length - 3} more`;
 });
 
+// For an alias row in the playground: how many configured targets
+// resolve to a real model the caller can route to under the current
+// effective cap. Only renders when the consumer supplied `catalog`
+// and `cap` props — the Models page tile (no cap context) hides this
+// badge entirely. `cap === null` means "no restriction" and the badge
+// renders as N/N. Targets pointing at ids the catalog cannot resolve
+// (typo, removed model) count as unreachable, mirroring the data-plane
+// resolver.
+const reachableTargetSummary = computed<string | null>(() => {
+  const a = props.model.aliasedFrom;
+  if (!a || props.catalog === undefined) return null;
+  const reachable = reachableTargets(props.model, props.catalog, props.cap ?? null);
+  return `${reachable.length} / ${a.targets.length} target${a.targets.length === 1 ? '' : 's'} reachable`;
+});
+
 // Single-target aliases render one badge per rule; multi-target aliases
 // collapse to "<field>: varies" for any field whose values disagree across
 // targets. Each badge carries an explicit `field` key so the bucket walk
@@ -78,6 +104,7 @@ const ruleBadges = computed<{ label: string }[]>(() => {
             output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
           </span>
           <span v-if="aliasOfLabel" class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/15 text-gray-400">{{ aliasOfLabel }}</span>
+          <span v-if="reachableTargetSummary" class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/15 text-gray-400">{{ reachableTargetSummary }}</span>
           <span v-if="model.aliasedFrom" class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/15 text-gray-400">selection: {{ model.aliasedFrom.selection }}</span>
           <span
             v-for="badge in ruleBadges"
diff --git a/apps/web/src/composables/useModels.ts b/apps/web/src/composables/useModels.ts
index 3518e0f0e..d665994c3 100644
--- a/apps/web/src/composables/useModels.ts
+++ b/apps/web/src/composables/useModels.ts
@@ -8,20 +8,15 @@ interface ModelsResponse {
   data: ControlPlaneModel[];
 }
 
-// Two stores share this core. The DEFAULT view (`useModelsStore`) backs the
-// Models page + Keys page; it stays scoped to the caller's effective
-// upstream cap, mirroring data-plane visibility — admins who self-restrict
-// see only what their own account would receive at the data plane. The
-// RAW view (`useRawModelsStore`) backs the alias editor surfaces (target
-// combobox, shadow detection, kind-mismatch warning, no-target-available
-// warning); it requests `include_unlisted=true` to surface every id the
-// resolver would accept AND `gateway_wide=true` because the editor's job
-// is to configure gateway state, not browse the admin's per-account
-// data-plane view. The server gates `gateway_wide=true` on admin —
-// non-admin sessions never reach these editor surfaces in the first
-// place (`AliasesSettingsCard` and `AliasEditDialog` mount only on
-// `requiresAdmin` pages).
-const makeStore = (params: { includeAliases: boolean; includeUnlisted?: boolean; gatewayWide?: boolean }) => {
+// Two stores share this core. The server returns gateway-wide rows for
+// admin sessions and scoped rows for non-admin sessions, so every surface
+// that mounts under `requiresAdmin` (alias edit dialog, settings card,
+// Models playground) gets the full catalog and filters client-side as
+// needed. `useModelsStore` includes synthesised alias entries (the
+// default `/v1/models` view); `useRawModelsStore` drops alias merging
+// and adds `include_unlisted=true` so the alias editor's combobox sees
+// every id the data-plane resolver would accept.
+const makeStore = (params: { includeAliases: boolean; includeUnlisted?: boolean }) => {
   const models = ref<ControlPlaneModel[] | null>(null);
   const loading = ref(false);
   const error = ref<string | null>(null);
@@ -32,10 +27,9 @@ const makeStore = (params: { includeAliases: boolean; includeUnlisted?: boolean;
     const load = async () => {
       loading.value = true;
       error.value = null;
-      const query: { aliases?: 'false'; include_unlisted?: 'true'; gateway_wide?: 'true' } = {};
+      const query: { aliases?: 'false'; include_unlisted?: 'true' } = {};
       if (!params.includeAliases) query.aliases = 'false';
       if (params.includeUnlisted) query.include_unlisted = 'true';
-      if (params.gatewayWide) query.gateway_wide = 'true';
       const { data, error: err } = await callApi<ModelsResponse>(() => api.api.models.$get({ query }));
       loading.value = false;
       if (err) {
@@ -50,4 +44,4 @@ const makeStore = (params: { includeAliases: boolean; includeUnlisted?: boolean;
 };
 
 export const useModelsStore = makeStore({ includeAliases: true });
-export const useRawModelsStore = makeStore({ includeAliases: false, includeUnlisted: true, gatewayWide: true });
+export const useRawModelsStore = makeStore({ includeAliases: false, includeUnlisted: true });
diff --git a/apps/web/src/pages/dashboard/models.vue b/apps/web/src/pages/dashboard/models.vue
index ff1954efa..fbaa848a3 100644
--- a/apps/web/src/pages/dashboard/models.vue
+++ b/apps/web/src/pages/dashboard/models.vue
@@ -7,6 +7,8 @@ import type { ApiKey, ControlPlaneModel } from '../../api/types.ts';
 import ChatPanel from '../../components/models/ChatPanel.vue';
 import ModelInfoBar from '../../components/models/ModelInfoBar.vue';
 import { useModelsStore } from '../../composables/useModels.ts';
+import { useAuthStore } from '../../stores/auth.ts';
+import { effectiveUpstreamCap, isReachableUnderCap } from '../../utils/reachability.ts';
 import { Input, OverlayScrollbars } from '@floway-dev/ui';
 
 export const useModelsPageData = defineBasicLoader(async () => {
@@ -22,6 +24,7 @@ export const useModelsPageData = defineBasicLoader(async () => {
 <script setup lang="ts">
 const initialData = useModelsPageData();
 const { models, error: modelsError } = useModelsStore();
+const auth = useAuthStore();
 
 // Reactivity is intentionally dropped: the loader never refetches keys here.
 const keys = initialData.data.value.keys;
@@ -30,11 +33,32 @@ const modelsSearch = ref('');
 const chatModelId = ref<string>('');
 const chatPanelRef = useTemplateRef<InstanceType<typeof ChatPanel>>('chatPanel');
 
+// Playground requires a real per-user API key, not the admin key.
+const selectedKeyId = ref<string | null>(keys[0]?.id ?? null);
+
+const selectedKey = computed<ApiKey | null>(() => {
+  const id = selectedKeyId.value;
+  if (!id) return null;
+  return keys.find(k => k.id === id) ?? null;
+});
+
+const selectedApiKey = computed(() => selectedKey.value?.key ?? null);
+
+// Server returns gateway-wide for admin sessions, so we filter client-side
+// here by the effective cap of (selected api key, owner user). Mirrors the
+// gateway's `effectiveUpstreamIdsFromContext`: the key's whitelist wins
+// when set; otherwise the user's cap applies. Without a selected key (no
+// keys created yet) the cap collapses to the admin's own user.upstreamIds.
+const effectiveCap = computed<readonly string[] | null>(
+  () => effectiveUpstreamCap(selectedKey.value?.upstream_ids ?? null, auth.currentUser?.upstreamIds ?? null),
+);
+
 const filteredChatModels = computed(() => {
-  const list = (models.value ?? []).filter(m => m.kind === 'chat');
+  const catalog = models.value ?? [];
+  const reachable = catalog.filter(m => m.kind === 'chat' && isReachableUnderCap(m, catalog, effectiveCap.value));
   const needle = modelsSearch.value.trim().toLowerCase();
-  if (!needle) return list;
-  return list.filter(m => m.id.toLowerCase().includes(needle) || (m.display_name?.toLowerCase().includes(needle) ?? false));
+  if (!needle) return reachable;
+  return reachable.filter(m => m.id.toLowerCase().includes(needle) || (m.display_name?.toLowerCase().includes(needle) ?? false));
 });
 
 const chatModelInfo = computed<ControlPlaneModel | undefined>(
@@ -43,15 +67,6 @@ const chatModelInfo = computed<ControlPlaneModel | undefined>(
 
 if (!chatModelId.value && filteredChatModels.value[0]) chatModelId.value = filteredChatModels.value[0].id;
 
-// Playground requires a real per-user API key, not the admin key.
-const selectedKeyId = ref<string | null>(keys[0]?.id ?? null);
-
-const selectedApiKey = computed(() => {
-  const id = selectedKeyId.value;
-  if (!id) return null;
-  return keys.find(k => k.id === id)!.key;
-});
-
 const banner = computed(() => modelsError.value ?? initialData.data.value.keysError);
 </script>
 
@@ -113,7 +128,7 @@ const banner = computed(() => modelsError.value ?? initialData.data.value.keysEr
 
       <div class="flex-1 flex flex-col min-w-0 min-h-0">
         <template v-if="chatModelInfo">
-          <ModelInfoBar :model="chatModelInfo" @clear="chatPanelRef?.clear()" />
+          <ModelInfoBar :model="chatModelInfo" :catalog="models ?? []" :cap="effectiveCap" @clear="chatPanelRef?.clear()" />
           <ChatPanel v-if="selectedApiKey" ref="chatPanel" :model-id="chatModelInfo.id" :api-key="selectedApiKey" />
           <div v-else class="flex-1 flex items-center justify-center px-6 text-center text-gray-600 text-sm">
             Create an API key in the Keys tab to chat with models.
diff --git a/apps/web/src/utils/reachability.ts b/apps/web/src/utils/reachability.ts
new file mode 100644
index 000000000..17924418d
--- /dev/null
+++ b/apps/web/src/utils/reachability.ts
@@ -0,0 +1,67 @@
+// Frontend reachability check: given a `ControlPlaneModel` row and the
+// catalog it lives in, decide whether the caller can route to it under an
+// upstream-id cap. The server always returns the gateway-wide catalog for
+// admin sessions (and the caller-scoped catalog for non-admin); the
+// dashboard's Models page + playground then filter client-side by the
+// effective cap of the selected api key (which itself inherits from the
+// owner user's `upstreamIds` when the key has no whitelist of its own).
+//
+// `cap === null` means "no restriction" (every upstream is reachable).
+// `cap` is a freshly-resolved array of upstream ids the caller is allowed
+// to route to right now.
+
+import type { ControlPlaneModel } from '../api/types.ts';
+
+// Resolve the effective per-request cap. Mirrors the gateway's
+// `effectiveUpstreamIdsFromContext`: the api key's `upstream_ids` win when
+// set; otherwise the cap falls back to the owner user's `upstreamIds`.
+// Both null means unrestricted.
+export const effectiveUpstreamCap = (
+  keyUpstreamIds: readonly string[] | null,
+  userUpstreamIds: readonly string[] | null,
+): readonly string[] | null => keyUpstreamIds ?? userUpstreamIds;
+
+// True when any of the model's upstream bindings is in the cap (or the
+// cap is unrestricted). For an alias row this is always false — the
+// alias's bindings list is empty and reachability runs through its
+// targets instead.
+const realModelReachable = (
+  model: ControlPlaneModel,
+  cap: readonly string[] | null,
+): boolean => {
+  if (cap === null) return true;
+  return model.upstreams.some(binding => cap.includes(binding.id));
+};
+
+// Returns whether the alias has at least one configured target whose
+// resolved real model is reachable under the cap. A target whose
+// `target_model_id` does not appear in the catalog at all (e.g. operator
+// typo, model removed) is treated as unreachable. Addressable-but-not-
+// listed entries (Copilot variant ids, prefix alternates) carry their
+// canonical real model's `upstreams`, so they count as reachable through
+// the same predicate.
+export const reachableTargets = (
+  alias: ControlPlaneModel,
+  catalog: readonly ControlPlaneModel[],
+  cap: readonly string[] | null,
+): readonly ControlPlaneModel[] => {
+  if (alias.aliasedFrom === undefined) return [];
+  const out: ControlPlaneModel[] = [];
+  for (const t of alias.aliasedFrom.targets) {
+    const target = catalog.find(row => row.id === t.target_model_id && row.aliasedFrom === undefined);
+    if (target && realModelReachable(target, cap)) out.push(target);
+  }
+  return out;
+};
+
+// True for a real model with at least one in-cap binding; true for an
+// alias with at least one reachable target. Hides a row from a listing
+// when the caller would 404 on it.
+export const isReachableUnderCap = (
+  model: ControlPlaneModel,
+  catalog: readonly ControlPlaneModel[],
+  cap: readonly string[] | null,
+): boolean => {
+  if (model.aliasedFrom === undefined) return realModelReachable(model, cap);
+  return reachableTargets(model, catalog, cap).length > 0;
+};
diff --git a/apps/web/src/utils/reachability_test.ts b/apps/web/src/utils/reachability_test.ts
new file mode 100644
index 000000000..bf1bac2a4
--- /dev/null
+++ b/apps/web/src/utils/reachability_test.ts
@@ -0,0 +1,104 @@
+import { describe, expect, it } from 'vitest';
+
+import { effectiveUpstreamCap, isReachableUnderCap, reachableTargets } from './reachability.ts';
+import { buildAliasModel, buildRealModel, buildUnlistedModel } from '../api/test-fixtures.ts';
+import type { ControlPlaneModel } from '../api/types.ts';
+
+const realWithUpstreams = (id: string, upstreams: { id: string }[]): ControlPlaneModel => buildRealModel({
+  id,
+  upstreams: upstreams.map(u => ({ id: u.id, name: u.id.toUpperCase(), kind: 'custom' })),
+});
+
+const aliasWithTargets = (id: string, targetIds: string[]): ControlPlaneModel => buildAliasModel({
+  id,
+  aliasedFrom: {
+    name: id, kind: 'chat', selection: 'first-available',
+    targets: targetIds.map(tid => ({ target_model_id: tid, rules: {} })),
+  },
+});
+
+describe('effectiveUpstreamCap', () => {
+  it('prefers the api key cap when set', () => {
+    expect(effectiveUpstreamCap(['up_a'], ['up_a', 'up_b'])).toEqual(['up_a']);
+  });
+
+  it('falls back to the user cap when the key has no whitelist', () => {
+    expect(effectiveUpstreamCap(null, ['up_a', 'up_b'])).toEqual(['up_a', 'up_b']);
+  });
+
+  it('returns null (unrestricted) when both are null', () => {
+    expect(effectiveUpstreamCap(null, null)).toBeNull();
+  });
+});
+
+describe('isReachableUnderCap — real models', () => {
+  const a = realWithUpstreams('a', [{ id: 'up_1' }]);
+
+  it('returns true when the cap is null', () => {
+    expect(isReachableUnderCap(a, [a], null)).toBe(true);
+  });
+
+  it('returns true when any binding is in the cap', () => {
+    expect(isReachableUnderCap(a, [a], ['up_1', 'up_2'])).toBe(true);
+  });
+
+  it('returns false when no binding is in the cap', () => {
+    expect(isReachableUnderCap(a, [a], ['up_2'])).toBe(false);
+  });
+});
+
+describe('isReachableUnderCap — aliases', () => {
+  it('returns true when at least one target is reachable', () => {
+    const target = realWithUpstreams('gpt-5', [{ id: 'up_1' }]);
+    const otherTarget = realWithUpstreams('claude', [{ id: 'up_2' }]);
+    const alias = aliasWithTargets('smart', ['gpt-5', 'claude']);
+    const catalog = [target, otherTarget, alias];
+    expect(isReachableUnderCap(alias, catalog, ['up_1'])).toBe(true);
+  });
+
+  it('returns false when every target is out of cap', () => {
+    const target = realWithUpstreams('gpt-5', [{ id: 'up_1' }]);
+    const alias = aliasWithTargets('smart', ['gpt-5']);
+    expect(isReachableUnderCap(alias, [target, alias], ['up_2'])).toBe(false);
+  });
+
+  it('returns false when the alias has no targets at all', () => {
+    const alias = aliasWithTargets('orphan', []);
+    expect(isReachableUnderCap(alias, [alias], ['up_1'])).toBe(false);
+  });
+
+  it('drops a target whose id resolves to no real-model row in the catalog', () => {
+    // Operator typo or removed model — the resolver would 404 at request
+    // time, the frontend filter treats the same way.
+    const alias = aliasWithTargets('smart', ['missing']);
+    expect(isReachableUnderCap(alias, [alias], null)).toBe(false);
+  });
+});
+
+describe('reachableTargets', () => {
+  it('returns every target whose real-model row is in cap', () => {
+    const gpt = realWithUpstreams('gpt-5', [{ id: 'up_1' }]);
+    const claude = realWithUpstreams('claude', [{ id: 'up_2' }]);
+    const alias = aliasWithTargets('smart', ['gpt-5', 'claude']);
+    const reachable = reachableTargets(alias, [gpt, claude, alias], ['up_1']);
+    expect(reachable.map(m => m.id)).toEqual(['gpt-5']);
+  });
+
+  it('matches addressable-but-not-listed entries against the real-model surface', () => {
+    // `buildUnlistedModel` carries `aliasedFrom === undefined` (it represents
+    // a real model's variant id), so the alias's target_model_id pointing
+    // at the variant id still matches the real-model lookup.
+    const opus = buildUnlistedModel({
+      id: 'claude-opus-4.7',
+      upstreams: [{ id: 'up_1', name: 'UP1', kind: 'copilot' }],
+    });
+    const alias = aliasWithTargets('opus-fast', ['claude-opus-4.7']);
+    const reachable = reachableTargets(alias, [opus, alias], ['up_1']);
+    expect(reachable.map(m => m.id)).toEqual(['claude-opus-4.7']);
+  });
+
+  it('returns empty for a non-alias row', () => {
+    const a = realWithUpstreams('a', [{ id: 'up_1' }]);
+    expect(reachableTargets(a, [a], null)).toEqual([]);
+  });
+});
diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 8b6896e9d..0eff21f1c 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -51,18 +51,15 @@ export const controlPlaneModels = async (c: Context) => {
   try {
     const includeAliases = c.req.query('aliases') !== 'false';
     const includeUnlisted = c.req.query('include_unlisted') === 'true';
-    const gatewayWide = c.req.query('gateway_wide') === 'true';
-    // `gateway_wide=true` lets the alias / upstream edit surfaces see every
-    // model on the gateway regardless of the caller's effective upstream
-    // cap — admin's editor surfaces configure gateway state, not their
-    // per-account data-plane view. Non-admin sessions get a 403 because the
-    // bypass exposes models from upstreams they have no data-plane access
-    // to. Default behavior stays scoped (Models page + Playground respect
-    // self-restriction the same way the data plane does).
-    if (gatewayWide && !userFromContext(c).isAdmin) {
-      return c.json({ error: 'Admin privileges required for gateway_wide=true' }, 403);
-    }
-    const upstreamScope = gatewayWide ? null : effectiveUpstreamIdsFromContext(c);
+    // Admin sessions see the entire gateway: editor surfaces (alias edit,
+    // upstream edit) need to configure models on upstreams the admin may
+    // have self-restricted out of their own data-plane access, and the
+    // dashboard filters the result client-side for surfaces that should
+    // respect the restriction (Models page, playground). Non-admin
+    // sessions stay scoped to their effective upstream cap so the
+    // dashboard cannot leak models from upstreams their account has no
+    // data-plane access to.
+    const upstreamScope = userFromContext(c).isAdmin ? null : effectiveUpstreamIdsFromContext(c);
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
     const [addressable, aliases] = await Promise.all([
       enumerateAddressableModelIds(
diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts
index 0c94b659b..3e6c87bb0 100644
--- a/packages/gateway/src/control-plane/models/routes_test.ts
+++ b/packages/gateway/src/control-plane/models/routes_test.ts
@@ -222,16 +222,17 @@ test('/api/models?include_unlisted=true: alias whose name collides with an unlis
   );
 });
 
-test('/api/models?gateway_wide=true bypasses the caller\'s upstream cap for admin sessions', async () => {
+test('/api/models for an admin session returns the gateway-wide catalog, bypassing the admin\'s own user.upstreamIds cap', async () => {
   const { adminSession, repo } = await setupAppTest();
   await repo.upstreams.save(buildCustomUpstreamRecord({ id: 'up_custom_models', sortOrder: 100 }));
   await repo.upstreams.save(azureUpstream());
 
-  // Admin self-restricts to a subset of upstreams. The Models page and
-  // playground must respect that cap (default scoped behavior); the alias
-  // edit / upstream edit surfaces opt into gateway-wide with
-  // ?gateway_wide=true so they see "what exists" not "what this account
-  // can reach".
+  // Admin self-restricts. The dashboard's editor surfaces (alias edit,
+  // upstream edit) need to see "what exists on the entire gateway", and
+  // the Models page + playground filter the gateway-wide payload
+  // client-side for surfaces that should respect the restriction.
+  // Server-side gateway-wide for admin is the foundation that lets the
+  // dashboard do that filtering.
   await repo.users.save({
     id: 1,
     username: 'admin',
@@ -244,41 +245,10 @@ test('/api/models?gateway_wide=true bypasses the caller\'s upstream cap for admi
   });
 
   await withMockedFetch(modelsFetchHandler, async () => {
-    // Default: scoped to admin's self-restriction.
-    const scoped = await requestApp('/api/models', { headers: { 'x-floway-session': adminSession } });
-    assertEquals(scoped.status, 200);
-    const scopedIds = ((await scoped.json()) as { data: Array<{ id: string }> }).data.map(m => m.id).sort();
-    assertEquals(scopedIds.includes('azure-public'), false);
-
-    // gateway_wide=true bypasses the cap.
-    const wide = await requestApp('/api/models?gateway_wide=true', { headers: { 'x-floway-session': adminSession } });
-    assertEquals(wide.status, 200);
-    const wideIds = ((await wide.json()) as { data: Array<{ id: string }> }).data.map(m => m.id).sort();
-    assertEquals(wideIds.includes('azure-public'), true);
-    assertEquals(wideIds.includes('custom-model'), true);
-  });
-});
-
-test('/api/models?gateway_wide=true rejects non-admin sessions with 403', async () => {
-  // Non-admin sessions can read the scoped /api/models for the Models page,
-  // but the gateway_wide bypass would leak models from upstreams they have
-  // no data-plane access to.
-  const { repo } = await setupAppTest();
-  await repo.upstreams.save(buildCustomUpstreamRecord({ id: 'up_custom_models', sortOrder: 100 }));
-  await repo.users.save({
-    id: 2,
-    username: 'tester',
-    passwordHash: null,
-    isAdmin: false,
-    upstreamIds: null,
-    canViewGlobalTelemetry: false,
-    createdAt: '2026-03-15T00:00:00.000Z',
-    deletedAt: null,
-  });
-  const session = (await repo.sessions.create(2)).id;
-
-  await withMockedFetch(modelsFetchHandler, async () => {
-    const response = await requestApp('/api/models?gateway_wide=true', { headers: { 'x-floway-session': session } });
-    assertEquals(response.status, 403);
+    const response = await requestApp('/api/models', { headers: { 'x-floway-session': adminSession } });
+    assertEquals(response.status, 200);
+    const ids = ((await response.json()) as { data: Array<{ id: string }> }).data.map(m => m.id).sort();
+    assertEquals(ids.includes('azure-public'), true);
+    assertEquals(ids.includes('custom-model'), true);
   });
 });
diff --git a/packages/gateway/src/control-plane/schemas.ts b/packages/gateway/src/control-plane/schemas.ts
index 1ac5e3d3c..ae8c43a93 100644
--- a/packages/gateway/src/control-plane/schemas.ts
+++ b/packages/gateway/src/control-plane/schemas.ts
@@ -730,15 +730,10 @@ export const tokenUsageQuery = z.object(usageBaseQuery);
 // raw real-model set. `include_unlisted=true` extends the payload with the
 // addressable-but-not-listed surface (prefix-form alternates, Copilot
 // variant ids, provider-side redirects), so the alias dialog combobox sees
-// every id the data-plane resolver would accept. `gateway_wide=true`
-// bypasses the caller's effective upstream cap and returns models for
-// every upstream on the gateway — admin-only; used by alias / upstream
-// edit surfaces that need to see "what exists" rather than "what this
-// account can reach".
+// every id the data-plane resolver would accept.
 export const modelsQuery = z.object({
   aliases: z.enum(['true', 'false']).optional(),
   include_unlisted: z.enum(['true', 'false']).optional(),
-  gateway_wide: z.enum(['true', 'false']).optional(),
 });
 
 export const searchUsageQuery = z.object({

From a11baa2eceeb0c4bf27828a623924f53e1f7e15b Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 05:14:41 +0800
Subject: [PATCH 163/170] docs(aliases/web): correct stale gateway_wide=true
 reference in no-target check
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The store query was simplified — `gateway_wide=true` was removed in
favor of admin-implicit gateway-wide behavior — but this comment
still cited the old URL shape. Update the comment to describe the
actual server contract.
---
 apps/web/src/components/alias-edit/warnings.ts | 11 ++++++-----
 1 file changed, 6 insertions(+), 5 deletions(-)

diff --git a/apps/web/src/components/alias-edit/warnings.ts b/apps/web/src/components/alias-edit/warnings.ts
index 5086e4ff7..e2de491ae 100644
--- a/apps/web/src/components/alias-edit/warnings.ts
+++ b/apps/web/src/components/alias-edit/warnings.ts
@@ -153,11 +153,12 @@ const computeNoTargetWarning = (
   alias: AliasView,
   models: readonly ControlPlaneModel[] | null | undefined,
 ): AliasNoTargetWarning | null => {
-  // The store reads `/api/models?gateway_wide=true&include_unlisted=true`,
-  // so this `models` array represents every id the data-plane resolver
-  // would accept on the entire gateway — not the admin's per-account
-  // view. Loading state — models is null — should not fire the warning,
-  // or the dashboard flashes a yellow icon on every alias during startup.
+  // `useRawModelsStore` fetches with `aliases=false&include_unlisted=true`;
+  // the server returns the gateway-wide surface to admin sessions, so this
+  // `models` array represents every id the data-plane resolver would accept
+  // on the entire gateway — not the admin's per-account view. Loading
+  // state — models is null — should not fire the warning, or the dashboard
+  // flashes a yellow icon on every alias during startup.
   if (models === null || models === undefined) return null;
   const addressableIds = new Set(models.filter(m => m.aliasedFrom === undefined).map(m => m.id));
   const reachable = alias.targets.some(t => addressableIds.has(t.target_model_id));

From 0725c31de5e0d9c656a7b5248872831dd4acb49d Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 05:37:05 +0800
Subject: [PATCH 164/170] test(chat/shared): drop unused alias field from
 TraceLine
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The optional alias field on TraceLine was never set by any call site —
SanitizeTraceCtx.emit only ever produces { field, targetProtocol }, so
the slot was unreachable.
---
 packages/gateway/src/data-plane/chat/shared/sanitize_test.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
index 1f55ef3e0..6a0bee4f9 100644
--- a/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
+++ b/packages/gateway/src/data-plane/chat/shared/sanitize_test.ts
@@ -9,7 +9,7 @@ import {
 } from './sanitize.ts';
 import { assertEquals } from '@floway-dev/test-utils';
 
-type TraceLine = { alias?: string; field: string; targetProtocol: string };
+type TraceLine = { field: string; targetProtocol: string };
 
 const makeTrace = (): { ctx: SanitizeTraceCtx; lines: TraceLine[] } => {
   const lines: TraceLine[] = [];

From e1fe43eab358be3cfd36e82bcd79c97b87302511 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 05:37:10 +0800
Subject: [PATCH 165/170] test(models): drop stale comment about endpoints
 field

The remaining assertEquals lines speak for themselves; the inline note
about `endpoints` being surfaced elsewhere was a leftover from an
earlier patching pass.
---
 packages/gateway/src/data-plane/models/serve_test.ts | 3 ---
 1 file changed, 3 deletions(-)

diff --git a/packages/gateway/src/data-plane/models/serve_test.ts b/packages/gateway/src/data-plane/models/serve_test.ts
index 1b51d5cc5..ac05ca29f 100644
--- a/packages/gateway/src/data-plane/models/serve_test.ts
+++ b/packages/gateway/src/data-plane/models/serve_test.ts
@@ -119,9 +119,6 @@ test('/v1/models returns merged model list from Copilot and custom upstreams', a
         assertEquals(model.providerKind, undefined);
         assertEquals(model.providers, undefined);
         assertEquals(model.providerData, undefined);
-        // `endpoints` IS surfaced — see assertion further below for the
-        // exact shape (the chat-three-API atom + optional /completions
-        // + embeddings / images keys).
         assertEquals(model.upstream, undefined);
         assertEquals(model.upstreamModel, undefined);
         // Copilot-only raw fields never reach the public DTO.

From 5c549f20e7f9b11f899a8b572264ee29efa7afc4 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 05:48:53 +0800
Subject: [PATCH 166/170] cleanup(aliases/web): drop fake-robust limits
 fallback in announced metadata

PublicModel.limits is a required field; the gateway's mirror at
data-plane/models/alias-listing.ts passes real.limits directly. The
?? {} on the frontend masked what would otherwise be a type-impossible
state, papering over a backend protocol violation instead of surfacing it.
---
 apps/web/src/components/alias-edit/announced-metadata.ts | 2 +-
 1 file changed, 1 insertion(+), 1 deletion(-)

diff --git a/apps/web/src/components/alias-edit/announced-metadata.ts b/apps/web/src/components/alias-edit/announced-metadata.ts
index be59c0791..953314ae1 100644
--- a/apps/web/src/components/alias-edit/announced-metadata.ts
+++ b/apps/web/src/components/alias-edit/announced-metadata.ts
@@ -127,7 +127,7 @@ export const computeAnnouncedMetadata = (
 
   if (available.length === 0) return {};
 
-  const limitsList = available.map(({ real }) => real.limits ?? {});
+  const limitsList = available.map(({ real }) => real.limits);
   const limits = intersectLimits(limitsList);
 
   const effectiveChats = available

From 1a327eae709f68f2b3dcc0a41cd7a121ecaec708 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 12:55:37 +0800
Subject: [PATCH 167/170] fix(aliases): split metadata (gateway-wide) from
 aliasedFrom visibility (caller-scoped)
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The synthesizer used to project both the alias's metadata (limits,
chat, endpoints, cost) AND its `aliasedFrom.targets` against the
caller's addressable surface, so a non-admin / data-plane caller saw:
- (a) a different limit/endpoint set than the admin saw, computed from
  whichever subset of targets sat inside their cap — the same alias
  looked different depending on who asked, and a tighter cap could
  paradoxically widen the published window
- (b) the operator's full configured target list — including target
  ids on upstreams they had no data-plane access to AND typo'd /
  removed model ids — which is operator state, not their business

Split the two axes:

- Metadata (limits, chat, endpoints, cost) is now computed against
  the GATEWAY-WIDE addressable surface. Every caller — admin session,
  non-admin session, data-plane api key — reads the same numbers for
  the same alias. Safe-lower-bound holds across the entire gateway,
  not the caller's subset.
- `aliasedFrom.targets` is per-caller. When `narrowTargets=true`
  (every data-plane call + non-admin control-plane call) only targets
  the caller can actually reach appear. When `narrowTargets=false`
  (admin control-plane only) the raw configured list survives so the
  alias-edit dialog can render typos and out-of-cap targets for
  fixing.
- Alias visibility (whether the row appears in the response at all)
  stays caller-scoped: at least one target must be reachable under
  the caller's cap.

Threaded through `/v1/models`, `/v1beta/models`, codex 1p catalog,
and `/api/models`. Non-admin paths now fetch BOTH the caller-scoped
and the gateway-wide addressable surface; admin paths skip the
second call because they already are gateway-wide. The SWR cache
shares the per-upstream catalog fetches.

Tests cover the split (synthesizer-level + control-plane integration:
admin sees raw config with typos, non-admin sees narrowed projection,
both read identical limits for an alias whose targets disagree on
limits across upstreams).
---
 .../src/control-plane/models/routes.ts        |  31 ++--
 .../src/control-plane/models/routes_test.ts   | 136 +++++++++++++-
 .../gateway/src/data-plane/codex/models.ts    |  56 +++---
 .../src/data-plane/models/alias-listing.ts    | 167 ++++++++++++------
 .../data-plane/models/alias-listing_test.ts   | 141 ++++++++++++---
 .../gateway/src/data-plane/models/gemini.ts   |  12 +-
 .../gateway/src/data-plane/models/load.ts     |  20 ++-
 7 files changed, 442 insertions(+), 121 deletions(-)

diff --git a/packages/gateway/src/control-plane/models/routes.ts b/packages/gateway/src/control-plane/models/routes.ts
index 0eff21f1c..bdce45a48 100644
--- a/packages/gateway/src/control-plane/models/routes.ts
+++ b/packages/gateway/src/control-plane/models/routes.ts
@@ -59,22 +59,33 @@ export const controlPlaneModels = async (c: Context) => {
     // sessions stay scoped to their effective upstream cap so the
     // dashboard cannot leak models from upstreams their account has no
     // data-plane access to.
-    const upstreamScope = userFromContext(c).isAdmin ? null : effectiveUpstreamIdsFromContext(c);
+    const isAdmin = userFromContext(c).isAdmin;
+    const upstreamScope = isAdmin ? null : effectiveUpstreamIdsFromContext(c);
     const fetcherForUpstream = await createPerRequestFetcher(getCurrentColo(c.req.raw));
-    const [addressable, aliases] = await Promise.all([
-      enumerateAddressableModelIds(
-        upstreamScope,
-        fetcherForUpstream,
-        backgroundSchedulerFromContext(c),
-      ),
+    // Two addressable surfaces: caller-scoped (drives visibility +
+    // `aliasedFrom.targets` narrowing for non-admin) and gateway-wide
+    // (drives the alias's metadata + endpoints + cost — every caller
+    // sees the same numbers for the same alias). For admin the two are
+    // the same, so skip the second fetch.
+    const [callerAddressable, gatewayAddressable, aliases] = await Promise.all([
+      enumerateAddressableModelIds(upstreamScope, fetcherForUpstream, backgroundSchedulerFromContext(c)),
+      isAdmin
+        ? Promise.resolve(null)
+        : enumerateAddressableModelIds(null, fetcherForUpstream, backgroundSchedulerFromContext(c)),
       includeAliases ? getRepo().modelAliases.list() : Promise.resolve([]),
     ]);
-    const realModels = listedRealModels(addressable);
+    const gatewayAddressableModelIds = gatewayAddressable ?? callerAddressable;
+    const realModels = listedRealModels(callerAddressable);
     const listedRows = includeAliases
       ? mergeAliasesIntoModels({
           realModels,
-          addressableModelIds: addressable,
+          gatewayAddressableModelIds,
+          callerAddressableModelIds: callerAddressable,
           aliases,
+          // Admin sees raw configured targets (including typos / out-of-
+          // cap models) so the alias-edit dialog can render the full
+          // configuration; non-admin sessions get the narrowed projection.
+          narrowTargets: !isAdmin,
           mapReal: toControlPlaneModel,
           wrapAlias: entry => ({ ...entry, upstreams: [] }),
         })
@@ -86,7 +97,7 @@ export const controlPlaneModels = async (c: Context) => {
     // of collision; the dashboard must agree.
     const listedIds = new Set(listedRows.map(row => row.id));
     const unlistedRows = includeUnlisted
-      ? addressable
+      ? callerAddressable
           .filter(entry => entry.unlisted === true && !listedIds.has(entry.id))
           .map(entry => toUnlistedControlPlaneModel(entry.id, entry.model))
       : [];
diff --git a/packages/gateway/src/control-plane/models/routes_test.ts b/packages/gateway/src/control-plane/models/routes_test.ts
index 3e6c87bb0..10349e327 100644
--- a/packages/gateway/src/control-plane/models/routes_test.ts
+++ b/packages/gateway/src/control-plane/models/routes_test.ts
@@ -2,7 +2,7 @@ import { test } from 'vitest';
 
 import { buildCustomUpstreamRecord, copilotModels, requestApp, setupAppTest } from '../../test-helpers.ts';
 import type { UpstreamRecord } from '@floway-dev/provider';
-import { assertEquals, jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
+import { assert, assertEquals, jsonResponse, withMockedFetch } from '@floway-dev/test-utils';
 
 const azureUpstream = (): UpstreamRecord => ({
   id: 'up_azure_models',
@@ -252,3 +252,137 @@ test('/api/models for an admin session returns the gateway-wide catalog, bypassi
     assertEquals(ids.includes('custom-model'), true);
   });
 });
+
+test('/api/models — admin sees raw alias.targets; non-admin sees the caller-narrowed projection', async () => {
+  // Wire an alias with a typo target + one real target. Admin must see
+  // the typo (so the alias-edit dialog can render it for fixing); a
+  // non-admin who can reach the real target must see only that target
+  // — never the typo, never out-of-cap target ids.
+  const { adminSession, repo } = await setupAppTest();
+  await repo.upstreams.save(buildCustomUpstreamRecord({ id: 'up_custom_models', sortOrder: 100 }));
+  await repo.modelAliases.insert({
+    name: 'mix',
+    kind: 'chat',
+    selection: 'first-available',
+    displayName: null,
+    visibleInModelsList: true,
+    targets: [
+      { target_model_id: 'custom-model', rules: {} },
+      { target_model_id: 'typo-no-such-model', rules: {} },
+    ],
+    announcedMetadata: null,
+    sortOrder: 0,
+    createdAt: '2026-01-01T00:00:00.000Z',
+    updatedAt: '2026-01-01T00:00:00.000Z',
+  });
+
+  // Non-admin user with access to the same upstream.
+  await repo.users.save({
+    id: 2,
+    username: 'tester',
+    passwordHash: null,
+    isAdmin: false,
+    upstreamIds: null,
+    canViewGlobalTelemetry: false,
+    createdAt: '2026-03-15T00:00:00.000Z',
+    deletedAt: null,
+  });
+  const nonAdminSession = (await repo.sessions.create(2)).id;
+
+  await withMockedFetch(modelsFetchHandler, async () => {
+    const adminResponse = await requestApp('/api/models', { headers: { 'x-floway-session': adminSession } });
+    assertEquals(adminResponse.status, 200);
+    const adminBody = (await adminResponse.json()) as { data: Array<{ id: string; aliasedFrom?: { targets: Array<{ target_model_id: string }> } }> };
+    const adminMix = adminBody.data.find(m => m.id === 'mix');
+    assert(adminMix !== undefined);
+    assertEquals(
+      adminMix!.aliasedFrom?.targets.map(t => t.target_model_id),
+      ['custom-model', 'typo-no-such-model'],
+    );
+
+    const nonAdminResponse = await requestApp('/api/models', { headers: { 'x-floway-session': nonAdminSession } });
+    assertEquals(nonAdminResponse.status, 200);
+    const nonAdminBody = (await nonAdminResponse.json()) as { data: Array<{ id: string; aliasedFrom?: { targets: Array<{ target_model_id: string }> } }> };
+    const nonAdminMix = nonAdminBody.data.find(m => m.id === 'mix');
+    assert(nonAdminMix !== undefined);
+    // Typo `typo-no-such-model` is hidden; only the reachable target is exposed.
+    assertEquals(
+      nonAdminMix!.aliasedFrom?.targets.map(t => t.target_model_id),
+      ['custom-model'],
+    );
+  });
+});
+
+test('/api/models — admin self-restriction does NOT leak per-alias metadata variation; non-admin and admin see identical limits/endpoints for the same alias', async () => {
+  // Two upstreams advertising the same alias's targets, but with
+  // different windows. Admin sees gateway-wide (limit = min over all).
+  // Non-admin restricted to the larger-window upstream should still see
+  // the same lower limit — metadata is a stable property of the alias,
+  // not a per-caller derivation.
+  const { adminSession, repo } = await setupAppTest();
+  await repo.upstreams.deleteAll();
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_small',
+    name: 'Small',
+    sortOrder: 100,
+    config: {
+      baseUrl: 'https://small.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-small',
+      endpoints: { chatCompletions: {} },
+      models: [{ upstreamModelId: 'shared', publicModelId: 'shared', kind: 'chat', endpoints: { chatCompletions: {} }, limits: { max_context_window_tokens: 100_000 } }],
+      modelsFetch: { enabled: false },
+    },
+  }));
+  await repo.upstreams.save(buildCustomUpstreamRecord({
+    id: 'up_big',
+    name: 'Big',
+    sortOrder: 200,
+    config: {
+      baseUrl: 'https://big.example.com',
+      authStyle: 'bearer',
+      apiKey: 'sk-big',
+      endpoints: { chatCompletions: {} },
+      models: [{ upstreamModelId: 'shared', publicModelId: 'shared', kind: 'chat', endpoints: { chatCompletions: {} }, limits: { max_context_window_tokens: 200_000 } }],
+      modelsFetch: { enabled: false },
+    },
+  }));
+  await repo.modelAliases.insert({
+    name: 'shared-alias',
+    kind: 'chat',
+    selection: 'first-available',
+    displayName: null,
+    visibleInModelsList: true,
+    targets: [{ target_model_id: 'shared', rules: {} }],
+    announcedMetadata: null,
+    sortOrder: 0,
+    createdAt: '2026-01-01T00:00:00.000Z',
+    updatedAt: '2026-01-01T00:00:00.000Z',
+  });
+
+  // Non-admin user scoped to ONLY the big-window upstream.
+  await repo.users.save({
+    id: 2, username: 'tester', passwordHash: null, isAdmin: false,
+    upstreamIds: ['up_big'], canViewGlobalTelemetry: false,
+    createdAt: '2026-03-15T00:00:00.000Z', deletedAt: null,
+  });
+  const nonAdminSession = (await repo.sessions.create(2)).id;
+
+  await withMockedFetch(() => { throw new Error('unexpected outbound fetch'); }, async () => {
+    const [adminRes, nonAdminRes] = await Promise.all([
+      requestApp('/api/models', { headers: { 'x-floway-session': adminSession } }),
+      requestApp('/api/models', { headers: { 'x-floway-session': nonAdminSession } }),
+    ]);
+    const adminBody = (await adminRes.json()) as { data: Array<{ id: string; limits?: { max_context_window_tokens?: number } }> };
+    const nonAdminBody = (await nonAdminRes.json()) as { data: Array<{ id: string; limits?: { max_context_window_tokens?: number } }> };
+    const adminAlias = adminBody.data.find(m => m.id === 'shared-alias');
+    const nonAdminAlias = nonAdminBody.data.find(m => m.id === 'shared-alias');
+    assert(adminAlias !== undefined);
+    assert(nonAdminAlias !== undefined);
+    // Both callers see the safe-lower-bound window — even though the
+    // non-admin's resolver would only ever pick the big-window
+    // upstream's binding.
+    assertEquals(adminAlias!.limits?.max_context_window_tokens, 100_000);
+    assertEquals(nonAdminAlias!.limits?.max_context_window_tokens, 100_000);
+  });
+});
diff --git a/packages/gateway/src/data-plane/codex/models.ts b/packages/gateway/src/data-plane/codex/models.ts
index 499072b47..d9bb42d46 100644
--- a/packages/gateway/src/data-plane/codex/models.ts
+++ b/packages/gateway/src/data-plane/codex/models.ts
@@ -66,41 +66,57 @@ const computeCatalog = async (
   fetcherForUpstream: (upstreamId: string) => Fetcher,
   scheduler: BackgroundScheduler,
 ): Promise<CodexCatalog> => {
-  const [catalog, addressable, aliases] = await Promise.all([
+  const [catalog, callerAddressable, gatewayAddressable, aliases] = await Promise.all([
     resolveCodexCatalog(userAgent),
     enumerateAddressableModelIds(upstreamIds, fetcherForUpstream, scheduler),
+    upstreamIds === null
+      ? Promise.resolve(null)
+      : enumerateAddressableModelIds(null, fetcherForUpstream, scheduler),
     getRepo().modelAliases.list(),
   ]);
-  const realModels = listedRealModels(addressable);
-  // `registrySlugs` mirrors the listed catalog surface — the slugs codex
-  // would have seen in a regular /v1/models call. `addressableById` is the
-  // broader map the resolver actually accepts (prefix alternates, Copilot
-  // variants) — same surface the listing-side synthesizer narrowed against,
-  // so the kind-filtered lookup below stays consistent with the entries
-  // it emitted.
+  const gatewayAddressableModelIds = gatewayAddressable ?? callerAddressable;
+  const realModels = listedRealModels(callerAddressable);
+  // `registrySlugs` mirrors the caller's listed catalog — the slugs the
+  // codex client would have seen in a regular /v1/models call. The two
+  // addressable maps below feed the synthesizer's metadata-vs-visibility
+  // split: gateway-wide for the alias's context-window intersection (so
+  // every API key sees the same number), caller-scope for "does this
+  // alias appear at all".
   const registrySlugs = new Set(realModels.map(m => m.id));
-  const addressableById = new Map(addressable.map(entry => [entry.id, entry] as const));
+  const gatewayById = new Map(gatewayAddressableModelIds.map(entry => [entry.id, entry] as const));
+  const callerById = new Map(callerAddressable.map(entry => [entry.id, entry] as const));
 
   // Each alias entry survives in the codex catalog when at least one of
   // its configured targets is currently addressable AND kind-matches the
-  // alias — the same predicate `synthesizeListedAliases` already applied to
-  // emit the entry, re-derived here per-target so we can pull the min
-  // window across every routable target. Selection mode is irrelevant
-  // because the catalog must publish a single stable window; the operator
-  // override still wins when announced metadata supplies one.
+  // alias under the CALLER's cap (this matches `synthesizeListedAliases`'s
+  // visibility rule). The fallback window — used when the operator did
+  // not override `announcedMetadata` — is the min across every GATEWAY-
+  // wide routable target's window, mirroring the safe-lower-bound rule
+  // /v1/models already applies. Selection mode is irrelevant here because
+  // the catalog must publish a single stable window.
   interface AliasCatalogInfo {
     readonly routableWindowsMin: number | null;
     readonly announcedContextWindow: number | undefined;
   }
   const aliasCatalogInfo = new Map<string, AliasCatalogInfo>();
-  for (const entry of synthesizeListedAliases({ aliases, addressableModelIds: addressable })) {
+  for (const entry of synthesizeListedAliases({
+    aliases,
+    gatewayAddressableModelIds,
+    callerAddressableModelIds: callerAddressable,
+    narrowTargets: true,
+  })) {
     const aliasedFrom = entry.aliasedFrom;
     if (aliasedFrom === undefined) continue;
-    const routable = aliasedFrom.targets
-      .map(t => addressableById.get(t.target_model_id))
+    // Use the raw alias record's targets for the window scan — the
+    // entry's `aliasedFrom.targets` is the narrowed projection, which
+    // would understate the gateway-wide min when the caller is scoped.
+    const alias = aliases.find(a => a.name === entry.id);
+    if (alias === undefined) continue;
+    const gatewayRoutable = alias.targets
+      .map(t => gatewayById.get(t.target_model_id))
       .filter((a): a is NonNullable<typeof a> => a !== undefined && a.model.kind === entry.kind);
-    if (routable.length === 0) continue;
-    const windows = routable
+    if (gatewayRoutable.length === 0) continue;
+    const windows = gatewayRoutable
       .map(a => a.model.limits.max_context_window_tokens)
       .filter((w): w is number => w !== undefined);
     aliasCatalogInfo.set(entry.id, {
@@ -122,7 +138,7 @@ const computeCatalog = async (
   const contextWindowOf: ContextWindowResolver = slug => {
     const info = aliasCatalogInfo.get(slug);
     if (info !== undefined) return info.announcedContextWindow ?? info.routableWindowsMin;
-    return addressableById.get(slug)?.model.limits.max_context_window_tokens ?? null;
+    return callerById.get(slug)?.model.limits.max_context_window_tokens ?? null;
   };
   return applyContextWindowFromRegistry(filtered, contextWindowOf);
 };
diff --git a/packages/gateway/src/data-plane/models/alias-listing.ts b/packages/gateway/src/data-plane/models/alias-listing.ts
index 4f88a488f..b058ea29b 100644
--- a/packages/gateway/src/data-plane/models/alias-listing.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing.ts
@@ -3,14 +3,14 @@
 // carries an `aliasedFrom` block so an alias-aware UI can render the
 // alias-of relationship without a second round trip.
 //
-// `limits` and `chat` come from the alias's announced metadata payload:
-// the operator's stored override when set (with top-level sub-block
-// granularity — a present `limits` / `chat` replaces the computed
-// counterpart wholesale, not per-leaf), otherwise the rule-aware
-// intersection across the alias's available targets. The
-// intersection is the safe lower bound for the inbound request — every
-// reported capability survives no matter which target the resolver
-// picks at request time.
+// `limits`, `chat`, `endpoints`, and `cost` are computed against the
+// GATEWAY-WIDE addressable surface — every caller sees the same numbers
+// for the same alias, independent of their data-plane cap. The operator's
+// stored `announced_metadata` override still wins at sub-block
+// granularity (a present `limits` / `chat` replaces the computed
+// counterpart wholesale, not per-leaf). The intersection is the safe
+// lower bound for the inbound request — every reported capability
+// survives no matter which target the resolver picks.
 //
 // The rule-aware part: when an alias's rule pins a value at a target,
 // that target is treated as "unsupported" for the corresponding
@@ -19,6 +19,11 @@
 // dropping the sub-field from the announced metadata keeps the wire
 // surface honest about what the operator left for the caller to set.
 //
+// Caller-scope (the addressable surface this specific request can
+// reach) controls only two things: whether the alias appears in this
+// caller's response (at least one target reachable under the cap), and
+// the `aliasedFrom.targets` projection when `narrowTargets` is true.
+//
 // Collision: when an alias's `name` exactly equals a real model id, the
 // alias entry replaces the real entry in the final catalog. Two entries
 // with the same `id` would break OpenAI client deduplication; collapsing
@@ -35,12 +40,27 @@ import type { ResolvedModel } from '@floway-dev/provider';
 
 export interface ListedAliasInputs {
   readonly aliases: readonly ModelAliasRecord[];
-  // Full addressable surface — both the listed catalog rows and the
-  // addressable-but-not-listed prefix/redirect forms each provider
-  // contributes. The synthesizer maps every alias target through this
-  // surface so a target that's only reachable via a prefix alternate or
-  // Copilot variant id still counts as available.
-  readonly addressableModelIds: readonly AddressableIdEntry[];
+  // Gateway-wide addressable surface — used for the metadata + endpoints
+  // + cost computations that must be stable across callers. A target
+  // resolvable only via an upstream the current caller cannot reach
+  // STILL contributes to the safe-lower-bound intersection the catalog
+  // publishes, because the same alias must look the same to every
+  // user (admin, non-admin, api key).
+  readonly gatewayAddressableModelIds: readonly AddressableIdEntry[];
+  // Caller-scoped addressable surface. Decides (a) whether this alias
+  // is visible to the caller at all (must have at least one target
+  // reachable under the caller's cap) and (b) the `aliasedFrom.targets`
+  // projection when `narrowTargets` is true. For unrestricted callers
+  // (admin gateway-wide) pass the same array as `gatewayAddressableModelIds`.
+  readonly callerAddressableModelIds: readonly AddressableIdEntry[];
+  // True for callers whose `aliasedFrom.targets` projection must omit
+  // any configured target the addressable surface cannot serve — every
+  // data-plane response, and non-admin control-plane responses. False
+  // for admin sessions on the control plane: the alias-edit dialog
+  // needs to see every target the operator wired, including typos and
+  // targets on upstreams the admin self-restricted out of, so the
+  // configuration is editable end to end.
+  readonly narrowTargets: boolean;
 }
 
 // Result preserves the order of `arrays[0]`. Matters for callers like the
@@ -154,14 +174,24 @@ const intersectLimits = (limitsList: readonly PublicModelLimits[]): PublicModelL
   return result;
 };
 
-const buildAliasedFrom = (alias: ModelAliasRecord): PublicModelAliasedFrom => ({
-  name: alias.name,
-  kind: alias.kind,
-  selection: alias.selection,
-  // Every configured target — including ones the live catalog can not
-  // serve — so the dashboard can show the full configuration.
-  targets: alias.targets,
-});
+// `narrowTargets=true` filters `targets` to those the caller's addressable
+// surface can serve — protects non-admin / data-plane callers from seeing
+// operator state (target IDs from upstreams they have no access to, plus
+// typo'd / removed model IDs). `narrowTargets=false` is the admin-debug
+// view: every configured target survives so the dashboard's alias editor
+// can render the full configuration even when the admin self-restricted.
+const buildAliasedFrom = (
+  alias: ModelAliasRecord,
+  addressableModelIds: readonly AddressableIdEntry[],
+  narrowTargets: boolean,
+): PublicModelAliasedFrom => {
+  if (!narrowTargets) {
+    return { name: alias.name, kind: alias.kind, selection: alias.selection, targets: alias.targets };
+  }
+  const addressableSet = new Set(addressableModelIds.map(entry => entry.id));
+  const targets = alias.targets.filter(t => addressableSet.has(t.target_model_id));
+  return { name: alias.name, kind: alias.kind, selection: alias.selection, targets };
+};
 
 // Compute the rule-aware intersection (`limits` + `chat`) over the
 // alias's currently-available targets. Caller decides whether to use
@@ -197,43 +227,57 @@ const mergeWithOverride = (
   chat: override.chat ?? computed.chat,
 });
 
-// Returns null when every configured target falls outside the caller's
-// addressable surface — an alias with no reachable target has no listing
-// row, because the catalog should never advertise an id the resolver
-// would 404 on. The alias itself stays addressable through
-// `resolveAlias`, which surfaces `AliasNoTargetAvailableError` at request
-// time. Callers (`synthesizeListedAliases`) filter the nulls out.
-const synthesizeOne = (alias: ModelAliasRecord, addressableModelIds: readonly AddressableIdEntry[]): PublicModel | null => {
-  // Map every alias target through the full addressable surface, not just
-  // the listed catalog: a target reachable only via a prefix-addressable
-  // alternate or a provider-side redirect (Copilot variant id) is still
-  // available to the resolver, and the listing must agree.
-  const addressableById = new Map(addressableModelIds.map(entry => [entry.id, entry.model] as const));
-  const availableTargets = alias.targets
-    .map(target => ({ target, real: addressableById.get(target.target_model_id) }))
+// Returns null when no target serves this alias on the gateway, OR when the
+// caller cannot reach any of the configured targets — the catalog should
+// never advertise an id the caller would 404 on. The alias itself stays
+// addressable through `resolveAlias`, which surfaces
+// `AliasNoTargetAvailableError` at request time. Callers
+// (`synthesizeListedAliases`) filter the nulls out.
+const synthesizeOne = (
+  alias: ModelAliasRecord,
+  gatewayAddressableModelIds: readonly AddressableIdEntry[],
+  callerAddressableModelIds: readonly AddressableIdEntry[],
+  narrowTargets: boolean,
+): PublicModel | null => {
+  // Gateway-wide kind-matched targets — the basis for stable metadata.
+  // A target reachable only via a prefix-addressable alternate or a
+  // provider-side redirect (Copilot variant id) still counts.
+  const gatewayById = new Map(gatewayAddressableModelIds.map(entry => [entry.id, entry.model] as const));
+  const gatewayAvailable = alias.targets
+    .map(target => ({ target, real: gatewayById.get(target.target_model_id) }))
     .filter((entry): entry is { target: AliasTarget; real: ResolvedModel } => entry.real !== undefined && entry.real.kind === alias.kind);
+  if (gatewayAvailable.length === 0) return null;
 
-  if (availableTargets.length === 0) return null;
+  // Caller-scope visibility: the alias appears only if at least one
+  // gateway-available target sits inside the caller's addressable cap.
+  const callerSet = new Set(callerAddressableModelIds.map(entry => entry.id));
+  const callerHasAny = gatewayAvailable.some(e => callerSet.has(e.target.target_model_id));
+  if (!callerHasAny) return null;
 
   // Display name precedence: operator-set wins; otherwise derive from the
   // sole target's id + rules when single-target; multi-target falls back to
   // the alias's own name because no single target represents the alias.
+  // Uses the configured `alias.targets.length` (stable across callers)
+  // rather than the per-caller reachable count.
   const displayName = alias.displayName ?? (alias.targets.length === 1
     ? composeAliasDisplayName(alias.targets[0].target_model_id, alias.targets[0].rules)
     : alias.name);
 
-  const computed = computeAutomaticMetadata(availableTargets);
+  // Metadata + endpoints + cost computed against gateway-wide — every
+  // caller sees the same numbers for the same alias, so a non-admin
+  // restricted to a subset of upstreams never sees a more permissive
+  // limit than the admin who knows the alias's true safe-lower-bound.
+  const computed = computeAutomaticMetadata(gatewayAvailable);
   const { limits, chat } = alias.announcedMetadata !== null
     ? mergeWithOverride(computed, alias.announcedMetadata)
     : computed;
 
-  // Endpoints follow the available-targets UNION, not an intersection —
-  // every endpoint reachable through ANY target is advertised, because
-  // the resolver's request-time pool narrows to targets that serve the
-  // inbound endpoint and the first-available / random pick happens
-  // within that narrowed pool. Operator can't override endpoints (they
-  // follow the target set, not a stored override).
-  const endpoints = unionEndpoints(availableTargets.map(({ real }) => real.endpoints));
+  // Endpoints follow the gateway-wide union — every endpoint reachable
+  // through ANY gateway target is advertised. The resolver's
+  // request-time pool narrows to targets that serve the inbound endpoint;
+  // a caller hitting an endpoint that's only available through an out-of-
+  // cap target gets the natural `AliasNoTargetAvailableError` 404.
+  const endpoints = unionEndpoints(gatewayAvailable.map(({ real }) => real.endpoints));
 
   const entry: PublicModel = {
     id: alias.name,
@@ -243,14 +287,15 @@ const synthesizeOne = (alias: ModelAliasRecord, addressableModelIds: readonly Ad
     limits,
     kind: alias.kind,
     endpoints,
-    aliasedFrom: buildAliasedFrom(alias),
+    aliasedFrom: buildAliasedFrom(alias, callerAddressableModelIds, narrowTargets),
   };
   if (chat !== undefined) entry.chat = chat;
 
-  // Single-target chat pricing rides along when available — the resolver
-  // will hit that target, so the catalog can publish its rate verbatim.
-  if (availableTargets.length === 1) {
-    const [{ real }] = availableTargets;
+  // Gateway-wide single-target chat pricing rides along when available.
+  // Stable across callers — same alias publishes the same cost
+  // everywhere.
+  if (gatewayAvailable.length === 1) {
+    const [{ real }] = gatewayAvailable;
     if (real.cost !== undefined) entry.cost = real.cost;
   }
 
@@ -263,7 +308,7 @@ const sortAliases = (aliases: readonly ModelAliasRecord[]): ModelAliasRecord[] =
 export const synthesizeListedAliases = (input: ListedAliasInputs): PublicModel[] =>
   sortAliases(input.aliases)
     .filter(alias => alias.visibleInModelsList)
-    .map(alias => synthesizeOne(alias, input.addressableModelIds))
+    .map(alias => synthesizeOne(alias, input.gatewayAddressableModelIds, input.callerAddressableModelIds, input.narrowTargets))
     .filter((entry): entry is PublicModel => entry !== null);
 
 // Compose real-model entries with visible alias entries into a single typed
@@ -280,18 +325,26 @@ export const synthesizeListedAliases = (input: ListedAliasInputs): PublicModel[]
 // before projecting to Gemini's wire form).
 //
 // `realModels` is the listed projection — what `/v1/models` and the
-// dashboard's default `/api/models` row stream emit. `addressableModelIds`
-// feeds the alias synthesizer's availability check; the merge step never
-// promotes addressable-but-not-listed ids to real-model rows.
+// dashboard's default `/api/models` row stream emit (caller-scoped).
+// The two addressable surfaces feed the alias synthesizer's metadata-vs-
+// visibility split; the merge step never promotes addressable-but-not-
+// listed ids to real-model rows.
 export const mergeAliasesIntoModels = <T>(input: {
   readonly realModels: readonly ResolvedModel[];
-  readonly addressableModelIds: readonly AddressableIdEntry[];
+  readonly gatewayAddressableModelIds: readonly AddressableIdEntry[];
+  readonly callerAddressableModelIds: readonly AddressableIdEntry[];
   readonly aliases: readonly ModelAliasRecord[];
+  readonly narrowTargets: boolean;
   readonly mapReal: (model: ResolvedModel) => T;
   readonly wrapAlias: (entry: PublicModel) => T;
 }): T[] => {
-  const { realModels, addressableModelIds, aliases, mapReal, wrapAlias } = input;
-  const aliasEntries = synthesizeListedAliases({ aliases, addressableModelIds });
+  const { realModels, gatewayAddressableModelIds, callerAddressableModelIds, aliases, narrowTargets, mapReal, wrapAlias } = input;
+  const aliasEntries = synthesizeListedAliases({
+    aliases,
+    gatewayAddressableModelIds,
+    callerAddressableModelIds,
+    narrowTargets,
+  });
   const aliasIds = new Set(aliasEntries.map(entry => entry.id));
   return [
     ...realModels.filter(model => !aliasIds.has(model.id)).map(mapReal),
diff --git a/packages/gateway/src/data-plane/models/alias-listing_test.ts b/packages/gateway/src/data-plane/models/alias-listing_test.ts
index effe792fd..663abdac1 100644
--- a/packages/gateway/src/data-plane/models/alias-listing_test.ts
+++ b/packages/gateway/src/data-plane/models/alias-listing_test.ts
@@ -53,7 +53,7 @@ describe('synthesizeListedAliases', () => {
       },
     })];
 
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.id).toBe('gpt-fast');
     expect(entry.display_name).toBe('gpt-5.4 (low effort)');
     // The rule pins effort, so the announced metadata drops it — the
@@ -76,7 +76,7 @@ describe('synthesizeListedAliases', () => {
       id: 'gpt-5.4',
       chat: { reasoning: { budget_tokens: { min: 1024, max: 65536 } } },
     })];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.chat?.reasoning).toBeUndefined();
   });
 
@@ -95,7 +95,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { reasoning: { budget_tokens: { min: 1024 } } } }),
       realModel({ id: 'b', chat: { reasoning: { budget_tokens: { max: 65536 } } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.chat?.reasoning).toBeUndefined();
   });
 
@@ -111,7 +111,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
       realModel({ id: 'b', chat: { modalities: { input: ['text'], output: ['text'] } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.id).toBe('smart-router');
     expect(entry.display_name).toBe('smart-router');
     expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
@@ -128,7 +128,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { reasoning: { effort: { supported: ['low'], default: 'low' } } } }),
       realModel({ id: 'b', chat: {} }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.chat?.reasoning).toBeUndefined();
   });
 
@@ -148,7 +148,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { modalities: { input: ['text'], output: ['text'] } } }),
       realModel({ id: 'b', chat: { modalities: { input: ['text'], output: ['image'] } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.chat?.modalities).toBeUndefined();
   });
 
@@ -164,7 +164,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
       realModel({ id: 'b', chat: { modalities: { input: ['text'], output: ['text', 'image'] } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
     // Every configured target — including the unavailable one — survives in aliasedFrom.
     expect(entry.aliasedFrom?.targets.map(t => t.target_model_id)).toEqual(['a', 'gone', 'b']);
@@ -173,7 +173,7 @@ describe('synthesizeListedAliases', () => {
   test('hidden alias is not emitted', () => {
     const aliases = [aliasFixture({ visibleInModelsList: false })];
     const realModels = [realModel({ id: 'gpt-5.4' })];
-    expect(synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) })).toEqual([]);
+    expect(synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false })).toEqual([]);
   });
 
   test('alias whose name collides with a real id is emitted (loadModels drops the duplicate real)', () => {
@@ -182,7 +182,7 @@ describe('synthesizeListedAliases', () => {
       targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
     })];
     const realModels = [realModel({ id: 'gpt-5.4', display_name: 'GPT 5.4' })];
-    const entries = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const entries = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entries).toHaveLength(1);
     expect(entries[0].id).toBe('gpt-5.4');
     expect(entries[0].aliasedFrom?.name).toBe('gpt-5.4');
@@ -193,7 +193,7 @@ describe('synthesizeListedAliases', () => {
       name: 'orphan',
       targets: [{ target_model_id: 'missing', rules: {} }],
     })];
-    expect(synthesizeListedAliases({ aliases, addressableModelIds: [] })).toEqual([]);
+    expect(synthesizeListedAliases({ aliases, gatewayAddressableModelIds: [], callerAddressableModelIds: [], narrowTargets: false })).toEqual([]);
   });
 
   test('sorts entries by (sort_order, name) so listing order stays stable', () => {
@@ -203,7 +203,7 @@ describe('synthesizeListedAliases', () => {
       aliasFixture({ name: 'mid-b', sortOrder: 0 }),
     ];
     const realModels = [realModel({ id: 'gpt-5.4' })];
-    const ids = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) }).map(entry => entry.id);
+    const ids = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false }).map(entry => entry.id);
     expect(ids).toEqual(['mid-a', 'mid-b', 'late']);
   });
 
@@ -219,7 +219,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'emb', kind: 'embedding' }),
       realModel({ id: 'chat', chat: { modalities: { input: ['text'], output: ['text'] } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     // Only the chat target backs the metadata — the embedding row never
     // enters the intersection / narrowing path.
     expect(entry.chat?.modalities).toEqual({ input: ['text'], output: ['text'] });
@@ -231,7 +231,7 @@ describe('synthesizeListedAliases', () => {
       targets: [{ target_model_id: 'gpt-5.4', rules: { reasoning: { effort: 'low' } } }],
     })];
     const realModels = [realModel({ id: 'gpt-5.4' })];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.display_name).toBe('My Fast GPT');
   });
 
@@ -249,7 +249,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } } } }),
       realModel({ id: 'b', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.chat?.reasoning).toBeUndefined();
   });
 
@@ -265,7 +265,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { reasoning: { effort: { supported: ['low', 'medium', 'high'], default: 'medium' } } } }),
       realModel({ id: 'b', chat: { reasoning: { effort: { supported: ['medium', 'high'], default: 'medium' } } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.chat?.reasoning?.effort).toEqual({ supported: ['medium', 'high'], default: 'medium' });
   });
 
@@ -281,7 +281,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', chat: { reasoning: { adaptive: true } } }),
       realModel({ id: 'b', chat: { reasoning: { adaptive: true } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.chat?.reasoning).toBeUndefined();
   });
 
@@ -297,7 +297,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', limits: { max_context_window_tokens: 128000, max_output_tokens: 16000 } }),
       realModel({ id: 'b', limits: { max_context_window_tokens: 200000 } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     // Both targets advertise max_context_window_tokens — emit the min.
     expect(entry.limits.max_context_window_tokens).toBe(128000);
     // Only `a` declares max_output_tokens, so it drops out.
@@ -319,7 +319,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'a', limits: { max_context_window_tokens: 128000 }, chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
       realModel({ id: 'b', limits: { max_context_window_tokens: 200000 }, chat: { modalities: { input: ['text'], output: ['text'] } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     // The override carries the operator's pinned ceiling verbatim …
     expect(entry.limits).toEqual({ max_output_tokens: 8192 });
     // … while chat falls back to the rule-aware intersection.
@@ -337,7 +337,7 @@ describe('synthesizeListedAliases', () => {
     const realModels = [
       realModel({ id: 'a', chat: { modalities: { input: ['text', 'image'], output: ['text'] } } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.chat).toEqual({ modalities: { input: ['text'], output: ['text'] } });
   });
 
@@ -355,7 +355,7 @@ describe('synthesizeListedAliases', () => {
       // Target b only serves the three chat endpoints.
       realModel({ id: 'b', endpoints: { chatCompletions: {}, messages: {}, responses: {} } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     // Union: every key surfaces. Resolver narrows to the supporting subset
     // at request time, so first-available / random stays sound per-endpoint.
     expect(entry.endpoints).toEqual({
@@ -378,7 +378,7 @@ describe('synthesizeListedAliases', () => {
       realModel({ id: 'gen', kind: 'image', endpoints: { imagesGenerations: {} } }),
       realModel({ id: 'edit', kind: 'image', endpoints: { imagesEdits: {} } }),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds: listed(realModels) });
+    const [entry] = synthesizeListedAliases({ aliases, gatewayAddressableModelIds: listed(realModels), callerAddressableModelIds: listed(realModels), narrowTargets: false });
     expect(entry.endpoints).toEqual({ imagesGenerations: {}, imagesEdits: {} });
   });
 
@@ -387,7 +387,7 @@ describe('synthesizeListedAliases', () => {
       name: 'ghost',
       targets: [{ target_model_id: 'missing', rules: {} }],
     })];
-    expect(synthesizeListedAliases({ aliases, addressableModelIds: [] })).toEqual([]);
+    expect(synthesizeListedAliases({ aliases, gatewayAddressableModelIds: [], callerAddressableModelIds: [], narrowTargets: false })).toEqual([]);
   });
 
   test('an alias target reachable only via the addressable-but-not-listed surface counts as available', () => {
@@ -409,9 +409,104 @@ describe('synthesizeListedAliases', () => {
       ...listed([canonical]),
       unlisted('claude-opus-4.7-high', canonical),
     ];
-    const [entry] = synthesizeListedAliases({ aliases, addressableModelIds });
+    const [entry] = synthesizeListedAliases({
+      aliases,
+      gatewayAddressableModelIds: addressableModelIds,
+      callerAddressableModelIds: addressableModelIds,
+      narrowTargets: false,
+    });
     expect(entry.id).toBe('fast-claude');
     expect(entry.chat?.modalities).toEqual({ input: ['text', 'image'], output: ['text'] });
     expect(entry.endpoints).toEqual({ chatCompletions: {}, messages: {}, responses: {} });
   });
+
+  test('metadata is computed gateway-wide — same numbers regardless of caller cap', () => {
+    // The alias has two targets with different windows. Two callers see
+    // the alias: one with full gateway access, one capped to a subset.
+    // Both must read the same `limits.max_context_window_tokens` because
+    // the announced metadata is a stable property of the alias.
+    const aliases = [aliasFixture({
+      name: 'mix',
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'b', rules: {} },
+      ],
+    })];
+    const a = realModel({ id: 'a', limits: { max_context_window_tokens: 100_000 } });
+    const b = realModel({ id: 'b', limits: { max_context_window_tokens: 200_000 } });
+    const gatewayWide = listed([a, b]);
+    const restricted = listed([b]);
+
+    const [unrestricted] = synthesizeListedAliases({
+      aliases,
+      gatewayAddressableModelIds: gatewayWide,
+      callerAddressableModelIds: gatewayWide,
+      narrowTargets: false,
+    });
+    const [scoped] = synthesizeListedAliases({
+      aliases,
+      gatewayAddressableModelIds: gatewayWide,
+      callerAddressableModelIds: restricted,
+      narrowTargets: true,
+    });
+
+    // Both callers read the safe-lower-bound min(100k, 200k) = 100k —
+    // even though the scoped caller's resolver would never pick `a`.
+    expect(unrestricted.limits.max_context_window_tokens).toBe(100_000);
+    expect(scoped.limits.max_context_window_tokens).toBe(100_000);
+    // Endpoints union also computed gateway-wide.
+    expect(unrestricted.endpoints).toEqual(scoped.endpoints);
+  });
+
+  test('narrowTargets=true filters `aliasedFrom.targets` to caller-reachable; narrowTargets=false keeps raw config (typos included)', () => {
+    const aliases = [aliasFixture({
+      name: 'mix',
+      targets: [
+        { target_model_id: 'a', rules: {} },
+        { target_model_id: 'b', rules: {} },
+        { target_model_id: 'typo-no-such-model', rules: {} },
+      ],
+    })];
+    const a = realModel({ id: 'a' });
+    const b = realModel({ id: 'b' });
+    const gatewayWide = listed([a, b]);
+    const restricted = listed([b]);
+
+    const [adminView] = synthesizeListedAliases({
+      aliases,
+      gatewayAddressableModelIds: gatewayWide,
+      callerAddressableModelIds: gatewayWide,
+      narrowTargets: false,
+    });
+    // Admin (narrowTargets=false) keeps the raw configured list,
+    // including the typo, so the alias-edit dialog can render the full
+    // configuration even when some targets do not currently resolve.
+    expect(adminView.aliasedFrom?.targets.map(t => t.target_model_id)).toEqual(['a', 'b', 'typo-no-such-model']);
+
+    const [scopedView] = synthesizeListedAliases({
+      aliases,
+      gatewayAddressableModelIds: gatewayWide,
+      callerAddressableModelIds: restricted,
+      narrowTargets: true,
+    });
+    // Non-admin / data-plane caller (narrowTargets=true) only sees the
+    // targets sitting inside their addressable cap. Out-of-cap target
+    // `a` AND the typo `typo-no-such-model` both drop out — the caller
+    // never learns the operator's full alias configuration.
+    expect(scopedView.aliasedFrom?.targets.map(t => t.target_model_id)).toEqual(['b']);
+  });
+
+  test('alias is omitted when caller cannot reach any of the configured targets', () => {
+    const aliases = [aliasFixture({
+      name: 'mix',
+      targets: [{ target_model_id: 'a', rules: {} }],
+    })];
+    const a = realModel({ id: 'a' });
+    expect(synthesizeListedAliases({
+      aliases,
+      gatewayAddressableModelIds: listed([a]),
+      callerAddressableModelIds: [],  // caller sees nothing
+      narrowTargets: true,
+    })).toEqual([]);
+  });
 });
diff --git a/packages/gateway/src/data-plane/models/gemini.ts b/packages/gateway/src/data-plane/models/gemini.ts
index 822c0aef8..64d565e76 100644
--- a/packages/gateway/src/data-plane/models/gemini.ts
+++ b/packages/gateway/src/data-plane/models/gemini.ts
@@ -74,18 +74,24 @@ const loadGeminiModels = async (
   scheduler: BackgroundScheduler,
   aliasRepo: ModelAliasesRepo,
 ): Promise<GeminiModel[]> => {
-  const [addressable, aliases] = await Promise.all([
+  const [callerAddressable, gatewayAddressable, aliases] = await Promise.all([
     enumerateAddressableModelIds(upstreamFilter, fetcherForUpstream, scheduler),
+    upstreamFilter === null
+      ? Promise.resolve(null)
+      : enumerateAddressableModelIds(null, fetcherForUpstream, scheduler),
     aliasRepo.list(),
   ]);
-  const realModels = listedRealModels(addressable);
+  const gatewayAddressableModelIds = gatewayAddressable ?? callerAddressable;
+  const realModels = listedRealModels(callerAddressable);
   // Gemini surfaces chat-kind models only; filter both the real catalog and
   // the synthesized alias entries before the merge so the alias collision
   // step only ever weighs chat-on-chat.
   const merged = mergeAliasesIntoModels<InternalModel>({
     realModels: realModels.filter(model => model.kind === 'chat'),
-    addressableModelIds: addressable.filter(entry => entry.model.kind === 'chat'),
+    gatewayAddressableModelIds: gatewayAddressableModelIds.filter(entry => entry.model.kind === 'chat'),
+    callerAddressableModelIds: callerAddressable.filter(entry => entry.model.kind === 'chat'),
     aliases: aliases.filter(alias => alias.kind === 'chat'),
+    narrowTargets: true,
     mapReal: model => model,
     wrapAlias: entry => ({
       id: entry.id,
diff --git a/packages/gateway/src/data-plane/models/load.ts b/packages/gateway/src/data-plane/models/load.ts
index 937966405..9417028da 100644
--- a/packages/gateway/src/data-plane/models/load.ts
+++ b/packages/gateway/src/data-plane/models/load.ts
@@ -31,19 +31,25 @@ export const loadModels = async (
   scheduler: BackgroundScheduler,
   aliasRepo: ModelAliasesRepo,
 ): Promise<PublicModelsResponse> => {
-  // The addressable surface already includes the listed projection — its
-  // entries-where-unlisted-is-absent are exactly the rows /v1/models
-  // historically served. Reusing the surface here avoids a second registry
-  // call for the alias-availability check.
-  const [addressable, aliases] = await Promise.all([
+  // Data-plane responses always narrow `aliasedFrom.targets` to the
+  // caller's reachable set (and never expose typo'd / removed target
+  // ids), but the alias's metadata is still computed gateway-wide so
+  // every caller sees the same numbers.
+  const [callerAddressable, gatewayAddressable, aliases] = await Promise.all([
     enumerateAddressableModelIds(upstreamFilter, fetcherForUpstream, scheduler),
+    upstreamFilter === null
+      ? Promise.resolve(null)
+      : enumerateAddressableModelIds(null, fetcherForUpstream, scheduler),
     aliasRepo.list(),
   ]);
-  const realModels = listedRealModels(addressable);
+  const gatewayAddressableModelIds = gatewayAddressable ?? callerAddressable;
+  const realModels = listedRealModels(callerAddressable);
   const data = mergeAliasesIntoModels({
     realModels,
-    addressableModelIds: addressable,
+    gatewayAddressableModelIds,
+    callerAddressableModelIds: callerAddressable,
     aliases,
+    narrowTargets: true,
     mapReal: toPublicModel,
     wrapAlias: entry => entry,
   });

From e48306bfe121b741ee4dc4a5fe9f188bdc364f05 Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 12:59:19 +0800
Subject: [PATCH 168/170] refactor(aliases/web): ModelInfoBar alias-of badge
 becomes count-based
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

`alias of: 3 / 4 models` when some configured targets are out of the
current cap; `alias of: 5 models` when every target is reachable;
`alias of: <id-or-display-name>` when only one target is reachable
(operator can see exactly what the resolver will pick), and the
`selection: <mode>` badge is hidden in that case because there is no
selection to make.

Replaces the previous "alias of: gpt-5.4, gemini-3-flash-preview,
deepseek-v4-pro +1 more" list, which contradicted the parallel
"3 / 4 reachable" badge when some of those listed ids were actually
out of cap. The list shape also became unwieldy on aliases that fan
out across several targets — the count is the information operators
actually act on.
---
 .../src/components/models/ModelInfoBar.vue    | 56 ++++++++++++-------
 1 file changed, 36 insertions(+), 20 deletions(-)

diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index 6bf8b4c1e..930c6e2dc 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -28,29 +28,46 @@ const formatTokenLimit = (n: number) => {
   return n.toString();
 };
 
-// Truncate to the first three with a "+N more" tail to keep the badge
-// readable for aliases that fan out to a long fallback chain.
-const aliasOfLabel = computed<string | null>(() => {
+// Shape used by the alias-of badge AND the selection badge to read the
+// caller-cap-aware reachable subset. `null` when the row is not an alias
+// or no catalog was supplied — both badges then go dormant.
+const aliasReach = computed<{ total: number; reachable: number; sole: ControlPlaneModel | null } | null>(() => {
   const a = props.model.aliasedFrom;
   if (!a) return null;
-  const ids = a.targets.map(t => t.target_model_id);
-  if (ids.length <= 3) return `alias of: ${ids.join(', ')}`;
-  return `alias of: ${ids.slice(0, 3).join(', ')} +${ids.length - 3} more`;
+  if (props.catalog === undefined) {
+    // No catalog → "everything configured is reachable" (the row was
+    // rendered in isolation, no cap context to apply).
+    return { total: a.targets.length, reachable: a.targets.length, sole: null };
+  }
+  const reachable = reachableTargets(props.model, props.catalog, props.cap ?? null);
+  return {
+    total: a.targets.length,
+    reachable: reachable.length,
+    sole: reachable.length === 1 ? reachable[0] : null,
+  };
+});
+
+// `alias of: <id>` when only one reachable target is also a visible
+// catalog row (admin or non-admin under a narrow cap viewing a single-
+// target alias);
+// `alias of: N models` when every configured target is reachable;
+// `alias of: K / N models` when some are out of cap.
+const aliasOfLabel = computed<string | null>(() => {
+  const r = aliasReach.value;
+  if (r === null) return null;
+  if (r.sole !== null) return `alias of: ${r.sole.display_name ?? r.sole.id}`;
+  if (r.reachable === r.total) return `alias of: ${r.total} model${r.total === 1 ? '' : 's'}`;
+  return `alias of: ${r.reachable} / ${r.total} models`;
 });
 
-// For an alias row in the playground: how many configured targets
-// resolve to a real model the caller can route to under the current
-// effective cap. Only renders when the consumer supplied `catalog`
-// and `cap` props — the Models page tile (no cap context) hides this
-// badge entirely. `cap === null` means "no restriction" and the badge
-// renders as N/N. Targets pointing at ids the catalog cannot resolve
-// (typo, removed model) count as unreachable, mirroring the data-plane
-// resolver.
-const reachableTargetSummary = computed<string | null>(() => {
+// Single-target chip is enough — drop the parallel `selection: random`
+// label, which only matters for multi-target aliases where the resolver
+// genuinely picks between candidates.
+const selectionLabel = computed<string | null>(() => {
   const a = props.model.aliasedFrom;
-  if (!a || props.catalog === undefined) return null;
-  const reachable = reachableTargets(props.model, props.catalog, props.cap ?? null);
-  return `${reachable.length} / ${a.targets.length} target${a.targets.length === 1 ? '' : 's'} reachable`;
+  if (!a) return null;
+  if (aliasReach.value?.sole !== null) return null;
+  return `selection: ${a.selection}`;
 });
 
 // Single-target aliases render one badge per rule; multi-target aliases
@@ -104,8 +121,7 @@ const ruleBadges = computed<{ label: string }[]>(() => {
             output: {{ formatTokenLimit(model.limits.max_output_tokens) }}
           </span>
           <span v-if="aliasOfLabel" class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/15 text-gray-400">{{ aliasOfLabel }}</span>
-          <span v-if="reachableTargetSummary" class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/15 text-gray-400">{{ reachableTargetSummary }}</span>
-          <span v-if="model.aliasedFrom" class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/15 text-gray-400">selection: {{ model.aliasedFrom.selection }}</span>
+          <span v-if="selectionLabel" class="text-[10px] font-mono px-2 py-0.5 rounded-full border border-white/15 text-gray-400">{{ selectionLabel }}</span>
           <span
             v-for="badge in ruleBadges"
             :key="badge.label"

From afb89a5d603b4c1e344ae509d86f6878598204fd Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 14:53:03 +0800
Subject: [PATCH 169/170] fix(aliases/web): alias-of single-target badge uses
 raw model id, not display name
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The badge mirrors the value the operator typed into the alias's target
field and the value a client would put on the wire. Display name is
already on the picked target's own row in the sidebar — repeating it
here is noise; the id is the actionable identifier.
---
 apps/web/src/components/models/ModelInfoBar.vue | 6 ++++--
 1 file changed, 4 insertions(+), 2 deletions(-)

diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index 930c6e2dc..e086f73fc 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -49,13 +49,15 @@ const aliasReach = computed<{ total: number; reachable: number; sole: ControlPla
 
 // `alias of: <id>` when only one reachable target is also a visible
 // catalog row (admin or non-admin under a narrow cap viewing a single-
-// target alias);
+// target alias) — use the raw model id, not its display name, so the
+// badge mirrors the value the operator typed into the alias target
+// field and the value a client would put on the wire;
 // `alias of: N models` when every configured target is reachable;
 // `alias of: K / N models` when some are out of cap.
 const aliasOfLabel = computed<string | null>(() => {
   const r = aliasReach.value;
   if (r === null) return null;
-  if (r.sole !== null) return `alias of: ${r.sole.display_name ?? r.sole.id}`;
+  if (r.sole !== null) return `alias of: ${r.sole.id}`;
   if (r.reachable === r.total) return `alias of: ${r.total} model${r.total === 1 ? '' : 's'}`;
   return `alias of: ${r.reachable} / ${r.total} models`;
 });

From 46626cba12b7323e37a9f110ec74813611dbfdda Mon Sep 17 00:00:00 2001
From: Menci <mencici@msn.com>
Date: Sun, 28 Jun 2026 15:19:50 +0800
Subject: [PATCH 170/170] feat(aliases/web): alias rows render the
 upstream-bindings union
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Alias rows used to show no provider badges (the wire's `upstreams: []`
on alias entries leaves nothing to render). Compute the de-duped
union across the caller-reachable targets' bindings so the alias info
bar surfaces the same provider-badge shape every real-model row does.

Each binding is further filtered against the caller's effective cap:
a target may sit on three upstreams of which only one is in cap, and
only the in-cap one is the provider the resolver would actually route
to. This keeps the badges aligned with the parallel "alias of: K / N
models" badge — both tell the operator what the resolver will see
under the current key, not what's configured gateway-wide.
---
 .../src/components/models/ModelInfoBar.vue    | 28 ++++++++++++++++++-
 1 file changed, 27 insertions(+), 1 deletion(-)

diff --git a/apps/web/src/components/models/ModelInfoBar.vue b/apps/web/src/components/models/ModelInfoBar.vue
index e086f73fc..4b10c0aca 100644
--- a/apps/web/src/components/models/ModelInfoBar.vue
+++ b/apps/web/src/components/models/ModelInfoBar.vue
@@ -72,6 +72,32 @@ const selectionLabel = computed<string | null>(() => {
   return `selection: ${a.selection}`;
 });
 
+// Provider badges this row renders. Real models advertise their own
+// `upstreams` bindings directly. Alias rows have an empty `upstreams`
+// list on the wire (the server intentionally lifts upstream info to
+// the targets) — compute the de-duped union of the caller-reachable
+// targets' bindings here so the alias surfaces the same provider-badge
+// shape every real-model row does. Each binding is further filtered
+// against the cap: a target may sit on three upstreams of which only
+// one is currently in cap; only the in-cap one is the provider the
+// resolver would actually route to.
+const effectiveUpstreams = computed<readonly { kind: ControlPlaneModel['upstreams'][number]['kind']; id: string; name: string }[]>(() => {
+  if (props.model.aliasedFrom === undefined) return props.model.upstreams;
+  if (props.catalog === undefined) return [];
+  const cap = props.cap ?? null;
+  const seen = new Set<string>();
+  const out: ControlPlaneModel['upstreams'] = [];
+  for (const target of reachableTargets(props.model, props.catalog, cap)) {
+    for (const binding of target.upstreams) {
+      if (cap !== null && !cap.includes(binding.id)) continue;
+      if (seen.has(binding.id)) continue;
+      seen.add(binding.id);
+      out.push(binding);
+    }
+  }
+  return out;
+});
+
 // Single-target aliases render one badge per rule; multi-target aliases
 // collapse to "<field>: varies" for any field whose values disagree across
 // targets. Each badge carries an explicit `field` key so the bucket walk
@@ -107,7 +133,7 @@ const ruleBadges = computed<{ label: string }[]>(() => {
         </div>
         <div class="flex flex-wrap gap-1.5 mt-2">
           <span
-            v-for="binding in model.upstreams"
+            v-for="binding in effectiveUpstreams"
             :key="binding.id"
             class="text-[10px] font-semibold px-2 py-0.5 rounded-full border"
             :class="providerBadgeClass(binding.kind)"