From 27079e58193dc07c1430e9e21e780aba3660139b Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Mon, 29 Jun 2026 20:35:23 +0200 Subject: [PATCH 1/2] =?UTF-8?q?feat:=20per-command=20MCP=20outputSchema=20?= =?UTF-8?q?=E2=80=94=20Phase=204?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Hand-author per-command MCP outputSchemas for the 13 typed commands whose closed result shapes live in the contracts layer (mirroring CommandResultMap): press, fill, longpress, boot, shutdown, viewport, home, back, rotate, app-switcher, clipboard, appstate, keyboard. The new COMMAND_OUTPUT_SCHEMAS registry is injected into tools/list via listCommandTools(). It is additive-only: untyped/dynamic tools (snapshot, gestures, perf, logs, …) carry no outputSchema key and stay byte-identical. Schemas are non-strict (no additionalProperties:false) so the additive cost object rides into structuredContent and still validates. MCP agents can now trust structuredContent against the advertised schema instead of re-parsing text. --- src/mcp/__tests__/command-tools.test.ts | 66 +++++ src/mcp/command-output-schemas.ts | 304 ++++++++++++++++++++++++ src/mcp/command-tools.ts | 18 +- 3 files changed, 383 insertions(+), 5 deletions(-) create mode 100644 src/mcp/command-output-schemas.ts diff --git a/src/mcp/__tests__/command-tools.test.ts b/src/mcp/__tests__/command-tools.test.ts index 1f782a732..b795cccb5 100644 --- a/src/mcp/__tests__/command-tools.test.ts +++ b/src/mcp/__tests__/command-tools.test.ts @@ -156,6 +156,72 @@ test('MCP includeCost rejects non-boolean values at the boundary', async () => { ); }); +test('MCP typed commands advertise an outputSchema with the contract discriminant', () => { + const tools = listCommandTools(); + + // keyboard is a flat closed shape: platform + action discriminants at the top. + const keyboard = tools.find((tool) => tool.name === 'keyboard'); + assert.ok(keyboard); + assert.ok(keyboard.outputSchema); + assert.equal(keyboard.outputSchema.type, 'object'); + assert.deepEqual( + (keyboard.outputSchema.properties?.action as { enum?: unknown[] } | undefined)?.enum, + ['status', 'dismiss', 'enter'], + ); + assert.deepEqual( + (keyboard.outputSchema.properties?.platform as { enum?: unknown[] } | undefined)?.enum, + ['android', 'ios'], + ); + + // clipboard is a discriminated union on `action`, modeled as oneOf branches. + const clipboard = tools.find((tool) => tool.name === 'clipboard'); + assert.ok(clipboard); + assert.ok(clipboard.outputSchema); + const clipboardActions = (clipboard.outputSchema.oneOf ?? []).map( + (branch) => (branch.properties?.action as { const?: unknown } | undefined)?.const, + ); + assert.deepEqual(clipboardActions, ['read', 'write']); +}); + +test('MCP untyped tools stay byte-identical: no outputSchema key', () => { + const tools = listCommandTools(); + + // snapshot is intentionally absent from the typed registry (dynamic shape). + const snapshot = tools.find((tool) => tool.name === 'snapshot'); + assert.ok(snapshot); + assert.equal('outputSchema' in snapshot, false); + + // devices is likewise untyped. + const devices = tools.find((tool) => tool.name === 'devices'); + assert.ok(devices); + assert.equal('outputSchema' in devices, false); +}); + +test('MCP boot structuredContent is consistent with its advertised outputSchema', async () => { + const bootResult = { + platform: 'ios', + target: 'mobile', + device: 'iPhone 16', + id: 'UDID-123', + kind: 'simulator', + booted: true, + }; + const executor = createCommandToolExecutor({ + createClient: () => ({}) as AgentDeviceClient, + runCommand: async () => bootResult, + }); + + const bootTool = listCommandTools().find((tool) => tool.name === 'boot'); + assert.ok(bootTool?.outputSchema); + const required = bootTool.outputSchema.required ?? []; + for (const key of required) { + assert.ok(key in bootResult, `boot result is missing required outputSchema key: ${key}`); + } + + const result = await executor.execute('boot', {}); + assert.deepEqual(result.structuredContent, bootResult); +}); + test('MCP session tool exposes state-dir resolution without a daemon round-trip', async () => { const sessionTool = listCommandTools().find((tool) => tool.name === 'session'); assert.ok(sessionTool); diff --git a/src/mcp/command-output-schemas.ts b/src/mcp/command-output-schemas.ts new file mode 100644 index 000000000..6aa548065 --- /dev/null +++ b/src/mcp/command-output-schemas.ts @@ -0,0 +1,304 @@ +import type { JsonSchema } from '../commands/command-contract.ts'; +import { booleanSchema, looseObjectSchema, stringSchema } from '../commands/command-input.ts'; +import { BACK_MODES } from '../core/back-mode.ts'; +import { DEVICE_ROTATIONS } from '../core/device-rotation.ts'; +import { SESSION_SURFACES } from '../core/session-surface.ts'; +import { DEVICE_TARGETS, PLATFORMS } from '../utils/device.ts'; + +/** + * Hand-authored, PARTIAL-coverage registry of per-command MCP `outputSchema`s, + * keyed by the daemon command NAME. It mirrors the typed-result spine + * `CommandResultMap` (src/core/command-descriptor/command-result.ts) one-for-one: + * a command appears here ONLY when its accurate, closed result shape lives in the + * contracts layer (src/contracts/*). The genuinely-dynamic commands (snapshot + * overlays, gestures, perf, logs, …) are intentionally absent — their tools stay + * byte-identical to today (no `outputSchema` key), exactly as `CommandResultMap` + * omits them rather than inventing a shape. + * + * There is no type→JSON-Schema generator in this repo, so every schema below is + * authored by hand from the matching contract type. Two invariants: + * - NEVER strict: no `additionalProperties: false` anywhere, so the additive + * `cost` object (opted in via `--cost` / `includeCost`) and any other additive + * fields ride into `structuredContent` and still validate. + * - Accurate, never invented: required-vs-optional, enums, `const` discriminants + * and discriminated-union branches mirror the source contract types. + */ + +const DEVICE_KINDS = ['simulator', 'emulator', 'device'] as const; + +function numberSchema(description?: string): JsonSchema { + return { type: 'number', ...(description ? { description } : {}) }; +} + +function enumSchema(values: readonly string[], description?: string): JsonSchema { + return { type: 'string', enum: values, ...(description ? { description } : {}) }; +} + +function constSchema(value: string): JsonSchema { + return { type: 'string', const: value }; +} + +function objectSchema( + properties: Record, + required: readonly string[] = [], + description?: string, +): JsonSchema { + // Intentionally non-strict (no additionalProperties: false) so additive + // fields such as `cost` validate. + return { + type: 'object', + ...(description ? { description } : {}), + properties, + ...(required.length > 0 ? { required } : {}), + }; +} + +const stringArraySchema: JsonSchema = { type: 'array', items: { type: 'string' } }; + +const rectSchema: JsonSchema = objectSchema( + { + x: numberSchema(), + y: numberSchema(), + width: numberSchema(), + height: numberSchema(), + }, + ['x', 'y', 'width', 'height'], +); + +const pointSchema: JsonSchema = objectSchema({ x: numberSchema(), y: numberSchema() }, ['x', 'y']); + +// SnapshotNode = RawSnapshotNode & { ref } (src/kernel/snapshot.ts). `index` and +// `ref` are the only always-present fields; all others are optional. +const snapshotNodeSchema: JsonSchema = objectSchema( + { + index: numberSchema(), + ref: stringSchema('Stable snapshot ref such as e12.'), + type: stringSchema(), + role: stringSchema(), + subrole: stringSchema(), + label: stringSchema(), + value: stringSchema(), + identifier: stringSchema(), + rect: rectSchema, + enabled: booleanSchema(), + selected: booleanSchema(), + focused: booleanSchema(), + visibleToUser: booleanSchema(), + hittable: booleanSchema(), + depth: numberSchema(), + parentIndex: numberSchema(), + pid: numberSchema(), + bundleId: stringSchema(), + appName: stringSchema(), + windowTitle: stringSchema(), + surface: stringSchema(), + hiddenContentAbove: booleanSchema(), + hiddenContentBelow: booleanSchema(), + interactionBlocked: enumSchema(['covered']), + presentationHints: stringArraySchema, + }, + ['index', 'ref'], + 'Resolved snapshot node for the matched element.', +); + +const resolvedRefTargetSchema: JsonSchema = objectSchema( + { kind: constSchema('ref'), ref: stringSchema() }, + ['kind', 'ref'], +); + +const resolvedSelectorTargetSchema: JsonSchema = objectSchema( + { kind: constSchema('selector'), selector: stringSchema() }, + ['kind', 'selector'], +); + +type InteractionExtra = { + properties?: Record; + required?: readonly string[]; +}; + +/** + * `ResolvedInteractionTarget & extra` — a `kind` discriminated union (point / ref + * / selector) shared by press / fill / longpress. The `const` discriminant keeps + * the branches mutually exclusive, so the additive `cost` field never breaks the + * exactly-one-of contract. + */ +function interactionResultSchema(extra: InteractionExtra = {}): JsonSchema { + const extraProperties = extra.properties ?? {}; + const extraRequired = extra.required ?? []; + const pointBranch = objectSchema( + { kind: constSchema('point'), point: pointSchema, ...extraProperties }, + ['kind', 'point', ...extraRequired], + ); + const refBranch = objectSchema( + { + kind: constSchema('ref'), + point: pointSchema, + target: resolvedRefTargetSchema, + node: snapshotNodeSchema, + selectorChain: stringArraySchema, + refLabel: stringSchema(), + ...extraProperties, + }, + ['kind', 'target', ...extraRequired], + ); + const selectorBranch = objectSchema( + { + kind: constSchema('selector'), + point: pointSchema, + target: resolvedSelectorTargetSchema, + node: snapshotNodeSchema, + selectorChain: stringArraySchema, + refLabel: stringSchema(), + ...extraProperties, + }, + ['kind', 'point', 'target', 'node', 'selectorChain', ...extraRequired], + ); + return { type: 'object', oneOf: [pointBranch, refBranch, selectorBranch] }; +} + +const backendResultSchema = looseObjectSchema('Raw backend result passthrough.'); + +// boot / shutdown share the resolved-device header (src/contracts/device.ts). +const deviceHeaderProperties: Record = { + platform: enumSchema(PLATFORMS), + target: enumSchema(DEVICE_TARGETS), + device: stringSchema('Human-readable device name.'), + id: stringSchema('Stable device id.'), + kind: enumSchema(DEVICE_KINDS), +}; +const deviceHeaderRequired = ['platform', 'target', 'device', 'id', 'kind'] as const; + +// TargetShutdownResult (src/target-shutdown-contract.ts). +const targetShutdownResultSchema: JsonSchema = objectSchema( + { + success: booleanSchema(), + exitCode: numberSchema(), + stdout: stringSchema(), + stderr: stringSchema(), + error: looseObjectSchema('Normalized error detail when shutdown failed.'), + }, + ['success', 'exitCode', 'stdout', 'stderr'], +); + +export const COMMAND_OUTPUT_SCHEMAS: Partial> = { + // src/contracts/interaction.ts + press: interactionResultSchema({ + properties: { backendResult: backendResultSchema, message: stringSchema() }, + }), + fill: interactionResultSchema({ + properties: { + text: stringSchema('Text submitted to the field.'), + warning: stringSchema(), + backendResult: backendResultSchema, + message: stringSchema(), + }, + required: ['text'], + }), + longpress: interactionResultSchema({ + properties: { + durationMs: numberSchema(), + backendResult: backendResultSchema, + message: stringSchema(), + }, + }), + + // src/contracts/device.ts + boot: objectSchema({ ...deviceHeaderProperties, booted: { type: 'boolean', const: true } }, [ + ...deviceHeaderRequired, + 'booted', + ]), + shutdown: objectSchema({ ...deviceHeaderProperties, shutdown: targetShutdownResultSchema }, [ + ...deviceHeaderRequired, + 'shutdown', + ]), + + // src/contracts/viewport.ts + viewport: objectSchema( + { width: numberSchema(), height: numberSchema(), message: stringSchema() }, + ['width', 'height', 'message'], + ), + + // src/contracts/navigation.ts + home: objectSchema({ action: constSchema('home'), message: stringSchema() }, [ + 'action', + 'message', + ]), + back: objectSchema( + { action: constSchema('back'), mode: enumSchema(BACK_MODES), message: stringSchema() }, + ['action', 'mode', 'message'], + ), + rotate: objectSchema( + { + action: constSchema('rotate'), + orientation: enumSchema(DEVICE_ROTATIONS), + message: stringSchema(), + }, + ['action', 'orientation', 'message'], + ), + 'app-switcher': objectSchema({ action: constSchema('app-switcher'), message: stringSchema() }, [ + 'action', + 'message', + ]), + + // src/contracts/clipboard.ts — discriminated union on `action`. + clipboard: { + type: 'object', + oneOf: [ + objectSchema({ action: constSchema('read'), text: stringSchema() }, ['action', 'text']), + objectSchema( + { action: constSchema('write'), textLength: numberSchema(), message: stringSchema() }, + ['action', 'textLength', 'message'], + ), + ], + }, + + // src/contracts/app-state.ts — discriminated union on `platform`. + appstate: { + type: 'object', + oneOf: [ + objectSchema( + { + platform: enumSchema(['ios', 'macos']), + appName: stringSchema(), + appBundleId: stringSchema(), + source: constSchema('session'), + surface: enumSchema(SESSION_SURFACES), + device_udid: stringSchema('iOS only — the session device UDID.'), + ios_simulator_device_set: { + type: ['string', 'null'], + description: 'iOS only — the simulator set path, or null when unknown.', + }, + }, + ['platform', 'appName', 'source', 'surface'], + ), + objectSchema( + { + platform: constSchema('android'), + package: stringSchema(), + activity: stringSchema(), + }, + ['platform', 'package', 'activity'], + ), + ], + }, + + // src/contracts/keyboard.ts — flat closed shape; `platform`/`action` always present. + keyboard: objectSchema( + { + platform: enumSchema(['android', 'ios']), + action: enumSchema(['status', 'dismiss', 'enter']), + visible: booleanSchema(), + wasVisible: booleanSchema(), + dismissed: booleanSchema(), + attempts: numberSchema(), + inputType: stringSchema(), + type: enumSchema(['text', 'number', 'email', 'phone', 'password', 'datetime', 'unknown']), + inputMethodPackage: stringSchema(), + focusedPackage: stringSchema(), + focusedResourceId: stringSchema(), + inputOwner: enumSchema(['app', 'ime', 'unknown']), + message: stringSchema(), + }, + ['platform', 'action'], + ), +}; diff --git a/src/mcp/command-tools.ts b/src/mcp/command-tools.ts index 37a83e959..4d9730f70 100644 --- a/src/mcp/command-tools.ts +++ b/src/mcp/command-tools.ts @@ -6,6 +6,7 @@ import { listMcpCommandMetadata, type CommandName, } from '../commands/command-metadata.ts'; +import { COMMAND_OUTPUT_SCHEMAS } from './command-output-schemas.ts'; export type ToolResult = { isError: boolean; @@ -35,12 +36,19 @@ export function listCommandTools(): Array<{ name: string; description: string; inputSchema: JsonSchema; + outputSchema?: JsonSchema; }> { - return listMcpCommandMetadata().map((definition) => ({ - name: definition.name, - description: definition.description, - inputSchema: withMcpConfigSchema(definition.inputSchema), - })); + return listMcpCommandMetadata().map((definition) => { + const outputSchema = COMMAND_OUTPUT_SCHEMAS[definition.name]; + return { + name: definition.name, + description: definition.description, + inputSchema: withMcpConfigSchema(definition.inputSchema), + // Only typed commands carry an outputSchema; untyped tools stay + // byte-identical to today (no key at all), additive-only. + ...(outputSchema ? { outputSchema } : {}), + }; + }); } export function createCommandToolExecutor(deps: CommandToolExecutorDeps = {}): CommandToolExecutor { From ec3f908b197f81b57feddd738840ffc1a10fc08d Mon Sep 17 00:00:00 2001 From: =?UTF-8?q?Micha=C5=82=20Pierzcha=C5=82a?= Date: Mon, 29 Jun 2026 21:13:09 +0200 Subject: [PATCH 2/2] refactor: type-tie COMMAND_OUTPUT_SCHEMAS to CommandResultMap Replace Partial> with `satisfies Record`, so the one-for-one invariant with the typed-result spine is compiler-enforced: a new CommandResultMap entry without an output schema is now a missing-key error, and a misspelled/extra key is an excess-property error (previously both compiled and silently omitted the schema). The lookup in listCommandTools guards with an `in` check since the registry is keyed by the typed commands only. --- src/mcp/command-output-schemas.ts | 23 +++++++++++++---------- src/mcp/command-tools.ts | 7 ++++++- 2 files changed, 19 insertions(+), 11 deletions(-) diff --git a/src/mcp/command-output-schemas.ts b/src/mcp/command-output-schemas.ts index 6aa548065..cc81ad949 100644 --- a/src/mcp/command-output-schemas.ts +++ b/src/mcp/command-output-schemas.ts @@ -1,4 +1,5 @@ import type { JsonSchema } from '../commands/command-contract.ts'; +import type { CommandResultMap } from '../core/command-descriptor/command-result.ts'; import { booleanSchema, looseObjectSchema, stringSchema } from '../commands/command-input.ts'; import { BACK_MODES } from '../core/back-mode.ts'; import { DEVICE_ROTATIONS } from '../core/device-rotation.ts'; @@ -6,14 +7,16 @@ import { SESSION_SURFACES } from '../core/session-surface.ts'; import { DEVICE_TARGETS, PLATFORMS } from '../utils/device.ts'; /** - * Hand-authored, PARTIAL-coverage registry of per-command MCP `outputSchema`s, - * keyed by the daemon command NAME. It mirrors the typed-result spine - * `CommandResultMap` (src/core/command-descriptor/command-result.ts) one-for-one: - * a command appears here ONLY when its accurate, closed result shape lives in the - * contracts layer (src/contracts/*). The genuinely-dynamic commands (snapshot - * overlays, gestures, perf, logs, …) are intentionally absent — their tools stay - * byte-identical to today (no `outputSchema` key), exactly as `CommandResultMap` - * omits them rather than inventing a shape. + * Hand-authored registry of per-command MCP `outputSchema`s, keyed by the daemon + * command NAME. It is type-tied to the typed-result spine `CommandResultMap` + * (src/core/command-descriptor/command-result.ts) via + * `satisfies Record`, so the one-for-one + * invariant is compiler-enforced: a new `CommandResultMap` entry without a schema + * here is a missing-key error, and a typo'd/extra key is an excess-property error. + * The genuinely-dynamic commands (snapshot overlays, gestures, perf, logs, …) are + * absent from BOTH maps — their tools stay byte-identical to today (no + * `outputSchema` key), exactly as `CommandResultMap` omits them rather than + * inventing a shape. * * There is no type→JSON-Schema generator in this repo, so every schema below is * authored by hand from the matching contract type. Two invariants: @@ -180,7 +183,7 @@ const targetShutdownResultSchema: JsonSchema = objectSchema( ['success', 'exitCode', 'stdout', 'stderr'], ); -export const COMMAND_OUTPUT_SCHEMAS: Partial> = { +export const COMMAND_OUTPUT_SCHEMAS = { // src/contracts/interaction.ts press: interactionResultSchema({ properties: { backendResult: backendResultSchema, message: stringSchema() }, @@ -301,4 +304,4 @@ export const COMMAND_OUTPUT_SCHEMAS: Partial> = { }, ['platform', 'action'], ), -}; +} satisfies Record; diff --git a/src/mcp/command-tools.ts b/src/mcp/command-tools.ts index 4d9730f70..fd56d8bcf 100644 --- a/src/mcp/command-tools.ts +++ b/src/mcp/command-tools.ts @@ -39,7 +39,12 @@ export function listCommandTools(): Array<{ outputSchema?: JsonSchema; }> { return listMcpCommandMetadata().map((definition) => { - const outputSchema = COMMAND_OUTPUT_SCHEMAS[definition.name]; + // The registry is keyed by the typed-result commands only (CommandResultMap), + // so guard the lookup; untyped tools resolve to no outputSchema. + const outputSchema = + definition.name in COMMAND_OUTPUT_SCHEMAS + ? COMMAND_OUTPUT_SCHEMAS[definition.name as keyof typeof COMMAND_OUTPUT_SCHEMAS] + : undefined; return { name: definition.name, description: definition.description,