From 07dc0106f55b365c5245ca36359e0736f73485ea Mon Sep 17 00:00:00 2001 From: Fadhlan Ridhwanallah Date: Thu, 18 Jun 2026 14:47:22 +0700 Subject: [PATCH 1/8] fix(CS-11647): hide Accept All bar for auto-executed commands MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The manual Accept All / Cancel bar briefly flashed below an assistant tool call before command-service started auto-executing it. The drain loop decides "auto-execute" inside an async 100ms-debounced task, but the room's readyCommands getter only filtered by execution status — so during the debounce window the bar painted, then yanked itself when acceptingAllRoomIds flipped. Pull the auto-execute decision into a synchronous predicate (isAutoExecutableCommand) and call it from both command-service (to decide whether to run) and room.gts readyCommands (to decide whether to render the bar). Single source of truth for the three conditions — checkCorrectness, requiresApproval=false, LLM mode act. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/host/app/components/matrix/room.gts | 13 +++- packages/host/app/lib/command-auto-execute.ts | 23 +++++++ packages/host/app/services/command-service.ts | 20 ++---- .../ai-assistant-panel/commands-test.gts | 68 +++++++++++++++++++ .../unit/lib/command-auto-execute-test.ts | 56 +++++++++++++++ 5 files changed, 164 insertions(+), 16 deletions(-) create mode 100644 packages/host/app/lib/command-auto-execute.ts create mode 100644 packages/host/tests/unit/lib/command-auto-execute-test.ts diff --git a/packages/host/app/components/matrix/room.gts b/packages/host/app/components/matrix/room.gts index 9658112b29..f988b28017 100644 --- a/packages/host/app/components/matrix/room.gts +++ b/packages/host/app/components/matrix/room.gts @@ -55,6 +55,7 @@ import { DEFAULT_FALLBACK_MODELS } from '@cardstack/runtime-common/matrix-consta import UpdateRoomSkillsCommand from '@cardstack/host/commands/update-room-skills'; import ENV from '@cardstack/host/config/environment'; +import { isAutoExecutableCommand } from '@cardstack/host/lib/command-auto-execute'; import type { FileUploadState } from '@cardstack/host/lib/file-upload-state'; import type { Message } from '@cardstack/host/lib/matrix-classes/message'; import type { StackItem } from '@cardstack/host/lib/stack-item'; @@ -1861,13 +1862,23 @@ export default class Room extends Component { if (!lastMessage || !lastMessage.commands) { return []; } + let roomResource = this.matrixService.roomResources.get(this.args.roomId); + let activeMode = roomResource?.getActiveLLMModeForMessage( + lastMessage.eventId, + ); return lastMessage.commands.filter( (command) => (command.status === 'ready' || command.status === undefined) && !this.commandService.currentlyExecutingCommandRequestIds.has( command.id!, ) && - !this.commandService.executedCommandRequestIds.has(command.id!), + !this.commandService.executedCommandRequestIds.has(command.id!) && + // Commands destined for auto-execution must not surface the manual + // Accept All / Cancel bar, even during the ~100ms debounce before + // command-service flips `acceptingAllRoomIds`. Without this filter, + // the bar paints and then yanks itself once auto-execution starts, + // which is the CS-11647 glitch. + !isAutoExecutableCommand(command, activeMode), ); } diff --git a/packages/host/app/lib/command-auto-execute.ts b/packages/host/app/lib/command-auto-execute.ts new file mode 100644 index 0000000000..63577ca35f --- /dev/null +++ b/packages/host/app/lib/command-auto-execute.ts @@ -0,0 +1,23 @@ +import type { LLMMode } from '@cardstack/runtime-common/matrix-constants'; + +import type MessageCommand from './matrix-classes/message-command'; + +export const CHECK_CORRECTNESS_COMMAND_NAME = 'checkCorrectness'; + +// Single source of truth for "this command runs without user approval". +// Used by command-service (to decide whether to auto-run) and by the room +// component (to decide whether to render the Accept All / Cancel bar). +// Keeping both call sites on the same predicate prevents the two from +// drifting and reintroducing the action-bar flash that prompted CS-11647. +export function isAutoExecutableCommand( + command: Pick, + activeLLMMode: LLMMode | undefined, +): boolean { + if (command.name === CHECK_CORRECTNESS_COMMAND_NAME) { + return true; + } + if (command.requiresApproval === false) { + return true; + } + return activeLLMMode === 'act'; +} diff --git a/packages/host/app/services/command-service.ts b/packages/host/app/services/command-service.ts index af34780c10..c382a67589 100644 --- a/packages/host/app/services/command-service.ts +++ b/packages/host/app/services/command-service.ts @@ -36,6 +36,10 @@ import type Realm from '@cardstack/host/services/realm'; import type { CardDef } from 'https://cardstack.com/base/card-api'; import type { CodePatchStatus } from 'https://cardstack.com/base/matrix-event'; +import { + CHECK_CORRECTNESS_COMMAND_NAME, + isAutoExecutableCommand, +} from '../lib/command-auto-execute'; import LimitedSet from '../lib/limited-set'; import type LoaderService from './loader-service'; @@ -50,7 +54,6 @@ import type MessageCommand from '../lib/matrix-classes/message-command'; import type { IEvent } from 'matrix-js-sdk'; const DELAY_FOR_APPLYING_UI = isTesting() ? 50 : 500; -const CHECK_CORRECTNESS_COMMAND_NAME = 'checkCorrectness'; type GenericCommand = Command< typeof CardDef | undefined, @@ -379,26 +382,13 @@ export default class CommandService extends Service { continue; } - // Get the LLM mode that was active when this message was created let activeModeAtMessageTime = roomResource.getActiveLLMModeForMessage( message.eventId, ); - // Auto-execute if LLM mode is 'act' AND the command came after the LLM mode was set to 'act', - // or if requiresApproval is false - let shouldAutoExecute = false; - let isCheckCorrectnessCommand = - messageCommand.name === CHECK_CORRECTNESS_COMMAND_NAME; - if ( - isCheckCorrectnessCommand || - messageCommand.requiresApproval === false || - activeModeAtMessageTime === 'act' + isAutoExecutableCommand(messageCommand, activeModeAtMessageTime) ) { - shouldAutoExecute = true; - } - - if (shouldAutoExecute) { readyCommands.push(messageCommand); } } diff --git a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts index d842037b5d..c20315f0a3 100644 --- a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts +++ b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts @@ -1641,4 +1641,72 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { 'commandResult should not reference the original/streaming event_id once a later event in room.events owns the commandRequest', ); }); + + test('CS-11647: Accept All bar does not flash for an auto-executed checkCorrectness command', async function (assert) { + let roomId = await renderAiAssistantPanel(); + + // checkCorrectness is on the always-auto-execute list, so the host runs + // it without asking. Before the fix, the manual approval bar painted + // for the ~100ms debounce window before command-service flipped + // `acceptingAllRoomIds`; the user saw Accept All / Cancel briefly + // appear then disappear. The bar must never paint in its manual-approval + // branch for this command. + simulateRemoteMessage(roomId, '@aibot:localhost', { + body: 'checking correctness', + msgtype: APP_BOXEL_MESSAGE_MSGTYPE, + format: 'org.matrix.custom.html', + isStreamingFinished: true, + [APP_BOXEL_COMMAND_REQUESTS_KEY]: [ + { + id: 'cs-11647-check-correctness', + name: 'checkCorrectness', + arguments: '{}', + }, + ], + }); + + await waitFor('[data-test-message-idx="0"]'); + assert + .dom('[data-test-accept-all]') + .doesNotExist( + 'Accept All button must not paint in the debounce window before auto-execute starts', + ); + + await settled(); + assert + .dom('[data-test-accept-all]') + .doesNotExist( + 'Accept All button still hidden after the auto-execute debounce window elapses', + ); + }); + + test('CS-11647: Accept All bar still renders for a command that requires user approval', async function (assert) { + let roomId = await renderAiAssistantPanel(`${testRealmURL}Person/fadhlan`); + + simulateRemoteMessage(roomId, '@aibot:localhost', { + body: 'patching', + msgtype: APP_BOXEL_MESSAGE_MSGTYPE, + format: 'org.matrix.custom.html', + isStreamingFinished: true, + [APP_BOXEL_COMMAND_REQUESTS_KEY]: [ + { + id: 'cs-11647-patch', + name: 'patchCardInstance', + arguments: JSON.stringify({ + attributes: { + cardId: `${testRealmURL}Person/fadhlan`, + patch: { attributes: { firstName: 'Dave' } }, + }, + }), + }, + ], + }); + + await waitFor('[data-test-accept-all]'); + assert + .dom('[data-test-accept-all]') + .exists( + 'manual approval bar still renders for commands that require user approval', + ); + }); }); diff --git a/packages/host/tests/unit/lib/command-auto-execute-test.ts b/packages/host/tests/unit/lib/command-auto-execute-test.ts new file mode 100644 index 0000000000..1de4c80715 --- /dev/null +++ b/packages/host/tests/unit/lib/command-auto-execute-test.ts @@ -0,0 +1,56 @@ +import { module, test } from 'qunit'; + +import { + CHECK_CORRECTNESS_COMMAND_NAME, + isAutoExecutableCommand, +} from '@cardstack/host/lib/command-auto-execute'; + +type AutoExecCommandInput = Parameters[0]; + +function cmd( + name: string | undefined, + requiresApproval = true, +): AutoExecCommandInput { + return { name, requiresApproval }; +} + +module('Unit | Lib | command-auto-execute', function () { + test('check-correctness commands auto-execute regardless of mode or approval flag', function (assert) { + assert.true( + isAutoExecutableCommand(cmd(CHECK_CORRECTNESS_COMMAND_NAME, true), 'ask'), + 'checkCorrectness in ask mode with requiresApproval=true still auto-executes', + ); + assert.true( + isAutoExecutableCommand( + cmd(CHECK_CORRECTNESS_COMMAND_NAME, true), + undefined, + ), + 'checkCorrectness with unknown mode still auto-executes', + ); + }); + + test('commands with requiresApproval=false auto-execute', function (assert) { + assert.true( + isAutoExecutableCommand(cmd('searchCard', false), 'ask'), + 'requiresApproval=false bypasses approval even in ask mode', + ); + }); + + test('act mode auto-executes commands that would otherwise require approval', function (assert) { + assert.true( + isAutoExecutableCommand(cmd('patchCardInstance', true), 'act'), + 'patchCardInstance in act mode auto-executes', + ); + }); + + test('ask mode with requiresApproval=true does not auto-execute', function (assert) { + assert.false( + isAutoExecutableCommand(cmd('patchCardInstance', true), 'ask'), + 'manual approval is required in ask mode', + ); + assert.false( + isAutoExecutableCommand(cmd('patchCardInstance', true), undefined), + 'manual approval is required when mode is unknown', + ); + }); +}); From c8b2950b7bec202e2061d7c4f498c108df9fd359 Mon Sep 17 00:00:00 2001 From: Fadhlan Ridhwanallah Date: Thu, 18 Jun 2026 22:28:25 +0700 Subject: [PATCH 2/8] test(CS-11647): broaden auto-execute action-bar coverage MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit isAutoExecutableCommand has three branches (checkCorrectness, requiresApproval=false, LLM mode 'act') and the readyCommands filter treats them uniformly. Rename the existing integration test so it reads as "any always-auto-executed command" instead of a checkCorrectness-specific case, and add a second test that drives the requiresApproval=false branch via the boxel-environment skill. The LLM-mode='act' branch is still locked in by the unit test — exercising it end-to-end would need extra LLM-mode-state plumbing for diminishing returns since all three branches converge on the same filter line in readyCommands. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ai-assistant-panel/commands-test.gts | 69 ++++++++++++++++--- 1 file changed, 61 insertions(+), 8 deletions(-) diff --git a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts index c20315f0a3..a5243cc7ed 100644 --- a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts +++ b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts @@ -1642,15 +1642,16 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { ); }); - test('CS-11647: Accept All bar does not flash for an auto-executed checkCorrectness command', async function (assert) { + test('CS-11647: Accept All bar does not flash for an always-auto-executed command (checkCorrectness)', async function (assert) { let roomId = await renderAiAssistantPanel(); - // checkCorrectness is on the always-auto-execute list, so the host runs - // it without asking. Before the fix, the manual approval bar painted - // for the ~100ms debounce window before command-service flipped - // `acceptingAllRoomIds`; the user saw Accept All / Cancel briefly - // appear then disappear. The bar must never paint in its manual-approval - // branch for this command. + // checkCorrectness is on the always-auto-execute list (one of three + // branches in isAutoExecutableCommand). Before the fix, the manual + // approval bar painted for the ~100ms debounce window before + // command-service flipped `acceptingAllRoomIds`; the user saw + // Accept All / Cancel briefly appear then disappear. The bar must + // never paint in its manual-approval branch for any auto-executed + // command, regardless of which condition triggers auto-execute. simulateRemoteMessage(roomId, '@aibot:localhost', { body: 'checking correctness', msgtype: APP_BOXEL_MESSAGE_MSGTYPE, @@ -1680,6 +1681,58 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { ); }); + test('CS-11647: Accept All bar does not flash for a requiresApproval=false command', async function (assert) { + setCardInOperatorModeState(`${testRealmURL}Person/fadhlan`); + await renderComponent( + class TestDriver extends GlimmerComponent { + + }, + ); + await waitFor('[data-test-person="Fadhlan"]'); + createAndJoinRoom({ + sender: '@testuser:localhost', + name: 'auto-exec via skill', + }); + await settled(); + await click('[data-test-open-ai-assistant]'); + await waitFor('[data-test-room-name="auto-exec via skill"]', { + timeout: 10000, + }); + + // The boxel-environment skill declares read-file-for-ai-assistant with + // requiresApproval=false (see the skill JSON earlier in this module), + // so MessageCommand.requiresApproval is false here — the second + // isAutoExecutableCommand branch. The fix must also suppress the + // Accept All bar for this path. + await addSkillToAiAssistant(`${testRealmURL}Skill/boxel-environment`); + + let roomId = document + .querySelector('[data-test-room]')! + .getAttribute('data-test-room')!; + simulateRemoteMessage(roomId, '@aibot:localhost', { + msgtype: APP_BOXEL_MESSAGE_MSGTYPE, + body: 'Reading hello file', + format: 'org.matrix.custom.html', + isStreamingFinished: true, + [APP_BOXEL_COMMAND_REQUESTS_KEY]: [ + { + id: 'cs-11647-no-approval', + name: 'read-file-for-ai-assistant_a831', + arguments: JSON.stringify({ + attributes: { fileIdentifier: `${testRealmURL}hello.txt` }, + }), + }, + ], + }); + + await waitFor('[data-test-message-idx="0"]'); + assert + .dom('[data-test-accept-all]') + .doesNotExist( + 'Accept All button suppressed for requiresApproval=false commands', + ); + }); + test('CS-11647: Accept All bar still renders for a command that requires user approval', async function (assert) { let roomId = await renderAiAssistantPanel(`${testRealmURL}Person/fadhlan`); @@ -1706,7 +1759,7 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { assert .dom('[data-test-accept-all]') .exists( - 'manual approval bar still renders for commands that require user approval', + 'manual approval bar still renders for commands that need user approval', ); }); }); From ee9b7fb1985e53ac20d94d4fe5d469717f21c12e Mon Sep 17 00:00:00 2001 From: Fadhlan Ridhwanallah Date: Thu, 18 Jun 2026 22:46:01 +0700 Subject: [PATCH 3/8] fix(CS-11647): apply same auto-execute mask to per-command Apply button MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit The per-command Apply button (rendered next to each tool-call message) flashes through its 'ready' state — a clickable Run button — for the ~100ms debounce window before command-service starts the auto-execute run. Same root cause as the Accept All bar. Reuse isAutoExecutableCommand: when status is ready/undefined and the helper says this command will auto-execute, render the applying state (spinner) immediately. The button then transitions naturally to applied on completion. If validate fails in the drain, command-service emits an invalid commandResult and the button settles into its invalid state — no risk of the spinner sticking. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../matrix/room-message-command.gts | 23 ++++++++++++- .../ai-assistant-panel/commands-test.gts | 33 +++++++++++++++++++ 2 files changed, 55 insertions(+), 1 deletion(-) diff --git a/packages/host/app/components/matrix/room-message-command.gts b/packages/host/app/components/matrix/room-message-command.gts index b409fe470b..c626a9bfb9 100644 --- a/packages/host/app/components/matrix/room-message-command.gts +++ b/packages/host/app/components/matrix/room-message-command.gts @@ -26,6 +26,7 @@ import { import type { CommandRequest } from '@cardstack/runtime-common/commands'; +import { isAutoExecutableCommand } from '@cardstack/host/lib/command-auto-execute'; import type MessageCommand from '@cardstack/host/lib/matrix-classes/message-command'; import type { RoomResource } from '@cardstack/host/resources/room'; @@ -77,7 +78,27 @@ export default class RoomMessageCommand extends Component { if (this.didFailCorrectnessCheck) { return 'applied-with-error'; } - return this.args.messageCommand?.status ?? 'ready'; + let status = this.args.messageCommand?.status; + // Mirror the Accept All bar fix: for any command the host will + // auto-execute (checkCorrectness, requiresApproval=false, LLM mode + // 'act'), present the applying spinner immediately on message-landed + // instead of the clickable Run button. Without this, the per-command + // Apply button flashes through 'ready' for the ~100ms debounce window + // before command-service starts the run. If validation later fails + // in the drain, command-service dispatches an `invalid` commandResult + // event and the button transitions to its invalid state — no risk of + // the spinner sticking. + if ((status === 'ready' || status === undefined) && this.willAutoExecute) { + return 'applying'; + } + return status ?? 'ready'; + } + + private get willAutoExecute() { + let activeMode = this.args.roomResource.getActiveLLMModeForMessage( + this.args.messageCommand.eventId, + ); + return isAutoExecutableCommand(this.args.messageCommand, activeMode); } @use private commandResultCard = resource(() => { diff --git a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts index a5243cc7ed..21c68aab12 100644 --- a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts +++ b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts @@ -1733,6 +1733,39 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { ); }); + test('CS-11647: per-command Apply button does not flash Run before auto-execute starts', async function (assert) { + let roomId = await renderAiAssistantPanel(); + + // The per-command Apply button (rendered next to each tool-call message) + // has the same race as the Accept All bar: between "message lands" + // and "command-service starts the run", a ready Run button would + // briefly render. The fix presents the applying-spinner immediately + // for any auto-executable command. + simulateRemoteMessage(roomId, '@aibot:localhost', { + body: 'checking correctness', + msgtype: APP_BOXEL_MESSAGE_MSGTYPE, + format: 'org.matrix.custom.html', + isStreamingFinished: true, + [APP_BOXEL_COMMAND_REQUESTS_KEY]: [ + { + id: 'cs-11647-apply-button', + name: 'checkCorrectness', + arguments: '{}', + }, + ], + }); + + await waitFor('[data-test-message-idx="0"] [data-test-command-apply]'); + assert + .dom('[data-test-message-idx="0"] [data-test-command-apply="ready"]') + .doesNotExist( + 'per-command Apply button must not show the ready/Run state for an auto-executed command', + ); + assert + .dom('[data-test-message-idx="0"] [data-test-command-apply="applying"]') + .exists('per-command Apply button shows the applying spinner instead'); + }); + test('CS-11647: Accept All bar still renders for a command that requires user approval', async function (assert) { let roomId = await renderAiAssistantPanel(`${testRealmURL}Person/fadhlan`); From fd115f9dc36691f017b44ff9588f80804fc12813 Mon Sep 17 00:00:00 2001 From: Fadhlan Ridhwanallah Date: Fri, 19 Jun 2026 09:23:19 +0700 Subject: [PATCH 4/8] fix(CS-11647): include agent ownership in isAutoExecutableCommand MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit CI surfaced a regression on the per-command Apply button fix: Acceptance | Commands tests: ShowCard command added from a skill, is not automatically executed when agentId does not match A ShowCard command sent by another agent has requiresApproval=false on the skill, so isAutoExecutableCommand returned true and the button masked into the applying spinner — but command-service refuses to run it because of the agentId gate (drainCommandProcessingQueue line 354-357). The button would have stuck on the spinner forever. Thread isOwnedByCurrentAgent through the predicate and short-circuit to false when it's false. Callers compute it from `message.agentId === matrixService.agentId`. command-service passes `true` because its outer loop already short-circuits on the same condition before reaching the predicate. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../matrix/room-message-command.gts | 8 +++- packages/host/app/components/matrix/room.gts | 4 +- packages/host/app/lib/command-auto-execute.ts | 20 ++++++++-- packages/host/app/services/command-service.ts | 10 ++++- .../unit/lib/command-auto-execute-test.ts | 39 ++++++++++++++++--- 5 files changed, 69 insertions(+), 12 deletions(-) diff --git a/packages/host/app/components/matrix/room-message-command.gts b/packages/host/app/components/matrix/room-message-command.gts index c626a9bfb9..5f24a1e128 100644 --- a/packages/host/app/components/matrix/room-message-command.gts +++ b/packages/host/app/components/matrix/room-message-command.gts @@ -98,7 +98,13 @@ export default class RoomMessageCommand extends Component { let activeMode = this.args.roomResource.getActiveLLMModeForMessage( this.args.messageCommand.eventId, ); - return isAutoExecutableCommand(this.args.messageCommand, activeMode); + let isOwnedByCurrentAgent = + this.args.messageCommand.message.agentId === this.matrixService.agentId; + return isAutoExecutableCommand( + this.args.messageCommand, + activeMode, + isOwnedByCurrentAgent, + ); } @use private commandResultCard = resource(() => { diff --git a/packages/host/app/components/matrix/room.gts b/packages/host/app/components/matrix/room.gts index f988b28017..d3845deb5f 100644 --- a/packages/host/app/components/matrix/room.gts +++ b/packages/host/app/components/matrix/room.gts @@ -1866,6 +1866,8 @@ export default class Room extends Component { let activeMode = roomResource?.getActiveLLMModeForMessage( lastMessage.eventId, ); + let isOwnedByCurrentAgent = + lastMessage.agentId === this.matrixService.agentId; return lastMessage.commands.filter( (command) => (command.status === 'ready' || command.status === undefined) && @@ -1878,7 +1880,7 @@ export default class Room extends Component { // command-service flips `acceptingAllRoomIds`. Without this filter, // the bar paints and then yanks itself once auto-execution starts, // which is the CS-11647 glitch. - !isAutoExecutableCommand(command, activeMode), + !isAutoExecutableCommand(command, activeMode, isOwnedByCurrentAgent), ); } diff --git a/packages/host/app/lib/command-auto-execute.ts b/packages/host/app/lib/command-auto-execute.ts index 63577ca35f..5488edff1a 100644 --- a/packages/host/app/lib/command-auto-execute.ts +++ b/packages/host/app/lib/command-auto-execute.ts @@ -5,14 +5,26 @@ import type MessageCommand from './matrix-classes/message-command'; export const CHECK_CORRECTNESS_COMMAND_NAME = 'checkCorrectness'; // Single source of truth for "this command runs without user approval". -// Used by command-service (to decide whether to auto-run) and by the room -// component (to decide whether to render the Accept All / Cancel bar). -// Keeping both call sites on the same predicate prevents the two from -// drifting and reintroducing the action-bar flash that prompted CS-11647. +// Used by command-service (to decide whether to auto-run) and by the +// room / room-message-command components (to decide whether to render +// the Accept All bar and the per-command Apply button). Keeping all +// call sites on the same predicate prevents them from drifting and +// reintroducing the action-bar flash that prompted CS-11647. +// +// `isOwnedByCurrentAgent` mirrors the agentId gate in +// command-service.drainCommandProcessingQueue: a command sent by +// another agent is never auto-executed, even if it would otherwise +// satisfy one of the three branches below. Callers that don't track +// agents (e.g. unit tests) can pass `true` to focus on the other +// conditions. export function isAutoExecutableCommand( command: Pick, activeLLMMode: LLMMode | undefined, + isOwnedByCurrentAgent: boolean, ): boolean { + if (!isOwnedByCurrentAgent) { + return false; + } if (command.name === CHECK_CORRECTNESS_COMMAND_NAME) { return true; } diff --git a/packages/host/app/services/command-service.ts b/packages/host/app/services/command-service.ts index c382a67589..62fdd2c154 100644 --- a/packages/host/app/services/command-service.ts +++ b/packages/host/app/services/command-service.ts @@ -386,8 +386,16 @@ export default class CommandService extends Service { message.eventId, ); + // The outer `message.agentId !== this.matrixService.agentId` + // gate above already short-circuited the not-our-agent case, so + // every command reaching this point is owned by the current + // agent. if ( - isAutoExecutableCommand(messageCommand, activeModeAtMessageTime) + isAutoExecutableCommand( + messageCommand, + activeModeAtMessageTime, + true, + ) ) { readyCommands.push(messageCommand); } diff --git a/packages/host/tests/unit/lib/command-auto-execute-test.ts b/packages/host/tests/unit/lib/command-auto-execute-test.ts index 1de4c80715..5cca7448f8 100644 --- a/packages/host/tests/unit/lib/command-auto-execute-test.ts +++ b/packages/host/tests/unit/lib/command-auto-execute-test.ts @@ -17,13 +17,18 @@ function cmd( module('Unit | Lib | command-auto-execute', function () { test('check-correctness commands auto-execute regardless of mode or approval flag', function (assert) { assert.true( - isAutoExecutableCommand(cmd(CHECK_CORRECTNESS_COMMAND_NAME, true), 'ask'), + isAutoExecutableCommand( + cmd(CHECK_CORRECTNESS_COMMAND_NAME, true), + 'ask', + true, + ), 'checkCorrectness in ask mode with requiresApproval=true still auto-executes', ); assert.true( isAutoExecutableCommand( cmd(CHECK_CORRECTNESS_COMMAND_NAME, true), undefined, + true, ), 'checkCorrectness with unknown mode still auto-executes', ); @@ -31,26 +36,50 @@ module('Unit | Lib | command-auto-execute', function () { test('commands with requiresApproval=false auto-execute', function (assert) { assert.true( - isAutoExecutableCommand(cmd('searchCard', false), 'ask'), + isAutoExecutableCommand(cmd('searchCard', false), 'ask', true), 'requiresApproval=false bypasses approval even in ask mode', ); }); test('act mode auto-executes commands that would otherwise require approval', function (assert) { assert.true( - isAutoExecutableCommand(cmd('patchCardInstance', true), 'act'), + isAutoExecutableCommand(cmd('patchCardInstance', true), 'act', true), 'patchCardInstance in act mode auto-executes', ); }); test('ask mode with requiresApproval=true does not auto-execute', function (assert) { assert.false( - isAutoExecutableCommand(cmd('patchCardInstance', true), 'ask'), + isAutoExecutableCommand(cmd('patchCardInstance', true), 'ask', true), 'manual approval is required in ask mode', ); assert.false( - isAutoExecutableCommand(cmd('patchCardInstance', true), undefined), + isAutoExecutableCommand(cmd('patchCardInstance', true), undefined, true), 'manual approval is required when mode is unknown', ); }); + + test('commands owned by another agent never auto-execute', function (assert) { + // Mirrors the agentId gate in command-service.drainCommandProcessingQueue: + // a command whose message came from a different agent must not auto-run + // on this host, even if it would otherwise satisfy one of the auto-exec + // branches. UI callers rely on this so the manual approval bar / per- + // command Apply button stay clickable for non-current-agent commands. + assert.false( + isAutoExecutableCommand( + cmd(CHECK_CORRECTNESS_COMMAND_NAME, true), + 'act', + false, + ), + 'checkCorrectness from another agent does not auto-execute', + ); + assert.false( + isAutoExecutableCommand(cmd('searchCard', false), 'act', false), + 'requiresApproval=false from another agent does not auto-execute', + ); + assert.false( + isAutoExecutableCommand(cmd('patchCardInstance', true), 'act', false), + 'act mode from another agent does not auto-execute', + ); + }); }); From ad2e63d29a9540d0dbd7445000e8f606c1eeec60 Mon Sep 17 00:00:00 2001 From: Fadhlan Ridhwanallah Date: Fri, 19 Jun 2026 11:06:50 +0700 Subject: [PATCH 5/8] test(CS-11647): pass agentId in auto-execute tests so the gate matches The new isAutoExecutableCommand agent-ownership check fails closed when message.agentId doesn't equal matrixService.agentId. The new CS-11647 integration tests went through simulateRemoteMessage without setting `data.context.agentId`, so the helper short-circuited to false in the test environment and the Accept All bar / per-command ready button rendered for an unrelated reason. CI surfaced this on the two tests that depended on the helper actually returning true: not ok 121 ... Accept All bar does not flash for a requiresApproval=false command not ok 122 ... per-command Apply button does not flash Run before auto-execute starts Pass the current agent's id explicitly. The checkCorrectness Accept All test also gets the context (previously passing by accident because validate marks status=invalid for an ad-hoc checkCorrectness call, which the readyCommands filter already excludes). Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ai-assistant-panel/commands-test.gts | 21 +++++++++++++++++++ 1 file changed, 21 insertions(+) diff --git a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts index 21c68aab12..c84be30633 100644 --- a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts +++ b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts @@ -1652,6 +1652,12 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { // Accept All / Cancel briefly appear then disappear. The bar must // never paint in its manual-approval branch for any auto-executed // command, regardless of which condition triggers auto-execute. + // + // agentId must match the host's matrix service so the + // agent-ownership gate in isAutoExecutableCommand passes — otherwise + // the predicate short-circuits to false (the not-our-agent case + // exercised by acceptance/commands-test.gts) and the bar would show + // for an unrelated reason. simulateRemoteMessage(roomId, '@aibot:localhost', { body: 'checking correctness', msgtype: APP_BOXEL_MESSAGE_MSGTYPE, @@ -1664,6 +1670,11 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { arguments: '{}', }, ], + data: { + context: { + agentId: getService('matrix-service').agentId, + }, + }, }); await waitFor('[data-test-message-idx="0"]'); @@ -1723,6 +1734,11 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { }), }, ], + data: { + context: { + agentId: getService('matrix-service').agentId, + }, + }, }); await waitFor('[data-test-message-idx="0"]'); @@ -1753,6 +1769,11 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { arguments: '{}', }, ], + data: { + context: { + agentId: getService('matrix-service').agentId, + }, + }, }); await waitFor('[data-test-message-idx="0"] [data-test-command-apply]'); From 2f1a7e7f98c71d0b823338844520985281d9b990 Mon Sep 17 00:00:00 2001 From: Fadhlan Ridhwanallah Date: Fri, 19 Jun 2026 15:50:16 +0700 Subject: [PATCH 6/8] fix(CS-11647): invalidate auto-executable commands when room processing is stuck drainCommandProcessingQueue used to `continue` past a wedged room without emitting any commandResult, so the synthetic "applying" spinner in room-message-command.gts had no terminal event to fall through and hung forever. Dispatch an `invalid` commandResult for each auto-executable command on that path so the UI surfaces the invalidCommandState alert with a Try Anyway button. Also extract STUCK_PROCESSING_TIMEOUT_MS so tests can target the same threshold the prod path uses. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/host/app/services/command-service.ts | 86 ++++++++++++++++++- 1 file changed, 82 insertions(+), 4 deletions(-) diff --git a/packages/host/app/services/command-service.ts b/packages/host/app/services/command-service.ts index 62fdd2c154..643fd27d75 100644 --- a/packages/host/app/services/command-service.ts +++ b/packages/host/app/services/command-service.ts @@ -51,9 +51,15 @@ import type StoreService from './store'; import type { CodeData } from '../lib/formatted-message/utils'; import type MessageCodePatchResult from '../lib/matrix-classes/message-code-patch-result'; import type MessageCommand from '../lib/matrix-classes/message-command'; +import type { RoomResource } from '../resources/room'; import type { IEvent } from 'matrix-js-sdk'; const DELAY_FOR_APPLYING_UI = isTesting() ? 50 : 500; +// How long drainCommandProcessingQueue waits for a room resource that's +// still processing before giving up on the event. In tests we shorten this +// so the stuck-timeout invalidation path can be exercised in a single test +// without holding a real test open for a minute. +const STUCK_PROCESSING_TIMEOUT_MS = isTesting() ? 1000 : 60_000; type GenericCommand = Command< typeof CardDef | undefined, @@ -323,7 +329,7 @@ export default class CommandService extends Service { `Room resource not found for room id ${roomId}, this should not happen`, ); } - let timeout = Date.now() + 60_000; // reset the timer to avoid a long wait if the room resource is processing + let timeout = Date.now() + STUCK_PROCESSING_TIMEOUT_MS; // reset the timer to avoid a long wait if the room resource is processing let currentRoomProcessingTimestamp = roomResource.processingLastStartedAt; while ( @@ -340,9 +346,23 @@ export default class CommandService extends Service { currentRoomProcessingTimestamp === roomResource.processingLastStartedAt ) { - // room seems to be stuck processing, so we will log and skip this event + // Room processing is wedged. The synthetic 'applying' state in + // room-message-command.gts shows the spinner the moment an + // auto-executable command lands and only clears when we dispatch + // a terminal commandResult ('applied' or 'invalid'). If we just + // logged and continued, the spinner would hang indefinitely with + // no manual Run fallback. Mark each auto-executable command on + // this message invalid so the UI falls through to the + // invalidCommandState "Try Anyway" branch; manual-approval + // commands are left in 'ready' so the action bar's Run button + // remains the user's fallback. console.error( - `Room resource for room ${roomId} seems to be stuck processing, skipping event ${eventId}`, + `Room resource for room ${roomId} seems to be stuck processing, invalidating auto-executable commands on event ${eventId}`, + ); + await this.invalidateAutoExecutableCommandsForStuckProcessing( + roomResource, + roomId!, + eventId!, ); continue; } @@ -420,6 +440,64 @@ export default class CommandService extends Service { } } + private async invalidateAutoExecutableCommandsForStuckProcessing( + roomResource: RoomResource, + roomId: string, + eventId: string, + ) { + let message = roomResource.messages.find((m) => m.eventId === eventId); + if (!message) { + return; + } + if (message.agentId !== this.matrixService.agentId) { + return; + } + let activeModeAtMessageTime = roomResource.getActiveLLMModeForMessage( + message.eventId, + ); + for (let messageCommand of message.commands) { + if (this.currentlyExecutingCommandRequestIds.has(messageCommand.id!)) { + continue; + } + if (this.executedCommandRequestIds.has(messageCommand.id!)) { + continue; + } + if ( + messageCommand.status === 'applied' || + messageCommand.status === 'invalid' + ) { + continue; + } + if (!messageCommand.name) { + continue; + } + // The outer agentId gate already verified ownership, so this command + // is owned by the current agent. + if ( + !isAutoExecutableCommand(messageCommand, activeModeAtMessageTime, true) + ) { + // Manual-approval commands stay 'ready' — the action bar's Run + // button is still the user's fallback for those. + continue; + } + let invokedToolFromEventId = + this.getCurrentEventIdForCommandRequest( + roomId, + messageCommand.commandRequest.id, + ) ?? messageCommand.eventId; + await this.matrixService.sendCommandResultEvent({ + roomId, + invokedToolFromEventId, + toolCallId: messageCommand.commandRequest.id!, + status: 'invalid', + failureReason: `Room processing did not finish within ${Math.round( + STUCK_PROCESSING_TIMEOUT_MS / 1000, + )}s; command was not started`, + context: await this.operatorModeStateService.getSummaryForAIBot(), + }); + } + } + private async drainCodePatchProcessingQueue() { let waiterToken = commandProcessingWaiter.beginAsync(); try { @@ -442,7 +520,7 @@ export default class CommandService extends Service { `Room resource not found for room id ${roomId}, this should not happen`, ); } - let timeout = Date.now() + 60_000; // reset the timer to avoid a long wait if the room resource is processing + let timeout = Date.now() + STUCK_PROCESSING_TIMEOUT_MS; // reset the timer to avoid a long wait if the room resource is processing let currentRoomProcessingTimestamp = roomResource.processingLastStartedAt; while ( From 3d4cfc3f757e2c0f98487df11d67473304414c43 Mon Sep 17 00:00:00 2001 From: Fadhlan Ridhwanallah Date: Fri, 19 Jun 2026 15:50:27 +0700 Subject: [PATCH 7/8] fix(CS-11647): align data-test-command-card-idle with synthetic applying state While the synthetic "applying" spinner is up (auto-executable command between landing and drainCommandProcessingQueue starting the run task), the card's idle test attribute used to disagree with the apply button: the apply button reported applying, the card still reported idle. Drive both off applyButtonState so the two attributes always agree about whether the spinner is visible. Co-Authored-By: Claude Opus 4.7 (1M context) --- packages/host/app/components/matrix/room-message-command.gts | 4 ++-- 1 file changed, 2 insertions(+), 2 deletions(-) diff --git a/packages/host/app/components/matrix/room-message-command.gts b/packages/host/app/components/matrix/room-message-command.gts index 5f24a1e128..1327bc15d2 100644 --- a/packages/host/app/components/matrix/room-message-command.gts +++ b/packages/host/app/components/matrix/room-message-command.gts @@ -260,7 +260,7 @@ export default class RoomMessageCommand extends Component { @monacoSDK={{@monacoSDK}} @codeData={{hash code=this.previewCommandCode language='json'}} data-test-command-card-idle={{not - (eq @messageCommand.status 'applying') + (eq this.applyButtonState 'applying') }} as |codeBlock| > @@ -280,7 +280,7 @@ export default class RoomMessageCommand extends Component { @monacoSDK={{@monacoSDK}} @codeData={{hash code=this.previewCommandCode language='json'}} data-test-command-card-idle={{not - (eq @messageCommand.status 'applying') + (eq this.applyButtonState 'applying') }} as |codeBlock| > From 1bd7797173cb7ac61987b1c147746268f7ad1a39 Mon Sep 17 00:00:00 2001 From: Fadhlan Ridhwanallah Date: Fri, 19 Jun 2026 15:50:39 +0700 Subject: [PATCH 8/8] test(CS-11647): cover stuck-processing helper, idle-attr coherence, and drop ticket prefix - New unit-flavoured integration test exercising invalidateAutoExecutableCommandsForStuckProcessing directly: spies on matrixService.sendCommandResultEvent and asserts only the auto- executable command gets an invalid dispatch with the expected failureReason. Avoids stubbing the ember-resources proxy, which silently no-ops Object.defineProperty. - Add a coherence assertion that data-test-command-card-idle is omitted while the synthetic applying state is on (Glimmer drops attributes bound to falsy expressions, so the test selector is presence-based). - Drop the per-test CS-11647 prefix from titles now that the cluster is large enough to read on its own. Co-Authored-By: Claude Opus 4.7 (1M context) --- .../ai-assistant-panel/commands-test.gts | 114 +++++++++++++++++- 1 file changed, 110 insertions(+), 4 deletions(-) diff --git a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts index c84be30633..eb1f39e0c9 100644 --- a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts +++ b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts @@ -1642,7 +1642,7 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { ); }); - test('CS-11647: Accept All bar does not flash for an always-auto-executed command (checkCorrectness)', async function (assert) { + test('Accept All bar does not flash for an always-auto-executed command (checkCorrectness)', async function (assert) { let roomId = await renderAiAssistantPanel(); // checkCorrectness is on the always-auto-execute list (one of three @@ -1692,7 +1692,7 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { ); }); - test('CS-11647: Accept All bar does not flash for a requiresApproval=false command', async function (assert) { + test('Accept All bar does not flash for a requiresApproval=false command', async function (assert) { setCardInOperatorModeState(`${testRealmURL}Person/fadhlan`); await renderComponent( class TestDriver extends GlimmerComponent { @@ -1749,7 +1749,7 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { ); }); - test('CS-11647: per-command Apply button does not flash Run before auto-execute starts', async function (assert) { + test('per-command Apply button does not flash Run before auto-execute starts', async function (assert) { let roomId = await renderAiAssistantPanel(); // The per-command Apply button (rendered next to each tool-call message) @@ -1785,9 +1785,115 @@ module('Integration | ai-assistant-panel | commands', function (hooks) { assert .dom('[data-test-message-idx="0"] [data-test-command-apply="applying"]') .exists('per-command Apply button shows the applying spinner instead'); + // The data-test-command-card-idle attribute is computed from + // applyButtonState (not the raw status); while the synthetic 'applying' + // is on it must NOT mark the card idle. Glimmer omits an attribute + // bound to a falsy expression, so the coherence check is on attribute + // presence — the apply button + the card must agree the spinner is + // up, not just one of them. + assert + .dom('[data-test-message-idx="0"] [data-test-command-card-idle]') + .doesNotExist( + 'data-test-command-card-idle agrees with applyButtonState while the synthetic spinner is on', + ); + }); + + test('stuck-processing helper dispatches an invalid commandResult for each auto-executable command', async function (assert) { + let roomId = await renderAiAssistantPanel(); + + // Verifies the followup-fix for the synthetic-spinner hang flagged in + // the self-review of this branch: drainCommandProcessingQueue must + // dispatch an `invalid` commandResult when a room is wedged, so the + // synthetic 'applying' state in room-message-command.gts falls through + // to the invalidCommandState ("Try Anyway") branch instead of pinning + // a spinner that no terminal event ever clears. + // + // Driving the real wait-loop end-to-end is unstable: roomResource is + // an ember-resources proxy so own-property defines for isProcessing / + // processingLastStartedAt silently no-op, and there's no public seam + // to keep the processRoomTask "running" without rewriting the + // resource itself. Instead, exercise the helper directly with a + // spied sendCommandResultEvent on matrixService — this proves the + // dispatch shape, the per-command iteration, and the failureReason + // text without depending on the proxy internals. + let matrixService = getService('matrix-service'); + let commandService = getService('command-service'); + + simulateRemoteMessage(roomId, '@aibot:localhost', { + body: 'checking correctness', + msgtype: APP_BOXEL_MESSAGE_MSGTYPE, + format: 'org.matrix.custom.html', + isStreamingFinished: true, + [APP_BOXEL_COMMAND_REQUESTS_KEY]: [ + { + id: 'cs-11647-stuck-auto', + name: 'checkCorrectness', + arguments: '{}', + }, + { + id: 'cs-11647-stuck-manual', + name: 'patchCardInstance', + arguments: JSON.stringify({ + attributes: { + cardId: `${testRealmURL}Person/fadhlan`, + patch: { attributes: { firstName: 'Dave' } }, + }, + }), + }, + ], + data: { + context: { + agentId: matrixService.agentId, + }, + }, + }); + await waitFor('[data-test-message-idx="0"] [data-test-command-apply]'); + + let roomResource = matrixService.roomResources.get(roomId)!; + let message = roomResource.messages.find( + (m: any) => m.commands?.length === 2, + ); + assert.ok(message, 'two-command bot message lands in the room resource'); + + let captured: Array<{ toolCallId: string; failureReason?: string }> = []; + let originalSend = matrixService.sendCommandResultEvent.bind(matrixService); + (matrixService as any).sendCommandResultEvent = async (params: any) => { + captured.push({ + toolCallId: params.toolCallId, + failureReason: params.failureReason, + }); + }; + try { + await ( + commandService as any + ).invalidateAutoExecutableCommandsForStuckProcessing( + roomResource, + roomId, + message!.eventId, + ); + } finally { + (matrixService as any).sendCommandResultEvent = originalSend; + } + + assert.strictEqual( + captured.length, + 1, + 'only the auto-executable command is invalidated; manual-approval command is left in ready', + ); + assert.strictEqual( + captured[0]?.toolCallId, + 'cs-11647-stuck-auto', + 'the dispatched invalid event targets the auto-executable command', + ); + assert.true( + (captured[0]?.failureReason ?? '').startsWith( + 'Room processing did not finish within', + ), + 'failureReason surfaces the stuck-processing cause for the invalidCommandState alert', + ); }); - test('CS-11647: Accept All bar still renders for a command that requires user approval', async function (assert) { + test('Accept All bar still renders for a command that requires user approval', async function (assert) { let roomId = await renderAiAssistantPanel(`${testRealmURL}Person/fadhlan`); simulateRemoteMessage(roomId, '@aibot:localhost', {