From 07dc0106f55b365c5245ca36359e0736f73485ea Mon Sep 17 00:00:00 2001
From: Fadhlan Ridhwanallah <fridhwanallah@gmail.com>
Date: Thu, 18 Jun 2026 14:47:22 +0700
Subject: [PATCH 1/8] fix(CS-11647): hide Accept All bar for auto-executed
 commands
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The manual Accept All / Cancel bar briefly flashed below an assistant
tool call before command-service started auto-executing it. The drain
loop decides "auto-execute" inside an async 100ms-debounced task, but
the room's readyCommands getter only filtered by execution status —
so during the debounce window the bar painted, then yanked itself
when acceptingAllRoomIds flipped.

Pull the auto-execute decision into a synchronous predicate
(isAutoExecutableCommand) and call it from both command-service
(to decide whether to run) and room.gts readyCommands (to decide
whether to render the bar). Single source of truth for the three
conditions — checkCorrectness, requiresApproval=false, LLM mode act.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/host/app/components/matrix/room.gts  | 13 +++-
 packages/host/app/lib/command-auto-execute.ts | 23 +++++++
 packages/host/app/services/command-service.ts | 20 ++----
 .../ai-assistant-panel/commands-test.gts      | 68 +++++++++++++++++++
 .../unit/lib/command-auto-execute-test.ts     | 56 +++++++++++++++
 5 files changed, 164 insertions(+), 16 deletions(-)
 create mode 100644 packages/host/app/lib/command-auto-execute.ts
 create mode 100644 packages/host/tests/unit/lib/command-auto-execute-test.ts
diff --git a/packages/host/app/components/matrix/room.gts b/packages/host/app/components/matrix/room.gts
index 9658112b29..f988b28017 100644
--- a/packages/host/app/components/matrix/room.gts
+++ b/packages/host/app/components/matrix/room.gts
@@ -55,6 +55,7 @@ import { DEFAULT_FALLBACK_MODELS } from '@cardstack/runtime-common/matrix-consta
 
 import UpdateRoomSkillsCommand from '@cardstack/host/commands/update-room-skills';
 import ENV from '@cardstack/host/config/environment';
+import { isAutoExecutableCommand } from '@cardstack/host/lib/command-auto-execute';
 import type { FileUploadState } from '@cardstack/host/lib/file-upload-state';
 import type { Message } from '@cardstack/host/lib/matrix-classes/message';
 import type { StackItem } from '@cardstack/host/lib/stack-item';
@@ -1861,13 +1862,23 @@ export default class Room extends Component<Signature> {
     if (!lastMessage || !lastMessage.commands) {
       return [];
     }
+    let roomResource = this.matrixService.roomResources.get(this.args.roomId);
+    let activeMode = roomResource?.getActiveLLMModeForMessage(
+      lastMessage.eventId,
+    );
     return lastMessage.commands.filter(
       (command) =>
         (command.status === 'ready' || command.status === undefined) &&
         !this.commandService.currentlyExecutingCommandRequestIds.has(
           command.id!,
         ) &&
-        !this.commandService.executedCommandRequestIds.has(command.id!),
+        !this.commandService.executedCommandRequestIds.has(command.id!) &&
+        // Commands destined for auto-execution must not surface the manual
+        // Accept All / Cancel bar, even during the ~100ms debounce before
+        // command-service flips `acceptingAllRoomIds`. Without this filter,
+        // the bar paints and then yanks itself once auto-execution starts,
+        // which is the CS-11647 glitch.
+        !isAutoExecutableCommand(command, activeMode),
     );
   }
 
diff --git a/packages/host/app/lib/command-auto-execute.ts b/packages/host/app/lib/command-auto-execute.ts
new file mode 100644
index 0000000000..63577ca35f
--- /dev/null
+++ b/packages/host/app/lib/command-auto-execute.ts
@@ -0,0 +1,23 @@
+import type { LLMMode } from '@cardstack/runtime-common/matrix-constants';
+
+import type MessageCommand from './matrix-classes/message-command';
+
+export const CHECK_CORRECTNESS_COMMAND_NAME = 'checkCorrectness';
+
+// Single source of truth for "this command runs without user approval".
+// Used by command-service (to decide whether to auto-run) and by the room
+// component (to decide whether to render the Accept All / Cancel bar).
+// Keeping both call sites on the same predicate prevents the two from
+// drifting and reintroducing the action-bar flash that prompted CS-11647.
+export function isAutoExecutableCommand(
+  command: Pick<MessageCommand, 'name' | 'requiresApproval'>,
+  activeLLMMode: LLMMode | undefined,
+): boolean {
+  if (command.name === CHECK_CORRECTNESS_COMMAND_NAME) {
+    return true;
+  }
+  if (command.requiresApproval === false) {
+    return true;
+  }
+  return activeLLMMode === 'act';
+}
diff --git a/packages/host/app/services/command-service.ts b/packages/host/app/services/command-service.ts
index af34780c10..c382a67589 100644
--- a/packages/host/app/services/command-service.ts
+++ b/packages/host/app/services/command-service.ts
@@ -36,6 +36,10 @@ import type Realm from '@cardstack/host/services/realm';
 import type { CardDef } from 'https://cardstack.com/base/card-api';
 import type { CodePatchStatus } from 'https://cardstack.com/base/matrix-event';
 
+import {
+  CHECK_CORRECTNESS_COMMAND_NAME,
+  isAutoExecutableCommand,
+} from '../lib/command-auto-execute';
 import LimitedSet from '../lib/limited-set';
 
 import type LoaderService from './loader-service';
@@ -50,7 +54,6 @@ import type MessageCommand from '../lib/matrix-classes/message-command';
 import type { IEvent } from 'matrix-js-sdk';
 
 const DELAY_FOR_APPLYING_UI = isTesting() ? 50 : 500;
-const CHECK_CORRECTNESS_COMMAND_NAME = 'checkCorrectness';
 
 type GenericCommand = Command<
   typeof CardDef | undefined,
@@ -379,26 +382,13 @@ export default class CommandService extends Service {
             continue;
           }
 
-          // Get the LLM mode that was active when this message was created
           let activeModeAtMessageTime = roomResource.getActiveLLMModeForMessage(
             message.eventId,
           );
 
-          // Auto-execute if LLM mode is 'act' AND the command came after the LLM mode was set to 'act',
-          // or if requiresApproval is false
-          let shouldAutoExecute = false;
-          let isCheckCorrectnessCommand =
-            messageCommand.name === CHECK_CORRECTNESS_COMMAND_NAME;
-
           if (
-            isCheckCorrectnessCommand ||
-            messageCommand.requiresApproval === false ||
-            activeModeAtMessageTime === 'act'
+            isAutoExecutableCommand(messageCommand, activeModeAtMessageTime)
           ) {
-            shouldAutoExecute = true;
-          }
-
-          if (shouldAutoExecute) {
             readyCommands.push(messageCommand);
           }
         }
diff --git a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
index d842037b5d..c20315f0a3 100644
--- a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
+++ b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
@@ -1641,4 +1641,72 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
       'commandResult should not reference the original/streaming event_id once a later event in room.events owns the commandRequest',
     );
   });
+
+  test('CS-11647: Accept All bar does not flash for an auto-executed checkCorrectness command', async function (assert) {
+    let roomId = await renderAiAssistantPanel();
+
+    // checkCorrectness is on the always-auto-execute list, so the host runs
+    // it without asking. Before the fix, the manual approval bar painted
+    // for the ~100ms debounce window before command-service flipped
+    // `acceptingAllRoomIds`; the user saw Accept All / Cancel briefly
+    // appear then disappear. The bar must never paint in its manual-approval
+    // branch for this command.
+    simulateRemoteMessage(roomId, '@aibot:localhost', {
+      body: 'checking correctness',
+      msgtype: APP_BOXEL_MESSAGE_MSGTYPE,
+      format: 'org.matrix.custom.html',
+      isStreamingFinished: true,
+      [APP_BOXEL_COMMAND_REQUESTS_KEY]: [
+        {
+          id: 'cs-11647-check-correctness',
+          name: 'checkCorrectness',
+          arguments: '{}',
+        },
+      ],
+    });
+
+    await waitFor('[data-test-message-idx="0"]');
+    assert
+      .dom('[data-test-accept-all]')
+      .doesNotExist(
+        'Accept All button must not paint in the debounce window before auto-execute starts',
+      );
+
+    await settled();
+    assert
+      .dom('[data-test-accept-all]')
+      .doesNotExist(
+        'Accept All button still hidden after the auto-execute debounce window elapses',
+      );
+  });
+
+  test('CS-11647: Accept All bar still renders for a command that requires user approval', async function (assert) {
+    let roomId = await renderAiAssistantPanel(`${testRealmURL}Person/fadhlan`);
+
+    simulateRemoteMessage(roomId, '@aibot:localhost', {
+      body: 'patching',
+      msgtype: APP_BOXEL_MESSAGE_MSGTYPE,
+      format: 'org.matrix.custom.html',
+      isStreamingFinished: true,
+      [APP_BOXEL_COMMAND_REQUESTS_KEY]: [
+        {
+          id: 'cs-11647-patch',
+          name: 'patchCardInstance',
+          arguments: JSON.stringify({
+            attributes: {
+              cardId: `${testRealmURL}Person/fadhlan`,
+              patch: { attributes: { firstName: 'Dave' } },
+            },
+          }),
+        },
+      ],
+    });
+
+    await waitFor('[data-test-accept-all]');
+    assert
+      .dom('[data-test-accept-all]')
+      .exists(
+        'manual approval bar still renders for commands that require user approval',
+      );
+  });
 });
diff --git a/packages/host/tests/unit/lib/command-auto-execute-test.ts b/packages/host/tests/unit/lib/command-auto-execute-test.ts
new file mode 100644
index 0000000000..1de4c80715
--- /dev/null
+++ b/packages/host/tests/unit/lib/command-auto-execute-test.ts
@@ -0,0 +1,56 @@
+import { module, test } from 'qunit';
+
+import {
+  CHECK_CORRECTNESS_COMMAND_NAME,
+  isAutoExecutableCommand,
+} from '@cardstack/host/lib/command-auto-execute';
+
+type AutoExecCommandInput = Parameters<typeof isAutoExecutableCommand>[0];
+
+function cmd(
+  name: string | undefined,
+  requiresApproval = true,
+): AutoExecCommandInput {
+  return { name, requiresApproval };
+}
+
+module('Unit | Lib | command-auto-execute', function () {
+  test('check-correctness commands auto-execute regardless of mode or approval flag', function (assert) {
+    assert.true(
+      isAutoExecutableCommand(cmd(CHECK_CORRECTNESS_COMMAND_NAME, true), 'ask'),
+      'checkCorrectness in ask mode with requiresApproval=true still auto-executes',
+    );
+    assert.true(
+      isAutoExecutableCommand(
+        cmd(CHECK_CORRECTNESS_COMMAND_NAME, true),
+        undefined,
+      ),
+      'checkCorrectness with unknown mode still auto-executes',
+    );
+  });
+
+  test('commands with requiresApproval=false auto-execute', function (assert) {
+    assert.true(
+      isAutoExecutableCommand(cmd('searchCard', false), 'ask'),
+      'requiresApproval=false bypasses approval even in ask mode',
+    );
+  });
+
+  test('act mode auto-executes commands that would otherwise require approval', function (assert) {
+    assert.true(
+      isAutoExecutableCommand(cmd('patchCardInstance', true), 'act'),
+      'patchCardInstance in act mode auto-executes',
+    );
+  });
+
+  test('ask mode with requiresApproval=true does not auto-execute', function (assert) {
+    assert.false(
+      isAutoExecutableCommand(cmd('patchCardInstance', true), 'ask'),
+      'manual approval is required in ask mode',
+    );
+    assert.false(
+      isAutoExecutableCommand(cmd('patchCardInstance', true), undefined),
+      'manual approval is required when mode is unknown',
+    );
+  });
+});

From c8b2950b7bec202e2061d7c4f498c108df9fd359 Mon Sep 17 00:00:00 2001
From: Fadhlan Ridhwanallah <fridhwanallah@gmail.com>
Date: Thu, 18 Jun 2026 22:28:25 +0700
Subject: [PATCH 2/8] test(CS-11647): broaden auto-execute action-bar coverage
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

isAutoExecutableCommand has three branches (checkCorrectness,
requiresApproval=false, LLM mode 'act') and the readyCommands filter
treats them uniformly. Rename the existing integration test so it
reads as "any always-auto-executed command" instead of a
checkCorrectness-specific case, and add a second test that drives the
requiresApproval=false branch via the boxel-environment skill.

The LLM-mode='act' branch is still locked in by the unit test —
exercising it end-to-end would need extra LLM-mode-state plumbing for
diminishing returns since all three branches converge on the same
filter line in readyCommands.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../ai-assistant-panel/commands-test.gts      | 69 ++++++++++++++++---
 1 file changed, 61 insertions(+), 8 deletions(-)

diff --git a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
index c20315f0a3..a5243cc7ed 100644
--- a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
+++ b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
@@ -1642,15 +1642,16 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
     );
   });
 
-  test('CS-11647: Accept All bar does not flash for an auto-executed checkCorrectness command', async function (assert) {
+  test('CS-11647: Accept All bar does not flash for an always-auto-executed command (checkCorrectness)', async function (assert) {
     let roomId = await renderAiAssistantPanel();
 
-    // checkCorrectness is on the always-auto-execute list, so the host runs
-    // it without asking. Before the fix, the manual approval bar painted
-    // for the ~100ms debounce window before command-service flipped
-    // `acceptingAllRoomIds`; the user saw Accept All / Cancel briefly
-    // appear then disappear. The bar must never paint in its manual-approval
-    // branch for this command.
+    // checkCorrectness is on the always-auto-execute list (one of three
+    // branches in isAutoExecutableCommand). Before the fix, the manual
+    // approval bar painted for the ~100ms debounce window before
+    // command-service flipped `acceptingAllRoomIds`; the user saw
+    // Accept All / Cancel briefly appear then disappear. The bar must
+    // never paint in its manual-approval branch for any auto-executed
+    // command, regardless of which condition triggers auto-execute.
     simulateRemoteMessage(roomId, '@aibot:localhost', {
       body: 'checking correctness',
       msgtype: APP_BOXEL_MESSAGE_MSGTYPE,
@@ -1680,6 +1681,58 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
       );
   });
 
+  test('CS-11647: Accept All bar does not flash for a requiresApproval=false command', async function (assert) {
+    setCardInOperatorModeState(`${testRealmURL}Person/fadhlan`);
+    await renderComponent(
+      class TestDriver extends GlimmerComponent {
+        <template><OperatorMode @onClose={{noop}} /></template>
+      },
+    );
+    await waitFor('[data-test-person="Fadhlan"]');
+    createAndJoinRoom({
+      sender: '@testuser:localhost',
+      name: 'auto-exec via skill',
+    });
+    await settled();
+    await click('[data-test-open-ai-assistant]');
+    await waitFor('[data-test-room-name="auto-exec via skill"]', {
+      timeout: 10000,
+    });
+
+    // The boxel-environment skill declares read-file-for-ai-assistant with
+    // requiresApproval=false (see the skill JSON earlier in this module),
+    // so MessageCommand.requiresApproval is false here — the second
+    // isAutoExecutableCommand branch. The fix must also suppress the
+    // Accept All bar for this path.
+    await addSkillToAiAssistant(`${testRealmURL}Skill/boxel-environment`);
+
+    let roomId = document
+      .querySelector('[data-test-room]')!
+      .getAttribute('data-test-room')!;
+    simulateRemoteMessage(roomId, '@aibot:localhost', {
+      msgtype: APP_BOXEL_MESSAGE_MSGTYPE,
+      body: 'Reading hello file',
+      format: 'org.matrix.custom.html',
+      isStreamingFinished: true,
+      [APP_BOXEL_COMMAND_REQUESTS_KEY]: [
+        {
+          id: 'cs-11647-no-approval',
+          name: 'read-file-for-ai-assistant_a831',
+          arguments: JSON.stringify({
+            attributes: { fileIdentifier: `${testRealmURL}hello.txt` },
+          }),
+        },
+      ],
+    });
+
+    await waitFor('[data-test-message-idx="0"]');
+    assert
+      .dom('[data-test-accept-all]')
+      .doesNotExist(
+        'Accept All button suppressed for requiresApproval=false commands',
+      );
+  });
+
   test('CS-11647: Accept All bar still renders for a command that requires user approval', async function (assert) {
     let roomId = await renderAiAssistantPanel(`${testRealmURL}Person/fadhlan`);
 
@@ -1706,7 +1759,7 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
     assert
       .dom('[data-test-accept-all]')
       .exists(
-        'manual approval bar still renders for commands that require user approval',
+        'manual approval bar still renders for commands that need user approval',
       );
   });
 });

From ee9b7fb1985e53ac20d94d4fe5d469717f21c12e Mon Sep 17 00:00:00 2001
From: Fadhlan Ridhwanallah <fridhwanallah@gmail.com>
Date: Thu, 18 Jun 2026 22:46:01 +0700
Subject: [PATCH 3/8] fix(CS-11647): apply same auto-execute mask to
 per-command Apply button
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The per-command Apply button (rendered next to each tool-call message)
flashes through its 'ready' state — a clickable Run button — for the
~100ms debounce window before command-service starts the auto-execute
run. Same root cause as the Accept All bar.

Reuse isAutoExecutableCommand: when status is ready/undefined and the
helper says this command will auto-execute, render the applying state
(spinner) immediately. The button then transitions naturally to
applied on completion. If validate fails in the drain, command-service
emits an invalid commandResult and the button settles into its
invalid state — no risk of the spinner sticking.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../matrix/room-message-command.gts           | 23 ++++++++++++-
 .../ai-assistant-panel/commands-test.gts      | 33 +++++++++++++++++++
 2 files changed, 55 insertions(+), 1 deletion(-)

diff --git a/packages/host/app/components/matrix/room-message-command.gts b/packages/host/app/components/matrix/room-message-command.gts
index b409fe470b..c626a9bfb9 100644
--- a/packages/host/app/components/matrix/room-message-command.gts
+++ b/packages/host/app/components/matrix/room-message-command.gts
@@ -26,6 +26,7 @@ import {
 
 import type { CommandRequest } from '@cardstack/runtime-common/commands';
 
+import { isAutoExecutableCommand } from '@cardstack/host/lib/command-auto-execute';
 import type MessageCommand from '@cardstack/host/lib/matrix-classes/message-command';
 
 import type { RoomResource } from '@cardstack/host/resources/room';
@@ -77,7 +78,27 @@ export default class RoomMessageCommand extends Component<Signature> {
     if (this.didFailCorrectnessCheck) {
       return 'applied-with-error';
     }
-    return this.args.messageCommand?.status ?? 'ready';
+    let status = this.args.messageCommand?.status;
+    // Mirror the Accept All bar fix: for any command the host will
+    // auto-execute (checkCorrectness, requiresApproval=false, LLM mode
+    // 'act'), present the applying spinner immediately on message-landed
+    // instead of the clickable Run button. Without this, the per-command
+    // Apply button flashes through 'ready' for the ~100ms debounce window
+    // before command-service starts the run. If validation later fails
+    // in the drain, command-service dispatches an `invalid` commandResult
+    // event and the button transitions to its invalid state — no risk of
+    // the spinner sticking.
+    if ((status === 'ready' || status === undefined) && this.willAutoExecute) {
+      return 'applying';
+    }
+    return status ?? 'ready';
+  }
+
+  private get willAutoExecute() {
+    let activeMode = this.args.roomResource.getActiveLLMModeForMessage(
+      this.args.messageCommand.eventId,
+    );
+    return isAutoExecutableCommand(this.args.messageCommand, activeMode);
   }
 
   @use private commandResultCard = resource(() => {
diff --git a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
index a5243cc7ed..21c68aab12 100644
--- a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
+++ b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
@@ -1733,6 +1733,39 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
       );
   });
 
+  test('CS-11647: per-command Apply button does not flash Run before auto-execute starts', async function (assert) {
+    let roomId = await renderAiAssistantPanel();
+
+    // The per-command Apply button (rendered next to each tool-call message)
+    // has the same race as the Accept All bar: between "message lands"
+    // and "command-service starts the run", a ready Run button would
+    // briefly render. The fix presents the applying-spinner immediately
+    // for any auto-executable command.
+    simulateRemoteMessage(roomId, '@aibot:localhost', {
+      body: 'checking correctness',
+      msgtype: APP_BOXEL_MESSAGE_MSGTYPE,
+      format: 'org.matrix.custom.html',
+      isStreamingFinished: true,
+      [APP_BOXEL_COMMAND_REQUESTS_KEY]: [
+        {
+          id: 'cs-11647-apply-button',
+          name: 'checkCorrectness',
+          arguments: '{}',
+        },
+      ],
+    });
+
+    await waitFor('[data-test-message-idx="0"] [data-test-command-apply]');
+    assert
+      .dom('[data-test-message-idx="0"] [data-test-command-apply="ready"]')
+      .doesNotExist(
+        'per-command Apply button must not show the ready/Run state for an auto-executed command',
+      );
+    assert
+      .dom('[data-test-message-idx="0"] [data-test-command-apply="applying"]')
+      .exists('per-command Apply button shows the applying spinner instead');
+  });
+
   test('CS-11647: Accept All bar still renders for a command that requires user approval', async function (assert) {
     let roomId = await renderAiAssistantPanel(`${testRealmURL}Person/fadhlan`);
 

From fd115f9dc36691f017b44ff9588f80804fc12813 Mon Sep 17 00:00:00 2001
From: Fadhlan Ridhwanallah <fridhwanallah@gmail.com>
Date: Fri, 19 Jun 2026 09:23:19 +0700
Subject: [PATCH 4/8] fix(CS-11647): include agent ownership in
 isAutoExecutableCommand
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CI surfaced a regression on the per-command Apply button fix:
  Acceptance | Commands tests: ShowCard command added from a skill,
  is not automatically executed when agentId does not match

A ShowCard command sent by another agent has requiresApproval=false
on the skill, so isAutoExecutableCommand returned true and the button
masked into the applying spinner — but command-service refuses to run
it because of the agentId gate (drainCommandProcessingQueue line
354-357). The button would have stuck on the spinner forever.

Thread isOwnedByCurrentAgent through the predicate and short-circuit
to false when it's false. Callers compute it from
`message.agentId === matrixService.agentId`. command-service passes
`true` because its outer loop already short-circuits on the same
condition before reaching the predicate.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../matrix/room-message-command.gts           |  8 +++-
 packages/host/app/components/matrix/room.gts  |  4 +-
 packages/host/app/lib/command-auto-execute.ts | 20 ++++++++--
 packages/host/app/services/command-service.ts | 10 ++++-
 .../unit/lib/command-auto-execute-test.ts     | 39 ++++++++++++++++---
 5 files changed, 69 insertions(+), 12 deletions(-)

diff --git a/packages/host/app/components/matrix/room-message-command.gts b/packages/host/app/components/matrix/room-message-command.gts
index c626a9bfb9..5f24a1e128 100644
--- a/packages/host/app/components/matrix/room-message-command.gts
+++ b/packages/host/app/components/matrix/room-message-command.gts
@@ -98,7 +98,13 @@ export default class RoomMessageCommand extends Component<Signature> {
     let activeMode = this.args.roomResource.getActiveLLMModeForMessage(
       this.args.messageCommand.eventId,
     );
-    return isAutoExecutableCommand(this.args.messageCommand, activeMode);
+    let isOwnedByCurrentAgent =
+      this.args.messageCommand.message.agentId === this.matrixService.agentId;
+    return isAutoExecutableCommand(
+      this.args.messageCommand,
+      activeMode,
+      isOwnedByCurrentAgent,
+    );
   }
 
   @use private commandResultCard = resource(() => {
diff --git a/packages/host/app/components/matrix/room.gts b/packages/host/app/components/matrix/room.gts
index f988b28017..d3845deb5f 100644
--- a/packages/host/app/components/matrix/room.gts
+++ b/packages/host/app/components/matrix/room.gts
@@ -1866,6 +1866,8 @@ export default class Room extends Component<Signature> {
     let activeMode = roomResource?.getActiveLLMModeForMessage(
       lastMessage.eventId,
     );
+    let isOwnedByCurrentAgent =
+      lastMessage.agentId === this.matrixService.agentId;
     return lastMessage.commands.filter(
       (command) =>
         (command.status === 'ready' || command.status === undefined) &&
@@ -1878,7 +1880,7 @@ export default class Room extends Component<Signature> {
         // command-service flips `acceptingAllRoomIds`. Without this filter,
         // the bar paints and then yanks itself once auto-execution starts,
         // which is the CS-11647 glitch.
-        !isAutoExecutableCommand(command, activeMode),
+        !isAutoExecutableCommand(command, activeMode, isOwnedByCurrentAgent),
     );
   }
 
diff --git a/packages/host/app/lib/command-auto-execute.ts b/packages/host/app/lib/command-auto-execute.ts
index 63577ca35f..5488edff1a 100644
--- a/packages/host/app/lib/command-auto-execute.ts
+++ b/packages/host/app/lib/command-auto-execute.ts
@@ -5,14 +5,26 @@ import type MessageCommand from './matrix-classes/message-command';
 export const CHECK_CORRECTNESS_COMMAND_NAME = 'checkCorrectness';
 
 // Single source of truth for "this command runs without user approval".
-// Used by command-service (to decide whether to auto-run) and by the room
-// component (to decide whether to render the Accept All / Cancel bar).
-// Keeping both call sites on the same predicate prevents the two from
-// drifting and reintroducing the action-bar flash that prompted CS-11647.
+// Used by command-service (to decide whether to auto-run) and by the
+// room / room-message-command components (to decide whether to render
+// the Accept All bar and the per-command Apply button). Keeping all
+// call sites on the same predicate prevents them from drifting and
+// reintroducing the action-bar flash that prompted CS-11647.
+//
+// `isOwnedByCurrentAgent` mirrors the agentId gate in
+// command-service.drainCommandProcessingQueue: a command sent by
+// another agent is never auto-executed, even if it would otherwise
+// satisfy one of the three branches below. Callers that don't track
+// agents (e.g. unit tests) can pass `true` to focus on the other
+// conditions.
 export function isAutoExecutableCommand(
   command: Pick<MessageCommand, 'name' | 'requiresApproval'>,
   activeLLMMode: LLMMode | undefined,
+  isOwnedByCurrentAgent: boolean,
 ): boolean {
+  if (!isOwnedByCurrentAgent) {
+    return false;
+  }
   if (command.name === CHECK_CORRECTNESS_COMMAND_NAME) {
     return true;
   }
diff --git a/packages/host/app/services/command-service.ts b/packages/host/app/services/command-service.ts
index c382a67589..62fdd2c154 100644
--- a/packages/host/app/services/command-service.ts
+++ b/packages/host/app/services/command-service.ts
@@ -386,8 +386,16 @@ export default class CommandService extends Service {
             message.eventId,
           );
 
+          // The outer `message.agentId !== this.matrixService.agentId`
+          // gate above already short-circuited the not-our-agent case, so
+          // every command reaching this point is owned by the current
+          // agent.
           if (
-            isAutoExecutableCommand(messageCommand, activeModeAtMessageTime)
+            isAutoExecutableCommand(
+              messageCommand,
+              activeModeAtMessageTime,
+              true,
+            )
           ) {
             readyCommands.push(messageCommand);
           }
diff --git a/packages/host/tests/unit/lib/command-auto-execute-test.ts b/packages/host/tests/unit/lib/command-auto-execute-test.ts
index 1de4c80715..5cca7448f8 100644
--- a/packages/host/tests/unit/lib/command-auto-execute-test.ts
+++ b/packages/host/tests/unit/lib/command-auto-execute-test.ts
@@ -17,13 +17,18 @@ function cmd(
 module('Unit | Lib | command-auto-execute', function () {
   test('check-correctness commands auto-execute regardless of mode or approval flag', function (assert) {
     assert.true(
-      isAutoExecutableCommand(cmd(CHECK_CORRECTNESS_COMMAND_NAME, true), 'ask'),
+      isAutoExecutableCommand(
+        cmd(CHECK_CORRECTNESS_COMMAND_NAME, true),
+        'ask',
+        true,
+      ),
       'checkCorrectness in ask mode with requiresApproval=true still auto-executes',
     );
     assert.true(
       isAutoExecutableCommand(
         cmd(CHECK_CORRECTNESS_COMMAND_NAME, true),
         undefined,
+        true,
       ),
       'checkCorrectness with unknown mode still auto-executes',
     );
@@ -31,26 +36,50 @@ module('Unit | Lib | command-auto-execute', function () {
 
   test('commands with requiresApproval=false auto-execute', function (assert) {
     assert.true(
-      isAutoExecutableCommand(cmd('searchCard', false), 'ask'),
+      isAutoExecutableCommand(cmd('searchCard', false), 'ask', true),
       'requiresApproval=false bypasses approval even in ask mode',
     );
   });
 
   test('act mode auto-executes commands that would otherwise require approval', function (assert) {
     assert.true(
-      isAutoExecutableCommand(cmd('patchCardInstance', true), 'act'),
+      isAutoExecutableCommand(cmd('patchCardInstance', true), 'act', true),
       'patchCardInstance in act mode auto-executes',
     );
   });
 
   test('ask mode with requiresApproval=true does not auto-execute', function (assert) {
     assert.false(
-      isAutoExecutableCommand(cmd('patchCardInstance', true), 'ask'),
+      isAutoExecutableCommand(cmd('patchCardInstance', true), 'ask', true),
       'manual approval is required in ask mode',
     );
     assert.false(
-      isAutoExecutableCommand(cmd('patchCardInstance', true), undefined),
+      isAutoExecutableCommand(cmd('patchCardInstance', true), undefined, true),
       'manual approval is required when mode is unknown',
     );
   });
+
+  test('commands owned by another agent never auto-execute', function (assert) {
+    // Mirrors the agentId gate in command-service.drainCommandProcessingQueue:
+    // a command whose message came from a different agent must not auto-run
+    // on this host, even if it would otherwise satisfy one of the auto-exec
+    // branches. UI callers rely on this so the manual approval bar / per-
+    // command Apply button stay clickable for non-current-agent commands.
+    assert.false(
+      isAutoExecutableCommand(
+        cmd(CHECK_CORRECTNESS_COMMAND_NAME, true),
+        'act',
+        false,
+      ),
+      'checkCorrectness from another agent does not auto-execute',
+    );
+    assert.false(
+      isAutoExecutableCommand(cmd('searchCard', false), 'act', false),
+      'requiresApproval=false from another agent does not auto-execute',
+    );
+    assert.false(
+      isAutoExecutableCommand(cmd('patchCardInstance', true), 'act', false),
+      'act mode from another agent does not auto-execute',
+    );
+  });
 });

From ad2e63d29a9540d0dbd7445000e8f606c1eeec60 Mon Sep 17 00:00:00 2001
From: Fadhlan Ridhwanallah <fridhwanallah@gmail.com>
Date: Fri, 19 Jun 2026 11:06:50 +0700
Subject: [PATCH 5/8] test(CS-11647): pass agentId in auto-execute tests so the
 gate matches

The new isAutoExecutableCommand agent-ownership check fails closed
when message.agentId doesn't equal matrixService.agentId. The new
CS-11647 integration tests went through simulateRemoteMessage without
setting `data.context.agentId`, so the helper short-circuited to
false in the test environment and the Accept All bar / per-command
ready button rendered for an unrelated reason. CI surfaced this on
the two tests that depended on the helper actually returning true:

  not ok 121 ... Accept All bar does not flash for a requiresApproval=false command
  not ok 122 ... per-command Apply button does not flash Run before auto-execute starts

Pass the current agent's id explicitly. The checkCorrectness Accept
All test also gets the context (previously passing by accident
because validate marks status=invalid for an ad-hoc checkCorrectness
call, which the readyCommands filter already excludes).

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../ai-assistant-panel/commands-test.gts      | 21 +++++++++++++++++++
 1 file changed, 21 insertions(+)

diff --git a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
index 21c68aab12..c84be30633 100644
--- a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
+++ b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
@@ -1652,6 +1652,12 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
     // Accept All / Cancel briefly appear then disappear. The bar must
     // never paint in its manual-approval branch for any auto-executed
     // command, regardless of which condition triggers auto-execute.
+    //
+    // agentId must match the host's matrix service so the
+    // agent-ownership gate in isAutoExecutableCommand passes — otherwise
+    // the predicate short-circuits to false (the not-our-agent case
+    // exercised by acceptance/commands-test.gts) and the bar would show
+    // for an unrelated reason.
     simulateRemoteMessage(roomId, '@aibot:localhost', {
       body: 'checking correctness',
       msgtype: APP_BOXEL_MESSAGE_MSGTYPE,
@@ -1664,6 +1670,11 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
           arguments: '{}',
         },
       ],
+      data: {
+        context: {
+          agentId: getService('matrix-service').agentId,
+        },
+      },
     });
 
     await waitFor('[data-test-message-idx="0"]');
@@ -1723,6 +1734,11 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
           }),
         },
       ],
+      data: {
+        context: {
+          agentId: getService('matrix-service').agentId,
+        },
+      },
     });
 
     await waitFor('[data-test-message-idx="0"]');
@@ -1753,6 +1769,11 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
           arguments: '{}',
         },
       ],
+      data: {
+        context: {
+          agentId: getService('matrix-service').agentId,
+        },
+      },
     });
 
     await waitFor('[data-test-message-idx="0"] [data-test-command-apply]');

From 2f1a7e7f98c71d0b823338844520985281d9b990 Mon Sep 17 00:00:00 2001
From: Fadhlan Ridhwanallah <fridhwanallah@gmail.com>
Date: Fri, 19 Jun 2026 15:50:16 +0700
Subject: [PATCH 6/8] fix(CS-11647): invalidate auto-executable commands when
 room processing is stuck

drainCommandProcessingQueue used to `continue` past a wedged room without
emitting any commandResult, so the synthetic "applying" spinner in
room-message-command.gts had no terminal event to fall through and hung
forever. Dispatch an `invalid` commandResult for each auto-executable
command on that path so the UI surfaces the invalidCommandState alert
with a Try Anyway button.

Also extract STUCK_PROCESSING_TIMEOUT_MS so tests can target the same
threshold the prod path uses.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/host/app/services/command-service.ts | 86 ++++++++++++++++++-
 1 file changed, 82 insertions(+), 4 deletions(-)

diff --git a/packages/host/app/services/command-service.ts b/packages/host/app/services/command-service.ts
index 62fdd2c154..643fd27d75 100644
--- a/packages/host/app/services/command-service.ts
+++ b/packages/host/app/services/command-service.ts
@@ -51,9 +51,15 @@ import type StoreService from './store';
 import type { CodeData } from '../lib/formatted-message/utils';
 import type MessageCodePatchResult from '../lib/matrix-classes/message-code-patch-result';
 import type MessageCommand from '../lib/matrix-classes/message-command';
+import type { RoomResource } from '../resources/room';
 import type { IEvent } from 'matrix-js-sdk';
 
 const DELAY_FOR_APPLYING_UI = isTesting() ? 50 : 500;
+// How long drainCommandProcessingQueue waits for a room resource that's
+// still processing before giving up on the event. In tests we shorten this
+// so the stuck-timeout invalidation path can be exercised in a single test
+// without holding a real test open for a minute.
+const STUCK_PROCESSING_TIMEOUT_MS = isTesting() ? 1000 : 60_000;
 
 type GenericCommand = Command<
   typeof CardDef | undefined,
@@ -323,7 +329,7 @@ export default class CommandService extends Service {
             `Room resource not found for room id ${roomId}, this should not happen`,
           );
         }
-        let timeout = Date.now() + 60_000; // reset the timer to avoid a long wait if the room resource is processing
+        let timeout = Date.now() + STUCK_PROCESSING_TIMEOUT_MS; // reset the timer to avoid a long wait if the room resource is processing
         let currentRoomProcessingTimestamp =
           roomResource.processingLastStartedAt;
         while (
@@ -340,9 +346,23 @@ export default class CommandService extends Service {
           currentRoomProcessingTimestamp ===
             roomResource.processingLastStartedAt
         ) {
-          // room seems to be stuck processing, so we will log and skip this event
+          // Room processing is wedged. The synthetic 'applying' state in
+          // room-message-command.gts shows the spinner the moment an
+          // auto-executable command lands and only clears when we dispatch
+          // a terminal commandResult ('applied' or 'invalid'). If we just
+          // logged and continued, the spinner would hang indefinitely with
+          // no manual Run fallback. Mark each auto-executable command on
+          // this message invalid so the UI falls through to the
+          // invalidCommandState "Try Anyway" branch; manual-approval
+          // commands are left in 'ready' so the action bar's Run button
+          // remains the user's fallback.
           console.error(
-            `Room resource for room ${roomId} seems to be stuck processing, skipping event ${eventId}`,
+            `Room resource for room ${roomId} seems to be stuck processing, invalidating auto-executable commands on event ${eventId}`,
+          );
+          await this.invalidateAutoExecutableCommandsForStuckProcessing(
+            roomResource,
+            roomId!,
+            eventId!,
           );
           continue;
         }
@@ -420,6 +440,64 @@ export default class CommandService extends Service {
     }
   }
 
+  private async invalidateAutoExecutableCommandsForStuckProcessing(
+    roomResource: RoomResource,
+    roomId: string,
+    eventId: string,
+  ) {
+    let message = roomResource.messages.find((m) => m.eventId === eventId);
+    if (!message) {
+      return;
+    }
+    if (message.agentId !== this.matrixService.agentId) {
+      return;
+    }
+    let activeModeAtMessageTime = roomResource.getActiveLLMModeForMessage(
+      message.eventId,
+    );
+    for (let messageCommand of message.commands) {
+      if (this.currentlyExecutingCommandRequestIds.has(messageCommand.id!)) {
+        continue;
+      }
+      if (this.executedCommandRequestIds.has(messageCommand.id!)) {
+        continue;
+      }
+      if (
+        messageCommand.status === 'applied' ||
+        messageCommand.status === 'invalid'
+      ) {
+        continue;
+      }
+      if (!messageCommand.name) {
+        continue;
+      }
+      // The outer agentId gate already verified ownership, so this command
+      // is owned by the current agent.
+      if (
+        !isAutoExecutableCommand(messageCommand, activeModeAtMessageTime, true)
+      ) {
+        // Manual-approval commands stay 'ready' — the action bar's Run
+        // button is still the user's fallback for those.
+        continue;
+      }
+      let invokedToolFromEventId =
+        this.getCurrentEventIdForCommandRequest(
+          roomId,
+          messageCommand.commandRequest.id,
+        ) ?? messageCommand.eventId;
+      await this.matrixService.sendCommandResultEvent({
+        roomId,
+        invokedToolFromEventId,
+        toolCallId: messageCommand.commandRequest.id!,
+        status: 'invalid',
+        failureReason: `Room processing did not finish within ${Math.round(
+          STUCK_PROCESSING_TIMEOUT_MS / 1000,
+        )}s; command was not started`,
+        context: await this.operatorModeStateService.getSummaryForAIBot(),
+      });
+    }
+  }
+
   private async drainCodePatchProcessingQueue() {
     let waiterToken = commandProcessingWaiter.beginAsync();
     try {
@@ -442,7 +520,7 @@ export default class CommandService extends Service {
             `Room resource not found for room id ${roomId}, this should not happen`,
           );
         }
-        let timeout = Date.now() + 60_000; // reset the timer to avoid a long wait if the room resource is processing
+        let timeout = Date.now() + STUCK_PROCESSING_TIMEOUT_MS; // reset the timer to avoid a long wait if the room resource is processing
         let currentRoomProcessingTimestamp =
           roomResource.processingLastStartedAt;
         while (

From 3d4cfc3f757e2c0f98487df11d67473304414c43 Mon Sep 17 00:00:00 2001
From: Fadhlan Ridhwanallah <fridhwanallah@gmail.com>
Date: Fri, 19 Jun 2026 15:50:27 +0700
Subject: [PATCH 7/8] fix(CS-11647): align data-test-command-card-idle with
 synthetic applying state

While the synthetic "applying" spinner is up (auto-executable command
between landing and drainCommandProcessingQueue starting the run task),
the card's idle test attribute used to disagree with the apply button:
the apply button reported applying, the card still reported idle. Drive
both off applyButtonState so the two attributes always agree about
whether the spinner is visible.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 packages/host/app/components/matrix/room-message-command.gts | 4 ++--
 1 file changed, 2 insertions(+), 2 deletions(-)

diff --git a/packages/host/app/components/matrix/room-message-command.gts b/packages/host/app/components/matrix/room-message-command.gts
index 5f24a1e128..1327bc15d2 100644
--- a/packages/host/app/components/matrix/room-message-command.gts
+++ b/packages/host/app/components/matrix/room-message-command.gts
@@ -260,7 +260,7 @@ export default class RoomMessageCommand extends Component<Signature> {
           @monacoSDK={{@monacoSDK}}
           @codeData={{hash code=this.previewCommandCode language='json'}}
           data-test-command-card-idle={{not
-            (eq @messageCommand.status 'applying')
+            (eq this.applyButtonState 'applying')
           }}
           as |codeBlock|
         >
@@ -280,7 +280,7 @@ export default class RoomMessageCommand extends Component<Signature> {
           @monacoSDK={{@monacoSDK}}
           @codeData={{hash code=this.previewCommandCode language='json'}}
           data-test-command-card-idle={{not
-            (eq @messageCommand.status 'applying')
+            (eq this.applyButtonState 'applying')
           }}
           as |codeBlock|
         >

From 1bd7797173cb7ac61987b1c147746268f7ad1a39 Mon Sep 17 00:00:00 2001
From: Fadhlan Ridhwanallah <fridhwanallah@gmail.com>
Date: Fri, 19 Jun 2026 15:50:39 +0700
Subject: [PATCH 8/8] test(CS-11647): cover stuck-processing helper, idle-attr
 coherence, and drop ticket prefix

- New unit-flavoured integration test exercising
  invalidateAutoExecutableCommandsForStuckProcessing directly: spies on
  matrixService.sendCommandResultEvent and asserts only the auto-
  executable command gets an invalid dispatch with the expected
  failureReason. Avoids stubbing the ember-resources proxy, which
  silently no-ops Object.defineProperty.
- Add a coherence assertion that data-test-command-card-idle is omitted
  while the synthetic applying state is on (Glimmer drops attributes
  bound to falsy expressions, so the test selector is presence-based).
- Drop the per-test CS-11647 prefix from titles now that the cluster is
  large enough to read on its own.

Co-Authored-By: Claude Opus 4.7 (1M context) <noreply@anthropic.com>
---
 .../ai-assistant-panel/commands-test.gts      | 114 +++++++++++++++++-
 1 file changed, 110 insertions(+), 4 deletions(-)

diff --git a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
index c84be30633..eb1f39e0c9 100644
--- a/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
+++ b/packages/host/tests/integration/components/ai-assistant-panel/commands-test.gts
@@ -1642,7 +1642,7 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
     );
   });
 
-  test('CS-11647: Accept All bar does not flash for an always-auto-executed command (checkCorrectness)', async function (assert) {
+  test('Accept All bar does not flash for an always-auto-executed command (checkCorrectness)', async function (assert) {
     let roomId = await renderAiAssistantPanel();
 
     // checkCorrectness is on the always-auto-execute list (one of three
@@ -1692,7 +1692,7 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
       );
   });
 
-  test('CS-11647: Accept All bar does not flash for a requiresApproval=false command', async function (assert) {
+  test('Accept All bar does not flash for a requiresApproval=false command', async function (assert) {
     setCardInOperatorModeState(`${testRealmURL}Person/fadhlan`);
     await renderComponent(
       class TestDriver extends GlimmerComponent {
@@ -1749,7 +1749,7 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
       );
   });
 
-  test('CS-11647: per-command Apply button does not flash Run before auto-execute starts', async function (assert) {
+  test('per-command Apply button does not flash Run before auto-execute starts', async function (assert) {
     let roomId = await renderAiAssistantPanel();
 
     // The per-command Apply button (rendered next to each tool-call message)
@@ -1785,9 +1785,115 @@ module('Integration | ai-assistant-panel | commands', function (hooks) {
     assert
       .dom('[data-test-message-idx="0"] [data-test-command-apply="applying"]')
       .exists('per-command Apply button shows the applying spinner instead');
+    // The data-test-command-card-idle attribute is computed from
+    // applyButtonState (not the raw status); while the synthetic 'applying'
+    // is on it must NOT mark the card idle. Glimmer omits an attribute
+    // bound to a falsy expression, so the coherence check is on attribute
+    // presence — the apply button + the card must agree the spinner is
+    // up, not just one of them.
+    assert
+      .dom('[data-test-message-idx="0"] [data-test-command-card-idle]')
+      .doesNotExist(
+        'data-test-command-card-idle agrees with applyButtonState while the synthetic spinner is on',
+      );
+  });
+
+  test('stuck-processing helper dispatches an invalid commandResult for each auto-executable command', async function (assert) {
+    let roomId = await renderAiAssistantPanel();
+
+    // Verifies the followup-fix for the synthetic-spinner hang flagged in
+    // the self-review of this branch: drainCommandProcessingQueue must
+    // dispatch an `invalid` commandResult when a room is wedged, so the
+    // synthetic 'applying' state in room-message-command.gts falls through
+    // to the invalidCommandState ("Try Anyway") branch instead of pinning
+    // a spinner that no terminal event ever clears.
+    //
+    // Driving the real wait-loop end-to-end is unstable: roomResource is
+    // an ember-resources proxy so own-property defines for isProcessing /
+    // processingLastStartedAt silently no-op, and there's no public seam
+    // to keep the processRoomTask "running" without rewriting the
+    // resource itself. Instead, exercise the helper directly with a
+    // spied sendCommandResultEvent on matrixService — this proves the
+    // dispatch shape, the per-command iteration, and the failureReason
+    // text without depending on the proxy internals.
+    let matrixService = getService('matrix-service');
+    let commandService = getService('command-service');
+
+    simulateRemoteMessage(roomId, '@aibot:localhost', {
+      body: 'checking correctness',
+      msgtype: APP_BOXEL_MESSAGE_MSGTYPE,
+      format: 'org.matrix.custom.html',
+      isStreamingFinished: true,
+      [APP_BOXEL_COMMAND_REQUESTS_KEY]: [
+        {
+          id: 'cs-11647-stuck-auto',
+          name: 'checkCorrectness',
+          arguments: '{}',
+        },
+        {
+          id: 'cs-11647-stuck-manual',
+          name: 'patchCardInstance',
+          arguments: JSON.stringify({
+            attributes: {
+              cardId: `${testRealmURL}Person/fadhlan`,
+              patch: { attributes: { firstName: 'Dave' } },
+            },
+          }),
+        },
+      ],
+      data: {
+        context: {
+          agentId: matrixService.agentId,
+        },
+      },
+    });
+    await waitFor('[data-test-message-idx="0"] [data-test-command-apply]');
+
+    let roomResource = matrixService.roomResources.get(roomId)!;
+    let message = roomResource.messages.find(
+      (m: any) => m.commands?.length === 2,
+    );
+    assert.ok(message, 'two-command bot message lands in the room resource');
+
+    let captured: Array<{ toolCallId: string; failureReason?: string }> = [];
+    let originalSend = matrixService.sendCommandResultEvent.bind(matrixService);
+    (matrixService as any).sendCommandResultEvent = async (params: any) => {
+      captured.push({
+        toolCallId: params.toolCallId,
+        failureReason: params.failureReason,
+      });
+    };
+    try {
+      await (
+        commandService as any
+      ).invalidateAutoExecutableCommandsForStuckProcessing(
+        roomResource,
+        roomId,
+        message!.eventId,
+      );
+    } finally {
+      (matrixService as any).sendCommandResultEvent = originalSend;
+    }
+
+    assert.strictEqual(
+      captured.length,
+      1,
+      'only the auto-executable command is invalidated; manual-approval command is left in ready',
+    );
+    assert.strictEqual(
+      captured[0]?.toolCallId,
+      'cs-11647-stuck-auto',
+      'the dispatched invalid event targets the auto-executable command',
+    );
+    assert.true(
+      (captured[0]?.failureReason ?? '').startsWith(
+        'Room processing did not finish within',
+      ),
+      'failureReason surfaces the stuck-processing cause for the invalidCommandState alert',
+    );
   });
 
-  test('CS-11647: Accept All bar still renders for a command that requires user approval', async function (assert) {
+  test('Accept All bar still renders for a command that requires user approval', async function (assert) {
     let roomId = await renderAiAssistantPanel(`${testRealmURL}Person/fadhlan`);
 
     simulateRemoteMessage(roomId, '@aibot:localhost', {