From 4e743f3e17df3dc4494169d5a61d36efdbfb9e4d Mon Sep 17 00:00:00 2001 From: Peter Kirkham Date: Tue, 2 Jun 2026 12:33:04 +0100 Subject: [PATCH] fix(mobile): stop false "task failed" notifications for stale/cancelled runs Mobile fired " failed" notifications for tasks the user had simply finished with. Two causes: 1. mapTerminalStatus collapsed the backend "cancelled" status into "failed". A cancelled run (sandbox stopped / user finished) is a normal lifecycle end, not a failure, but it was surfaced as "Run failed" and pushed a " failed" notification. Cancelled is now a distinct terminal state ("Run stopped") and never pings as a failure. 2. The terminal-status notification fired on stale reconnects. awaitingPing is deliberately preserved across snapshots, so reconnecting to a run that ended (failed/cancelled) while the device was away re-fired a terminal notification for a run the user had already moved on from. We now thread the run's updated_at through status/snapshot updates and suppress the ping when the terminal transition is older than 2 minutes (missing timestamp is treated as fresh, so genuine just-happened alerts are never suppressed). Adds taskSessionStore.test.ts covering: no ping for cancelled, ping for a recent failure, no ping for a stale failure, no ping when not awaiting, and a completion ping. Generated-By: PostHog Code Task-Id: 8bca0c4c-ff31-41f6-9f20-0fd2a4e91860 --- .../tasks/components/TaskSessionView.tsx | 105 ++++++----- .../src/features/tasks/lib/cloudTaskStream.ts | 2 + .../tasks/stores/taskSessionStore.test.ts | 164 ++++++++++++++++++ .../features/tasks/stores/taskSessionStore.ts | 52 +++++- apps/mobile/src/features/tasks/types.ts | 6 + .../features/tasks/utils/sessionActivity.ts | 2 +- 6 files changed, 282 insertions(+), 49 deletions(-) create mode 100644 apps/mobile/src/features/tasks/stores/taskSessionStore.test.ts diff --git a/apps/mobile/src/features/tasks/components/TaskSessionView.tsx b/apps/mobile/src/features/tasks/components/TaskSessionView.tsx index fb4c23ed13..ec9c0f9ad8 100644 --- a/apps/mobile/src/features/tasks/components/TaskSessionView.tsx +++ b/apps/mobile/src/features/tasks/components/TaskSessionView.tsx @@ -55,7 +55,7 @@ interface TaskSessionViewProps { pendingPermissions?: Record; isConnecting?: boolean; isThinking?: boolean; - terminalStatus?: "failed" | "completed"; + terminalStatus?: "failed" | "completed" | "cancelled"; lastError?: string | null; onRetry?: () => void; onOpenTask?: (taskId: string) => void; @@ -795,6 +795,64 @@ function ConnectingIndicator() { ); } +// Terminal-state banner shown at the top of a finished run's transcript. +// "failed" reads as an error and offers a retry; "completed" and "cancelled" +// are non-error ends (the latter meaning the run was stopped / the user +// finished with it) and offer to continue the conversation. +function TerminalStatusBanner({ + terminalStatus, + lastError, + onRetry, +}: { + terminalStatus: "failed" | "completed" | "cancelled"; + lastError?: string | null; + onRetry?: () => void; +}) { + const isFailed = terminalStatus === "failed"; + const label = + terminalStatus === "failed" + ? "Run failed" + : terminalStatus === "cancelled" + ? "Run stopped" + : "Run completed"; + const actionLabel = isFailed ? "Retry" : "Continue"; + + return ( + + + {label} + + {lastError && ( + {lastError} + )} + {onRetry && ( + + + {actionLabel} + + + )} + + ); +} + export function TaskSessionView({ events, pendingPermissions, @@ -1017,46 +1075,11 @@ export function TaskSessionView({ initialNumToRender={30} ListHeaderComponent={ terminalStatus ? ( - - - {terminalStatus === "failed" ? "Run failed" : "Run completed"} - - {lastError && ( - {lastError} - )} - {onRetry && ( - - - {terminalStatus === "failed" ? "Retry" : "Continue"} - - - )} - + ) : null } /> diff --git a/apps/mobile/src/features/tasks/lib/cloudTaskStream.ts b/apps/mobile/src/features/tasks/lib/cloudTaskStream.ts index 2541eed7e7..ad0088dbba 100644 --- a/apps/mobile/src/features/tasks/lib/cloudTaskStream.ts +++ b/apps/mobile/src/features/tasks/lib/cloudTaskStream.ts @@ -566,6 +566,7 @@ function emitSnapshot(watcher: WatcherState, entries: StoredLogEntry[]): void { output: watcher.lastOutput, errorMessage: watcher.lastErrorMessage, branch: watcher.lastBranch, + statusUpdatedAt: watcher.lastStatusUpdatedAt, }); } @@ -579,6 +580,7 @@ function emitStatus(watcher: WatcherState): void { output: watcher.lastOutput, errorMessage: watcher.lastErrorMessage, branch: watcher.lastBranch, + statusUpdatedAt: watcher.lastStatusUpdatedAt, }); } diff --git a/apps/mobile/src/features/tasks/stores/taskSessionStore.test.ts b/apps/mobile/src/features/tasks/stores/taskSessionStore.test.ts new file mode 100644 index 0000000000..291ab1396b --- /dev/null +++ b/apps/mobile/src/features/tasks/stores/taskSessionStore.test.ts @@ -0,0 +1,164 @@ +import { beforeEach, describe, expect, it, vi } from "vitest"; + +// --- Mocks for side-effecting / native deps pulled in by the store --------- + +const presentLocalNotification = vi.fn().mockResolvedValue(undefined); +vi.mock("@/features/notifications/lib/notifications", () => ({ + presentLocalNotification: (...args: unknown[]) => + presentLocalNotification(...args), +})); + +const playMeepSound = vi.fn().mockResolvedValue(undefined); +vi.mock("../utils/sounds", () => ({ + playMeepSound: () => playMeepSound(), +})); + +vi.mock("expo-haptics", () => ({ + notificationAsync: vi.fn(), + NotificationFeedbackType: { Success: "success" }, +})); + +vi.mock("@/features/preferences/stores/preferencesStore", () => ({ + usePreferencesStore: { + getState: () => ({ pingsEnabled: true, pushNotificationsEnabled: true }), + }, +})); + +// Network/cloud APIs — never hit in these tests but imported by the module. +vi.mock("../api", () => ({ + CloudCommandError: class CloudCommandError extends Error {}, + getTask: vi.fn(), + runTaskInCloud: vi.fn(), + sendCloudCommand: vi.fn(), +})); + +vi.mock("../lib/cloudTaskStream", () => ({ + watchCloudTask: vi.fn(), +})); + +// Pulls in expo-file-system → expo-modules-core, which needs the RN-injected +// __DEV__ global; stub the module so the store loads under the node env. +vi.mock("../composer/attachments/buildCloudPrompt", () => ({ + buildCloudPromptBlocks: vi.fn(), +})); + +import type { TaskSession } from "./taskSessionStore"; +import { useTaskSessionStore } from "./taskSessionStore"; + +// Unique ids per test — maybePresentLocalNotification keeps a module-level +// per-task dedup window that would otherwise suppress later tests' pings. +let idCounter = 0; +let TASK_RUN_ID = "run-0"; +let TASK_ID = "task-0"; + +function seedSession(overrides: Partial = {}): void { + const session: TaskSession = { + taskRunId: TASK_RUN_ID, + taskId: TASK_ID, + taskTitle: "My task", + events: [], + status: "connected", + isPromptPending: true, + awaitingPing: true, + ...overrides, + }; + useTaskSessionStore.setState({ + sessions: { [TASK_RUN_ID]: session }, + // Not focused on this task, so the OS-banner suppression doesn't kick in. + focusedTaskId: null, + }); +} + +const recentTimestamp = () => new Date(Date.now() - 1000).toISOString(); +const staleTimestamp = () => + new Date(Date.now() - 10 * 60 * 1000).toISOString(); + +describe("taskSessionStore terminal notifications", () => { + beforeEach(() => { + presentLocalNotification.mockClear(); + playMeepSound.mockClear(); + idCounter += 1; + TASK_RUN_ID = `run-${idCounter}`; + TASK_ID = `task-${idCounter}`; + useTaskSessionStore.setState({ sessions: {}, focusedTaskId: null }); + }); + + it("does NOT fire a failure notification for a cancelled run", () => { + seedSession(); + useTaskSessionStore.getState()._handleCloudUpdate(TASK_RUN_ID, { + taskId: TASK_ID, + runId: TASK_RUN_ID, + kind: "status", + status: "cancelled", + statusUpdatedAt: recentTimestamp(), + }); + + expect(presentLocalNotification).not.toHaveBeenCalled(); + // The run is still recorded as a distinct, non-failed terminal state. + expect( + useTaskSessionStore.getState().sessions[TASK_RUN_ID]?.terminalStatus, + ).toBe("cancelled"); + }); + + it("fires a failure notification for a recent failed run", () => { + seedSession(); + useTaskSessionStore.getState()._handleCloudUpdate(TASK_RUN_ID, { + taskId: TASK_ID, + runId: TASK_RUN_ID, + kind: "status", + status: "failed", + errorMessage: "boom", + statusUpdatedAt: recentTimestamp(), + }); + + expect(presentLocalNotification).toHaveBeenCalledTimes(1); + expect(presentLocalNotification.mock.calls[0][0].body).toContain("failed"); + }); + + it("does NOT fire for a failed run that terminated long ago (stale reconnect)", () => { + seedSession(); + useTaskSessionStore.getState()._handleCloudUpdate(TASK_RUN_ID, { + taskId: TASK_ID, + runId: TASK_RUN_ID, + kind: "snapshot", + newEntries: [], + totalEntryCount: 0, + status: "failed", + statusUpdatedAt: staleTimestamp(), + }); + + expect(presentLocalNotification).not.toHaveBeenCalled(); + expect( + useTaskSessionStore.getState().sessions[TASK_RUN_ID]?.terminalStatus, + ).toBe("failed"); + }); + + it("does NOT fire when the user was not awaiting a ping", () => { + seedSession({ awaitingPing: false }); + useTaskSessionStore.getState()._handleCloudUpdate(TASK_RUN_ID, { + taskId: TASK_ID, + runId: TASK_RUN_ID, + kind: "status", + status: "failed", + statusUpdatedAt: recentTimestamp(), + }); + + expect(presentLocalNotification).not.toHaveBeenCalled(); + }); + + it("fires a completion notification for a recent completed run", () => { + seedSession(); + useTaskSessionStore.getState()._handleCloudUpdate(TASK_RUN_ID, { + taskId: TASK_ID, + runId: TASK_RUN_ID, + kind: "status", + status: "completed", + statusUpdatedAt: recentTimestamp(), + }); + + expect(presentLocalNotification).toHaveBeenCalledTimes(1); + expect(presentLocalNotification.mock.calls[0][0].body).toContain( + "finished", + ); + }); +}); diff --git a/apps/mobile/src/features/tasks/stores/taskSessionStore.ts b/apps/mobile/src/features/tasks/stores/taskSessionStore.ts index 7e43ee26db..bb7a9a342a 100644 --- a/apps/mobile/src/features/tasks/stores/taskSessionStore.ts +++ b/apps/mobile/src/features/tasks/stores/taskSessionStore.ts @@ -261,8 +261,8 @@ export interface TaskSession { // the log). Used to dedup the canonical copy against the echo. localUserEchoes?: Set; // Terminal backend status for this run, populated by status updates so the - // UI can surface "Run failed" / "Run completed". - terminalStatus?: "failed" | "completed"; + // UI can surface "Run failed" / "Run completed" / "Run stopped". + terminalStatus?: "failed" | "completed" | "cancelled"; lastError?: string | null; // True when the user initiated work (new task, sendPrompt, resume) and // we should play a sound when control returns. False when reconnecting @@ -331,12 +331,32 @@ const connectAttempts = new Set(); function mapTerminalStatus( status: string | undefined | null, -): "completed" | "failed" | undefined { +): "completed" | "failed" | "cancelled" | undefined { if (status === "completed") return "completed"; - if (status === "failed" || status === "cancelled") return "failed"; + if (status === "failed") return "failed"; + // A "cancelled" run is a normal lifecycle end — the sandbox was stopped or + // the user finished with the task. It is NOT a failure, so it is kept + // distinct here instead of being collapsed into "failed". + if (status === "cancelled") return "cancelled"; return undefined; } +// A terminal transition is "stale" when the backend recorded it well before +// we observed it — i.e. the run ended while this device wasn't watching and +// we're only now catching up via a reconnect snapshot. Firing a (possibly +// hours-late) "failed"/"finished" ping for a run the user has already moved on +// from is the spurious notification we want to avoid. A missing/unparseable +// timestamp is treated as fresh so a genuine, just-happened terminal alert is +// never suppressed. +const TERMINAL_PING_STALENESS_MS = 2 * 60 * 1000; + +function isTerminalTransitionStale(statusUpdatedAt?: string | null): boolean { + if (!statusUpdatedAt) return false; + const ts = Date.parse(statusUpdatedAt); + if (Number.isNaN(ts)) return false; + return Date.now() - ts > TERMINAL_PING_STALENESS_MS; +} + export const useTaskSessionStore = create((set, get) => ({ sessions: {}, focusedTaskId: null, @@ -964,7 +984,7 @@ export const useTaskSessionStore = create((set, get) => ({ if (update.kind === "status" || update.kind === "snapshot") { if (isTerminalStatus(update.status)) { const preState = get().sessions[taskRunId]; - const shouldPing = preState?.awaitingPing ?? false; + const wasAwaitingPing = preState?.awaitingPing ?? false; const terminal = mapTerminalStatus(update.status); set((state) => { const current = state.sessions[taskRunId]; @@ -982,14 +1002,32 @@ export const useTaskSessionStore = create((set, get) => ({ }, }; }); + + // Only alert when (a) the user was actively awaiting a ping on this + // device, (b) the run reached this terminal state recently — not a + // stale reconnect to a run that ended while we were away — and (c) the + // run actually failed or completed. A "cancelled" run is a normal + // lifecycle end (sandbox stopped / user finished), so it never pings; + // surfacing it as " failed" was the source of spurious failure + // notifications for idle/finished tasks. + const notifyKind: "task_failed" | "turn_complete" | null = + terminal === "failed" + ? "task_failed" + : terminal === "completed" + ? "turn_complete" + : null; + const shouldPing = + wasAwaitingPing && + notifyKind !== null && + !isTerminalTransitionStale(update.statusUpdatedAt); if (shouldPing && usePreferencesStore.getState().pingsEnabled) { playMeepSound().catch(() => {}); Haptics.notificationAsync(Haptics.NotificationFeedbackType.Success); } - if (shouldPing) { + if (shouldPing && notifyKind) { maybePresentLocalNotification({ taskRunId, - kind: terminal === "failed" ? "task_failed" : "turn_complete", + kind: notifyKind, }); } } diff --git a/apps/mobile/src/features/tasks/types.ts b/apps/mobile/src/features/tasks/types.ts index 9e7ec5a54a..78c4cc6bb5 100644 --- a/apps/mobile/src/features/tasks/types.ts +++ b/apps/mobile/src/features/tasks/types.ts @@ -183,6 +183,10 @@ export interface CloudTaskStatusUpdate extends CloudTaskUpdateBase { output?: Record | null; errorMessage?: string | null; branch?: string | null; + // ISO timestamp the backend last updated the run. Used to tell a + // just-happened terminal transition apart from a stale reconnect to a run + // that ended while this device wasn't watching. + statusUpdatedAt?: string | null; } export interface CloudTaskSnapshotUpdate extends CloudTaskUpdateBase { @@ -194,6 +198,8 @@ export interface CloudTaskSnapshotUpdate extends CloudTaskUpdateBase { output?: Record | null; errorMessage?: string | null; branch?: string | null; + // See CloudTaskStatusUpdate.statusUpdatedAt. + statusUpdatedAt?: string | null; } export interface CloudTaskErrorUpdate extends CloudTaskUpdateBase { diff --git a/apps/mobile/src/features/tasks/utils/sessionActivity.ts b/apps/mobile/src/features/tasks/utils/sessionActivity.ts index b131e1305a..6ccf95d28a 100644 --- a/apps/mobile/src/features/tasks/utils/sessionActivity.ts +++ b/apps/mobile/src/features/tasks/utils/sessionActivity.ts @@ -5,7 +5,7 @@ export type SessionActivityPhase = "idle" | "connecting" | "working"; interface SessionActivityState { isPromptPending?: boolean; awaitingAgentOutput?: boolean; - terminalStatus?: "failed" | "completed"; + terminalStatus?: "failed" | "completed" | "cancelled"; events?: SessionEvent[]; }