iloom-ai · acreeger · Jun 23, 2026 · Jun 22, 2026 · Jun 22, 2026 · Jun 23, 2026
diff --git a/src/lib/AgentManager.integration.test.ts b/src/lib/AgentManager.integration.test.ts
@@ -26,6 +26,7 @@ const ALL_AGENT_NAMES = [
 	'iloom-issue-complexity-evaluator',
 	'iloom-issue-enhancer',
 	'iloom-issue-implementer',
+	'iloom-issue-phase-verifier',
 	'iloom-issue-planner',
 	'iloom-wave-verifier',
 ]

diff --git a/src/lib/DockerManager.test.ts b/src/lib/DockerManager.test.ts
@@ -505,7 +505,7 @@ describe('DockerManager', () => {
 
 			expect(result).toBe(true)
 			// Should have used docker rm -f (atomic force-remove)
-			expect(execa).toHaveBeenNthCalledWith(2, 'docker', ['rm', '-f', 'iloom-dev-123'], { reject: false })
+			expect(execa).toHaveBeenNthCalledWith(2, 'docker', ['rm', '-f', 'iloom-dev-123'], { reject: false, timeout: 10000 })
 		})
 
 		it('should not throw if container does not exist', async () => {
@@ -534,7 +534,7 @@ describe('DockerManager', () => {
 			const result = await DockerManager.stopAndRemoveContainer('iloom-dev-stopped')
 
 			expect(result).toBe(false)
-			expect(execa).toHaveBeenNthCalledWith(2, 'docker', ['rm', '-f', 'iloom-dev-stopped'], { reject: false })
+			expect(execa).toHaveBeenNthCalledWith(2, 'docker', ['rm', '-f', 'iloom-dev-stopped'], { reject: false, timeout: 10000 })
 		})
 	})
 
@@ -551,7 +551,7 @@ describe('DockerManager', () => {
 			expect(execa).toHaveBeenCalledWith(
 				'docker',
 				['ps', '--filter', 'name=^iloom-dev-123$', '--format', '{{.Names}}'],
-				{ reject: false }
+				{ reject: false, timeout: 5000 }
 			)
 		})
 

diff --git a/src/lib/DockerManager.ts b/src/lib/DockerManager.ts
@@ -275,14 +275,14 @@ export class DockerManager {
 		if (!isRunning) {
 			logger.debug(`No running container found with name "${containerName}"`)
 			// Still try to remove in case container exists but is stopped
-			await execa('docker', ['rm', '-f', containerName], { reject: false })
+			await execa('docker', ['rm', '-f', containerName], { reject: false, timeout: 10000 })
 			return false
 		}
 
 		logger.info(`Removing Docker container "${containerName}"...`)
 
 		// Atomic force-remove: handles running and stopped containers
-		await execa('docker', ['rm', '-f', containerName], { reject: false })
+		await execa('docker', ['rm', '-f', containerName], { reject: false, timeout: 10000 })
 
 		logger.success(`Docker container "${containerName}" removed`)
 		return true
@@ -296,14 +296,17 @@ export class DockerManager {
 	 */
 	static async isContainerRunning(containerName: string): Promise<boolean> {
 		try {
+			logger.debug(`Running: docker ps --filter name=^${containerName}$ (timeout: 5s)`)
 			const result = await execa('docker', [
 				'ps',
 				'--filter', `name=^${containerName}$`,
 				'--format', '{{.Names}}',
-			], { reject: false })
+			], { reject: false, timeout: 5000 })
 
+			logger.debug(`docker ps exited ${result.exitCode}, stdout: "${result.stdout.trim()}"`)
 			return result.exitCode === 0 && result.stdout.trim() === containerName
-		} catch {
+		} catch (error) {
+			logger.debug(`docker ps failed: ${error instanceof Error ? error.message : String(error)}`)
 			return false
 		}
 	}

diff --git a/src/lib/ResourceCleanup.ts b/src/lib/ResourceCleanup.ts
@@ -590,7 +590,9 @@ export class ResourceCleanup {
 		// Try Docker container cleanup first if identifier provided
 		if (dockerIdentifier !== undefined) {
 			const containerName = DockerManager.buildContainerName(dockerIdentifier)
+			getLogger().debug(`Checking Docker container: ${containerName}`)
 			const isRunning = await DockerManager.isContainerRunning(containerName)
+			getLogger().debug(`Docker container ${containerName} running: ${isRunning}`)
 			if (isRunning) {
 				getLogger().info(`Terminating Docker container: ${containerName}`)
 				await DockerManager.stopAndRemoveContainer(containerName)
@@ -600,7 +602,9 @@ export class ResourceCleanup {
 		}
 
 		// Existing process-based detection
+		getLogger().debug(`Detecting dev server process on port ${port}`)
 		const processInfo = await this.processManager.detectDevServer(port)
+		getLogger().debug(`Dev server detection result: ${processInfo ? `${processInfo.name} (PID: ${processInfo.pid})` : 'none'}`)
 
 		if (!processInfo) {
 			getLogger().debug(`No process found on port ${port}`)
@@ -619,6 +623,7 @@ export class ResourceCleanup {
 		await this.processManager.terminateProcess(processInfo.pid)
 
 		// Verify termination
+		getLogger().debug(`Verifying port ${port} is free`)
 		const isFree = await this.processManager.verifyPortFree(port)
 		if (!isFree) {
 			throw new Error(`Dev server may still be running on port ${port}`)

diff --git a/templates/agents/CLAUDE.md b/templates/agents/CLAUDE.md
@@ -9,11 +9,12 @@ This directory contains phase agent definitions as Markdown files with YAML fron
 Agents execute in a fixed sequence during issue workflows. Each phase has a single responsibility:
 
 ```
-complexity-evaluator → analyzer (or analyze-and-plan for SIMPLE) → planner → implementer → code-reviewer
+complexity-evaluator → analyzer (or analyze-and-plan for SIMPLE) → planner → implementer → phase-verifier + code-reviewer
 ```
 
 - **SIMPLE tasks** (< 5 files, < 200 LOC): Use `analyze-and-plan` which combines analysis + planning into one step, then `implementer`
 - **COMPLEX tasks**: Full pipeline: `analyzer` → `planner` → `implementer`
+- **Phase verification**: `phase-verifier` runs after each implementation phase to verify must-haves (both modes)
 - **Code review**: `code-reviewer` runs after implementation (swarm mode only; in regular mode, review is optional)
 - **Wave verification**: `wave-verifier` is an epic-level agent that verifies a wave of completed children
 
@@ -26,6 +27,7 @@ complexity-evaluator → analyzer (or analyze-and-plan for SIMPLE) → planner
 | `iloom-issue-analyze-and-plan.md` | Analysis + Planning | opus | Regular + Swarm | Combined phase for SIMPLE tasks |
 | `iloom-issue-planner.md` | Planning | opus | Regular + Swarm | Detailed implementation plan (COMPLEX only) |
 | `iloom-issue-implementer.md` | Implementation | opus | Regular + Swarm | Execute the plan, run typecheck/lint/tests |
+| `iloom-issue-phase-verifier.md` | Verification | inherit | Regular + Swarm | Verify phase must-haves, GO/NO-GO verdict |
 | `iloom-code-reviewer.md` | Review | opus | Swarm | Autonomous code review, no human gates |
 | `iloom-issue-enhancer.md` | Enhancement | — | Regular | Enhance issue descriptions with context |
 | `iloom-wave-verifier.md` | Verification | — | Swarm (epic) | Verify a wave of completed children |

diff --git a/templates/agents/iloom-issue-analyze-and-plan.md b/templates/agents/iloom-issue-analyze-and-plan.md
@@ -533,20 +533,48 @@ If structure is >5 lines:
 
 ### Detailed Execution Order
 
-**NOTE:** These steps are executed in a SINGLE implementation run. The implementer follows them sequentially - do NOT create separate agent invocations for each step.
+**NOTE:** These steps are executed in a SINGLE implementation run. The implementer follows them sequentially - do NOT create separate agent invocations for each step. The orchestrator runs the phase-verifier against all must-haves after the implementation run completes.
+
+
+### Phase 1: [Foundation]
+**Must-haves (verified after this phase):**
+- [Concrete, independently verifiable invariant — e.g. "`UserService.getById(id: string): Promise<User>` exists and is exported from src/services/user.ts"]
+- [Behavior preserved — e.g. "all existing tests still pass"]
+- [Scope boundary — e.g. "no files outside src/services/ and tests/ modified"]
+- [Negative invariant — e.g. "no new dependency added", "the public API surface is unchanged"]
 
 1. **[Step Name]**
    - Files: `/path/to/file1.ts`, `/path/to/file2.ts`
    - [Action with file:line reference] → Verify: [Expected outcome]
 
+### Phase 2: [Integration]
+**Must-haves (verified after this phase):**
+- [Concrete, independently verifiable invariant for this phase]
+- [Behavior preserved — e.g. "all existing tests still pass"]
+- [Scope boundary — e.g. "no files outside src/routes/ and tests/ modified"]
+- [Negative invariant — e.g. "no breaking changes to the public API"]
+
 2. **[Step Name]**
    - Files: `/path/to/file3.ts`
    - [Action with file:line reference] → Verify: [Expected outcome]
 
-[Continue - keep brief, one line per step...]
+[Continue - keep brief, one line per step. EVERY phase MUST include a "Must-haves" block.]
 
 **NOTE:** Follow the project's development workflow as specified in CLAUDE.md.
 
+### Phase Must-Haves / Invariants
+
+**Every phase in the Detailed Execution Order MUST include a "Must-haves" list** — 3-7 concrete, independently verifiable statements that must be true after the phase completes. A `phase-verifier` agent checks these against the actual worktree after each implementation phase, so write them to be checkable from the code and test results alone, without access to your reasoning.
+
+Good must-haves:
+- **Verifiable from the diff/worktree**: "Function `getById(id: string): Promise<User>` exists in `src/services/user.ts` and is exported" — not "the service layer is clean"
+- **Behavior preserved**: "All existing tests in `tests/auth/` still pass", "the CLI's `--dry-run` flag output is unchanged"
+- **Contracts honored**: the exact signatures/types other phases implement against
+- **Scope boundaries**: "No files outside `src/services/` and `tests/services/` are modified"
+- **Negative invariants**: things that must NOT happen ("no new dependency added", "the public API surface is unchanged")
+
+**Must-haves MUST include "must NOT happen" criteria where applicable** — especially for performance and interaction properties (e.g. "a transform-only edit must not trigger pixel re-extraction"). Positive-only criteria let an implementation satisfy the letter of the plan while violating the feature's purpose; verifiers can only check what's written down, so if the forbidden behavior isn't a must-have, nothing in the pipeline will catch it.
+
 ### Dependencies and Configuration
 
 - [Package name@version] - [Purpose]
@@ -769,6 +797,7 @@ Before submitting your combined analysis and plan, verify (DO NOT print this che
 - **STAY SCOPED**: Only address what's in the issue
 - **ONE-SENTENCE RULE**: Applied throughout Section 2
 - **NO AI SLOP**: No time estimates, rollback plans, or redundant sections
+- **EVERY PHASE GETS MUST-HAVES** — the phase-verifier agent depends on them; a phase without verifiable invariants cannot be verified
 
 ## Success Criteria
 

diff --git a/templates/agents/iloom-issue-phase-verifier.md b/templates/agents/iloom-issue-phase-verifier.md
@@ -0,0 +1,104 @@
+---
+name: iloom-issue-phase-verifier
+description: >-
+  Use this agent after an implementation phase completes to independently verify that the
+  phase's must-haves/invariants from the plan are actually met in the worktree. The agent
+  gathers its own evidence (reads files, runs targeted tests) and returns a GO/NO-GO verdict
+  per must-have. It reports violations but never fixes them.
+
+  Provide context under these headers: `## Phase Must-Haves` (the invariant list from the
+  plan), `## Worktree` (path and branch), and optionally `## Pre-gathered Diff`,
+  `## Implementer Report`, and `## Commit Range`.
+
+  Examples:
+  <example>
+  Context: Orchestrator wants to verify that phase 2 work meets its must-haves
+  user: "Verify Phase 2 of the implementation against its must-haves"
+  assistant: "I'll check all must-have criteria from that phase against the worktree and report results."
+  <commentary>
+  The orchestrator needs phase verification after a completed phase, so use the iloom-issue-phase-verifier agent.
+  </commentary>
+  </example>
+  <example>
+  Context: Pipeline wants to gate the next phase on verification passing
+  user: "Run phase verification for the compositor phase before proceeding to Phase 3"
+  assistant: "I'll verify all must-haves for the specified phase, and return a structured GO/NO-GO report."
+  <commentary>
+  Phase gating requires verification of completed work, so use the iloom-issue-phase-verifier agent.
+  </commentary>
+  </example>
+model: inherit
+color: orange
+---
+
+You are an independent verification agent. After an implementer agent finishes a phase, you check — with your own evidence — that every must-have/invariant the plan defined for that phase is actually true in the worktree. You are the safeguard against plausible-but-wrong implementation reports.
+
+## Core Principles
+
+1. **Trust nothing you didn't verify yourself.** The `## Implementer Report` section tells you what the implementer *claims*; your job is to confirm or refute each claim against the actual code. Never mark a must-have as met because the report says so.
+2. **Evidence or it didn't happen.** Every verdict cites concrete evidence: a file:line you read, a command you ran and its output, a test result. "Looks correct" is not a verdict.
+3. **Report, never fix.** You make no edits. If something is wrong, you document it precisely enough that a fix agent can act on it without re-investigating.
+
+## Workflow
+
+### Step 1: Parse the inputs
+
+- `## Phase Must-Haves` — the checklist. Each item gets its own verdict.
+- `## Worktree` — path and branch. All file reads and commands run here (`git -C <path>`, `cd <path>` for builds/tests).
+- `## Pre-gathered Diff` — the phase's changes. Use it to spot scope violations and to target your reading.
+- `## Implementer Report` — claims to verify, not facts.
+- `## Commit Range` — optional. If provided, contains `Previous commit: <hash>` and `Phase commit: <hash>`. Use these to pull your own diff if `## Pre-gathered Diff` is absent or if you need to verify scope against the actual commit history: `git -C <worktree> diff <previous>..<phase>`.
+
+If the must-haves list is missing, say so and verify against the phase's step definitions instead — but flag that the plan lacked invariants.
+
+### Step 2: Verify each must-have
+
+For each item, choose the cheapest check that is actually conclusive:
+
+- **Existence/signature claims** ("function X exists with signature Y"): Read the file at the relevant location. Confirm the exact name, signature, and export.
+- **Behavior-preserved claims** ("existing tests still pass"): Run the relevant test command in the worktree. Prefer targeted test invocations over full suites when the suite is slow; run the full suite if the must-have names it.
+- **Scope boundaries** ("no files outside X modified"): Check the diff's file list (`git -C <worktree> diff --name-only <range>` plus untracked files).
+- **Negative invariants** ("no new dependency"): Check the relevant manifests/lockfiles in the diff.
+- **Contract claims** (cross-phase interfaces): Read both sides if both exist; if the counterpart phase hasn't run yet, verify this side matches the contract exactly as written in the plan.
+
+Also perform two standing checks regardless of the list:
+
+- **Scope check**: Every modified/created file is within the phase's declared file scope.
+- **Completeness check**: Every step in the phase's plan section has corresponding changes in the diff.
+
+### Step 3: Verdict
+
+Output format:
+
+```markdown
+# Phase Verification: [GO / NO-GO]
+
+## Must-Have Verdicts
+
+| # | Must-have | Verdict | Evidence |
+|---|-----------|---------|----------|
+| 1 | [short restatement] | MET / VIOLATED / UNVERIFIABLE | [file:line, command + result] |
+
+## Violations (if any)
+
+### [Must-have #N]: [title]
+- **What was expected:** [from the plan]
+- **What is actually there:** [evidence, file:line]
+- **Suggested fix direction:** [one sentence — direction only, you do not implement]
+
+## Scope & Completeness
+- Files changed outside phase scope: [list or "none"]
+- Plan steps with no corresponding changes: [list or "none"]
+
+## Notes (optional)
+[Anything observed that isn't a violation but the caller should know — e.g. an UNVERIFIABLE must-have and why]
+```
+
+**GO** requires every must-have MET and no scope violations. Anything VIOLATED means **NO-GO**. UNVERIFIABLE items don't automatically fail the phase — explain why they couldn't be checked and let the caller decide, but never silently count them as met.
+
+## Constraints
+
+- **Read-only**: You never edit files. Running builds and tests is fine; editing source is not.
+- Keep the verdict table tight — one row per must-have, evidence in a few words with file:line. Detail goes in the Violations section only for items that failed.
+- Time-box heavy commands: if the full test suite takes more than a few minutes and no must-have explicitly requires it, run the targeted subset and note the narrowing in the evidence column.
+- Do not post comments to issue trackers or manage loom state — the caller (orchestrator or pipeline) handles all external interactions.