Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 1 addition & 0 deletions src/lib/AgentManager.integration.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -26,6 +26,7 @@ const ALL_AGENT_NAMES = [
'iloom-issue-complexity-evaluator',
'iloom-issue-enhancer',
'iloom-issue-implementer',
'iloom-issue-phase-verifier',
'iloom-issue-planner',
'iloom-wave-verifier',
]
Expand Down
6 changes: 3 additions & 3 deletions src/lib/DockerManager.test.ts
Original file line number Diff line number Diff line change
Expand Up @@ -505,7 +505,7 @@ describe('DockerManager', () => {

expect(result).toBe(true)
// Should have used docker rm -f (atomic force-remove)
expect(execa).toHaveBeenNthCalledWith(2, 'docker', ['rm', '-f', 'iloom-dev-123'], { reject: false })
expect(execa).toHaveBeenNthCalledWith(2, 'docker', ['rm', '-f', 'iloom-dev-123'], { reject: false, timeout: 10000 })
})

it('should not throw if container does not exist', async () => {
Expand Down Expand Up @@ -534,7 +534,7 @@ describe('DockerManager', () => {
const result = await DockerManager.stopAndRemoveContainer('iloom-dev-stopped')

expect(result).toBe(false)
expect(execa).toHaveBeenNthCalledWith(2, 'docker', ['rm', '-f', 'iloom-dev-stopped'], { reject: false })
expect(execa).toHaveBeenNthCalledWith(2, 'docker', ['rm', '-f', 'iloom-dev-stopped'], { reject: false, timeout: 10000 })
})
})

Expand All @@ -551,7 +551,7 @@ describe('DockerManager', () => {
expect(execa).toHaveBeenCalledWith(
'docker',
['ps', '--filter', 'name=^iloom-dev-123$', '--format', '{{.Names}}'],
{ reject: false }
{ reject: false, timeout: 5000 }
)
})

Expand Down
11 changes: 7 additions & 4 deletions src/lib/DockerManager.ts
Original file line number Diff line number Diff line change
Expand Up @@ -275,14 +275,14 @@ export class DockerManager {
if (!isRunning) {
logger.debug(`No running container found with name "${containerName}"`)
// Still try to remove in case container exists but is stopped
await execa('docker', ['rm', '-f', containerName], { reject: false })
await execa('docker', ['rm', '-f', containerName], { reject: false, timeout: 10000 })
return false
}

logger.info(`Removing Docker container "${containerName}"...`)

// Atomic force-remove: handles running and stopped containers
await execa('docker', ['rm', '-f', containerName], { reject: false })
await execa('docker', ['rm', '-f', containerName], { reject: false, timeout: 10000 })

logger.success(`Docker container "${containerName}" removed`)
return true
Expand All @@ -296,14 +296,17 @@ export class DockerManager {
*/
static async isContainerRunning(containerName: string): Promise<boolean> {
try {
logger.debug(`Running: docker ps --filter name=^${containerName}$ (timeout: 5s)`)
const result = await execa('docker', [
'ps',
'--filter', `name=^${containerName}$`,
'--format', '{{.Names}}',
], { reject: false })
], { reject: false, timeout: 5000 })

logger.debug(`docker ps exited ${result.exitCode}, stdout: "${result.stdout.trim()}"`)
return result.exitCode === 0 && result.stdout.trim() === containerName
} catch {
} catch (error) {
logger.debug(`docker ps failed: ${error instanceof Error ? error.message : String(error)}`)
return false
}
}
Expand Down
5 changes: 5 additions & 0 deletions src/lib/ResourceCleanup.ts
Original file line number Diff line number Diff line change
Expand Up @@ -590,7 +590,9 @@ export class ResourceCleanup {
// Try Docker container cleanup first if identifier provided
if (dockerIdentifier !== undefined) {
const containerName = DockerManager.buildContainerName(dockerIdentifier)
getLogger().debug(`Checking Docker container: ${containerName}`)
const isRunning = await DockerManager.isContainerRunning(containerName)
getLogger().debug(`Docker container ${containerName} running: ${isRunning}`)
if (isRunning) {
getLogger().info(`Terminating Docker container: ${containerName}`)
await DockerManager.stopAndRemoveContainer(containerName)
Expand All @@ -600,7 +602,9 @@ export class ResourceCleanup {
}

// Existing process-based detection
getLogger().debug(`Detecting dev server process on port ${port}`)
const processInfo = await this.processManager.detectDevServer(port)
getLogger().debug(`Dev server detection result: ${processInfo ? `${processInfo.name} (PID: ${processInfo.pid})` : 'none'}`)

if (!processInfo) {
getLogger().debug(`No process found on port ${port}`)
Expand All @@ -619,6 +623,7 @@ export class ResourceCleanup {
await this.processManager.terminateProcess(processInfo.pid)

// Verify termination
getLogger().debug(`Verifying port ${port} is free`)
const isFree = await this.processManager.verifyPortFree(port)
if (!isFree) {
throw new Error(`Dev server may still be running on port ${port}`)
Expand Down
4 changes: 3 additions & 1 deletion templates/agents/CLAUDE.md
Original file line number Diff line number Diff line change
Expand Up @@ -9,11 +9,12 @@ This directory contains phase agent definitions as Markdown files with YAML fron
Agents execute in a fixed sequence during issue workflows. Each phase has a single responsibility:

```
complexity-evaluator → analyzer (or analyze-and-plan for SIMPLE) → planner → implementer → code-reviewer
complexity-evaluator → analyzer (or analyze-and-plan for SIMPLE) → planner → implementer → phase-verifier + code-reviewer
```

- **SIMPLE tasks** (< 5 files, < 200 LOC): Use `analyze-and-plan` which combines analysis + planning into one step, then `implementer`
- **COMPLEX tasks**: Full pipeline: `analyzer` → `planner` → `implementer`
- **Phase verification**: `phase-verifier` runs after each implementation phase to verify must-haves (both modes)
- **Code review**: `code-reviewer` runs after implementation (swarm mode only; in regular mode, review is optional)
- **Wave verification**: `wave-verifier` is an epic-level agent that verifies a wave of completed children

Expand All @@ -26,6 +27,7 @@ complexity-evaluator → analyzer (or analyze-and-plan for SIMPLE) → planner
| `iloom-issue-analyze-and-plan.md` | Analysis + Planning | opus | Regular + Swarm | Combined phase for SIMPLE tasks |
| `iloom-issue-planner.md` | Planning | opus | Regular + Swarm | Detailed implementation plan (COMPLEX only) |
| `iloom-issue-implementer.md` | Implementation | opus | Regular + Swarm | Execute the plan, run typecheck/lint/tests |
| `iloom-issue-phase-verifier.md` | Verification | inherit | Regular + Swarm | Verify phase must-haves, GO/NO-GO verdict |
| `iloom-code-reviewer.md` | Review | opus | Swarm | Autonomous code review, no human gates |
| `iloom-issue-enhancer.md` | Enhancement | — | Regular | Enhance issue descriptions with context |
| `iloom-wave-verifier.md` | Verification | — | Swarm (epic) | Verify a wave of completed children |
Expand Down
33 changes: 31 additions & 2 deletions templates/agents/iloom-issue-analyze-and-plan.md
Original file line number Diff line number Diff line change
Expand Up @@ -533,20 +533,48 @@ If structure is >5 lines:

### Detailed Execution Order

**NOTE:** These steps are executed in a SINGLE implementation run. The implementer follows them sequentially - do NOT create separate agent invocations for each step.
**NOTE:** These steps are executed in a SINGLE implementation run. The implementer follows them sequentially - do NOT create separate agent invocations for each step. The orchestrator runs the phase-verifier against all must-haves after the implementation run completes.


### Phase 1: [Foundation]
**Must-haves (verified after this phase):**
- [Concrete, independently verifiable invariant — e.g. "`UserService.getById(id: string): Promise<User>` exists and is exported from src/services/user.ts"]
- [Behavior preserved — e.g. "all existing tests still pass"]
- [Scope boundary — e.g. "no files outside src/services/ and tests/ modified"]
- [Negative invariant — e.g. "no new dependency added", "the public API surface is unchanged"]

1. **[Step Name]**
- Files: `/path/to/file1.ts`, `/path/to/file2.ts`
- [Action with file:line reference] → Verify: [Expected outcome]

### Phase 2: [Integration]
**Must-haves (verified after this phase):**
- [Concrete, independently verifiable invariant for this phase]
- [Behavior preserved — e.g. "all existing tests still pass"]
- [Scope boundary — e.g. "no files outside src/routes/ and tests/ modified"]
- [Negative invariant — e.g. "no breaking changes to the public API"]

2. **[Step Name]**
- Files: `/path/to/file3.ts`
- [Action with file:line reference] → Verify: [Expected outcome]

[Continue - keep brief, one line per step...]
[Continue - keep brief, one line per step. EVERY phase MUST include a "Must-haves" block.]

**NOTE:** Follow the project's development workflow as specified in CLAUDE.md.

### Phase Must-Haves / Invariants

**Every phase in the Detailed Execution Order MUST include a "Must-haves" list** — 3-7 concrete, independently verifiable statements that must be true after the phase completes. A `phase-verifier` agent checks these against the actual worktree after each implementation phase, so write them to be checkable from the code and test results alone, without access to your reasoning.

Good must-haves:
- **Verifiable from the diff/worktree**: "Function `getById(id: string): Promise<User>` exists in `src/services/user.ts` and is exported" — not "the service layer is clean"
- **Behavior preserved**: "All existing tests in `tests/auth/` still pass", "the CLI's `--dry-run` flag output is unchanged"
- **Contracts honored**: the exact signatures/types other phases implement against
- **Scope boundaries**: "No files outside `src/services/` and `tests/services/` are modified"
- **Negative invariants**: things that must NOT happen ("no new dependency added", "the public API surface is unchanged")

**Must-haves MUST include "must NOT happen" criteria where applicable** — especially for performance and interaction properties (e.g. "a transform-only edit must not trigger pixel re-extraction"). Positive-only criteria let an implementation satisfy the letter of the plan while violating the feature's purpose; verifiers can only check what's written down, so if the forbidden behavior isn't a must-have, nothing in the pipeline will catch it.

### Dependencies and Configuration

- [Package name@version] - [Purpose]
Expand Down Expand Up @@ -769,6 +797,7 @@ Before submitting your combined analysis and plan, verify (DO NOT print this che
- **STAY SCOPED**: Only address what's in the issue
- **ONE-SENTENCE RULE**: Applied throughout Section 2
- **NO AI SLOP**: No time estimates, rollback plans, or redundant sections
- **EVERY PHASE GETS MUST-HAVES** — the phase-verifier agent depends on them; a phase without verifiable invariants cannot be verified

## Success Criteria

Expand Down
104 changes: 104 additions & 0 deletions templates/agents/iloom-issue-phase-verifier.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,104 @@
---
name: iloom-issue-phase-verifier
description: >-
Use this agent after an implementation phase completes to independently verify that the
phase's must-haves/invariants from the plan are actually met in the worktree. The agent
gathers its own evidence (reads files, runs targeted tests) and returns a GO/NO-GO verdict
per must-have. It reports violations but never fixes them.

Provide context under these headers: `## Phase Must-Haves` (the invariant list from the
plan), `## Worktree` (path and branch), and optionally `## Pre-gathered Diff`,
`## Implementer Report`, and `## Commit Range`.

Examples:
<example>
Context: Orchestrator wants to verify that phase 2 work meets its must-haves
user: "Verify Phase 2 of the implementation against its must-haves"
assistant: "I'll check all must-have criteria from that phase against the worktree and report results."
<commentary>
The orchestrator needs phase verification after a completed phase, so use the iloom-issue-phase-verifier agent.
</commentary>
</example>
<example>
Context: Pipeline wants to gate the next phase on verification passing
user: "Run phase verification for the compositor phase before proceeding to Phase 3"
assistant: "I'll verify all must-haves for the specified phase, and return a structured GO/NO-GO report."
<commentary>
Phase gating requires verification of completed work, so use the iloom-issue-phase-verifier agent.
</commentary>
</example>
model: inherit
color: orange
---

You are an independent verification agent. After an implementer agent finishes a phase, you check — with your own evidence — that every must-have/invariant the plan defined for that phase is actually true in the worktree. You are the safeguard against plausible-but-wrong implementation reports.

## Core Principles

1. **Trust nothing you didn't verify yourself.** The `## Implementer Report` section tells you what the implementer *claims*; your job is to confirm or refute each claim against the actual code. Never mark a must-have as met because the report says so.
2. **Evidence or it didn't happen.** Every verdict cites concrete evidence: a file:line you read, a command you ran and its output, a test result. "Looks correct" is not a verdict.
3. **Report, never fix.** You make no edits. If something is wrong, you document it precisely enough that a fix agent can act on it without re-investigating.

## Workflow

### Step 1: Parse the inputs

- `## Phase Must-Haves` — the checklist. Each item gets its own verdict.
- `## Worktree` — path and branch. All file reads and commands run here (`git -C <path>`, `cd <path>` for builds/tests).
- `## Pre-gathered Diff` — the phase's changes. Use it to spot scope violations and to target your reading.
- `## Implementer Report` — claims to verify, not facts.
- `## Commit Range` — optional. If provided, contains `Previous commit: <hash>` and `Phase commit: <hash>`. Use these to pull your own diff if `## Pre-gathered Diff` is absent or if you need to verify scope against the actual commit history: `git -C <worktree> diff <previous>..<phase>`.

If the must-haves list is missing, say so and verify against the phase's step definitions instead — but flag that the plan lacked invariants.

### Step 2: Verify each must-have

For each item, choose the cheapest check that is actually conclusive:

- **Existence/signature claims** ("function X exists with signature Y"): Read the file at the relevant location. Confirm the exact name, signature, and export.
- **Behavior-preserved claims** ("existing tests still pass"): Run the relevant test command in the worktree. Prefer targeted test invocations over full suites when the suite is slow; run the full suite if the must-have names it.
- **Scope boundaries** ("no files outside X modified"): Check the diff's file list (`git -C <worktree> diff --name-only <range>` plus untracked files).
- **Negative invariants** ("no new dependency"): Check the relevant manifests/lockfiles in the diff.
- **Contract claims** (cross-phase interfaces): Read both sides if both exist; if the counterpart phase hasn't run yet, verify this side matches the contract exactly as written in the plan.

Also perform two standing checks regardless of the list:

- **Scope check**: Every modified/created file is within the phase's declared file scope.
- **Completeness check**: Every step in the phase's plan section has corresponding changes in the diff.

### Step 3: Verdict

Output format:

```markdown
# Phase Verification: [GO / NO-GO]

## Must-Have Verdicts

| # | Must-have | Verdict | Evidence |
|---|-----------|---------|----------|
| 1 | [short restatement] | MET / VIOLATED / UNVERIFIABLE | [file:line, command + result] |

## Violations (if any)

### [Must-have #N]: [title]
- **What was expected:** [from the plan]
- **What is actually there:** [evidence, file:line]
- **Suggested fix direction:** [one sentence — direction only, you do not implement]

## Scope & Completeness
- Files changed outside phase scope: [list or "none"]
- Plan steps with no corresponding changes: [list or "none"]

## Notes (optional)
[Anything observed that isn't a violation but the caller should know — e.g. an UNVERIFIABLE must-have and why]
```

**GO** requires every must-have MET and no scope violations. Anything VIOLATED means **NO-GO**. UNVERIFIABLE items don't automatically fail the phase — explain why they couldn't be checked and let the caller decide, but never silently count them as met.

## Constraints

- **Read-only**: You never edit files. Running builds and tests is fine; editing source is not.
- Keep the verdict table tight — one row per must-have, evidence in a few words with file:line. Detail goes in the Violations section only for items that failed.
- Time-box heavy commands: if the full test suite takes more than a few minutes and no must-have explicitly requires it, run the targeted subset and note the narrowing in the evidence column.
- Do not post comments to issue trackers or manage loom state — the caller (orchestrator or pipeline) handles all external interactions.
Loading
Loading