From b135942b09b0022140c8075c831ce8e8631afac2 Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 20:42:18 -0400
Subject: [PATCH 01/18] docs: add plans/ tree with PR 1-3 plan for --deep

Establishes plans/todo/ and plans/done/ folder convention for tracking
in-flight work. Documents the three-transport architecture for making
--deep functional without coupling Zift to specific LLM providers:

- PR 1: OpenAI-compatible HTTP (headless/CI; one client, many backends)
- PR 2: MCP server (agent hosts call Zift; we never host a model)
- PR 3: Subprocess hook (escape hatch for any other agent)

Build order is inside-out: PR 1 ships the shared primitives (candidate
selection, context expansion, prompt library, JSON output schema, merge,
cost tracking) so PR 2 and PR 3 are thin transport wrappers, not
parallel implementations.

PR 1 plan is detailed (module layout, signatures, schema, prompt
sketch, error handling, 6-commit sequence, risks). PR 2 and PR 3 are
sketches to be expanded when their turn comes.
---
 plans/done/.gitkeep                      |   0
 plans/todo/00-deep-mode-overview.md      |  67 ++++
 plans/todo/01-pr1-deep-http-transport.md | 444 +++++++++++++++++++++++
 plans/todo/02-pr2-mcp-server.md          |  94 +++++
 plans/todo/03-pr3-subprocess-hook.md     | 107 ++++++
 5 files changed, 712 insertions(+)
 create mode 100644 plans/done/.gitkeep
 create mode 100644 plans/todo/00-deep-mode-overview.md
 create mode 100644 plans/todo/01-pr1-deep-http-transport.md
 create mode 100644 plans/todo/02-pr2-mcp-server.md
 create mode 100644 plans/todo/03-pr3-subprocess-hook.md

diff --git a/plans/done/.gitkeep b/plans/done/.gitkeep
new file mode 100644
index 0000000..e69de29
diff --git a/plans/todo/00-deep-mode-overview.md b/plans/todo/00-deep-mode-overview.md
new file mode 100644
index 0000000..0591af4
--- /dev/null
+++ b/plans/todo/00-deep-mode-overview.md
@@ -0,0 +1,67 @@
+# Deep mode — design overview
+
+Tracks the multi-PR effort to make `--deep` functional and keep Zift agent-agnostic as the LLM landscape churns.
+
+## Goal
+
+Make `zift scan --deep` produce semantic findings (`pass: ScanPass::Semantic`) without coupling Zift to any specific LLM provider, vendor, or local-model runner. Users must be able to plug in the agent they already use.
+
+## Design constraints
+
+1. **No provider treadmill.** We do not write `AnthropicClient` + `OpenAIClient` + `OllamaClient` + `GeminiClient` + …. Every quarter another provider ships; we'd spend our time chasing them.
+2. **Local-first must be a real path**, not an afterthought. Running fully offline against `ollama serve` or `llama-server` should be no harder than running against a hosted API.
+3. **Zift's value is rules + prompts + Rego validation**, not model plumbing. The transports are interchangeable; the authz domain knowledge is the moat.
+4. **Each transport reuses the same primitives** — candidate selection, context expansion, prompt library, structured output schema, semantic-finding merge. No parallel implementations.
+
+## The three transports
+
+| Tier | Transport | When it's used | PR |
+|------|-----------|----------------|-----|
+| 1 | **MCP server** (`zift mcp`) | User has an agent host (Claude Code, Cursor, Continue, Cline, Zed). Their agent calls Zift tools; their agent calls the model. We never see the model. | [PR 2](./02-pr2-mcp-server.md) |
+| 2 | **OpenAI-compatible HTTP** (`--base-url`) | Headless / CI runs. One client speaks to Ollama, LM Studio, llama.cpp `server`, vLLM, OpenRouter, Together, Groq, Anthropic-via-proxy, OpenAI itself. | [PR 1](./01-pr1-deep-http-transport.md) |
+| 3 | **Subprocess hook** (`--agent-cmd`) | Anything else — `claude -p`, `aider`, custom shell scripts, agents that don't expose HTTP. Stdin: prompt + JSON. Stdout: JSON matching our schema. | [PR 3](./03-pr3-subprocess-hook.md) |
+
+User picks explicitly via `[deep] mode = "mcp" | "http" | "subprocess"`. No provider auto-detection magic.
+
+## Build order: inside-out
+
+We build PR 1 first even though MCP (PR 2) is the strategically headline answer. Reason: MCP needs the prompt library, candidate selection, context expansion, and structured-output schema *anyway*. Building HTTP first forces those primitives into a clean shape; the MCP server in PR 2 is then a thin transport layer over them. The reverse order means writing the primitives for MCP, then refactoring when HTTP shows up.
+
+```
+                               ┌─────────────────────────┐
+                               │     src/deep/           │
+                               │  candidate · context    │
+                               │  prompt · merge · cost  │
+                               │  finding · output schema│
+                               └─────────────────────────┘
+                                ▲          ▲          ▲
+                                │          │          │
+              ┌─────────────────┘          │          └─────────────────┐
+              │                            │                            │
+   ┌──────────────────────┐    ┌──────────────────────┐    ┌──────────────────────┐
+   │  PR 1: HTTP client   │    │  PR 2: MCP server    │    │  PR 3: Subprocess    │
+   │  (built first)       │    │  (thin wrapper)      │    │  (thinnest wrapper)  │
+   └──────────────────────┘    └──────────────────────┘    └──────────────────────┘
+```
+
+## Shared primitives (defined in PR 1, reused by PR 2 and PR 3)
+
+- `deep::prompt::SYSTEM_PROMPT` — the authz definition + category taxonomy + output contract.
+- `deep::prompt::output_schema()` — the JSON Schema every transport's response must validate against.
+- `deep::prompt::render(...)` — produces `(system, user, schema)` tuples per candidate.
+- `deep::candidate::select_candidates(...)` — chooses what to escalate / cold-scan.
+- `deep::context::expand_finding(...)` and `expand_region(...)` — pulls surrounding code.
+- `deep::finding::SemanticFinding` and `into_finding(...)` — the deserialization target + canonical-Finding translation.
+- `deep::merge::merge(...)` — dedup + integration of semantic findings into the structural set.
+- `deep::cost::CostTracker` — token-based USD ceiling.
+
+## Folder convention
+
+- `plans/todo/` — work not yet shipped. Plans live here while in flight.
+- `plans/done/` — work that has shipped. Move the file here in the same commit that ships the work, optionally appending a "Shipped" section with the merged PR link and any decisions changed during implementation.
+
+## Cross-references
+
+- [PR 1 — HTTP transport](./01-pr1-deep-http-transport.md)
+- [PR 2 — MCP server](./02-pr2-mcp-server.md)
+- [PR 3 — Subprocess hook](./03-pr3-subprocess-hook.md)
diff --git a/plans/todo/01-pr1-deep-http-transport.md b/plans/todo/01-pr1-deep-http-transport.md
new file mode 100644
index 0000000..7edc44c
--- /dev/null
+++ b/plans/todo/01-pr1-deep-http-transport.md
@@ -0,0 +1,444 @@
+# PR 1 — Tier 2 deep scan: OpenAI-compatible HTTP transport
+
+Companion to [00-deep-mode-overview.md](./00-deep-mode-overview.md). This PR makes `--deep` functional end-to-end and lays down the shared primitives that PR 2 and PR 3 reuse.
+
+## 1. Goal & scope
+
+End-to-end working `--deep` flag using a single HTTP client that speaks the OpenAI `/v1/chat/completions` shape. After this PR:
+
+```
+zift scan ./repo --deep \
+  --base-url http://localhost:11434/v1 \
+  --model qwen2.5-coder:14b \
+  --api-key sk-...
+```
+
+…produces additional `Finding`s with `pass: ScanPass::Semantic` merged into the report alongside structural findings.
+
+**Out of scope**: MCP server, subprocess hook, per-provider auth quirks. We accept a `base_url` and let the user point us at whatever proxies that translate to OpenAI shape.
+
+## 2. Module layout
+
+All new code lives under `src/deep/`.
+
+### `src/deep/mod.rs` — orchestrator
+
+```rust
+pub use config::DeepRuntime;          // resolved CLI+config bundle
+pub use error::DeepError;
+pub use finding::SemanticFinding;     // pre-merge LLM output
+pub mod candidate;
+pub mod client;
+pub mod config;
+pub mod context;
+pub mod cost;
+pub mod error;
+pub mod finding;
+pub mod merge;
+pub mod prompt;
+
+pub fn run(
+    structural: &[Finding],
+    scan_root: &Path,
+    runtime: &DeepRuntime,
+) -> Result<Vec<Finding>, DeepError>;
+```
+
+`run` is the single entry point called from `commands/scan.rs`. Synchronous (see §4). Returns `Vec<Finding>` with `pass: Semantic` already set. Merging into the master vec happens in the caller.
+
+### `src/deep/config.rs` — runtime config
+
+```rust
+pub struct DeepRuntime {
+    pub base_url: String,                  // e.g. "http://localhost:11434/v1"
+    pub model: String,
+    pub api_key: Option<String>,           // some local servers accept any string or none
+    pub max_cost_usd: Option<f64>,
+    pub cost_per_1k_input: Option<f64>,    // user-supplied; None = no cost tracking
+    pub cost_per_1k_output: Option<f64>,
+    pub request_timeout_secs: u64,         // default 120
+    pub max_candidates: usize,             // default 50
+    pub max_concurrent: usize,             // default 4
+    pub temperature: f32,                  // default 0.0
+    pub max_prompt_chars: usize,           // default 16000, hard truncates expanded snippet
+}
+
+pub fn build(args: &ScanArgs, config: &ZiftConfig) -> Result<DeepRuntime, DeepError>;
+```
+
+Resolution precedence: CLI flag > env var (`ZIFT_API_KEY`) > `[deep]` config table > built-in default. Validation: empty `base_url` is hard error; missing `model` is hard error; missing `api_key` is a warning (not an error — Ollama/llama.cpp accept any value).
+
+### `src/deep/error.rs`
+
+```rust
+#[derive(thiserror::Error, Debug)]
+pub enum DeepError {
+    #[error("missing config: {0}")]                Config(String),
+    #[error("HTTP error: {0}")]                    Http(#[from] reqwest::Error),
+    #[error("model returned malformed JSON: {0}")] BadResponse(String),
+    #[error("cost ceiling reached after ${spent:.4} USD")] CostExceeded { spent: f64 },
+    #[error("request timed out after {secs}s")]    Timeout { secs: u64 },
+    #[error("io error: {0}")]                      Io(#[from] std::io::Error),
+}
+```
+
+`DeepError` converts into `ZiftError::General` at the call site so the rest of the binary stays unchanged.
+
+### `src/deep/candidate.rs` — what to send
+
+```rust
+pub struct Candidate {
+    pub kind: CandidateKind,                   // Escalation | ColdRegion
+    pub file: PathBuf,
+    pub language: Language,
+    pub line_start: usize,                     // 1-based, inclusive
+    pub line_end: usize,
+    pub source_snippet: String,                // already-expanded context
+    pub original_finding_id: Option<String>,   // present iff Escalation
+    pub seed_category: Option<AuthCategory>,   // hint for prompt selection
+}
+
+pub enum CandidateKind { Escalation, ColdRegion }
+
+pub fn select_candidates(
+    structural: &[Finding],
+    scan_root: &Path,
+    runtime: &DeepRuntime,
+) -> Result<Vec<Candidate>, DeepError>;
+```
+
+See §6 for heuristics.
+
+### `src/deep/context.rs` — code expansion
+
+```rust
+pub fn expand_finding(
+    finding: &Finding,
+    scan_root: &Path,
+) -> Result<ExpandedContext, DeepError>;
+
+pub fn expand_region(
+    file: &Path,
+    language: Language,
+    line_start: usize,
+    line_end: usize,
+) -> Result<ExpandedContext, DeepError>;
+
+pub struct ExpandedContext {
+    pub file_relative: PathBuf,
+    pub language: Language,
+    pub line_start: usize,    // adjusted to enclosing function start
+    pub line_end: usize,
+    pub snippet: String,      // function body + imports
+    pub imports: Vec<String>, // top of file, top 20 lines verbatim
+}
+```
+
+Strategy in §7.
+
+### `src/deep/prompt.rs` — prompt + JSON schema
+
+```rust
+pub struct PromptInputs<'a> {
+    pub candidate: &'a Candidate,
+    pub structural_finding: Option<&'a Finding>,
+}
+
+pub struct RenderedPrompt {
+    pub system: String,
+    pub user: String,
+    pub schema: serde_json::Value,    // for OpenAI structured-outputs
+}
+
+pub fn render(inputs: &PromptInputs) -> RenderedPrompt;
+
+pub fn output_schema() -> serde_json::Value;       // exported; PR 2/PR 3 reuse
+pub const SYSTEM_PROMPT: &str = "...";              // exported; PR 2/PR 3 reuse
+```
+
+Schema in §5; sketch in §6.
+
+### `src/deep/client.rs` — HTTP transport
+
+```rust
+pub struct OpenAiCompatibleClient {
+    http: reqwest::blocking::Client,
+    base_url: String,
+    api_key: Option<String>,
+    model: String,
+    temperature: f32,
+}
+
+impl OpenAiCompatibleClient {
+    pub fn new(runtime: &DeepRuntime) -> Result<Self, DeepError>;
+    pub fn analyze(&self, prompt: &RenderedPrompt) -> Result<AnalyzeResponse, DeepError>;
+}
+
+pub struct AnalyzeResponse {
+    pub findings: Vec<SemanticFinding>,
+    pub usage: TokenUsage,
+}
+
+pub struct TokenUsage {
+    pub input_tokens: u32,
+    pub output_tokens: u32,
+}
+```
+
+Implementation: POST `{base_url}/chat/completions` with body containing `model`, `messages: [system, user]`, `response_format: { type: "json_schema", json_schema: { name: "zift_findings", strict: true, schema: ... } }`, `temperature`. Some local servers ignore `response_format` — we still parse `choices[0].message.content` as JSON. If that fails, retry once with a degraded prompt that says "respond with ONLY a JSON object matching this schema" and no `response_format` field.
+
+### `src/deep/finding.rs` — semantic-side type
+
+```rust
+#[derive(Deserialize, Debug, Clone)]
+pub struct SemanticFinding {
+    pub line_start: usize,
+    pub line_end: usize,
+    pub category: AuthCategory,
+    pub confidence: Confidence,
+    pub description: String,
+    pub reasoning: String,
+    pub is_false_positive: bool,           // for escalations: model judges seed
+}
+
+pub fn into_finding(
+    sem: SemanticFinding,
+    candidate: &Candidate,
+    seed: Option<&Finding>,
+) -> Finding;
+```
+
+`into_finding` translates to the canonical `Finding`, computing the deterministic id via the existing hash. Need to expose `compute_finding_id` from `scanner/matcher.rs` as `pub(crate)` — clean refactor in commit 2.
+
+### `src/deep/merge.rs` — dedup + integrate
+
+```rust
+pub fn merge(structural: Vec<Finding>, semantic: Vec<Finding>) -> Vec<Finding>;
+```
+
+Rules: a semantic finding overlapping a structural finding's range (same file, range overlap >= 50%) replaces the structural one only if the semantic finding has equal or higher confidence; otherwise both kept. False-positive flags from `SemanticFinding::is_false_positive` cause the structural counterpart to be dropped entirely.
+
+### `src/deep/cost.rs` — token-based ceiling
+
+```rust
+pub struct CostTracker {
+    spent_micro_usd: AtomicU64,    // millionths of a dollar; avoid float atomics
+    cap_usd: Option<f64>,
+    in_rate: Option<f64>,
+    out_rate: Option<f64>,
+}
+
+impl CostTracker {
+    pub fn new(runtime: &DeepRuntime) -> Self;
+    pub fn record(&self, usage: &TokenUsage) -> Result<(), DeepError>;
+    pub fn spent_usd(&self) -> f64;
+}
+```
+
+After every response, orchestrator calls `record`. If new total exceeds cap, return `CostExceeded` and stop dispatching further candidates (in-flight ones complete naturally).
+
+## 3. Cargo.toml additions
+
+```toml
+[dependencies]
+reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] }
+
+[dev-dependencies]
+mockito = "1"
+```
+
+`rustls-tls` over `native-tls` to keep the build hermetic (no OpenSSL on contributor machines). `serde_json` and `thiserror` already present. `mockito` is sync — no tokio leak into the test harness.
+
+## 4. Async strategy: blocking
+
+**Recommendation: blocking.** Reasons:
+
+- Today's `scan` pipeline is sync end-to-end. `commands::scan::execute → scanner::scan` is sync.
+- Going async means making `main` async (forces a tokio runtime) or `block_on`-ing inside `scan::execute`. Either way, async colors `run`, `client::analyze`, every helper.
+- Concurrency for HTTP fan-out is achievable with `std::thread::scope` over `reqwest::blocking::Client` (clone-cheap). Cap parallelism at `runtime.max_concurrent` (default 4).
+- If we later need streaming for MCP (PR 2) we can add an async path then; the prompt/schema/candidate primitives don't change.
+
+Single shared `reqwest::blocking::Client` on `OpenAiCompatibleClient` — avoids per-request runtime spinup.
+
+## 5. Structured output JSON schema
+
+This is the contract PR 2 and PR 3 must also bind to.
+
+```json
+{
+  "type": "object",
+  "properties": {
+    "findings": {
+      "type": "array",
+      "items": {
+        "type": "object",
+        "properties": {
+          "line_start":        { "type": "integer", "minimum": 1 },
+          "line_end":          { "type": "integer", "minimum": 1 },
+          "category":          { "type": "string",
+                                 "enum": ["rbac", "abac", "middleware",
+                                          "business_rule", "ownership",
+                                          "feature_gate", "custom"] },
+          "confidence":        { "type": "string", "enum": ["low", "medium", "high"] },
+          "description":       { "type": "string", "maxLength": 280 },
+          "reasoning":         { "type": "string", "maxLength": 800 },
+          "is_false_positive": { "type": "boolean" }
+        },
+        "required": ["line_start", "line_end", "category", "confidence",
+                     "description", "reasoning", "is_false_positive"],
+        "additionalProperties": false
+      }
+    }
+  },
+  "required": ["findings"],
+  "additionalProperties": false
+}
+```
+
+Snake_case enums match `#[serde(rename_all = "snake_case")]` on `AuthCategory`.
+
+## 6. System prompt sketch
+
+`SYSTEM_PROMPT` (single concatenated string):
+
+1. Role: "You are an expert in application authorization patterns. You analyze source code snippets to identify embedded authorization logic."
+2. Definition of authorization: role checks, attribute checks, ownership, feature gates, middleware, business logic that gates access.
+3. Anti-examples: input validation, generic null/empty checks, rate limits not user-conditioned, retry/idempotency logic.
+4. The seven `AuthCategory` values with one-sentence definitions each.
+5. Confidence calibration: "high = unambiguous auth check; medium = likely auth with reasonable alternate interpretation; low = could be auth, depends on unseen context."
+6. Output contract: "You MUST return a JSON object matching this schema. No prose. No markdown fences. If the snippet contains no authorization logic, return `{\"findings\": []}`."
+
+Per-call user prompt (rendered by `prompt::render`):
+
+- Header: file relative path, language, line range.
+- Optional seed: "A structural rule flagged this region as <category, confidence>. Confirm or reject."
+- The expanded snippet, fenced.
+- "Identify all authorization decisions in the snippet above. Use line numbers from the snippet."
+
+Keep it short — small local models struggle with long prompts.
+
+### Candidate selection rules
+
+`select_candidates` returns up to `runtime.max_candidates` (default 50), in priority order:
+
+**Escalations** (push):
+- Every structural finding with `confidence: Low`. Goal: classify or reject.
+- Every structural finding with `confidence: Medium` and `category: BusinessRule | Custom | Ownership`. These are the noisy categories.
+- Skip `confidence: High` — already trusted; sending them just costs money.
+
+**Cold regions** (pull):
+- For each scanned source file (reuse `discovery::discover_files`), regex-match function/method names: `(?i)(authori[sz]e|authenticate|require|ensure|guard|protect|allow|deny|check|can|may|isAdmin|hasRole|hasPermission)` plus the language's function-keyword regex.
+- Each match becomes one `ColdRegion` candidate. Cap cold regions at 30% of `max_candidates` so escalations get priority.
+- De-duplicate cold regions against escalation file/line ranges.
+
+Determinism: candidates sorted by `(file, line_start)` so reruns produce identical input ordering, keeping test expectations stable.
+
+## 7. Context expansion strategy
+
+Two-tier:
+
+**Fast path (default)**: line-window expansion. Read the file, take lines `[max(1, start-5), min(eof, end+15)]`. Cheap, no parsing. Plus the first 20 lines of the file as `imports` (verbatim — model parses).
+
+**Smart path** (used when fast-path snippet is < 8 lines after window): re-parse with tree-sitter (`parser::parse_source` already exists); walk up from the original node to the nearest `function_declaration | method_definition | arrow_function | function_expression | class_declaration`; expand to that node's range. Cap at 200 lines to bound prompt size.
+
+Truncate the final snippet at `runtime.max_prompt_chars` (default 16000) to prevent foot-guns on huge functions.
+
+## 8. Test plan
+
+### Unit tests
+
+| Module | Tests |
+|---|---|
+| `config.rs` | precedence (CLI > env > toml); empty base_url errors; partial config accepted |
+| `candidate.rs` | high-conf skipped; low-conf escalated; cold regex matches `requireAuth`/`isAdmin` but not `authorRefactor`; max_candidates honored; deterministic ordering |
+| `context.rs` | line-window math at file boundaries; tree-sitter expansion finds enclosing function in TS; imports extracted |
+| `prompt.rs` | rendered prompt valid UTF-8 and < 8KB for typical input; schema is valid JSON Schema (round-trip via serde_json) |
+| `client.rs` | success path; malformed JSON path (missing `findings`); HTTP 500; HTTP 401; request body shape (assert via serde_json::Value comparison) |
+| `cost.rs` | cap not exceeded; cap exceeded triggers error; `None` rates → no tracking |
+| `merge.rs` | overlapping ranges replace by confidence; false-positive drops seed; non-overlapping kept |
+
+### Integration test: `tests/deep_http_integration.rs`
+
+Uses `mockito` (sync). Three tests:
+
+1. **Happy path**: spin up `mockito::Server`; mock `POST /chat/completions` to return canned OpenAI-shaped response with one semantic finding; build a `DeepRuntime` pointing at `server.url()`; synthesize a structural `Finding` with `confidence: Low`; call `deep::run`; assert returned vec has 1 entry with `pass: ScanPass::Semantic` and right `line_start`.
+
+2. **Malformed JSON**: mock returns `{"choices":[{"message":{"content":"not json"}}]}` — assert `DeepError::BadResponse`.
+
+3. **Cost cap**: mock returns `usage: {prompt_tokens: 10000, completion_tokens: 5000}`; set `max_cost_usd: 0.01` and rates that exceed; expect `CostExceeded`.
+
+### Existing test impact
+
+CLI test for `--provider` no longer applies; replace with `--base-url`. Existing scan tests don't set `--deep` so they should be no-ops; verify.
+
+## 9. Error handling
+
+| Failure | Behavior |
+|---|---|
+| Malformed JSON from model | One retry with degraded prompt; if still bad, log warning + drop candidate, continue |
+| HTTP timeout | Configurable per-request timeout (default 120s); on timeout, log + drop candidate |
+| API key missing | Warn at startup if base_url is non-localhost; allow it (local servers don't need keys) |
+| Cost ceiling hit mid-run | Stop dispatching new candidates; finalize in-flight; warn with spent total; return findings collected so far |
+| HTTP 401/403 | Hard fail with clear "auth rejected by {base_url}" message |
+| HTTP 5xx | Exponential backoff (3 attempts at 1s, 4s, 16s) then drop |
+| `--deep` without `--model` | Hard fail at config-build time |
+| `--deep` without `--base-url` | Hard fail at config-build time (no default — user intent matters) |
+
+Drop-and-continue is the right policy: structural findings still ship; semantic is best-effort enrichment.
+
+## 10. Cost tracking
+
+User-supplied per-1k-token rates. Two new `[deep]` fields:
+
+```toml
+[deep]
+base_url = "http://localhost:11434/v1"
+model = "qwen2.5-coder:14b"
+max_cost = 5.00
+cost_per_1k_input = 0.0      # local model = free
+cost_per_1k_output = 0.0
+```
+
+Logic in `cost::record`:
+
+```
+delta = (in_tokens / 1000.0) * in_rate + (out_tokens / 1000.0) * out_rate
+spent += delta
+if cap.is_some_and(|c| spent > c): Err(CostExceeded { spent })
+```
+
+If both rates are `None`, skip tracking (spent = 0, never errors). Token counts come from response `usage`; if a server omits usage, log debug and treat `delta = 0`.
+
+CLI `--max-cost` wins over toml; CLI flags for the rates intentionally not added — they belong in the config file.
+
+## 11. Commit sequence
+
+Six commits, each compiling and passing tests:
+
+1. **`refactor(cli): drop closed LlmProvider enum, add --base-url`** — `cli.rs`, `config.rs`, `commands/init.rs`, CLI tests. Stub-only deep scan still prints the warning.
+2. **`feat(deep): add deep module skeleton with config + error types`** — empty modules with type definitions; `deep::run` returns `Ok(vec![])`; wired into `commands/scan.rs`; tests for `config::build`. Expose `compute_finding_id` from scanner.
+3. **`feat(deep): candidate selection and context expansion`** — `candidate.rs`, `context.rs` with tests. `deep::run` produces candidates but returns empty findings.
+4. **`feat(deep): prompt rendering and JSON schema`** — `prompt.rs`, `finding.rs`. `output_schema()` and `SYSTEM_PROMPT` exported. Tests for prompt validity.
+5. **`feat(deep): OpenAI-compatible HTTP client`** — `client.rs` + reqwest dep + cost tracker. Unit tests with no network. Integration test in `tests/` with mockito (mocked happy path + bad-JSON + cost-cap).
+6. **`feat(deep): merge semantic findings into scan output`** — `merge.rs`; wire end-to-end in `commands/scan.rs`; update existing scan tests to assert `pass`; end-to-end test with mockito producing a real `ScanPass::Semantic` finding.
+
+Each commit ~150-400 lines of diff, reviewable independently. PR title for the merge: `feat: implement --deep with OpenAI-compatible HTTP transport`.
+
+## 12. Risks & open questions
+
+1. **Tree-sitter Python/Go grammars not pulled in** today. Cold-region scanning is regex-based (language-agnostic), so semantic could technically run on Python files even though structural can't. **Decision needed**: gate cold-region candidate creation to languages where `parser::is_language_supported(lang)` is true. Avoids surprising "deep scans Python but structural ignores it" UX.
+2. **`response_format` not universally supported.** Ollama 0.5+, llama.cpp partial. Plan: send it; on parse failure, retry without it. Follow-up issue: capability detection at startup.
+3. **`OPENAI_API_KEY` fallback?** CLI already wires `ZIFT_API_KEY` env. **Decision needed**: do we *also* fall back to `OPENAI_API_KEY` for users re-using existing env? Lean **no** — explicit > implicit, and we're not OpenAI.
+4. **Concurrency default**. `max_concurrent: 4` is conservative. Local servers with one GPU may degrade with parallelism > 1. Document this; consider auto-detecting localhost in base_url and capping to 1. **Defer to PR 1 review.**
+5. **Determinism in CI**. With `temperature: 0.0` and a mock server, integration tests are deterministic. Real-LLM tests are not — don't add any.
+6. **`compute_finding_id` location**. Currently private in `scanner/matcher.rs`. Need to expose at `crate::types::compute_finding_id` or similar. Small refactor in commit 2.
+
+## 13. Critical files
+
+- `/Users/brad/dev/zift/src/cli.rs`
+- `/Users/brad/dev/zift/src/config.rs`
+- `/Users/brad/dev/zift/src/commands/scan.rs`
+- `/Users/brad/dev/zift/src/commands/init.rs`
+- `/Users/brad/dev/zift/src/types.rs`
+- `/Users/brad/dev/zift/src/scanner/matcher.rs` (expose finding_id)
+- `/Users/brad/dev/zift/Cargo.toml`
diff --git a/plans/todo/02-pr2-mcp-server.md b/plans/todo/02-pr2-mcp-server.md
new file mode 100644
index 0000000..462fb15
--- /dev/null
+++ b/plans/todo/02-pr2-mcp-server.md
@@ -0,0 +1,94 @@
+# PR 2 — Tier 1 deep scan: MCP server
+
+Companion to [00-deep-mode-overview.md](./00-deep-mode-overview.md). Builds on the primitives shipped in [PR 1](./01-pr1-deep-http-transport.md). This is the strategically headline transport — it inverts the model relationship so Zift never hosts an LLM client; the user's existing agent host (Claude Code, Cursor, Continue, Cline, Zed, etc.) calls Zift as an MCP tool provider.
+
+**Status**: not started. Depends on PR 1 landing.
+
+## 1. Goal & scope
+
+Add `zift mcp` subcommand that runs Zift as an MCP server over stdio. The agent host connects, calls our tools, and gets back structured authz findings + ground-truth Rego validation. The agent host owns the model; we own the authz expertise.
+
+Out of scope: HTTP-transport MCP (stdio is the universal default), authentication, multi-client.
+
+## 2. Subcommand
+
+```
+zift mcp [--rules-dir DIR] [--scan-root DIR]
+```
+
+Speaks JSON-RPC 2.0 over stdio per the MCP spec. `--scan-root` defaults to cwd; `--rules-dir` follows existing precedence.
+
+## 3. Tools exposed
+
+| Tool | Purpose | Reuses |
+|---|---|---|
+| `scan_authz` | Run structural scan on a path; return findings JSON | `scanner::scan` |
+| `get_finding_context` | Expand a finding's snippet (lines before/after, or smart enclosing function) | `deep::context::expand_finding` |
+| `list_rules` | Enumerate the rule library (id, language, category, confidence, description) | `rules::loader` |
+| `get_rule` | Fetch one rule's full definition incl. tree-sitter query and Rego template | `rules::loader` |
+| `suggest_rego` | Render a rule's Rego template against a finding's captures | existing rendering |
+| `validate_rego` | Run OPA against a Rego policy; return parse errors / test results | `rego::validate` (may need shelling out to `opa` or embedding `regorus`) |
+| `analyze_snippet` | Given a snippet + language + optional seed, return the rendered prompt + schema. Does NOT call any model. | `deep::prompt::render`, `deep::prompt::output_schema` |
+
+`analyze_snippet` is the key trick: the MCP server returns the prompt and schema; the agent host's model produces the response; the host can call `submit_analysis` (next tool, optional) to register the result. Or it just keeps the result locally — Zift doesn't have to track it.
+
+## 4. Resources exposed
+
+| Resource | URI | Content |
+|---|---|---|
+| `rule://<rule_id>` | per-rule | Full TOML rule + human-readable docs |
+| `category://<auth_category>` | per category | One-paragraph definition + canonical examples |
+| `prompt://system` | singleton | The `SYSTEM_PROMPT` constant from `deep::prompt` |
+| `prompt://schema` | singleton | The `output_schema()` JSON Schema |
+
+This is how the MCP-attached agent learns *how* to think about authz — by reading the resources we already wrote for PR 1.
+
+## 5. Crate dependencies
+
+Use the official Rust MCP SDK if it exists; otherwise hand-roll JSON-RPC 2.0 over stdio (small, well-spec'd protocol). As of this writing the canonical SDK is `rmcp` (modelcontextprotocol/rust-sdk). Verify currency at PR start.
+
+```toml
+[dependencies]
+rmcp = "..."  # or whatever the official Rust SDK is at PR-start time
+```
+
+If no maintained SDK exists, the alternative is roughly 300 lines of stdio + serde + JSON-RPC framing.
+
+## 6. Architectural reuse
+
+PR 2 should not implement *any* prompt logic, *any* candidate selection, *any* output schema. Those are imported verbatim from `crate::deep::prompt` and `crate::deep::candidate`.
+
+```rust
+use crate::deep::prompt::{SYSTEM_PROMPT, output_schema, render};
+use crate::deep::context::expand_finding;
+use crate::deep::candidate::select_candidates;
+```
+
+The MCP server is a transport, period.
+
+## 7. Open questions (resolve before kickoff)
+
+1. **Rust MCP SDK maturity.** If `rmcp` is still 0.x with breaking changes per release, we may want to pin or vendor.
+2. **Streaming.** MCP supports streaming responses. Worth using for long scans? Probably yes for `scan_authz` on large repos. Confirm SDK supports it.
+3. **`validate_rego` implementation.** Shell out to `opa` binary (requires user to have it installed) vs embed `regorus` (Rust-native OPA-compatible evaluator). Lean toward `regorus` for zero-install UX. Verify rule coverage parity.
+4. **Multi-tenancy.** MCP servers are typically single-client; do we need to handle concurrent calls? Stdio means one client at a time, so no — keep it single-threaded internally.
+5. **Logging.** stdio is the wire; logs must go to stderr only. Audit existing `eprintln!` / log calls in `scanner::*` to ensure none accidentally write to stdout.
+
+## 8. Test plan (sketch)
+
+- Unit-test each tool handler against a fake JSON-RPC framer.
+- Integration test: spawn `zift mcp` as subprocess, send canned `tools/list`, `tools/call` messages, assert responses.
+- One smoke test that loads `prompt://system` and `prompt://schema` and asserts they match the constants in `crate::deep::prompt`.
+
+## 9. Commit sequence (rough)
+
+1. `feat(mcp): add zift mcp subcommand stub` — CLI wiring, prints "MCP server starting" and exits.
+2. `feat(mcp): JSON-RPC 2.0 stdio framing` — protocol layer, no tools yet.
+3. `feat(mcp): expose rule library as tools and resources` — `scan_authz`, `list_rules`, `get_rule`, `rule://*`, `category://*`.
+4. `feat(mcp): expose deep-mode primitives as tools` — `get_finding_context`, `analyze_snippet`, `prompt://system`, `prompt://schema`.
+5. `feat(mcp): expose Rego suggestion and validation` — `suggest_rego`, `validate_rego`.
+6. `docs(mcp): example agent host configs` — Claude Code, Cursor, Continue snippets in README or docs/.
+
+## 10. Decision deferred from PR 1
+
+If during PR 1 we find the prompt library / candidate selection abstractions need a different shape to also serve the MCP path, fix them in PR 1 before merging — don't ship a shape we'll break in PR 2.
diff --git a/plans/todo/03-pr3-subprocess-hook.md b/plans/todo/03-pr3-subprocess-hook.md
new file mode 100644
index 0000000..fffd60d
--- /dev/null
+++ b/plans/todo/03-pr3-subprocess-hook.md
@@ -0,0 +1,107 @@
+# PR 3 — Tier 3 deep scan: subprocess hook
+
+Companion to [00-deep-mode-overview.md](./00-deep-mode-overview.md). Builds on the primitives shipped in [PR 1](./01-pr1-deep-http-transport.md). The smallest of the three transports — an escape hatch for any agent that doesn't fit Tier 1 (MCP) or Tier 2 (HTTP).
+
+**Status**: not started. Depends on PR 1 landing.
+
+## 1. Goal & scope
+
+Add `--agent-cmd "<command>"` flag. Zift writes the rendered prompt + candidate JSON to the subprocess's stdin and reads JSON matching the deep-mode schema from its stdout. Use cases:
+
+- `claude -p` (the Claude Code CLI in print mode)
+- `aider` running in a constrained mode
+- A user shell script that does whatever wrapping they need
+- Any agent that exposes a stdin-in / stdout-out contract
+
+Out of scope: process pooling, IPC beyond stdin/stdout, environment-variable injection beyond what the user's shell provides.
+
+## 2. CLI surface
+
+```
+zift scan ./repo --deep --agent-cmd "claude -p --output-format json"
+```
+
+In `.zift.toml`:
+
+```toml
+[deep]
+mode = "subprocess"
+agent_cmd = "claude -p --output-format json"
+agent_timeout_secs = 600     # generous; LLM CLIs can be slow
+```
+
+Mode resolution: CLI `--agent-cmd` implies `mode = "subprocess"`. Explicit `mode = "subprocess"` without `agent_cmd` is a hard error.
+
+## 3. Implementation sketch
+
+New module: `src/deep/subprocess.rs`. Roughly 100-150 lines.
+
+```rust
+pub struct SubprocessClient {
+    cmd: String,
+    timeout: Duration,
+}
+
+impl SubprocessClient {
+    pub fn new(runtime: &DeepRuntime) -> Result<Self, DeepError>;
+    pub fn analyze(&self, prompt: &RenderedPrompt) -> Result<AnalyzeResponse, DeepError>;
+}
+```
+
+`analyze` flow:
+
+1. Spawn the command via `std::process::Command::new("sh").arg("-c").arg(&self.cmd)` with stdin/stdout piped.
+2. Write to stdin a single JSON envelope: `{ "system": ..., "user": ..., "schema": ... }`. Close stdin.
+3. Read stdout to EOF (with timeout).
+4. Parse stdout as the same JSON schema as PR 1's HTTP path (`output_schema`).
+5. Return `AnalyzeResponse { findings, usage: TokenUsage::zero() }` — token tracking N/A here.
+
+## 4. Architectural reuse
+
+The orchestrator (`deep::run`) becomes generic over the analyzer:
+
+```rust
+trait Analyzer {
+    fn analyze(&self, prompt: &RenderedPrompt) -> Result<AnalyzeResponse, DeepError>;
+}
+
+impl Analyzer for OpenAiCompatibleClient { ... }   // PR 1
+impl Analyzer for SubprocessClient { ... }         // PR 3
+```
+
+Trait introduced in this PR (or backported into PR 1 if PR 2 already needed it). The candidate selector, prompt renderer, schema, merge, and cost tracker are all reused unchanged.
+
+`mode` field on `DeepRuntime` selects which analyzer to instantiate.
+
+## 5. Cost tracking
+
+N/A. Subprocess agents don't return token counts in any standard way. `CostTracker` is bypassed for this transport (treat every call as $0). If users want a ceiling, they enforce it externally — `timeout`, `ulimit`, or a wrapper script that counts invocations.
+
+## 6. Test plan
+
+- Unit-test envelope construction (JSON shape).
+- Integration test with a tiny shell-script "agent": writes a canned response to stdout. Mock contract is portable (POSIX `cat <<EOF` style). Skip on Windows or use a Rust binary in `tests/fixtures/`.
+- Test for malformed-stdout error path.
+- Test for timeout (subprocess that sleeps forever).
+- Test for nonzero exit (subprocess that exits 1 — should surface as `DeepError`).
+
+## 7. Open questions
+
+1. **Shell vs direct exec.** Using `sh -c` lets users supply pipelines (`claude -p | jq ...`) but creates a Windows-portability issue. Lean: `sh -c` on Unix, `cmd /c` on Windows. Or document Unix-only for v1.
+2. **Stdin envelope format.** JSON object with `{system, user, schema}` (proposed) vs a single big string concatenated for the user prompt. Lean toward JSON — agents that wrap real LLMs can route system/user separately; trivial-to-skip otherwise.
+3. **Concurrency.** Default to `max_concurrent: 1` for subprocess (some agent CLIs serialize internally; spawning 4 of them is not always faster). Document this.
+4. **Streaming output.** Some agent CLIs stream tokens. We require the final full JSON; if the CLI streams partial JSON, we read to EOF and parse the whole buffer. Document.
+
+## 8. Commit sequence (rough)
+
+1. `refactor(deep): introduce Analyzer trait, port OpenAI client to it`
+2. `feat(deep): add subprocess analyzer + --agent-cmd flag`
+3. `test(deep): subprocess integration test with shell-script fixture`
+4. `docs: example agent-cmd usages (claude -p, aider, custom script)`
+
+Each commit small and reviewable.
+
+## 9. Risks
+
+- **Hard to debug.** When a user's `agent_cmd` returns garbage, the failure mode is opaque. Always print the first ~500 bytes of stdout to stderr on parse failure. Always print stderr from the subprocess on nonzero exit.
+- **Security.** Running arbitrary shell commands the user configured is a footgun if `.zift.toml` is checked in to a repo and Zift is run by another user. Document; consider warning when `agent_cmd` is read from a `.zift.toml` not owned by the running user.

From eb56bd9be8c9716440713983ab6ec24c9f6da73b Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 20:48:25 -0400
Subject: [PATCH 02/18] docs(plans): lock three decisions for PR 1 deep-mode
 design

- Cold-region scanning is ungated across all Language enum entries.
  Regex-based discovery is grammar-free, so the semantic pass can ship
  useful coverage of v0.2/v0.3 languages (Python, Go, etc.) before
  their structural grammars land. Today's discovery::discover_files is
  TS/JS/Java-only and will need to be extended (or paralleled) for deep.
- No OPENAI_API_KEY env fallback. Only ZIFT_API_KEY.
- Auto-cap max_concurrent to 1 when base_url host is localhost (single
  GPU servers serialize internally); explicit config overrides.

Also corrects misleading wording: tree-sitter grammars for Python/Go
exist in the ecosystem; Zift simply hasn't integrated them yet (v0.2
and v0.3 roadmap).
---
 plans/todo/01-pr1-deep-http-transport.md | 26 ++++++++++++++++--------
 1 file changed, 18 insertions(+), 8 deletions(-)

diff --git a/plans/todo/01-pr1-deep-http-transport.md b/plans/todo/01-pr1-deep-http-transport.md
index 7edc44c..dee9c7c 100644
--- a/plans/todo/01-pr1-deep-http-transport.md
+++ b/plans/todo/01-pr1-deep-http-transport.md
@@ -327,10 +327,13 @@ Keep it short — small local models struggle with long prompts.
 - Skip `confidence: High` — already trusted; sending them just costs money.
 
 **Cold regions** (pull):
-- For each scanned source file (reuse `discovery::discover_files`), regex-match function/method names: `(?i)(authori[sz]e|authenticate|require|ensure|guard|protect|allow|deny|check|can|may|isAdmin|hasRole|hasPermission)` plus the language's function-keyword regex.
+- Walk all source files in the scan root via an extension-based discovery that maps to the full `Language` enum, **not** just structurally-supported languages. (`discovery::discover_files` today is restricted to TS/JS/Java; we either extend it or add a `discover_files_for_deep` variant that covers Python, Go, C#, Kotlin, Ruby, PHP file extensions too.)
+- Regex-match function/method names: `(?i)(authori[sz]e|authenticate|require|ensure|guard|protect|allow|deny|check|can|may|isAdmin|hasRole|hasPermission)` plus a per-language function-keyword anchor where known (`function`, `def`, `func`, `fun`, `public`, `private`, `fn`, `=> {`). Languages we don't have a function-keyword for fall back to auth-name-only matching — slightly noisier but still useful.
 - Each match becomes one `ColdRegion` candidate. Cap cold regions at 30% of `max_candidates` so escalations get priority.
 - De-duplicate cold regions against escalation file/line ranges.
 
+**Why ungated across all languages**: structural support is at v0.1 (TS/JS/Java); Python/Go are v0.2 roadmap, C#/Kotlin/Ruby/PHP are v0.3. Cold-region scanning is regex-based and grammar-free, so the semantic pass becomes a way to ship *useful* coverage of v0.2/v0.3 languages **before** their structural grammars land. Early adopters running `--deep` against a Python or Go codebase get value today.
+
 Determinism: candidates sorted by `(file, line_start)` so reruns produce identical input ordering, keeping test expectations stable.
 
 ## 7. Context expansion strategy
@@ -341,6 +344,8 @@ Two-tier:
 
 **Smart path** (used when fast-path snippet is < 8 lines after window): re-parse with tree-sitter (`parser::parse_source` already exists); walk up from the original node to the nearest `function_declaration | method_definition | arrow_function | function_expression | class_declaration`; expand to that node's range. Cap at 200 lines to bound prompt size.
 
+**Smart-path only available for languages with an integrated tree-sitter grammar** — today TS/JS/Java. Python/Go/etc. fall through to the line-window fast path until their grammars land in v0.2/v0.3. This is fine: the model can usually figure out function boundaries from a generous line window, especially with the file header (imports) included.
+
 Truncate the final snippet at `runtime.max_prompt_chars` (default 16000) to prevent foot-guns on huge functions.
 
 ## 8. Test plan
@@ -424,14 +429,19 @@ Six commits, each compiling and passing tests:
 
 Each commit ~150-400 lines of diff, reviewable independently. PR title for the merge: `feat: implement --deep with OpenAI-compatible HTTP transport`.
 
-## 12. Risks & open questions
+## 12. Decisions & open issues
+
+### Locked decisions
+
+1. **Cold-region scanning is ungated across languages.** Runs on every language in the `Language` enum, including ones without a tree-sitter grammar. Rationale in §6. Implementation note: `discovery::discover_files` today only emits TS/JS/Java extensions; deep mode either extends it or adds a `discover_files_for_deep` that covers all `Language` extensions.
+2. **No `OPENAI_API_KEY` fallback.** Only `ZIFT_API_KEY` is honored from the environment. Explicit > implicit; Zift is not OpenAI.
+3. **Localhost concurrency auto-cap.** When `base_url` host is `localhost` or `127.0.0.1` (or `::1`), `max_concurrent` defaults to 1. Local single-GPU servers serialize internally; parallelism > 1 just adds queueing. User can override via explicit `[deep] max_concurrent = N`.
+
+### Open issues
 
-1. **Tree-sitter Python/Go grammars not pulled in** today. Cold-region scanning is regex-based (language-agnostic), so semantic could technically run on Python files even though structural can't. **Decision needed**: gate cold-region candidate creation to languages where `parser::is_language_supported(lang)` is true. Avoids surprising "deep scans Python but structural ignores it" UX.
-2. **`response_format` not universally supported.** Ollama 0.5+, llama.cpp partial. Plan: send it; on parse failure, retry without it. Follow-up issue: capability detection at startup.
-3. **`OPENAI_API_KEY` fallback?** CLI already wires `ZIFT_API_KEY` env. **Decision needed**: do we *also* fall back to `OPENAI_API_KEY` for users re-using existing env? Lean **no** — explicit > implicit, and we're not OpenAI.
-4. **Concurrency default**. `max_concurrent: 4` is conservative. Local servers with one GPU may degrade with parallelism > 1. Document this; consider auto-detecting localhost in base_url and capping to 1. **Defer to PR 1 review.**
-5. **Determinism in CI**. With `temperature: 0.0` and a mock server, integration tests are deterministic. Real-LLM tests are not — don't add any.
-6. **`compute_finding_id` location**. Currently private in `scanner/matcher.rs`. Need to expose at `crate::types::compute_finding_id` or similar. Small refactor in commit 2.
+1. **`response_format` not universally supported.** Ollama 0.5+, llama.cpp partial. Plan: send it; on parse failure, retry without it. Follow-up issue: capability detection at startup vs per-call.
+2. **Determinism in CI.** With `temperature: 0.0` and a mock server, integration tests are deterministic. Real-LLM tests are not — don't add any.
+3. **`compute_finding_id` location.** Currently private in `scanner/matcher.rs`. Need to expose at `crate::types::compute_finding_id` or similar. Small refactor in commit 2.
 
 ## 13. Critical files
 

From f479ac3e0920f2ba00e8121e8a3425846c6c67c5 Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 20:54:49 -0400
Subject: [PATCH 03/18] refactor(cli): replace closed LlmProvider enum with
 --base-url
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Anthropic/Openai/Ollama enum was a maintenance treadmill. Replace
with a flat --base-url that points at any OpenAI-compatible
chat-completions endpoint, so one client covers Ollama, LM Studio,
llama.cpp, vLLM, OpenRouter, OpenAI, Anthropic-via-proxy, etc.

Also renames ZIFT_API_KEY → ZIFT_AGENT_API_KEY (namespaced and semantic;
leaves room for a ZIFT_AGENT_* family). Drops api_key from .zift.toml
schema — keys belong in env or CLI, not source-controlled files.

--deep is still a stub; first of six commits implementing it end-to-end
via the OpenAI-compatible HTTP transport (see plans/todo/).
---
 docs/DESIGN.md                           |  8 ++---
 plans/todo/01-pr1-deep-http-transport.md | 11 ++++--
 src/cli.rs                               | 44 +++++++++++++++++-------
 src/commands/init.rs                     |  6 ++--
 src/config.rs                            | 21 +++++++----
 5 files changed, 62 insertions(+), 28 deletions(-)

diff --git a/docs/DESIGN.md b/docs/DESIGN.md
index ef8be06..92f2068 100644
--- a/docs/DESIGN.md
+++ b/docs/DESIGN.md
@@ -260,10 +260,10 @@ SCAN OPTIONS:
     --config            Path to config file (default: .zift.toml)
 
 DEEP SCAN OPTIONS:
-    --provider          LLM provider (anthropic|openai|ollama)
-    --model             Model to use (default: provider-specific)
-    --max-cost          Maximum spend limit for LLM calls
-    --api-key           API key (or set ZIFT_API_KEY / provider-specific env vars)
+    --base-url          OpenAI-compatible endpoint (e.g. http://localhost:11434/v1)
+    --model             Model name to send to the agent endpoint
+    --max-cost          Maximum spend limit in USD
+    --api-key           API key for the agent endpoint (or set ZIFT_AGENT_API_KEY)
 
 EXTRACT OPTIONS:
     --input, -i         Findings file (default: stdin or last scan)
diff --git a/plans/todo/01-pr1-deep-http-transport.md b/plans/todo/01-pr1-deep-http-transport.md
index dee9c7c..fc9324c 100644
--- a/plans/todo/01-pr1-deep-http-transport.md
+++ b/plans/todo/01-pr1-deep-http-transport.md
@@ -66,7 +66,12 @@ pub struct DeepRuntime {
 pub fn build(args: &ScanArgs, config: &ZiftConfig) -> Result<DeepRuntime, DeepError>;
 ```
 
-Resolution precedence: CLI flag > env var (`ZIFT_API_KEY`) > `[deep]` config table > built-in default. Validation: empty `base_url` is hard error; missing `model` is hard error; missing `api_key` is a warning (not an error — Ollama/llama.cpp accept any value).
+Resolution precedence:
+
+- `base_url`, `model`, `max_cost`: CLI flag > `[deep]` config > built-in default.
+- `api_key`: CLI flag (`--api-key`) > env var (`ZIFT_AGENT_API_KEY`) > unset. **Intentionally NOT readable from `.zift.toml`** — keys belong in env vars or CLI to avoid accidental secret commits.
+
+Validation: empty `base_url` is hard error; missing `model` is hard error; missing `api_key` is a warning (not an error — Ollama/llama.cpp accept any value).
 
 ### `src/deep/error.rs`
 
@@ -420,7 +425,7 @@ CLI `--max-cost` wins over toml; CLI flags for the rates intentionally not added
 
 Six commits, each compiling and passing tests:
 
-1. **`refactor(cli): drop closed LlmProvider enum, add --base-url`** — `cli.rs`, `config.rs`, `commands/init.rs`, CLI tests. Stub-only deep scan still prints the warning.
+1. **`refactor(cli): drop closed LlmProvider enum, add --base-url, rename env var`** — `cli.rs`, `config.rs`, `commands/init.rs`, `docs/DESIGN.md`, CLI tests. Renames `ZIFT_API_KEY` → `ZIFT_AGENT_API_KEY`; `api_key` removed from config-file schema. Stub-only deep scan still prints the warning.
 2. **`feat(deep): add deep module skeleton with config + error types`** — empty modules with type definitions; `deep::run` returns `Ok(vec![])`; wired into `commands/scan.rs`; tests for `config::build`. Expose `compute_finding_id` from scanner.
 3. **`feat(deep): candidate selection and context expansion`** — `candidate.rs`, `context.rs` with tests. `deep::run` produces candidates but returns empty findings.
 4. **`feat(deep): prompt rendering and JSON schema`** — `prompt.rs`, `finding.rs`. `output_schema()` and `SYSTEM_PROMPT` exported. Tests for prompt validity.
@@ -434,7 +439,7 @@ Each commit ~150-400 lines of diff, reviewable independently. PR title for the m
 ### Locked decisions
 
 1. **Cold-region scanning is ungated across languages.** Runs on every language in the `Language` enum, including ones without a tree-sitter grammar. Rationale in §6. Implementation note: `discovery::discover_files` today only emits TS/JS/Java extensions; deep mode either extends it or adds a `discover_files_for_deep` that covers all `Language` extensions.
-2. **No `OPENAI_API_KEY` fallback.** Only `ZIFT_API_KEY` is honored from the environment. Explicit > implicit; Zift is not OpenAI.
+2. **No `OPENAI_API_KEY` fallback.** Only `ZIFT_AGENT_API_KEY` is honored from the environment. Explicit > implicit; Zift is not OpenAI.
 3. **Localhost concurrency auto-cap.** When `base_url` host is `localhost` or `127.0.0.1` (or `::1`), `max_concurrent` defaults to 1. Local single-GPU servers serialize internally; parallelism > 1 just adds queueing. User can override via explicit `[deep] max_concurrent = N`.
 
 ### Open issues
diff --git a/src/cli.rs b/src/cli.rs
index a990ffa..39a6d46 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -86,20 +86,23 @@ pub struct ScanArgs {
     pub rules_dir: Option<PathBuf>,
 
     // -- Deep scan options --
-    /// LLM provider (requires --deep)
+    /// Base URL of an OpenAI-compatible chat-completions endpoint (requires --deep)
+    ///
+    /// Examples: http://localhost:11434/v1 (Ollama), http://localhost:1234/v1 (LM Studio),
+    /// https://api.openai.com/v1, https://openrouter.ai/api/v1
     #[arg(long)]
-    pub provider: Option<LlmProvider>,
+    pub base_url: Option<String>,
 
-    /// Model to use (requires --deep)
+    /// Model name to send to the agent endpoint (requires --deep)
     #[arg(long)]
     pub model: Option<String>,
 
-    /// Maximum spend limit for LLM calls (requires --deep)
+    /// Maximum spend limit in USD (requires --deep)
     #[arg(long)]
     pub max_cost: Option<f64>,
 
-    /// API key (or set ZIFT_API_KEY / provider-specific env vars)
-    #[arg(long, env = "ZIFT_API_KEY")]
+    /// API key for the agent endpoint (or set ZIFT_AGENT_API_KEY)
+    #[arg(long, env = "ZIFT_AGENT_API_KEY")]
     pub api_key: Option<String>,
 }
 
@@ -181,13 +184,6 @@ pub enum ReportFormat {
     Markdown,
 }
 
-#[derive(Debug, Clone, Copy, PartialEq, Eq, ValueEnum)]
-pub enum LlmProvider {
-    Anthropic,
-    Openai,
-    Ollama,
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
@@ -288,4 +284,26 @@ mod tests {
         let cli = Cli::try_parse_from(["zift", "-vvv", "."]).unwrap();
         assert_eq!(cli.verbose, 3);
     }
+
+    #[test]
+    fn deep_scan_with_base_url() {
+        let cli = Cli::try_parse_from([
+            "zift",
+            "scan",
+            "--deep",
+            "--base-url",
+            "http://localhost:11434/v1",
+            "--model",
+            "qwen2.5-coder:14b",
+            ".",
+        ])
+        .unwrap();
+        if let Some(Command::Scan(args)) = cli.command {
+            assert!(args.deep);
+            assert_eq!(args.base_url.as_deref(), Some("http://localhost:11434/v1"));
+            assert_eq!(args.model.as_deref(), Some("qwen2.5-coder:14b"));
+        } else {
+            panic!("expected Scan command");
+        }
+    }
 }
diff --git a/src/commands/init.rs b/src/commands/init.rs
index 8a58700..2966029 100644
--- a/src/commands/init.rs
+++ b/src/commands/init.rs
@@ -7,9 +7,11 @@ exclude = ["vendor/**", "node_modules/**", "target/**"]
 # min_confidence = "medium"
 
 # [deep]
-# provider = "anthropic"
-# model = "claude-sonnet-4-20250514"
+# base_url = "http://localhost:11434/v1"   # Ollama, LM Studio, OpenAI-compatible
+# model    = "your-model-name"
 # max_cost = 5.00
+# # API key: set $ZIFT_AGENT_API_KEY in your environment, or pass --api-key.
+# # Do NOT put the key in this file — it gets checked into source control.
 
 [extract]
 package_prefix = "app.authz"
diff --git a/src/config.rs b/src/config.rs
index f18ee0a..265fc99 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -24,9 +24,14 @@ pub struct ScanConfig {
 #[derive(Debug, Default, Deserialize)]
 #[serde(default)]
 pub struct DeepConfig {
-    pub provider: Option<String>,
+    /// OpenAI-compatible chat-completions endpoint, e.g. "http://localhost:11434/v1".
+    pub base_url: Option<String>,
+    /// Model name to send to the agent endpoint.
     pub model: Option<String>,
+    /// Maximum spend limit in USD.
     pub max_cost: Option<f64>,
+    // NOTE: api_key is intentionally NOT readable from this file — keys belong
+    // in $ZIFT_AGENT_API_KEY or --api-key, not checked into source control.
 }
 
 #[derive(Debug, Default, Deserialize)]
@@ -66,7 +71,7 @@ mod tests {
     fn default_config() {
         let config = ZiftConfig::default();
         assert!(config.scan.exclude.is_empty());
-        assert!(config.deep.provider.is_none());
+        assert!(config.deep.base_url.is_none());
         assert!(config.extract.package_prefix.is_none());
     }
 
@@ -79,8 +84,8 @@ languages = ["java", "typescript"]
 min_confidence = "medium"
 
 [deep]
-provider = "anthropic"
-model = "claude-sonnet-4-20250514"
+base_url = "http://localhost:11434/v1"
+model = "qwen2.5-coder:14b"
 max_cost = 5.00
 
 [extract]
@@ -93,7 +98,11 @@ additional = ["./custom-rules"]
         let config: ZiftConfig = toml::from_str(toml).unwrap();
         assert_eq!(config.scan.exclude.len(), 2);
         assert_eq!(config.scan.languages, vec!["java", "typescript"]);
-        assert_eq!(config.deep.provider.as_deref(), Some("anthropic"));
+        assert_eq!(
+            config.deep.base_url.as_deref(),
+            Some("http://localhost:11434/v1")
+        );
+        assert_eq!(config.deep.model.as_deref(), Some("qwen2.5-coder:14b"));
         assert_eq!(config.deep.max_cost, Some(5.0));
         assert_eq!(config.extract.package_prefix.as_deref(), Some("app.authz"));
         assert_eq!(config.rules.additional, vec!["./custom-rules"]);
@@ -108,7 +117,7 @@ exclude = ["vendor/**"]
         let config: ZiftConfig = toml::from_str(toml).unwrap();
         assert_eq!(config.scan.exclude, vec!["vendor/**"]);
         assert!(config.scan.languages.is_empty());
-        assert!(config.deep.provider.is_none());
+        assert!(config.deep.base_url.is_none());
     }
 
     #[test]

From b7bd17821599cf8558a1715657610e3ee95b5923 Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 21:03:35 -0400
Subject: [PATCH 04/18] feat(deep): add deep module skeleton with config +
 error types
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lays down the src/deep/ module tree with config + error types fully
implemented and the rest stubbed. Future commits fill in:

  candidate.rs   commit 3 — selection rules
  context.rs     commit 3 — line-window + tree-sitter expansion
  prompt.rs      commit 4 — system prompt + JSON schema
  finding.rs     commit 4 — SemanticFinding deserialization + translation
  client.rs      commit 5 — OpenAI-compatible HTTP client
  cost.rs        commit 5 — token-based USD ceiling
  merge.rs       commit 6 — overlap dedup + false-positive drops

This commit lands:

- DeepRuntime struct + config::build() with precedence/validation
  (12 new tests). Localhost concurrency auto-cap to 1 (single-GPU
  servers serialize internally) implemented and tested for IPv4/IPv6
  loopback plus "localhost".
- DeepError enum (thiserror); converts into ZiftError::Deep via #[from].
- Wiring: scan.rs builds the runtime up front so we fail fast on bad
  config, then calls deep::run after the structural scan and merges via
  deep::merge::merge. Both deep::run and merge are no-op pass-throughs
  in this commit.
- Exposes scanner::matcher::compute_finding_id as pub(crate) so the
  semantic-finding translator (commit 4) can compute deterministic IDs.

Stub modules carry #![allow(dead_code)]; each is removed when the real
implementation lands.

Refs plans/todo/01-pr1-deep-http-transport.md
---
 src/commands/scan.rs   |  28 ++++-
 src/deep/candidate.rs  |  46 ++++++++
 src/deep/client.rs     |  46 ++++++++
 src/deep/config.rs     | 241 +++++++++++++++++++++++++++++++++++++++++
 src/deep/context.rs    |  43 ++++++++
 src/deep/cost.rs       |  34 ++++++
 src/deep/error.rs      |  28 +++++
 src/deep/finding.rs    |  36 ++++++
 src/deep/merge.rs      |  24 ++++
 src/deep/mod.rs        |  52 +++++++++
 src/deep/prompt.rs     |  41 +++++++
 src/error.rs           |   5 +
 src/main.rs            |   1 +
 src/scanner/matcher.rs |   2 +-
 14 files changed, 620 insertions(+), 7 deletions(-)
 create mode 100644 src/deep/candidate.rs
 create mode 100644 src/deep/client.rs
 create mode 100644 src/deep/config.rs
 create mode 100644 src/deep/context.rs
 create mode 100644 src/deep/cost.rs
 create mode 100644 src/deep/error.rs
 create mode 100644 src/deep/finding.rs
 create mode 100644 src/deep/merge.rs
 create mode 100644 src/deep/mod.rs
 create mode 100644 src/deep/prompt.rs

diff --git a/src/commands/scan.rs b/src/commands/scan.rs
index 546e46b..e7e5ab6 100644
--- a/src/commands/scan.rs
+++ b/src/commands/scan.rs
@@ -1,5 +1,6 @@
 use crate::cli::{OutputFormat, ScanArgs};
 use crate::config::ZiftConfig;
+use crate::deep;
 use crate::error::{Result, ZiftError};
 use crate::output;
 use crate::rules;
@@ -20,11 +21,13 @@ pub fn execute(args: ScanArgs, config: ZiftConfig) -> Result<()> {
     })?;
     tracing::info!("scanning {}", path.display());
 
-    if args.deep {
-        eprintln!(
-            "warning: --deep (LLM-assisted) is not yet implemented, running structural scan only"
-        );
-    }
+    // Build deep-scan runtime config eagerly so we fail fast on bad config
+    // (missing --base-url / --model) before running an entire structural scan.
+    let deep_runtime = if args.deep {
+        Some(deep::config::build(&args, &config)?)
+    } else {
+        None
+    };
 
     // Warn about explicitly requested languages that lack parser support
     for lang in &args.language {
@@ -38,7 +41,20 @@ pub fn execute(args: ScanArgs, config: ZiftConfig) -> Result<()> {
     let loaded_rules = rules::load_rules(args.rules_dir.as_deref(), &config)?;
     tracing::info!("loaded {} pattern rules", loaded_rules.len());
 
-    let result = scanner::scan(&path, &loaded_rules, &args, &config)?;
+    let mut result = scanner::scan(&path, &loaded_rules, &args, &config)?;
+
+    if let Some(runtime) = deep_runtime.as_ref() {
+        tracing::info!(
+            "running deep scan: base_url={} model={} concurrency={}",
+            runtime.base_url,
+            runtime.model,
+            runtime.max_concurrent
+        );
+        let semantic = deep::run(&result.findings, &path, runtime)?;
+        if !semantic.is_empty() {
+            result.findings = deep::merge::merge(result.findings, semantic);
+        }
+    }
 
     let stdout = std::io::stdout();
     let mut writer: Box<dyn std::io::Write> = if let Some(ref out_path) = args.output {
diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs
new file mode 100644
index 0000000..b098bb7
--- /dev/null
+++ b/src/deep/candidate.rs
@@ -0,0 +1,46 @@
+//! Candidate selection for the deep (semantic) scan.
+//!
+//! See plans/todo/01-pr1-deep-http-transport.md §6 for selection rules.
+//! Implementation lands in commit 3.
+
+// Stubs are used by future commits; suppress dead-code warnings until then.
+#![allow(dead_code)]
+
+use crate::deep::config::DeepRuntime;
+use crate::deep::error::DeepError;
+use crate::types::{AuthCategory, Finding, Language};
+use std::path::{Path, PathBuf};
+
+#[derive(Debug, Clone, Copy, PartialEq, Eq)]
+pub enum CandidateKind {
+    /// Re-evaluation of a structural finding (typically low/medium confidence).
+    Escalation,
+    /// Cold-region scan triggered by name-based heuristics. May or may not
+    /// correspond to a structural finding.
+    ColdRegion,
+}
+
+#[derive(Debug, Clone)]
+pub struct Candidate {
+    pub kind: CandidateKind,
+    pub file: PathBuf,
+    pub language: Language,
+    pub line_start: usize,
+    pub line_end: usize,
+    pub source_snippet: String,
+    /// Set iff `kind == Escalation` — the structural finding's id.
+    pub original_finding_id: Option<String>,
+    /// Hint for prompt selection (e.g. seed an RBAC-flavored prompt).
+    pub seed_category: Option<AuthCategory>,
+}
+
+/// Pick which structural findings to escalate and which file regions to
+/// cold-scan. Sorted deterministically by `(file, line_start)`.
+pub fn select_candidates(
+    _structural: &[Finding],
+    _scan_root: &Path,
+    _runtime: &DeepRuntime,
+) -> Result<Vec<Candidate>, DeepError> {
+    // TODO(commit 3): implement escalation rules + cold-region scanning.
+    Ok(Vec::new())
+}
diff --git a/src/deep/client.rs b/src/deep/client.rs
new file mode 100644
index 0000000..9134c91
--- /dev/null
+++ b/src/deep/client.rs
@@ -0,0 +1,46 @@
+//! OpenAI-compatible chat-completions HTTP client.
+//!
+//! POSTs to `{base_url}/chat/completions` with a structured-output request,
+//! parses the response into [`SemanticFinding`]s. One client speaks to any
+//! backend that exposes the OpenAI dialect (Ollama, LM Studio, llama.cpp,
+//! vLLM, OpenRouter, OpenAI itself, Anthropic-via-proxy, …).
+//!
+//! Implementation lands in commit 5 (where reqwest enters the build).
+
+#![allow(dead_code)]
+
+use crate::deep::config::DeepRuntime;
+use crate::deep::error::DeepError;
+use crate::deep::finding::SemanticFinding;
+use crate::deep::prompt::RenderedPrompt;
+
+#[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
+pub struct TokenUsage {
+    pub input_tokens: u32,
+    pub output_tokens: u32,
+}
+
+#[derive(Debug)]
+pub struct AnalyzeResponse {
+    pub findings: Vec<SemanticFinding>,
+    pub usage: TokenUsage,
+}
+
+/// HTTP client for an OpenAI-compatible chat-completions endpoint.
+///
+/// Fields land in commit 5 (`reqwest::blocking::Client`, base_url, api_key,
+/// model, temperature).
+pub struct OpenAiCompatibleClient {
+    // Body lands in commit 5.
+}
+
+impl OpenAiCompatibleClient {
+    pub fn new(_runtime: &DeepRuntime) -> Result<Self, DeepError> {
+        unimplemented!("OpenAiCompatibleClient::new: commit 5")
+    }
+
+    /// Send one prompt to the endpoint, return the parsed findings + usage.
+    pub fn analyze(&self, _prompt: &RenderedPrompt) -> Result<AnalyzeResponse, DeepError> {
+        unimplemented!("OpenAiCompatibleClient::analyze: commit 5")
+    }
+}
diff --git a/src/deep/config.rs b/src/deep/config.rs
new file mode 100644
index 0000000..07a6431
--- /dev/null
+++ b/src/deep/config.rs
@@ -0,0 +1,241 @@
+//! Resolve CLI args + `.zift.toml` into a runtime config for the deep scan.
+//!
+//! Precedence (see plans/todo/01-pr1-deep-http-transport.md §2):
+//!
+//! - `base_url`, `model`, `max_cost`: CLI flag > `[deep]` config > default.
+//! - `api_key`: CLI flag > `$ZIFT_AGENT_API_KEY` > unset. **Not** readable
+//!   from `.zift.toml` — keys belong in env or CLI, not source-controlled
+//!   files.
+
+// Most DeepRuntime fields are read by the HTTP client (commit 5) and
+// orchestrator (commit 6); skipped here until those land.
+#![allow(dead_code)]
+
+use crate::cli::ScanArgs;
+use crate::config::ZiftConfig;
+use crate::deep::error::DeepError;
+
+/// Resolved runtime configuration for the deep (semantic) scan.
+#[derive(Debug, Clone)]
+pub struct DeepRuntime {
+    pub base_url: String,
+    pub model: String,
+    pub api_key: Option<String>,
+    pub max_cost_usd: Option<f64>,
+    pub cost_per_1k_input: Option<f64>,
+    pub cost_per_1k_output: Option<f64>,
+    pub request_timeout_secs: u64,
+    pub max_candidates: usize,
+    pub max_concurrent: usize,
+    pub temperature: f32,
+    pub max_prompt_chars: usize,
+}
+
+const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 120;
+const DEFAULT_MAX_CANDIDATES: usize = 50;
+const DEFAULT_MAX_PROMPT_CHARS: usize = 16_000;
+const DEFAULT_TEMPERATURE: f32 = 0.0;
+const DEFAULT_REMOTE_CONCURRENCY: usize = 4;
+const DEFAULT_LOCAL_CONCURRENCY: usize = 1;
+
+/// Heuristic check: is this base_url pointing at a local server?
+///
+/// Used to auto-cap concurrency to 1 — single-GPU local servers serialize
+/// internally, so parallelism > 1 just adds queue latency without throughput
+/// gain. Users can override via explicit `[deep] max_concurrent = N`.
+fn is_localhost(base_url: &str) -> bool {
+    let lower = base_url.to_ascii_lowercase();
+    lower.contains("://localhost")
+        || lower.contains("://127.0.0.1")
+        || lower.contains("://[::1]")
+        || lower.contains("://0.0.0.0")
+}
+
+/// Resolve CLI args + config-file values into a [`DeepRuntime`].
+///
+/// Validates required fields; returns [`DeepError::Config`] on missing
+/// `base_url` or `model`.
+pub fn build(args: &ScanArgs, config: &ZiftConfig) -> Result<DeepRuntime, DeepError> {
+    let base_url = args
+        .base_url
+        .clone()
+        .or_else(|| config.deep.base_url.clone())
+        .filter(|s| !s.is_empty())
+        .ok_or_else(|| {
+            DeepError::Config(
+                "--base-url is required when --deep is set \
+                 (or set [deep] base_url in .zift.toml)"
+                    .into(),
+            )
+        })?;
+
+    let model = args
+        .model
+        .clone()
+        .or_else(|| config.deep.model.clone())
+        .filter(|s| !s.is_empty())
+        .ok_or_else(|| {
+            DeepError::Config(
+                "--model is required when --deep is set \
+                 (or set [deep] model in .zift.toml)"
+                    .into(),
+            )
+        })?;
+
+    let api_key = args.api_key.clone().filter(|s| !s.is_empty());
+    let max_cost_usd = args.max_cost.or(config.deep.max_cost);
+
+    let max_concurrent = if is_localhost(&base_url) {
+        DEFAULT_LOCAL_CONCURRENCY
+    } else {
+        DEFAULT_REMOTE_CONCURRENCY
+    };
+
+    Ok(DeepRuntime {
+        base_url,
+        model,
+        api_key,
+        max_cost_usd,
+        cost_per_1k_input: None,
+        cost_per_1k_output: None,
+        request_timeout_secs: DEFAULT_REQUEST_TIMEOUT_SECS,
+        max_candidates: DEFAULT_MAX_CANDIDATES,
+        max_concurrent,
+        temperature: DEFAULT_TEMPERATURE,
+        max_prompt_chars: DEFAULT_MAX_PROMPT_CHARS,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::config::DeepConfig;
+
+    fn args_with(
+        base_url: Option<&str>,
+        model: Option<&str>,
+        api_key: Option<&str>,
+        max_cost: Option<f64>,
+    ) -> ScanArgs {
+        ScanArgs {
+            deep: true,
+            base_url: base_url.map(String::from),
+            model: model.map(String::from),
+            api_key: api_key.map(String::from),
+            max_cost,
+            ..ScanArgs::default()
+        }
+    }
+
+    fn config_with(deep: DeepConfig) -> ZiftConfig {
+        ZiftConfig {
+            deep,
+            ..ZiftConfig::default()
+        }
+    }
+
+    #[test]
+    fn cli_wins_over_config() {
+        let args = args_with(Some("http://cli/v1"), Some("cli-model"), None, None);
+        let config = config_with(DeepConfig {
+            base_url: Some("http://config/v1".into()),
+            model: Some("config-model".into()),
+            max_cost: Some(1.0),
+        });
+        let runtime = build(&args, &config).unwrap();
+        assert_eq!(runtime.base_url, "http://cli/v1");
+        assert_eq!(runtime.model, "cli-model");
+    }
+
+    #[test]
+    fn config_used_when_cli_absent() {
+        let args = args_with(None, None, None, None);
+        let config = config_with(DeepConfig {
+            base_url: Some("http://config/v1".into()),
+            model: Some("config-model".into()),
+            max_cost: Some(2.5),
+        });
+        let runtime = build(&args, &config).unwrap();
+        assert_eq!(runtime.base_url, "http://config/v1");
+        assert_eq!(runtime.model, "config-model");
+        assert_eq!(runtime.max_cost_usd, Some(2.5));
+    }
+
+    #[test]
+    fn cli_max_cost_wins_over_config() {
+        let args = args_with(Some("http://x/v1"), Some("m"), None, Some(0.5));
+        let config = config_with(DeepConfig {
+            base_url: None,
+            model: None,
+            max_cost: Some(10.0),
+        });
+        let runtime = build(&args, &config).unwrap();
+        assert_eq!(runtime.max_cost_usd, Some(0.5));
+    }
+
+    #[test]
+    fn missing_base_url_errors() {
+        let args = args_with(None, Some("m"), None, None);
+        let err = build(&args, &ZiftConfig::default()).unwrap_err();
+        assert!(matches!(err, DeepError::Config(_)));
+    }
+
+    #[test]
+    fn missing_model_errors() {
+        let args = args_with(Some("http://x/v1"), None, None, None);
+        let err = build(&args, &ZiftConfig::default()).unwrap_err();
+        assert!(matches!(err, DeepError::Config(_)));
+    }
+
+    #[test]
+    fn empty_base_url_treated_as_missing() {
+        let args = args_with(Some(""), Some("m"), None, None);
+        let err = build(&args, &ZiftConfig::default()).unwrap_err();
+        assert!(matches!(err, DeepError::Config(_)));
+    }
+
+    #[test]
+    fn empty_api_key_normalized_to_none() {
+        let args = args_with(Some("http://x/v1"), Some("m"), Some(""), None);
+        let runtime = build(&args, &ZiftConfig::default()).unwrap();
+        assert!(runtime.api_key.is_none());
+    }
+
+    #[test]
+    fn localhost_caps_concurrency_to_one() {
+        let args = args_with(Some("http://localhost:11434/v1"), Some("m"), None, None);
+        let runtime = build(&args, &ZiftConfig::default()).unwrap();
+        assert_eq!(runtime.max_concurrent, DEFAULT_LOCAL_CONCURRENCY);
+    }
+
+    #[test]
+    fn loopback_ipv4_caps_concurrency_to_one() {
+        let args = args_with(Some("http://127.0.0.1:11434/v1"), Some("m"), None, None);
+        let runtime = build(&args, &ZiftConfig::default()).unwrap();
+        assert_eq!(runtime.max_concurrent, DEFAULT_LOCAL_CONCURRENCY);
+    }
+
+    #[test]
+    fn loopback_ipv6_caps_concurrency_to_one() {
+        let args = args_with(Some("http://[::1]:8080/v1"), Some("m"), None, None);
+        let runtime = build(&args, &ZiftConfig::default()).unwrap();
+        assert_eq!(runtime.max_concurrent, DEFAULT_LOCAL_CONCURRENCY);
+    }
+
+    #[test]
+    fn remote_uses_default_concurrency() {
+        let args = args_with(Some("https://api.openai.com/v1"), Some("m"), None, None);
+        let runtime = build(&args, &ZiftConfig::default()).unwrap();
+        assert_eq!(runtime.max_concurrent, DEFAULT_REMOTE_CONCURRENCY);
+    }
+
+    #[test]
+    fn default_timeouts_and_limits() {
+        let args = args_with(Some("https://x/v1"), Some("m"), None, None);
+        let runtime = build(&args, &ZiftConfig::default()).unwrap();
+        assert_eq!(runtime.request_timeout_secs, DEFAULT_REQUEST_TIMEOUT_SECS);
+        assert_eq!(runtime.max_candidates, DEFAULT_MAX_CANDIDATES);
+        assert_eq!(runtime.max_prompt_chars, DEFAULT_MAX_PROMPT_CHARS);
+        assert_eq!(runtime.temperature, DEFAULT_TEMPERATURE);
+    }
+}
diff --git a/src/deep/context.rs b/src/deep/context.rs
new file mode 100644
index 0000000..f9348ce
--- /dev/null
+++ b/src/deep/context.rs
@@ -0,0 +1,43 @@
+//! Code-context expansion for deep-scan candidates.
+//!
+//! Two-tier strategy (see plans/todo/01-pr1-deep-http-transport.md §7):
+//!
+//! - **Fast path**: line-window `[start-5, end+15]` plus the first 20 lines
+//!   of the file as imports. Works for all languages.
+//! - **Smart path**: tree-sitter walk to enclosing function. Only available
+//!   for languages with an integrated grammar (TS/JS/Java today).
+//!
+//! Implementation lands in commit 3.
+
+#![allow(dead_code)]
+
+use crate::deep::error::DeepError;
+use crate::types::{Finding, Language};
+use std::path::{Path, PathBuf};
+
+#[derive(Debug, Clone)]
+pub struct ExpandedContext {
+    pub file_relative: PathBuf,
+    pub language: Language,
+    pub line_start: usize,
+    pub line_end: usize,
+    pub snippet: String,
+    pub imports: Vec<String>,
+}
+
+/// Expand a structural finding's snippet to include surrounding function
+/// body and file-level imports.
+pub fn expand_finding(_finding: &Finding, _scan_root: &Path) -> Result<ExpandedContext, DeepError> {
+    unimplemented!("expand_finding: commit 3")
+}
+
+/// Expand an arbitrary file region (used for `ColdRegion` candidates that
+/// have no structural finding behind them).
+pub fn expand_region(
+    _file: &Path,
+    _language: Language,
+    _line_start: usize,
+    _line_end: usize,
+) -> Result<ExpandedContext, DeepError> {
+    unimplemented!("expand_region: commit 3")
+}
diff --git a/src/deep/cost.rs b/src/deep/cost.rs
new file mode 100644
index 0000000..6fa5122
--- /dev/null
+++ b/src/deep/cost.rs
@@ -0,0 +1,34 @@
+//! Token-based USD cost ceiling for deep-scan calls.
+//!
+//! See plans/todo/01-pr1-deep-http-transport.md §10. Implementation lands
+//! in commit 5.
+
+#![allow(dead_code)]
+
+use crate::deep::client::TokenUsage;
+use crate::deep::config::DeepRuntime;
+use crate::deep::error::DeepError;
+
+/// Tracks cumulative USD spend across deep-scan requests; errors via
+/// [`DeepError::CostExceeded`] when the cap is reached.
+///
+/// If both rates are `None`, tracking is a no-op (spent stays 0).
+pub struct CostTracker {
+    // Fields land in commit 5 (atomic spend counter, cap, rates).
+}
+
+impl CostTracker {
+    pub fn new(_runtime: &DeepRuntime) -> Self {
+        unimplemented!("CostTracker::new: commit 5")
+    }
+
+    /// Record token usage from one response; return Err if cap exceeded.
+    pub fn record(&self, _usage: &TokenUsage) -> Result<(), DeepError> {
+        unimplemented!("CostTracker::record: commit 5")
+    }
+
+    /// Cumulative USD spent so far.
+    pub fn spent_usd(&self) -> f64 {
+        unimplemented!("CostTracker::spent_usd: commit 5")
+    }
+}
diff --git a/src/deep/error.rs b/src/deep/error.rs
new file mode 100644
index 0000000..6cd0862
--- /dev/null
+++ b/src/deep/error.rs
@@ -0,0 +1,28 @@
+// Some variants are constructed only by code that lands in commits 5/6.
+#![allow(dead_code)]
+
+use thiserror::Error;
+
+/// Errors produced by the deep (semantic) scan pipeline.
+///
+/// Converts cleanly into [`crate::error::ZiftError`] via `#[from]` at the
+/// crate boundary.
+#[derive(Error, Debug)]
+pub enum DeepError {
+    #[error("missing config: {0}")]
+    Config(String),
+
+    #[error("io error: {0}")]
+    Io(#[from] std::io::Error),
+
+    #[error("model returned malformed JSON: {0}")]
+    BadResponse(String),
+
+    #[error("cost ceiling reached after ${spent:.4} USD")]
+    CostExceeded { spent: f64 },
+
+    #[error("request timed out after {secs}s")]
+    Timeout { secs: u64 },
+    // Http(#[from] reqwest::Error) is added in commit 5 alongside the HTTP
+    // client, so we don't drag reqwest into the build before it's needed.
+}
diff --git a/src/deep/finding.rs b/src/deep/finding.rs
new file mode 100644
index 0000000..70213ae
--- /dev/null
+++ b/src/deep/finding.rs
@@ -0,0 +1,36 @@
+//! LLM-side finding shape and translation to the canonical [`Finding`].
+//!
+//! Implementation of [`into_finding`] lands in commit 4.
+
+#![allow(dead_code)]
+
+use crate::deep::candidate::Candidate;
+use crate::types::{AuthCategory, Confidence, Finding};
+use serde::Deserialize;
+
+/// LLM-side finding shape, deserialized from `output_schema()`-compliant
+/// JSON returned by the agent. Translated to the canonical [`Finding`] via
+/// [`into_finding`].
+#[derive(Debug, Clone, Deserialize)]
+pub struct SemanticFinding {
+    pub line_start: usize,
+    pub line_end: usize,
+    pub category: AuthCategory,
+    pub confidence: Confidence,
+    pub description: String,
+    pub reasoning: String,
+    /// For `Escalation` candidates: did the model judge the seed structural
+    /// finding to be a false positive? Causes the seed to be dropped during
+    /// merge (see [`crate::deep::merge::merge`]).
+    pub is_false_positive: bool,
+}
+
+/// Translate an LLM-emitted [`SemanticFinding`] into the canonical [`Finding`]
+/// shape, computing the deterministic id hash.
+pub fn into_finding(
+    _sem: SemanticFinding,
+    _candidate: &Candidate,
+    _seed: Option<&Finding>,
+) -> Finding {
+    unimplemented!("into_finding: commit 4")
+}
diff --git a/src/deep/merge.rs b/src/deep/merge.rs
new file mode 100644
index 0000000..ae744e9
--- /dev/null
+++ b/src/deep/merge.rs
@@ -0,0 +1,24 @@
+//! Merge semantic findings into the structural-pass finding set.
+//!
+//! Real merge logic lands in commit 6:
+//!
+//! - Semantic finding overlapping a structural finding's range (>= 50%
+//!   overlap, same file) replaces the structural one **iff** semantic
+//!   confidence ≥ structural confidence.
+//! - `is_false_positive: true` from a `SemanticFinding` drops the seed
+//!   structural finding entirely.
+//! - Non-overlapping semantic findings are appended.
+//!
+//! In commit 2 this is a trivial concat — semantic findings are appended
+//! verbatim. Sufficient because [`crate::deep::run`] is itself a no-op stub
+//! that returns an empty vec.
+
+use crate::types::Finding;
+
+pub fn merge(structural: Vec<Finding>, semantic: Vec<Finding>) -> Vec<Finding> {
+    // TODO(commit 6): overlap detection + confidence-based replacement +
+    // false-positive drops.
+    let mut all = structural;
+    all.extend(semantic);
+    all
+}
diff --git a/src/deep/mod.rs b/src/deep/mod.rs
new file mode 100644
index 0000000..c8ca55c
--- /dev/null
+++ b/src/deep/mod.rs
@@ -0,0 +1,52 @@
+//! Deep (LLM-assisted) semantic scan.
+//!
+//! See [`plans/todo/01-pr1-deep-http-transport.md`] for the full design.
+//! This module is being built incrementally across six commits:
+//!
+//! 1. CLI/config refactor (done in PR-prep commit)
+//! 2. Module skeleton + config + error types (this commit)
+//! 3. Candidate selection + context expansion
+//! 4. Prompt rendering + JSON output schema
+//! 5. OpenAI-compatible HTTP client + cost tracker
+//! 6. Result merge + end-to-end wiring
+//!
+//! The primitives in this module are intentionally transport-agnostic so
+//! that PR 2 (MCP server) and PR 3 (subprocess hook) can reuse them.
+
+pub mod candidate;
+pub mod client;
+pub mod config;
+pub mod context;
+pub mod cost;
+pub mod error;
+pub mod finding;
+pub mod merge;
+pub mod prompt;
+
+// Convenience re-exports (DeepRuntime, DeepError, SemanticFinding) will be
+// added in commit 6 when end-to-end wiring lands and external callers
+// actually use them. Adding them now triggers unused-import warnings.
+
+use crate::deep::config::DeepRuntime;
+use crate::deep::error::DeepError;
+use crate::types::Finding;
+use std::path::Path;
+
+/// Run the deep (semantic) scan over a set of structural findings.
+///
+/// Returns additional findings with `pass: ScanPass::Semantic`. Merging into
+/// the master findings vec is the caller's responsibility (use
+/// [`merge::merge`]).
+///
+/// In commit 2 this is a no-op stub. Subsequent commits add candidate
+/// selection, context expansion, prompt rendering, HTTP analyze, and result
+/// merging.
+pub fn run(
+    _structural: &[Finding],
+    _scan_root: &Path,
+    _runtime: &DeepRuntime,
+) -> Result<Vec<Finding>, DeepError> {
+    // TODO(commits 3-6): candidate selection -> context expansion ->
+    // prompt rendering -> HTTP analyze -> finding merge.
+    Ok(Vec::new())
+}
diff --git a/src/deep/prompt.rs b/src/deep/prompt.rs
new file mode 100644
index 0000000..cf780a6
--- /dev/null
+++ b/src/deep/prompt.rs
@@ -0,0 +1,41 @@
+//! Prompt rendering and JSON output schema for the deep scan.
+//!
+//! Both `SYSTEM_PROMPT` and [`output_schema`] are exported for reuse by
+//! PR 2 (MCP server) and PR 3 (subprocess hook). They are the canonical
+//! contract that every transport binds to.
+//!
+//! Implementation lands in commit 4.
+
+#![allow(dead_code)]
+
+use crate::deep::candidate::Candidate;
+use crate::types::Finding;
+
+/// System prompt sent on every deep-scan request. Defines the authz
+/// taxonomy, calibration guidance, and the structured-output contract.
+pub const SYSTEM_PROMPT: &str = ""; // commit 4
+
+#[derive(Debug, Clone)]
+pub struct PromptInputs<'a> {
+    pub candidate: &'a Candidate,
+    pub structural_finding: Option<&'a Finding>,
+}
+
+#[derive(Debug, Clone)]
+pub struct RenderedPrompt {
+    pub system: String,
+    pub user: String,
+    pub schema: serde_json::Value,
+}
+
+/// Build the per-candidate prompt + schema bundle.
+pub fn render(_inputs: &PromptInputs) -> RenderedPrompt {
+    unimplemented!("prompt::render: commit 4")
+}
+
+/// JSON Schema the model must emit. Matches [`SemanticFinding`] field-for-field.
+///
+/// [`SemanticFinding`]: crate::deep::finding::SemanticFinding
+pub fn output_schema() -> serde_json::Value {
+    unimplemented!("output_schema: commit 4")
+}
diff --git a/src/error.rs b/src/error.rs
index 507f8df..c8ac6dc 100644
--- a/src/error.rs
+++ b/src/error.rs
@@ -2,6 +2,8 @@ use std::path::PathBuf;
 
 use thiserror::Error;
 
+use crate::deep::error::DeepError;
+
 pub type Result<T> = std::result::Result<T, ZiftError>;
 
 #[derive(Error, Debug)]
@@ -21,6 +23,9 @@ pub enum ZiftError {
     #[error("invalid tree-sitter query in rule {rule_id}: {message}")]
     QueryError { rule_id: String, message: String },
 
+    #[error("deep scan: {0}")]
+    Deep(#[from] DeepError),
+
     #[error("{0}")]
     General(String),
 }
diff --git a/src/main.rs b/src/main.rs
index cf6a875..0bff422 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,6 +1,7 @@
 mod cli;
 mod commands;
 mod config;
+mod deep;
 mod error;
 mod logging;
 mod output;
diff --git a/src/scanner/matcher.rs b/src/scanner/matcher.rs
index d15253e..16ba46f 100644
--- a/src/scanner/matcher.rs
+++ b/src/scanner/matcher.rs
@@ -164,7 +164,7 @@ fn check_predicates(predicates: &[(String, Predicate)], captures: &HashMap<&str,
     true
 }
 
-fn compute_finding_id(
+pub(crate) fn compute_finding_id(
     rule_id: &str,
     file_path: &Path,
     line_start: usize,

From c61e4970613ff8122725324e68871b1414803f2b Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 21:14:35 -0400
Subject: [PATCH 05/18] feat(deep): candidate selection and context expansion
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements the first half of deep-pass orchestration: figuring out
WHAT code to send to the model.

Two candidate sources, in priority order:

- Escalations (push): structural findings worth a second look — all
  low-confidence, plus medium-confidence in noisy categories
  (BusinessRule, Custom, Ownership). High-confidence findings are NOT
  escalated; they're already trusted.
- Cold regions (pull): regex-driven discovery of auth-y function
  names across all languages, including those without structural
  parser support. Lets the deep pass deliver value for Python/Go/etc.
  before v0.2/v0.3 grammar integration. Capped at 30% of
  max_candidates so escalations get priority.

Context expansion is fast-path-only in this commit: line window
[start-5, end+15] plus the first 20 lines as imports, truncated at
runtime.max_prompt_chars. Tree-sitter smart-path (walk to enclosing
function) is a TODO — the line window plus imports is sufficient for
the model to figure out function boundaries; tree-sitter walking can
land later if measured to matter.

Two helpers added to scanner::discovery:

- detect_language_for_deep: extension map covering all 9 Language
  enum variants (vs structural's 3).
- discover_files_for_deep: walker variant emitting files in any
  language. Sibling to discover_files; structural walker unchanged.

24 new tests, 142 total. Some Candidate fields (original_finding_id,
seed_category) and ExpandedContext fields are populated here but
consumed by prompt rendering in commit 4; module-level
allow(dead_code) goes away then.

Refs plans/todo/01-pr1-deep-http-transport.md §6, §7
---
 src/deep/candidate.rs    | 524 ++++++++++++++++++++++++++++++++++++++-
 src/deep/context.rs      | 273 ++++++++++++++++++--
 src/scanner/discovery.rs | 133 +++++++++-
 3 files changed, 902 insertions(+), 28 deletions(-)

diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs
index b098bb7..ff2c786 100644
--- a/src/deep/candidate.rs
+++ b/src/deep/candidate.rs
@@ -1,15 +1,79 @@
 //! Candidate selection for the deep (semantic) scan.
 //!
-//! See plans/todo/01-pr1-deep-http-transport.md §6 for selection rules.
-//! Implementation lands in commit 3.
+//! Two sources feed the candidate set, in priority order:
+//!
+//! 1. **Escalations** — structural findings whose confidence/category warrant
+//!    a second look (low-confidence anything; medium-confidence in noisy
+//!    categories like Custom/Ownership/BusinessRule). High-confidence
+//!    structural findings are NOT escalated — they are already trusted.
+//! 2. **Cold regions** — file regions discovered by regex over auth-y
+//!    function names. Capped at 30% of `max_candidates` so escalations get
+//!    priority. Runs on **all** languages in the [`Language`] enum, including
+//!    those without structural parser support (Python, Go, etc.) — see
+//!    plans/todo/01-pr1-deep-http-transport.md §6 for rationale.
+//!
+//! Candidates are sorted deterministically by `(file, line_start)`.
 
-// Stubs are used by future commits; suppress dead-code warnings until then.
+// Some fields (original_finding_id, seed_category) are set here but read by
+// the prompt renderer in commit 4. is_tsx_jsx helper similarly waits for
+// commit 4. The allow goes away in commit 4.
 #![allow(dead_code)]
 
 use crate::deep::config::DeepRuntime;
+use crate::deep::context::{expand_finding, expand_region};
 use crate::deep::error::DeepError;
-use crate::types::{AuthCategory, Finding, Language};
+use crate::scanner::discovery::{detect_language_for_deep, discover_files_for_deep};
+use crate::types::{AuthCategory, Confidence, Finding, Language};
+use regex::Regex;
+use std::collections::HashSet;
 use std::path::{Path, PathBuf};
+use std::sync::LazyLock;
+
+/// Cap cold-region candidates at this fraction of `max_candidates`, so
+/// escalations from structural findings always get priority.
+const COLD_REGION_FRACTION: f32 = 0.3;
+
+/// Names that suggest authorization logic. Matched case-insensitively as
+/// whole-word tokens. False positives are tolerated — the model filters them
+/// at deep-pass time. Missed real authz, on the other hand, is a worse
+/// failure mode, so this list is moderately permissive.
+///
+/// Patterns covered:
+/// - `authorize`, `authorise`, `authorization`, `authorizer`, …
+/// - `authenticate`, `authentication`, …
+/// - `isAdmin`, `isOwner`, `isAuthorized`, `isAuthenticated`, `isInRole`
+/// - `hasRole`, `hasPermission`, `hasAccess`, `hasPrivilege`
+/// - `requireAuth`, `requireAdmin`, `requireRole`, `requireUser`, …
+/// - `ensureAuth`, `ensurePermission`, …
+/// - `checkAuth`, `checkRole`, `checkPermission`, …
+/// - `currentUser`, `getRoles`, `getPermissions`
+/// - `guard`, `authz`, `rbac`, `acl`
+/// - Framework idioms: `before_action`, `login_required`, `permission_required`
+static AUTH_NAME_REGEX: LazyLock<Regex> = LazyLock::new(|| {
+    Regex::new(
+        r"(?ix)
+        \b(?:
+            authori[sz]\w*
+          | authenticat\w*
+          | is_?(?: admin | owner | authori[sz]ed | authenticated | in_?role )
+          | has_?(?: role | permission | access | privilege )\w*
+          | (?: requires? | ensures? )_?(?: auth | admin | role | permission | login | user | owner )
+          | check_?(?: auth | admin | role | permission | access | privilege )
+          | current_?user
+          | get_?(?: roles | permissions | privileges )
+          | guard\w*
+          | authz\w*
+          | rbac
+          | acl
+          | before_action
+          | before_filter
+          | login_required
+          | permission_required
+        )\b
+        ",
+    )
+    .expect("AUTH_NAME_REGEX is a valid regex")
+});
 
 #[derive(Debug, Clone, Copy, PartialEq, Eq)]
 pub enum CandidateKind {
@@ -23,6 +87,7 @@ pub enum CandidateKind {
 #[derive(Debug, Clone)]
 pub struct Candidate {
     pub kind: CandidateKind,
+    /// Path relative to scan root.
     pub file: PathBuf,
     pub language: Language,
     pub line_start: usize,
@@ -35,12 +100,451 @@ pub struct Candidate {
 }
 
 /// Pick which structural findings to escalate and which file regions to
-/// cold-scan. Sorted deterministically by `(file, line_start)`.
+/// cold-scan. Sorted deterministically by `(file, line_start)`. Capped at
+/// `runtime.max_candidates`.
 pub fn select_candidates(
-    _structural: &[Finding],
-    _scan_root: &Path,
-    _runtime: &DeepRuntime,
+    structural: &[Finding],
+    scan_root: &Path,
+    runtime: &DeepRuntime,
+) -> Result<Vec<Candidate>, DeepError> {
+    let mut escalations = build_escalations(structural, scan_root, runtime)?;
+    escalations.truncate(runtime.max_candidates);
+
+    let cold_budget = (runtime.max_candidates as f32 * COLD_REGION_FRACTION) as usize;
+    let cold_budget = cold_budget.min(runtime.max_candidates.saturating_sub(escalations.len()));
+
+    let cold = if cold_budget == 0 {
+        Vec::new()
+    } else {
+        let escalation_ranges: HashSet<(PathBuf, usize, usize)> = escalations
+            .iter()
+            .map(|c| (c.file.clone(), c.line_start, c.line_end))
+            .collect();
+        build_cold_regions(scan_root, runtime, &escalation_ranges, cold_budget)?
+    };
+
+    let mut all: Vec<Candidate> = escalations.into_iter().chain(cold).collect();
+    all.sort_by(|a, b| {
+        a.file
+            .cmp(&b.file)
+            .then(a.line_start.cmp(&b.line_start))
+            .then(a.line_end.cmp(&b.line_end))
+    });
+
+    Ok(all)
+}
+
+/// Should this structural finding be re-examined by the model?
+fn should_escalate(finding: &Finding) -> bool {
+    match finding.confidence {
+        Confidence::Low => true,
+        Confidence::Medium => matches!(
+            finding.category,
+            AuthCategory::BusinessRule | AuthCategory::Custom | AuthCategory::Ownership
+        ),
+        Confidence::High => false,
+    }
+}
+
+fn build_escalations(
+    structural: &[Finding],
+    scan_root: &Path,
+    runtime: &DeepRuntime,
 ) -> Result<Vec<Candidate>, DeepError> {
-    // TODO(commit 3): implement escalation rules + cold-region scanning.
-    Ok(Vec::new())
+    let mut out = Vec::new();
+    for finding in structural {
+        if !should_escalate(finding) {
+            continue;
+        }
+        let ctx = expand_finding(finding, scan_root, runtime.max_prompt_chars)?;
+        out.push(Candidate {
+            kind: CandidateKind::Escalation,
+            file: finding.file.clone(),
+            language: finding.language,
+            line_start: ctx.line_start,
+            line_end: ctx.line_end,
+            source_snippet: ctx.snippet,
+            original_finding_id: Some(finding.id.clone()),
+            seed_category: Some(finding.category),
+        });
+    }
+    Ok(out)
+}
+
+fn build_cold_regions(
+    scan_root: &Path,
+    runtime: &DeepRuntime,
+    escalation_ranges: &HashSet<(PathBuf, usize, usize)>,
+    budget: usize,
+) -> Result<Vec<Candidate>, DeepError> {
+    if budget == 0 {
+        return Ok(Vec::new());
+    }
+
+    let discovered = discover_files_for_deep(scan_root, &[], &[]);
+    let mut out: Vec<Candidate> = Vec::new();
+
+    for file in discovered {
+        if out.len() >= budget {
+            break;
+        }
+        let content = match std::fs::read_to_string(&file.path) {
+            Ok(c) => c,
+            Err(_) => continue, // skip non-UTF8 / permission errors silently
+        };
+
+        // Find auth-name match line numbers, then collapse overlapping windows.
+        let mut hit_lines: Vec<usize> = Vec::new();
+        for (idx, line) in content.lines().enumerate() {
+            if AUTH_NAME_REGEX.is_match(line) {
+                hit_lines.push(idx + 1); // 1-based
+            }
+        }
+        if hit_lines.is_empty() {
+            continue;
+        }
+
+        let coalesced = coalesce_windows(&hit_lines);
+
+        let file_relative = file
+            .path
+            .strip_prefix(scan_root)
+            .map(|p| p.to_path_buf())
+            .unwrap_or_else(|_| file.path.clone());
+
+        for (start, end) in coalesced {
+            if out.len() >= budget {
+                break;
+            }
+            // Skip if it overlaps an escalation range in the same file.
+            if overlaps_any(&file_relative, start, end, escalation_ranges) {
+                continue;
+            }
+            let ctx = expand_region(
+                &file.path,
+                file_relative.clone(),
+                file.language,
+                start,
+                end,
+                runtime.max_prompt_chars,
+            )?;
+            out.push(Candidate {
+                kind: CandidateKind::ColdRegion,
+                file: file_relative.clone(),
+                language: file.language,
+                line_start: ctx.line_start,
+                line_end: ctx.line_end,
+                source_snippet: ctx.snippet,
+                original_finding_id: None,
+                seed_category: None,
+            });
+        }
+    }
+
+    Ok(out)
+}
+
+/// Collapse a list of 1-based hit lines into coalesced (start, end) ranges
+/// using the same line-window the context expander applies. Adjacent
+/// or overlapping windows are merged into a single range.
+fn coalesce_windows(hit_lines: &[usize]) -> Vec<(usize, usize)> {
+    const BEFORE: usize = 5;
+    const AFTER: usize = 15;
+
+    let mut hits = hit_lines.to_vec();
+    hits.sort_unstable();
+    hits.dedup();
+
+    let mut out: Vec<(usize, usize)> = Vec::new();
+    for line in hits {
+        let start = line.saturating_sub(BEFORE).max(1);
+        let end = line + AFTER;
+        match out.last_mut() {
+            Some(last) if last.1 + 1 >= start => {
+                last.1 = last.1.max(end);
+            }
+            _ => out.push((start, end)),
+        }
+    }
+    out
+}
+
+fn overlaps_any(
+    file: &Path,
+    start: usize,
+    end: usize,
+    ranges: &HashSet<(PathBuf, usize, usize)>,
+) -> bool {
+    ranges.iter().any(|(f, s, e)| {
+        // Same file + line ranges intersect.
+        f.as_path() == file && start <= *e && *s <= end
+    })
+}
+
+/// Lookup a language's tsx/jsx flavor for a given file path. Used by the
+/// deep file walker to honor TS/JS structural quirks if needed downstream.
+#[allow(dead_code)] // used in commit 4 by prompt rendering
+pub(crate) fn is_tsx_jsx(path: &Path) -> bool {
+    detect_language_for_deep(path)
+        .map(|(_, tsx)| tsx)
+        .unwrap_or(false)
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::{AuthCategory, Confidence, Language, ScanPass};
+    use std::fs;
+    use tempfile::tempdir;
+
+    fn finding(file: &str, line: usize, category: AuthCategory, confidence: Confidence) -> Finding {
+        Finding {
+            id: format!("test-{file}-{line}"),
+            file: PathBuf::from(file),
+            line_start: line,
+            line_end: line + 2,
+            code_snippet: String::new(),
+            language: Language::TypeScript,
+            category,
+            confidence,
+            description: String::new(),
+            pattern_rule: None,
+            rego_stub: None,
+            pass: ScanPass::Structural,
+        }
+    }
+
+    fn rt() -> DeepRuntime {
+        DeepRuntime {
+            base_url: "http://x/v1".into(),
+            model: "m".into(),
+            api_key: None,
+            max_cost_usd: None,
+            cost_per_1k_input: None,
+            cost_per_1k_output: None,
+            request_timeout_secs: 120,
+            max_candidates: 50,
+            max_concurrent: 1,
+            temperature: 0.0,
+            max_prompt_chars: 16_000,
+        }
+    }
+
+    // ---- regex coverage ----
+
+    #[test]
+    fn regex_matches_obvious_authz_names() {
+        for s in [
+            "authorize",
+            "authorization",
+            "authorise",
+            "authenticate",
+            "isAdmin",
+            "is_admin",
+            "isAuthorized",
+            "isAuthenticated",
+            "isInRole",
+            "hasRole",
+            "hasPermission",
+            "hasAccess",
+            "hasPrivilege",
+            "requireAuth",
+            "require_auth",
+            "requireAdmin",
+            "ensureRole",
+            "checkPermission",
+            "currentUser",
+            "current_user",
+            "getRoles",
+            "getPermissions",
+            "guardAdmin",
+            "authzService",
+            "rbac",
+            "acl",
+            "before_action",
+            "login_required",
+            "permission_required",
+        ] {
+            assert!(
+                AUTH_NAME_REGEX.is_match(s),
+                "regex should match auth-y name: {s}"
+            );
+        }
+    }
+
+    #[test]
+    fn regex_does_not_match_obvious_non_auth_names() {
+        for s in [
+            "authorRefactor",
+            "authentic",
+            "ruleset",
+            "permissive",
+            "checkInput",
+            "canRender",
+            "rolesetEditor",
+            "factoryGuard", // matches `guard\w*`? Let's see.
+        ] {
+            // Note: factoryGuard contains "guard" — and "guard" alone is in our
+            // pattern (`guard\w*` matches `guard` and `guardAdmin` but our \b
+            // anchor prevents matching mid-word). Let's check: in `factoryGuard`,
+            // \b is between y and G (camelCase), so \bguard\b WOULD match the
+            // suffix. This is a known limitation — camelCase names need a
+            // tokenizer to be perfectly safe. For now the false positive is
+            // acceptable; the model rejects non-auth at deep-pass time.
+            if s == "factoryGuard" {
+                continue;
+            }
+            assert!(
+                !AUTH_NAME_REGEX.is_match(s),
+                "regex should NOT match non-auth name: {s}"
+            );
+        }
+    }
+
+    // ---- escalation rules ----
+
+    #[test]
+    fn high_confidence_findings_not_escalated() {
+        assert!(!should_escalate(&finding(
+            "a.ts",
+            10,
+            AuthCategory::Rbac,
+            Confidence::High
+        )));
+    }
+
+    #[test]
+    fn low_confidence_findings_escalated_regardless_of_category() {
+        for cat in [
+            AuthCategory::Rbac,
+            AuthCategory::Abac,
+            AuthCategory::Custom,
+            AuthCategory::FeatureGate,
+        ] {
+            assert!(should_escalate(&finding("a.ts", 10, cat, Confidence::Low)));
+        }
+    }
+
+    #[test]
+    fn medium_confidence_only_escalated_for_noisy_categories() {
+        assert!(should_escalate(&finding(
+            "a.ts",
+            10,
+            AuthCategory::Custom,
+            Confidence::Medium
+        )));
+        assert!(should_escalate(&finding(
+            "a.ts",
+            10,
+            AuthCategory::Ownership,
+            Confidence::Medium
+        )));
+        assert!(should_escalate(&finding(
+            "a.ts",
+            10,
+            AuthCategory::BusinessRule,
+            Confidence::Medium
+        )));
+        assert!(!should_escalate(&finding(
+            "a.ts",
+            10,
+            AuthCategory::Rbac,
+            Confidence::Medium
+        )));
+        assert!(!should_escalate(&finding(
+            "a.ts",
+            10,
+            AuthCategory::Middleware,
+            Confidence::Medium
+        )));
+    }
+
+    // ---- coalescing ----
+
+    #[test]
+    fn coalesce_merges_overlapping_windows() {
+        // Lines 10 and 12 → windows (5..25) and (7..27) → merged (5..27)
+        let merged = coalesce_windows(&[10, 12]);
+        assert_eq!(merged, vec![(5, 27)]);
+    }
+
+    #[test]
+    fn coalesce_keeps_distant_windows_separate() {
+        // Lines 10 and 100 → windows (5..25) and (95..115) → not merged
+        let merged = coalesce_windows(&[10, 100]);
+        assert_eq!(merged, vec![(5, 25), (95, 115)]);
+    }
+
+    #[test]
+    fn coalesce_dedupes_repeated_lines() {
+        let merged = coalesce_windows(&[10, 10, 10]);
+        assert_eq!(merged, vec![(5, 25)]);
+    }
+
+    // ---- end-to-end with real files ----
+
+    #[test]
+    fn select_candidates_finds_cold_region_in_python() {
+        let dir = tempdir().unwrap();
+        let py = "def is_admin(user):\n    return user.role == 'admin'\n";
+        fs::write(dir.path().join("auth.py"), py).unwrap();
+
+        let runtime = rt();
+        let candidates = select_candidates(&[], dir.path(), &runtime).unwrap();
+        assert_eq!(candidates.len(), 1);
+        assert_eq!(candidates[0].kind, CandidateKind::ColdRegion);
+        assert_eq!(candidates[0].language, Language::Python);
+        assert_eq!(candidates[0].file, PathBuf::from("auth.py"));
+    }
+
+    #[test]
+    fn cold_region_dedupes_against_escalation() {
+        let dir = tempdir().unwrap();
+        // Source file with `isAdmin` on line 1 and lots of padding.
+        let mut content = String::from("function isAdmin() { return true; }\n");
+        for i in 2..=50 {
+            content.push_str(&format!("// line {i}\n"));
+        }
+        fs::write(dir.path().join("auth.ts"), &content).unwrap();
+
+        let f = finding("auth.ts", 1, AuthCategory::Custom, Confidence::Low);
+        let candidates = select_candidates(&[f], dir.path(), &rt()).unwrap();
+        // Without dedup we'd have 2 (1 escalation + 1 cold-region overlapping it).
+        // With dedup, the cold-region candidate at line 1 is suppressed.
+        assert_eq!(candidates.len(), 1);
+        assert_eq!(candidates[0].kind, CandidateKind::Escalation);
+    }
+
+    #[test]
+    fn determinism_same_input_same_output() {
+        let dir = tempdir().unwrap();
+        fs::write(dir.path().join("a.py"), "def has_role(u, r):\n    pass\n").unwrap();
+        fs::write(dir.path().join("b.py"), "def is_admin(u):\n    pass\n").unwrap();
+
+        let one = select_candidates(&[], dir.path(), &rt()).unwrap();
+        let two = select_candidates(&[], dir.path(), &rt()).unwrap();
+        assert_eq!(one.len(), two.len());
+        for (a, b) in one.iter().zip(two.iter()) {
+            assert_eq!(a.file, b.file);
+            assert_eq!(a.line_start, b.line_start);
+            assert_eq!(a.line_end, b.line_end);
+        }
+    }
+
+    #[test]
+    fn max_candidates_cap_respected() {
+        let dir = tempdir().unwrap();
+        // 20 files, each with one auth-y name.
+        for i in 0..20 {
+            fs::write(
+                dir.path().join(format!("f{i}.py")),
+                format!("def is_admin_{i}():\n    pass\n"),
+            )
+            .unwrap();
+        }
+        let mut runtime = rt();
+        runtime.max_candidates = 5;
+        let candidates = select_candidates(&[], dir.path(), &runtime).unwrap();
+        assert!(candidates.len() <= 5);
+    }
 }
diff --git a/src/deep/context.rs b/src/deep/context.rs
index f9348ce..59e039f 100644
--- a/src/deep/context.rs
+++ b/src/deep/context.rs
@@ -3,18 +3,25 @@
 //! Two-tier strategy (see plans/todo/01-pr1-deep-http-transport.md §7):
 //!
 //! - **Fast path**: line-window `[start-5, end+15]` plus the first 20 lines
-//!   of the file as imports. Works for all languages.
+//!   of the file as imports. Works for all languages. **Implemented here.**
 //! - **Smart path**: tree-sitter walk to enclosing function. Only available
-//!   for languages with an integrated grammar (TS/JS/Java today).
-//!
-//! Implementation lands in commit 3.
+//!   for languages with an integrated grammar (TS/JS/Java today). **TODO**:
+//!   land in a follow-up commit; primary path is fast-path which is
+//!   sufficient for v1. Most local 7B-14B models can figure out function
+//!   boundaries from a generous line window with imports included.
 
+// ExpandedContext.imports/file_relative/language are populated here but
+// consumed by prompt rendering in commit 4. The allow goes away then.
 #![allow(dead_code)]
 
 use crate::deep::error::DeepError;
 use crate::types::{Finding, Language};
 use std::path::{Path, PathBuf};
 
+const LINES_BEFORE: usize = 5;
+const LINES_AFTER: usize = 15;
+const IMPORT_LINES: usize = 20;
+
 #[derive(Debug, Clone)]
 pub struct ExpandedContext {
     pub file_relative: PathBuf,
@@ -25,19 +32,257 @@ pub struct ExpandedContext {
     pub imports: Vec<String>,
 }
 
-/// Expand a structural finding's snippet to include surrounding function
-/// body and file-level imports.
-pub fn expand_finding(_finding: &Finding, _scan_root: &Path) -> Result<ExpandedContext, DeepError> {
-    unimplemented!("expand_finding: commit 3")
+/// Expand a structural finding's snippet to include surrounding lines and
+/// file-level imports. `finding.file` is interpreted as relative to
+/// `scan_root`.
+pub fn expand_finding(
+    finding: &Finding,
+    scan_root: &Path,
+    max_chars: usize,
+) -> Result<ExpandedContext, DeepError> {
+    let abs_path = scan_root.join(&finding.file);
+    expand_inner(
+        &abs_path,
+        finding.file.clone(),
+        finding.language,
+        finding.line_start,
+        finding.line_end,
+        max_chars,
+    )
 }
 
 /// Expand an arbitrary file region (used for `ColdRegion` candidates that
-/// have no structural finding behind them).
+/// have no structural finding behind them). `file_absolute` must be readable;
+/// `file_relative` is the path used in [`ExpandedContext::file_relative`].
 pub fn expand_region(
-    _file: &Path,
-    _language: Language,
-    _line_start: usize,
-    _line_end: usize,
+    file_absolute: &Path,
+    file_relative: PathBuf,
+    language: Language,
+    line_start: usize,
+    line_end: usize,
+    max_chars: usize,
 ) -> Result<ExpandedContext, DeepError> {
-    unimplemented!("expand_region: commit 3")
+    expand_inner(
+        file_absolute,
+        file_relative,
+        language,
+        line_start,
+        line_end,
+        max_chars,
+    )
+}
+
+fn expand_inner(
+    file_absolute: &Path,
+    file_relative: PathBuf,
+    language: Language,
+    line_start: usize,
+    line_end: usize,
+    max_chars: usize,
+) -> Result<ExpandedContext, DeepError> {
+    let content = std::fs::read_to_string(file_absolute)?;
+    let lines: Vec<&str> = content.lines().collect();
+    let total = lines.len();
+
+    if total == 0 {
+        return Ok(ExpandedContext {
+            file_relative,
+            language,
+            line_start: 1,
+            line_end: 1,
+            snippet: String::new(),
+            imports: Vec::new(),
+        });
+    }
+
+    // Clamp inputs to the file.
+    let start_1based = line_start.max(1).min(total);
+    let end_1based = line_end.max(start_1based).min(total);
+
+    // Apply line window. 1-based inclusive throughout.
+    let window_start = start_1based.saturating_sub(LINES_BEFORE).max(1);
+    let window_end = (end_1based + LINES_AFTER).min(total);
+
+    // 0-based indexing into `lines`.
+    let snippet_slice = &lines[(window_start - 1)..window_end];
+    let mut snippet = snippet_slice.join("\n");
+
+    // Truncate at max_chars (favors keeping the head — the part most likely
+    // to contain the actual auth check; trailing context is more discardable).
+    if snippet.len() > max_chars {
+        snippet.truncate(max_chars);
+        snippet.push_str("\n// [truncated by zift deep-mode max_prompt_chars]");
+    }
+
+    let imports: Vec<String> = lines
+        .iter()
+        .take(IMPORT_LINES)
+        .map(|s| (*s).to_string())
+        .collect();
+
+    Ok(ExpandedContext {
+        file_relative,
+        language,
+        line_start: window_start,
+        line_end: window_end,
+        snippet,
+        imports,
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::{AuthCategory, Confidence, ScanPass};
+    use std::fs;
+    use std::path::PathBuf;
+    use tempfile::tempdir;
+
+    fn make_finding(file: PathBuf, line_start: usize, line_end: usize) -> Finding {
+        Finding {
+            id: "test".into(),
+            file,
+            line_start,
+            line_end,
+            code_snippet: String::new(),
+            language: Language::TypeScript,
+            category: AuthCategory::Custom,
+            confidence: Confidence::Low,
+            description: String::new(),
+            pattern_rule: None,
+            rego_stub: None,
+            pass: ScanPass::Structural,
+        }
+    }
+
+    fn write_file(dir: &Path, name: &str, content: &str) -> PathBuf {
+        let path = dir.join(name);
+        fs::write(&path, content).unwrap();
+        path
+    }
+
+    fn numbered_lines(n: usize) -> String {
+        (1..=n)
+            .map(|i| format!("line {i}"))
+            .collect::<Vec<_>>()
+            .join("\n")
+    }
+
+    #[test]
+    fn fast_path_basic_window() {
+        let dir = tempdir().unwrap();
+        write_file(dir.path(), "a.ts", &numbered_lines(50));
+        let finding = make_finding(PathBuf::from("a.ts"), 20, 22);
+
+        let ctx = expand_finding(&finding, dir.path(), 16_000).unwrap();
+        assert_eq!(ctx.line_start, 15); // 20 - 5
+        assert_eq!(ctx.line_end, 37); // 22 + 15
+        assert!(ctx.snippet.contains("line 20"));
+        assert!(ctx.snippet.contains("line 15"));
+        assert!(ctx.snippet.contains("line 37"));
+        assert!(!ctx.snippet.contains("line 14"));
+        assert!(!ctx.snippet.contains("line 38"));
+    }
+
+    #[test]
+    fn window_clamps_at_file_start() {
+        let dir = tempdir().unwrap();
+        write_file(dir.path(), "a.ts", &numbered_lines(50));
+        let finding = make_finding(PathBuf::from("a.ts"), 1, 1);
+
+        let ctx = expand_finding(&finding, dir.path(), 16_000).unwrap();
+        assert_eq!(ctx.line_start, 1);
+        assert_eq!(ctx.line_end, 16); // 1 + 15
+    }
+
+    #[test]
+    fn window_clamps_at_file_end() {
+        let dir = tempdir().unwrap();
+        write_file(dir.path(), "a.ts", &numbered_lines(20));
+        let finding = make_finding(PathBuf::from("a.ts"), 18, 20);
+
+        let ctx = expand_finding(&finding, dir.path(), 16_000).unwrap();
+        assert_eq!(ctx.line_start, 13); // 18 - 5
+        assert_eq!(ctx.line_end, 20); // clamped at total
+    }
+
+    #[test]
+    fn line_beyond_eof_is_clamped() {
+        let dir = tempdir().unwrap();
+        write_file(dir.path(), "a.ts", &numbered_lines(10));
+        let finding = make_finding(PathBuf::from("a.ts"), 999, 1000);
+
+        let ctx = expand_finding(&finding, dir.path(), 16_000).unwrap();
+        // Should not panic. Clamped to file length.
+        assert_eq!(ctx.line_start, 5); // 10 - 5
+        assert_eq!(ctx.line_end, 10);
+    }
+
+    #[test]
+    fn empty_file_returns_empty_snippet() {
+        let dir = tempdir().unwrap();
+        write_file(dir.path(), "a.ts", "");
+        let finding = make_finding(PathBuf::from("a.ts"), 1, 1);
+
+        let ctx = expand_finding(&finding, dir.path(), 16_000).unwrap();
+        assert!(ctx.snippet.is_empty());
+        assert!(ctx.imports.is_empty());
+    }
+
+    #[test]
+    fn imports_are_first_20_lines() {
+        let dir = tempdir().unwrap();
+        write_file(dir.path(), "a.ts", &numbered_lines(100));
+        let finding = make_finding(PathBuf::from("a.ts"), 50, 50);
+
+        let ctx = expand_finding(&finding, dir.path(), 16_000).unwrap();
+        assert_eq!(ctx.imports.len(), 20);
+        assert_eq!(ctx.imports[0], "line 1");
+        assert_eq!(ctx.imports[19], "line 20");
+    }
+
+    #[test]
+    fn imports_capped_at_file_length() {
+        let dir = tempdir().unwrap();
+        write_file(dir.path(), "a.ts", &numbered_lines(5));
+        let finding = make_finding(PathBuf::from("a.ts"), 1, 1);
+
+        let ctx = expand_finding(&finding, dir.path(), 16_000).unwrap();
+        assert_eq!(ctx.imports.len(), 5);
+    }
+
+    #[test]
+    fn truncation_at_max_chars() {
+        let dir = tempdir().unwrap();
+        let content = (1..=200)
+            .map(|i| format!("a long line of repeated text {i} ").repeat(20))
+            .collect::<Vec<_>>()
+            .join("\n");
+        write_file(dir.path(), "a.ts", &content);
+        let finding = make_finding(PathBuf::from("a.ts"), 100, 100);
+
+        let ctx = expand_finding(&finding, dir.path(), 500).unwrap();
+        assert!(ctx.snippet.len() < 600); // 500 + tail marker
+        assert!(ctx.snippet.contains("[truncated"));
+    }
+
+    #[test]
+    fn expand_region_uses_relative_path_in_output() {
+        let dir = tempdir().unwrap();
+        let abs_path = write_file(dir.path(), "auth.py", &numbered_lines(30));
+
+        let ctx = expand_region(
+            &abs_path,
+            PathBuf::from("auth.py"),
+            Language::Python,
+            10,
+            12,
+            16_000,
+        )
+        .unwrap();
+        assert_eq!(ctx.file_relative, PathBuf::from("auth.py"));
+        assert_eq!(ctx.language, Language::Python);
+        assert_eq!(ctx.line_start, 5);
+        assert_eq!(ctx.line_end, 27);
+    }
 }
diff --git a/src/scanner/discovery.rs b/src/scanner/discovery.rs
index 832324e..f9c5826 100644
--- a/src/scanner/discovery.rs
+++ b/src/scanner/discovery.rs
@@ -12,6 +12,8 @@ pub struct DiscoveredFile {
     pub is_tsx_jsx: bool,
 }
 
+/// Extension → language map for languages with structural parser support.
+/// Used by the structural scanning pass.
 pub fn detect_language(path: &Path) -> Option<(Language, bool)> {
     let ext = path.extension()?.to_str()?.to_ascii_lowercase();
     match ext.as_str() {
@@ -24,11 +26,62 @@ pub fn detect_language(path: &Path) -> Option<(Language, bool)> {
     }
 }
 
+/// Extension → language map covering **all** languages in the [`Language`]
+/// enum, including those without structural parser support yet (Python, Go,
+/// C#, Kotlin, Ruby, PHP). Used by the deep (semantic) scan, which can run
+/// regex-based cold-region detection on any language regardless of grammar
+/// availability.
+pub fn detect_language_for_deep(path: &Path) -> Option<(Language, bool)> {
+    let ext = path.extension()?.to_str()?.to_ascii_lowercase();
+    match ext.as_str() {
+        "ts" => Some((Language::TypeScript, false)),
+        "tsx" => Some((Language::TypeScript, true)),
+        "js" | "mjs" | "cjs" => Some((Language::JavaScript, false)),
+        "jsx" => Some((Language::JavaScript, true)),
+        "java" => Some((Language::Java, false)),
+        "py" | "pyi" => Some((Language::Python, false)),
+        "go" => Some((Language::Go, false)),
+        "cs" => Some((Language::CSharp, false)),
+        "kt" | "kts" => Some((Language::Kotlin, false)),
+        "rb" | "rake" => Some((Language::Ruby, false)),
+        "php" | "phtml" => Some((Language::Php, false)),
+        _ => None,
+    }
+}
+
 pub fn discover_files(
     root: &Path,
     exclude_patterns: &[String],
     language_filter: &[Language],
 ) -> Vec<DiscoveredFile> {
+    discover_with(root, exclude_patterns, language_filter, detect_language)
+}
+
+/// Discover source files for the deep (semantic) scan. Behaves identically
+/// to [`discover_files`] but emits files in **all** languages from the
+/// [`Language`] enum, not only structurally-supported ones.
+pub fn discover_files_for_deep(
+    root: &Path,
+    exclude_patterns: &[String],
+    language_filter: &[Language],
+) -> Vec<DiscoveredFile> {
+    discover_with(
+        root,
+        exclude_patterns,
+        language_filter,
+        detect_language_for_deep,
+    )
+}
+
+fn discover_with<F>(
+    root: &Path,
+    exclude_patterns: &[String],
+    language_filter: &[Language],
+    detect: F,
+) -> Vec<DiscoveredFile>
+where
+    F: Fn(&Path) -> Option<(Language, bool)>,
+{
     let mut builder = WalkBuilder::new(root);
     builder
         .hidden(true)
@@ -37,11 +90,9 @@ pub fn discover_files(
         .follow_links(false)
         .parents(true);
 
-    // Add exclude overrides
     if !exclude_patterns.is_empty() {
         let mut overrides = OverrideBuilder::new(root);
         for pattern in exclude_patterns {
-            // Negate the pattern so it becomes an exclusion
             let _ = overrides.add(&format!("!{pattern}"));
         }
         if let Ok(ov) = overrides.build() {
@@ -55,8 +106,7 @@ pub fn discover_files(
         if !path.is_file() {
             continue;
         }
-        if let Some((lang, is_tsx_jsx)) = detect_language(path) {
-            // Apply language filter
+        if let Some((lang, is_tsx_jsx)) = detect(path) {
             if !language_filter.is_empty() && !language_filter.contains(&lang) {
                 continue;
             }
@@ -128,4 +178,79 @@ mod tests {
         assert_eq!(files.len(), 1);
         assert_eq!(files[0].language, Language::TypeScript);
     }
+
+    #[test]
+    fn detect_language_for_deep_covers_all_languages() {
+        assert_eq!(
+            detect_language_for_deep(Path::new("foo.py")),
+            Some((Language::Python, false))
+        );
+        assert_eq!(
+            detect_language_for_deep(Path::new("foo.pyi")),
+            Some((Language::Python, false))
+        );
+        assert_eq!(
+            detect_language_for_deep(Path::new("foo.go")),
+            Some((Language::Go, false))
+        );
+        assert_eq!(
+            detect_language_for_deep(Path::new("Foo.cs")),
+            Some((Language::CSharp, false))
+        );
+        assert_eq!(
+            detect_language_for_deep(Path::new("Foo.kt")),
+            Some((Language::Kotlin, false))
+        );
+        assert_eq!(
+            detect_language_for_deep(Path::new("foo.kts")),
+            Some((Language::Kotlin, false))
+        );
+        assert_eq!(
+            detect_language_for_deep(Path::new("foo.rb")),
+            Some((Language::Ruby, false))
+        );
+        assert_eq!(
+            detect_language_for_deep(Path::new("Rakefile.rake")),
+            Some((Language::Ruby, false))
+        );
+        assert_eq!(
+            detect_language_for_deep(Path::new("foo.php")),
+            Some((Language::Php, false))
+        );
+        assert_eq!(
+            detect_language_for_deep(Path::new("foo.phtml")),
+            Some((Language::Php, false))
+        );
+        // Structural extensions still work in the deep map.
+        assert_eq!(
+            detect_language_for_deep(Path::new("foo.ts")),
+            Some((Language::TypeScript, false))
+        );
+        // Genuinely unknown extensions still return None.
+        assert_eq!(detect_language_for_deep(Path::new("foo.rs")), None);
+        assert_eq!(detect_language_for_deep(Path::new("foo.txt")), None);
+    }
+
+    #[test]
+    fn structural_detect_language_does_not_pick_up_python() {
+        // Sanity: the structural detector must NOT include Python — otherwise
+        // the structural pass would try to parse files for which it has no
+        // grammar. The deep detector picks them up; the structural one doesn't.
+        assert_eq!(detect_language(Path::new("foo.py")), None);
+        assert_eq!(detect_language(Path::new("foo.go")), None);
+    }
+
+    #[test]
+    fn discover_for_deep_picks_up_extra_languages() {
+        let dir = tempfile::tempdir().unwrap();
+        fs::write(dir.path().join("a.ts"), "let x = 1;").unwrap();
+        fs::write(dir.path().join("b.py"), "x = 1\n").unwrap();
+        fs::write(dir.path().join("c.go"), "package main\n").unwrap();
+
+        let structural = discover_files(dir.path(), &[], &[]);
+        assert_eq!(structural.len(), 1, "structural sees only TS");
+
+        let deep = discover_files_for_deep(dir.path(), &[], &[]);
+        assert_eq!(deep.len(), 3, "deep sees TS + Python + Go");
+    }
 }

From 72411f92770d6298492fa457b857a8e34c99c7a7 Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 21:25:27 -0400
Subject: [PATCH 06/18] feat(deep): prompt rendering and JSON schema
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Implements the model-facing contract: SYSTEM_PROMPT, output_schema(),
per-candidate prompt rendering with framework-aware guidance, and
SemanticFinding -> canonical Finding translation.

Highlights:

- SYSTEM_PROMPT is terse (~1.5KB). Local 7B-14B models do better with
  structured prompts than prose, and token economy matters when users
  pay per million. Defines authz / not-authz / categories / confidence /
  output contract.
- output_schema() returns the strict JSON Schema for OpenAI structured-
  outputs. Reused verbatim by PR 2 (MCP server) and PR 3 (subprocess
  hook). Field-for-field match with SemanticFinding.
- render() builds per-candidate user prompts with file/language/lines
  header, optional structural-finding seed for escalations, fenced
  source snippet, and a hint to use snippet line numbers.
- Framework guidance injected per-call only when a known framework is
  detected in the candidate's imports — keeps the base prompt small
  for the common case; targeted hints when relevant. 12 frameworks
  across 7 languages (Express, NestJS, Next.js, Django, Flask, FastAPI,
  Spring Security, Rails, Gin, Echo, ASP.NET Core, Laravel). Per-
  language signature filtering avoids cross-language false positives.
- into_finding translates a model-emitted SemanticFinding into the
  canonical Finding. pattern_rule inherited from structural seed if
  present; deterministic id via compute_finding_id; code_snippet read
  from the file at model-reported lines (best-effort, falls back to
  empty on read errors). reasoning is logged via tracing::debug; not
  stored on the canonical Finding (no field for it).

Plan deviation: Candidate gained an `imports: Vec<String>` field so
render() can do per-call framework detection. Populated from
ExpandedContext.imports; small extension in keeping with the
framework-aware design.

20 new tests, 162 total. The deep module remains dead from the
binary's perspective until commit 6 wires deep::run to actually do
work; module-level allow(dead_code) annotations reflect this and
go away then.

Refs plans/todo/01-pr1-deep-http-transport.md §5, §6, §11
---
 src/deep/candidate.rs    |  15 +-
 src/deep/finding.rs      | 264 ++++++++++++++++++++-
 src/deep/prompt.rs       | 483 ++++++++++++++++++++++++++++++++++++++-
 src/scanner/discovery.rs |   2 +
 4 files changed, 735 insertions(+), 29 deletions(-)

diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs
index ff2c786..5631c11 100644
--- a/src/deep/candidate.rs
+++ b/src/deep/candidate.rs
@@ -14,9 +14,9 @@
 //!
 //! Candidates are sorted deterministically by `(file, line_start)`.
 
-// Some fields (original_finding_id, seed_category) are set here but read by
-// the prompt renderer in commit 4. is_tsx_jsx helper similarly waits for
-// commit 4. The allow goes away in commit 4.
+// The deep module is wired into the binary only when `deep::run` actually
+// does work (commit 6). Until then it's "dead" from the binary's perspective
+// even though tests cover it. This allow goes away in commit 6.
 #![allow(dead_code)]
 
 use crate::deep::config::DeepRuntime;
@@ -93,6 +93,9 @@ pub struct Candidate {
     pub line_start: usize,
     pub line_end: usize,
     pub source_snippet: String,
+    /// First N lines of the file (verbatim) — used by the prompt renderer
+    /// to detect framework idioms (e.g. `import express`, `from django`).
+    pub imports: Vec<String>,
     /// Set iff `kind == Escalation` — the structural finding's id.
     pub original_finding_id: Option<String>,
     /// Hint for prompt selection (e.g. seed an RBAC-flavored prompt).
@@ -164,6 +167,7 @@ fn build_escalations(
             line_start: ctx.line_start,
             line_end: ctx.line_end,
             source_snippet: ctx.snippet,
+            imports: ctx.imports,
             original_finding_id: Some(finding.id.clone()),
             seed_category: Some(finding.category),
         });
@@ -235,6 +239,7 @@ fn build_cold_regions(
                 line_start: ctx.line_start,
                 line_end: ctx.line_end,
                 source_snippet: ctx.snippet,
+                imports: ctx.imports,
                 original_finding_id: None,
                 seed_category: None,
             });
@@ -281,9 +286,7 @@ fn overlaps_any(
     })
 }
 
-/// Lookup a language's tsx/jsx flavor for a given file path. Used by the
-/// deep file walker to honor TS/JS structural quirks if needed downstream.
-#[allow(dead_code)] // used in commit 4 by prompt rendering
+/// Lookup a language's tsx/jsx flavor for a given file path.
 pub(crate) fn is_tsx_jsx(path: &Path) -> bool {
     detect_language_for_deep(path)
         .map(|(_, tsx)| tsx)
diff --git a/src/deep/finding.rs b/src/deep/finding.rs
index 70213ae..31e30ee 100644
--- a/src/deep/finding.rs
+++ b/src/deep/finding.rs
@@ -1,12 +1,13 @@
 //! LLM-side finding shape and translation to the canonical [`Finding`].
-//!
-//! Implementation of [`into_finding`] lands in commit 4.
 
+// Wired into the binary in commit 6; until then, dead from main()'s view.
 #![allow(dead_code)]
 
 use crate::deep::candidate::Candidate;
-use crate::types::{AuthCategory, Confidence, Finding};
+use crate::scanner::matcher::compute_finding_id;
+use crate::types::{AuthCategory, Confidence, Finding, ScanPass};
 use serde::Deserialize;
+use std::path::Path;
 
 /// LLM-side finding shape, deserialized from `output_schema()`-compliant
 /// JSON returned by the agent. Translated to the canonical [`Finding`] via
@@ -18,19 +19,258 @@ pub struct SemanticFinding {
     pub category: AuthCategory,
     pub confidence: Confidence,
     pub description: String,
+    /// Model's reasoning chain. Logged via `tracing` for debugging; not
+    /// stored on the canonical [`Finding`] (no field for it). Step-by-step
+    /// reasoning helps the model produce calibrated output even when we
+    /// don't read it back.
     pub reasoning: String,
-    /// For `Escalation` candidates: did the model judge the seed structural
-    /// finding to be a false positive? Causes the seed to be dropped during
-    /// merge (see [`crate::deep::merge::merge`]).
+    /// For `Escalation` candidates: the model judges the seed structural
+    /// finding to be a false positive. Causes the seed to be dropped at
+    /// merge time (see [`crate::deep::merge::merge`]).
     pub is_false_positive: bool,
 }
 
-/// Translate an LLM-emitted [`SemanticFinding`] into the canonical [`Finding`]
-/// shape, computing the deterministic id hash.
+/// Translate a model-emitted [`SemanticFinding`] into the canonical
+/// [`Finding`] shape.
+///
+/// `scan_root` is required to read the file at `candidate.file` (relative)
+/// to populate `code_snippet` from the lines the model identified. If the
+/// file is unreadable (e.g. moved between scan and analyze), `code_snippet`
+/// falls back to the empty string — best-effort, do not fail the finding.
 pub fn into_finding(
-    _sem: SemanticFinding,
-    _candidate: &Candidate,
-    _seed: Option<&Finding>,
+    sem: SemanticFinding,
+    candidate: &Candidate,
+    seed: Option<&Finding>,
+    scan_root: &Path,
 ) -> Finding {
-    unimplemented!("into_finding: commit 4")
+    tracing::debug!(
+        file = %candidate.file.display(),
+        lines = format!("{}-{}", sem.line_start, sem.line_end),
+        category = ?sem.category,
+        confidence = ?sem.confidence,
+        is_false_positive = sem.is_false_positive,
+        reasoning = %sem.reasoning,
+        "semantic finding"
+    );
+
+    let rule_id = match seed.and_then(|s| s.pattern_rule.as_deref()) {
+        Some(pr) => format!("{pr}-semantic"),
+        None => format!("semantic-{}", category_slug(sem.category)),
+    };
+
+    let code_snippet =
+        extract_lines(scan_root, &candidate.file, sem.line_start, sem.line_end).unwrap_or_default();
+
+    let id = compute_finding_id(
+        &rule_id,
+        &candidate.file,
+        sem.line_start,
+        sem.line_end,
+        &code_snippet,
+    );
+
+    Finding {
+        id,
+        file: candidate.file.clone(),
+        line_start: sem.line_start,
+        line_end: sem.line_end,
+        code_snippet,
+        language: candidate.language,
+        category: sem.category,
+        confidence: sem.confidence,
+        description: sem.description,
+        pattern_rule: seed.and_then(|s| s.pattern_rule.clone()),
+        rego_stub: None, // structural-only; semantic findings have no rego template
+        pass: ScanPass::Semantic,
+    }
+}
+
+/// Read the file at `scan_root.join(relative)` and return lines `[start, end]`
+/// joined by `\n`. Returns `None` on read error or out-of-range input.
+fn extract_lines(scan_root: &Path, relative: &Path, start: usize, end: usize) -> Option<String> {
+    if start == 0 || end < start {
+        return None;
+    }
+    let content = std::fs::read_to_string(scan_root.join(relative)).ok()?;
+    let lines: Vec<&str> = content.lines().collect();
+    if lines.is_empty() {
+        return None;
+    }
+    let s = (start - 1).min(lines.len() - 1);
+    let e = end.min(lines.len()).max(s + 1);
+    Some(lines[s..e].join("\n"))
+}
+
+fn category_slug(cat: AuthCategory) -> &'static str {
+    match cat {
+        AuthCategory::Rbac => "rbac",
+        AuthCategory::Abac => "abac",
+        AuthCategory::Middleware => "middleware",
+        AuthCategory::BusinessRule => "business_rule",
+        AuthCategory::Ownership => "ownership",
+        AuthCategory::FeatureGate => "feature_gate",
+        AuthCategory::Custom => "custom",
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::deep::candidate::CandidateKind;
+    use crate::types::Language;
+    use std::fs;
+    use std::path::PathBuf;
+    use tempfile::tempdir;
+
+    fn make_candidate(file: &str, language: Language) -> Candidate {
+        Candidate {
+            kind: CandidateKind::Escalation,
+            file: PathBuf::from(file),
+            language,
+            line_start: 1,
+            line_end: 100,
+            source_snippet: String::new(),
+            imports: Vec::new(),
+            original_finding_id: Some("structural-1".into()),
+            seed_category: Some(AuthCategory::Custom),
+        }
+    }
+
+    fn make_seed(pattern_rule: Option<&str>) -> Finding {
+        Finding {
+            id: "structural-1".into(),
+            file: PathBuf::from("src/auth.ts"),
+            line_start: 5,
+            line_end: 5,
+            code_snippet: String::new(),
+            language: Language::TypeScript,
+            category: AuthCategory::Custom,
+            confidence: Confidence::Low,
+            description: "matched custom rule".into(),
+            pattern_rule: pattern_rule.map(String::from),
+            rego_stub: None,
+            pass: ScanPass::Structural,
+        }
+    }
+
+    fn make_semantic(line_start: usize, line_end: usize) -> SemanticFinding {
+        SemanticFinding {
+            line_start,
+            line_end,
+            category: AuthCategory::Rbac,
+            confidence: Confidence::High,
+            description: "isAdmin role check".into(),
+            reasoning: "function name + return value structure indicates rbac".into(),
+            is_false_positive: false,
+        }
+    }
+
+    fn write_file(dir: &Path, name: &str, content: &str) -> PathBuf {
+        let p = dir.join(name);
+        if let Some(parent) = p.parent() {
+            fs::create_dir_all(parent).unwrap();
+        }
+        fs::write(&p, content).unwrap();
+        p
+    }
+
+    #[test]
+    fn into_finding_marks_pass_semantic() {
+        let dir = tempdir().unwrap();
+        write_file(
+            dir.path(),
+            "src/auth.ts",
+            "line one\nline two\nline three\n",
+        );
+        let cand = make_candidate("src/auth.ts", Language::TypeScript);
+        let sem = make_semantic(1, 2);
+        let f = into_finding(sem, &cand, None, dir.path());
+        assert_eq!(f.pass, ScanPass::Semantic);
+    }
+
+    #[test]
+    fn into_finding_inherits_pattern_rule_from_seed() {
+        let dir = tempdir().unwrap();
+        write_file(dir.path(), "src/auth.ts", "line\n");
+        let cand = make_candidate("src/auth.ts", Language::TypeScript);
+        let sem = make_semantic(1, 1);
+        let seed = make_seed(Some("ts-foo"));
+        let f = into_finding(sem, &cand, Some(&seed), dir.path());
+        assert_eq!(f.pattern_rule.as_deref(), Some("ts-foo"));
+    }
+
+    #[test]
+    fn into_finding_uses_synthetic_rule_id_for_cold_regions() {
+        let dir = tempdir().unwrap();
+        write_file(dir.path(), "src/auth.ts", "line\n");
+        let cand = make_candidate("src/auth.ts", Language::TypeScript);
+        let sem = make_semantic(1, 1);
+        let f = into_finding(sem, &cand, None, dir.path());
+        // No structural seed, no pattern_rule on the resulting Finding.
+        assert!(f.pattern_rule.is_none());
+        // But the deterministic id is computed using a "semantic-rbac"-style
+        // synthetic rule id (we can't observe this directly, but we can
+        // observe that two cold-regions in the same place produce the same id).
+        let f2 = into_finding(make_semantic(1, 1), &cand, None, dir.path());
+        assert_eq!(f.id, f2.id);
+    }
+
+    #[test]
+    fn into_finding_id_differs_when_lines_differ() {
+        let dir = tempdir().unwrap();
+        write_file(
+            dir.path(),
+            "src/auth.ts",
+            &(1..=20)
+                .map(|i| format!("line {i}"))
+                .collect::<Vec<_>>()
+                .join("\n"),
+        );
+        let cand = make_candidate("src/auth.ts", Language::TypeScript);
+        let f1 = into_finding(make_semantic(1, 1), &cand, None, dir.path());
+        let f2 = into_finding(make_semantic(5, 5), &cand, None, dir.path());
+        assert_ne!(f1.id, f2.id);
+    }
+
+    #[test]
+    fn into_finding_extracts_code_snippet_from_file() {
+        let dir = tempdir().unwrap();
+        let content = (1..=10)
+            .map(|i| format!("line {i}"))
+            .collect::<Vec<_>>()
+            .join("\n");
+        write_file(dir.path(), "src/auth.ts", &content);
+        let cand = make_candidate("src/auth.ts", Language::TypeScript);
+        let f = into_finding(make_semantic(3, 5), &cand, None, dir.path());
+        assert!(f.code_snippet.contains("line 3"));
+        assert!(f.code_snippet.contains("line 4"));
+        assert!(f.code_snippet.contains("line 5"));
+        assert!(!f.code_snippet.contains("line 2"));
+        assert!(!f.code_snippet.contains("line 6"));
+    }
+
+    #[test]
+    fn into_finding_falls_back_to_empty_snippet_on_read_error() {
+        let dir = tempdir().unwrap();
+        // File doesn't exist.
+        let cand = make_candidate("nonexistent.ts", Language::TypeScript);
+        let f = into_finding(make_semantic(1, 5), &cand, None, dir.path());
+        assert_eq!(f.code_snippet, "");
+        // Other fields are still populated.
+        assert_eq!(f.pass, ScanPass::Semantic);
+        assert_eq!(f.line_start, 1);
+        assert_eq!(f.line_end, 5);
+    }
+
+    #[test]
+    fn category_slugs_round_trip() {
+        // Slugs match output_schema enum values.
+        assert_eq!(category_slug(AuthCategory::Rbac), "rbac");
+        assert_eq!(category_slug(AuthCategory::Abac), "abac");
+        assert_eq!(category_slug(AuthCategory::Middleware), "middleware");
+        assert_eq!(category_slug(AuthCategory::BusinessRule), "business_rule");
+        assert_eq!(category_slug(AuthCategory::Ownership), "ownership");
+        assert_eq!(category_slug(AuthCategory::FeatureGate), "feature_gate");
+        assert_eq!(category_slug(AuthCategory::Custom), "custom");
+    }
 }
diff --git a/src/deep/prompt.rs b/src/deep/prompt.rs
index cf780a6..7a858e9 100644
--- a/src/deep/prompt.rs
+++ b/src/deep/prompt.rs
@@ -1,19 +1,60 @@
 //! Prompt rendering and JSON output schema for the deep scan.
 //!
-//! Both `SYSTEM_PROMPT` and [`output_schema`] are exported for reuse by
-//! PR 2 (MCP server) and PR 3 (subprocess hook). They are the canonical
-//! contract that every transport binds to.
+//! [`SYSTEM_PROMPT`] and [`output_schema`] are exported for reuse by PR 2
+//! (MCP server) and PR 3 (subprocess hook). Every transport binds to this
+//! contract.
 //!
-//! Implementation lands in commit 4.
+//! Tone: terse. Local 7B-14B models do better with structured prompts than
+//! prose. Token economy matters when the user pays per million.
+//!
+//! Framework guidance is injected per-call only when a known framework is
+//! detected in the candidate's imports — keeps the base prompt small for
+//! the common case, adds targeted hints when relevant.
 
+// Wired into the binary in commit 6; until then, dead from main()'s view.
 #![allow(dead_code)]
 
 use crate::deep::candidate::Candidate;
-use crate::types::Finding;
+use crate::types::{Finding, Language};
+
+/// System prompt sent on every deep-scan request. The seven `category` and
+/// three `confidence` enums match the `output_schema()` and the canonical
+/// [`crate::types::AuthCategory`] / [`crate::types::Confidence`] enums.
+pub const SYSTEM_PROMPT: &str = r#"You identify authorization logic in source code.
+
+AUTHZ:
+- Role checks (hasRole, isAdmin, requires X)
+- Attribute checks (user.tenant, user.plan)
+- Ownership (user X owns resource Y)
+- Route guards / middleware / decorators
+- Feature gates (plan-based, tenant-based, flag-based)
+- Business rules that gate access by user
+
+NOT AUTHZ:
+- Input validation, null checks
+- Rate limits not user-conditioned
+- Retry / idempotency / caching
+- Factory / service-locator / DI patterns
+- Logging or audit trails (the action, not the gate)
+
+CATEGORIES:
+- rbac: role-based
+- abac: attribute-based
+- middleware: route/handler-level guards
+- business_rule: domain-specific access rules
+- ownership: resource-owner checks
+- feature_gate: plan/tenant/flag-based
+- custom: doesn't fit the above
 
-/// System prompt sent on every deep-scan request. Defines the authz
-/// taxonomy, calibration guidance, and the structured-output contract.
-pub const SYSTEM_PROMPT: &str = ""; // commit 4
+CONFIDENCE:
+- high: unambiguous authz check
+- medium: likely authz, reasonable alternative interpretation exists
+- low: could be authz, depends on context not shown
+
+OUTPUT: JSON matching the supplied schema. No prose, no markdown fences.
+Empty findings array if no authz logic is present.
+For escalations: set is_false_positive=true ONLY when you reject the seed flag.
+Use line numbers from the supplied snippet."#;
 
 #[derive(Debug, Clone)]
 pub struct PromptInputs<'a> {
@@ -29,13 +70,433 @@ pub struct RenderedPrompt {
 }
 
 /// Build the per-candidate prompt + schema bundle.
-pub fn render(_inputs: &PromptInputs) -> RenderedPrompt {
-    unimplemented!("prompt::render: commit 4")
+pub fn render(inputs: &PromptInputs) -> RenderedPrompt {
+    let frameworks = detect_frameworks(&inputs.candidate.imports, inputs.candidate.language);
+
+    let mut user = String::with_capacity(inputs.candidate.source_snippet.len() + 512);
+    user.push_str("File: ");
+    user.push_str(&inputs.candidate.file.display().to_string());
+    user.push_str("\nLanguage: ");
+    user.push_str(&inputs.candidate.language.to_string());
+    user.push_str(&format!(
+        "\nLines: {}-{}\n",
+        inputs.candidate.line_start, inputs.candidate.line_end
+    ));
+
+    if let Some(seed) = inputs.structural_finding {
+        user.push_str(&format!(
+            "\nA structural rule flagged this region as {} ({}). Confirm or reject.\n",
+            seed.category, seed.confidence,
+        ));
+    }
+
+    if !frameworks.is_empty() {
+        user.push_str("\nFramework hints:\n");
+        for fw in &frameworks {
+            user.push_str("- ");
+            user.push_str(fw.name);
+            user.push_str(": ");
+            user.push_str(fw.guidance);
+            user.push('\n');
+        }
+    }
+
+    user.push_str("\n```");
+    user.push_str(language_fence(inputs.candidate.language));
+    user.push('\n');
+    user.push_str(&inputs.candidate.source_snippet);
+    if !inputs.candidate.source_snippet.ends_with('\n') {
+        user.push('\n');
+    }
+    user.push_str(
+        "```\n\nIdentify all authorization decisions in the snippet. Use line numbers from the snippet.",
+    );
+
+    RenderedPrompt {
+        system: SYSTEM_PROMPT.to_string(),
+        user,
+        schema: output_schema(),
+    }
 }
 
 /// JSON Schema the model must emit. Matches [`SemanticFinding`] field-for-field.
 ///
 /// [`SemanticFinding`]: crate::deep::finding::SemanticFinding
 pub fn output_schema() -> serde_json::Value {
-    unimplemented!("output_schema: commit 4")
+    serde_json::json!({
+        "type": "object",
+        "properties": {
+            "findings": {
+                "type": "array",
+                "items": {
+                    "type": "object",
+                    "properties": {
+                        "line_start":        { "type": "integer", "minimum": 1 },
+                        "line_end":          { "type": "integer", "minimum": 1 },
+                        "category":          {
+                            "type": "string",
+                            "enum": ["rbac", "abac", "middleware", "business_rule",
+                                     "ownership", "feature_gate", "custom"]
+                        },
+                        "confidence":        {
+                            "type": "string",
+                            "enum": ["low", "medium", "high"]
+                        },
+                        "description":       { "type": "string", "maxLength": 280 },
+                        "reasoning":         { "type": "string", "maxLength": 800 },
+                        "is_false_positive": { "type": "boolean" }
+                    },
+                    "required": ["line_start", "line_end", "category", "confidence",
+                                 "description", "reasoning", "is_false_positive"],
+                    "additionalProperties": false
+                }
+            }
+        },
+        "required": ["findings"],
+        "additionalProperties": false
+    })
+}
+
+// -- Framework detection ---------------------------------------------------
+
+struct Framework {
+    name: &'static str,
+    languages: &'static [Language],
+    /// Substrings to look for in the candidate's `imports` slice.
+    signatures: &'static [&'static str],
+    /// 1-2 sentence guidance injected into the user prompt when detected.
+    guidance: &'static str,
+}
+
+const FRAMEWORKS: &[Framework] = &[
+    Framework {
+        name: "Express",
+        languages: &[Language::TypeScript, Language::JavaScript],
+        signatures: &[
+            "from 'express'",
+            "from \"express\"",
+            "require('express')",
+            "require(\"express\")",
+        ],
+        guidance: "Express middleware in app.use(...) or app.METHOD(..., handler, ...) chains often gates access; flag as middleware. Common: requireAuth, passport.authenticate, role-checking middleware.",
+    },
+    Framework {
+        name: "NestJS",
+        languages: &[Language::TypeScript, Language::JavaScript],
+        signatures: &["@nestjs/", "from '@nestjs", "from \"@nestjs"],
+        guidance: "NestJS @UseGuards(...) decorators are middleware-category. @Roles(...) and @Permissions(...) are typically rbac.",
+    },
+    Framework {
+        name: "Next.js",
+        languages: &[Language::TypeScript, Language::JavaScript],
+        signatures: &["from 'next/", "from \"next/", "next-auth"],
+        guidance: "Next.js middleware.ts or route handlers calling getServerSession are often middleware. NextAuth session checks are middleware/rbac.",
+    },
+    Framework {
+        name: "Django",
+        languages: &[Language::Python],
+        signatures: &["from django.", "import django"],
+        guidance: "Django @login_required, @permission_required, @user_passes_test are middleware. request.user.has_perm(...) and request.user.groups are rbac. django-guardian object-level perms are abac/ownership.",
+    },
+    Framework {
+        name: "Flask",
+        languages: &[Language::Python],
+        signatures: &["from flask", "import flask"],
+        guidance: "Flask custom decorators using functools.wraps + flask.g.user are middleware. flask-login's @login_required is middleware.",
+    },
+    Framework {
+        name: "FastAPI",
+        languages: &[Language::Python],
+        signatures: &["from fastapi", "import fastapi"],
+        guidance: "FastAPI Depends(...) on auth-y functions is middleware. OAuth2PasswordBearer + Depends is rbac/middleware.",
+    },
+    Framework {
+        name: "Spring Security",
+        languages: &[Language::Java, Language::Kotlin],
+        signatures: &[
+            "org.springframework.security",
+            "import org.springframework.security",
+        ],
+        guidance: "Spring Security @PreAuthorize / @PostAuthorize / @Secured / @RolesAllowed are rbac. SecurityContextHolder.getContext().getAuthentication() reads current user. SecurityFilterChain / WebSecurityConfigurerAdapter are middleware.",
+    },
+    Framework {
+        name: "Rails",
+        languages: &[Language::Ruby],
+        signatures: &[
+            "ApplicationController",
+            "ActionController",
+            "Rails.application",
+            "before_action",
+        ],
+        guidance: "Rails before_action :auth_method is middleware. Pundit's authorize @resource and CanCanCan's can?/cannot? are rbac/abac. current_user is the universal user accessor.",
+    },
+    Framework {
+        name: "Gin",
+        languages: &[Language::Go],
+        signatures: &["github.com/gin-gonic/gin"],
+        guidance: "Gin gin.HandlerFunc returned from auth-y constructors are middleware. c.Set(\"user\", ...) followed by c.MustGet is the user-flow.",
+    },
+    Framework {
+        name: "Echo",
+        languages: &[Language::Go],
+        signatures: &["github.com/labstack/echo", "labstack/echo"],
+        guidance: "Echo middleware.JWT and middleware.BasicAuth are middleware. Custom MiddlewareFunc with role checks is rbac middleware.",
+    },
+    Framework {
+        name: "ASP.NET Core",
+        languages: &[Language::CSharp],
+        signatures: &["Microsoft.AspNetCore", "using Microsoft.AspNetCore"],
+        guidance: "[Authorize] / [Authorize(Roles=\"...\")] / [Authorize(Policy=\"...\")] attributes are rbac. User.IsInRole(...) and ClaimsPrincipal checks are rbac/abac. AuthorizationHandler<T> is custom.",
+    },
+    Framework {
+        name: "Laravel",
+        languages: &[Language::Php],
+        signatures: &["Illuminate\\", "use Illuminate"],
+        guidance: "Laravel middleware in routes (auth, can:, role:) is middleware. Gate::define and Gate::allows are abac. $user->can(...) is rbac/abac.",
+    },
+];
+
+fn detect_frameworks(imports: &[String], language: Language) -> Vec<&'static Framework> {
+    let combined = imports.join("\n");
+    FRAMEWORKS
+        .iter()
+        .filter(|fw| fw.languages.contains(&language))
+        .filter(|fw| fw.signatures.iter().any(|s| combined.contains(s)))
+        .collect()
+}
+
+fn language_fence(lang: Language) -> &'static str {
+    match lang {
+        Language::TypeScript => "typescript",
+        Language::JavaScript => "javascript",
+        Language::Java => "java",
+        Language::Python => "python",
+        Language::Go => "go",
+        Language::CSharp => "csharp",
+        Language::Kotlin => "kotlin",
+        Language::Ruby => "ruby",
+        Language::Php => "php",
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::deep::candidate::{Candidate, CandidateKind};
+    use crate::types::{AuthCategory, Confidence, ScanPass};
+    use std::path::PathBuf;
+
+    fn candidate_with_imports(language: Language, imports: Vec<String>) -> Candidate {
+        Candidate {
+            kind: CandidateKind::ColdRegion,
+            file: PathBuf::from("src/auth.ts"),
+            language,
+            line_start: 10,
+            line_end: 25,
+            source_snippet: "function isAdmin() { return user.role === 'admin'; }".into(),
+            imports,
+            original_finding_id: None,
+            seed_category: None,
+        }
+    }
+
+    fn finding_seed() -> Finding {
+        Finding {
+            id: "structural-1".into(),
+            file: PathBuf::from("src/auth.ts"),
+            line_start: 10,
+            line_end: 25,
+            code_snippet: String::new(),
+            language: Language::TypeScript,
+            category: AuthCategory::Custom,
+            confidence: Confidence::Low,
+            description: "matched custom rule".into(),
+            pattern_rule: Some("ts-custom-1".into()),
+            rego_stub: None,
+            pass: ScanPass::Structural,
+        }
+    }
+
+    #[test]
+    fn system_prompt_is_non_empty_and_concise() {
+        assert!(!SYSTEM_PROMPT.is_empty());
+        // Token-budget sanity: keep system prompt under ~2k chars (~500 tokens).
+        // Local 7B-14B models need room for the user prompt + framework hints.
+        assert!(SYSTEM_PROMPT.len() < 2_000, "SYSTEM_PROMPT is too verbose");
+    }
+
+    #[test]
+    fn system_prompt_lists_all_seven_categories() {
+        for cat in [
+            "rbac",
+            "abac",
+            "middleware",
+            "business_rule",
+            "ownership",
+            "feature_gate",
+            "custom",
+        ] {
+            assert!(
+                SYSTEM_PROMPT.contains(cat),
+                "SYSTEM_PROMPT missing category: {cat}"
+            );
+        }
+    }
+
+    #[test]
+    fn output_schema_has_required_shape() {
+        let schema = output_schema();
+        assert_eq!(schema["type"], "object");
+        assert_eq!(schema["required"][0], "findings");
+        let item_required = &schema["properties"]["findings"]["items"]["required"];
+        assert!(
+            item_required
+                .as_array()
+                .unwrap()
+                .contains(&serde_json::Value::String("line_start".into()))
+        );
+        assert!(
+            item_required
+                .as_array()
+                .unwrap()
+                .contains(&serde_json::Value::String("is_false_positive".into()))
+        );
+    }
+
+    #[test]
+    fn output_schema_categories_match_authcategory_enum() {
+        let schema = output_schema();
+        let categories =
+            schema["properties"]["findings"]["items"]["properties"]["category"]["enum"]
+                .as_array()
+                .unwrap();
+        let names: Vec<&str> = categories.iter().filter_map(|v| v.as_str()).collect();
+        assert_eq!(
+            names,
+            vec![
+                "rbac",
+                "abac",
+                "middleware",
+                "business_rule",
+                "ownership",
+                "feature_gate",
+                "custom"
+            ]
+        );
+    }
+
+    #[test]
+    fn render_includes_file_language_and_lines() {
+        let cand = candidate_with_imports(Language::TypeScript, vec![]);
+        let inputs = PromptInputs {
+            candidate: &cand,
+            structural_finding: None,
+        };
+        let rendered = render(&inputs);
+        assert!(rendered.user.contains("File: src/auth.ts"));
+        assert!(rendered.user.contains("Language: typescript"));
+        assert!(rendered.user.contains("Lines: 10-25"));
+        assert!(rendered.user.contains("```typescript"));
+    }
+
+    #[test]
+    fn render_includes_seed_when_escalation() {
+        let cand = candidate_with_imports(Language::TypeScript, vec![]);
+        let seed = finding_seed();
+        let inputs = PromptInputs {
+            candidate: &cand,
+            structural_finding: Some(&seed),
+        };
+        let rendered = render(&inputs);
+        assert!(rendered.user.contains("structural rule flagged"));
+        assert!(rendered.user.contains("Custom"));
+        assert!(rendered.user.contains("low"));
+    }
+
+    #[test]
+    fn render_omits_framework_section_when_none_detected() {
+        let cand =
+            candidate_with_imports(Language::TypeScript, vec!["// no framework here".into()]);
+        let inputs = PromptInputs {
+            candidate: &cand,
+            structural_finding: None,
+        };
+        let rendered = render(&inputs);
+        assert!(!rendered.user.contains("Framework hints:"));
+    }
+
+    #[test]
+    fn render_includes_framework_hints_when_detected() {
+        let cand = candidate_with_imports(
+            Language::TypeScript,
+            vec!["import express from 'express';".into()],
+        );
+        let inputs = PromptInputs {
+            candidate: &cand,
+            structural_finding: None,
+        };
+        let rendered = render(&inputs);
+        assert!(rendered.user.contains("Framework hints:"));
+        assert!(rendered.user.contains("Express"));
+    }
+
+    #[test]
+    fn detect_frameworks_respects_language() {
+        // "from django" is a substring of arbitrary TS code; should NOT match in TS.
+        let imports = vec!["// from django.contrib.auth import login".into()];
+        let py = detect_frameworks(&imports, Language::Python);
+        let ts = detect_frameworks(&imports, Language::TypeScript);
+        assert!(py.iter().any(|fw| fw.name == "Django"));
+        assert!(!ts.iter().any(|fw| fw.name == "Django"));
+    }
+
+    #[test]
+    fn detect_frameworks_finds_spring_in_java() {
+        let imports =
+            vec!["import org.springframework.security.access.prepost.PreAuthorize;".into()];
+        let found = detect_frameworks(&imports, Language::Java);
+        assert!(found.iter().any(|fw| fw.name == "Spring Security"));
+    }
+
+    #[test]
+    fn detect_frameworks_finds_django_via_either_signature() {
+        for sig in ["from django.contrib.auth import login", "import django"] {
+            let imports = vec![sig.into()];
+            let found = detect_frameworks(&imports, Language::Python);
+            assert!(
+                found.iter().any(|fw| fw.name == "Django"),
+                "missed Django for: {sig}"
+            );
+        }
+    }
+
+    #[test]
+    fn detect_frameworks_finds_multiple() {
+        let imports = vec![
+            "import express from 'express';".into(),
+            "import { Module } from '@nestjs/common';".into(),
+        ];
+        let found = detect_frameworks(&imports, Language::TypeScript);
+        let names: Vec<&str> = found.iter().map(|fw| fw.name).collect();
+        assert!(names.contains(&"Express"));
+        assert!(names.contains(&"NestJS"));
+    }
+
+    #[test]
+    fn language_fence_covers_all_languages() {
+        for lang in [
+            Language::TypeScript,
+            Language::JavaScript,
+            Language::Java,
+            Language::Python,
+            Language::Go,
+            Language::CSharp,
+            Language::Kotlin,
+            Language::Ruby,
+            Language::Php,
+        ] {
+            let fence = language_fence(lang);
+            assert!(!fence.is_empty(), "language_fence empty for {lang:?}");
+        }
+    }
 }
diff --git a/src/scanner/discovery.rs b/src/scanner/discovery.rs
index f9c5826..14ec968 100644
--- a/src/scanner/discovery.rs
+++ b/src/scanner/discovery.rs
@@ -31,6 +31,7 @@ pub fn detect_language(path: &Path) -> Option<(Language, bool)> {
 /// C#, Kotlin, Ruby, PHP). Used by the deep (semantic) scan, which can run
 /// regex-based cold-region detection on any language regardless of grammar
 /// availability.
+#[allow(dead_code)] // wired into the binary in commit 6
 pub fn detect_language_for_deep(path: &Path) -> Option<(Language, bool)> {
     let ext = path.extension()?.to_str()?.to_ascii_lowercase();
     match ext.as_str() {
@@ -60,6 +61,7 @@ pub fn discover_files(
 /// Discover source files for the deep (semantic) scan. Behaves identically
 /// to [`discover_files`] but emits files in **all** languages from the
 /// [`Language`] enum, not only structurally-supported ones.
+#[allow(dead_code)] // wired into the binary in commit 6
 pub fn discover_files_for_deep(
     root: &Path,
     exclude_patterns: &[String],

From a343d1720e67672d9099dcb7008113b064648869 Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 21:31:46 -0400
Subject: [PATCH 07/18] feat(deep): OpenAI-compatible HTTP client + cost
 tracker
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Lands the network layer of the deep scan. After this commit, the deep
module can actually talk to a model — only the orchestrator wiring
(commit 6) remains to take it end-to-end.

OpenAiCompatibleClient
- POSTs to {base_url}/chat/completions with OpenAI-dialect bodies:
  model, messages, response_format (strict json_schema), temperature.
- Bearer auth from runtime.api_key when present.
- Retry-once-without-response_format on parse failure. Fixes the
  common case of older Ollama / llama.cpp servers ignoring the
  structured-output directive. Tests verify both the retry-succeeds
  and retry-also-fails paths.
- Markdown-fence stripping: some local models wrap JSON in ```json
  fences despite the system prompt saying not to. Stripped before parse.
- Auth errors (401/403) surface as DeepError::Config with a clear
  "auth rejected by {url}" message; other HTTP errors generic.
- request_timeout_secs honored.

CostTracker
- AtomicU64 spending counter in micro-USD precision; safe across
  concurrent requests when commit 6 fans out to max_concurrent.
- No-op when both per-1k rates are unset or zero — the local-model
  default never trips a cap.
- Cap-exceeded errors include exact spend at trip time.
- Thread-safety verified via a 10-thread concurrent-record test.

Plan deviation: split src/main.rs into src/lib.rs + thin main.rs
shim so integration tests in tests/ can `use zift::deep::*`. Future-
proofs PR 2 (the MCP server can depend on zift as a library).

Cargo additions:
  reqwest 0.12 (blocking + json + rustls-tls — hermetic, no OpenSSL)
  mockito 1    (dev-dep, sync)

23 new tests (7 cost + 6 client unit + 10 integration). 185 total.

Refs plans/todo/01-pr1-deep-http-transport.md §3, §8, §9, §10
---
 Cargo.lock                     | 1306 +++++++++++++++++++++++++++++++-
 Cargo.toml                     |    2 +
 src/deep/client.rs             |  229 +++++-
 src/deep/cost.rs               |  181 ++++-
 src/deep/error.rs              |    5 +-
 src/lib.rs                     |   17 +
 src/main.rs                    |   15 +-
 tests/deep_http_integration.rs |  336 ++++++++
 8 files changed, 2040 insertions(+), 51 deletions(-)
 create mode 100644 src/lib.rs
 create mode 100644 tests/deep_http_integration.rs

diff --git a/Cargo.lock b/Cargo.lock
index 3399069..7a8b676 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -47,7 +47,7 @@ version = "1.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "40c48f72fd53cd289104fc64099abca73db4166ad86ea0b4341abe65af83dadc"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -58,7 +58,7 @@ checksum = "291e6a250ff86cd4a820112fb8898808a366d8f9f58ce16d1f538353ad55747d"
 dependencies = [
  "anstyle",
  "once_cell_polyfill",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -67,12 +67,34 @@ version = "1.0.102"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7f202df86484c868dbad7eaa557ef785d5c66295e41b460ef922eca0723b842c"
 
+[[package]]
+name = "assert-json-diff"
+version = "2.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "47e4f2b81832e72834d7518d8487a0396a28cc408186a2e8854c0f98011faf12"
+dependencies = [
+ "serde",
+ "serde_json",
+]
+
+[[package]]
+name = "atomic-waker"
+version = "1.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1505bd5d3d116872e7271a6d4e16d81d0c8570876c8de68093a09ac269d8aac0"
+
 [[package]]
 name = "autocfg"
 version = "1.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c08606f8c3cbf4ce6ec8e28fb0014a2c086708fe954eaa885384a6165172e7e8"
 
+[[package]]
+name = "base64"
+version = "0.22.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "72b3254f16251a8381aa12e40e3c4d2f0199f8c6508fbecb9d91f575e0fbb8c6"
+
 [[package]]
 name = "bitflags"
 version = "2.11.1"
@@ -98,6 +120,18 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "bumpalo"
+version = "3.20.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5d20789868f4b01b2f2caec9f5c4e0213b41e3e5702a50157d699ae31ced2fcb"
+
+[[package]]
+name = "bytes"
+version = "1.11.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e748733b7cbc798e1434b6ac524f0c1ff2ab456fe201501e6497c8417a4fc33"
+
 [[package]]
 name = "cc"
 version = "1.2.60"
@@ -114,6 +148,12 @@ version = "1.0.4"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "9330f8b2ff13f34540b44e946ef35111825727b38d33286ef986142615121801"
 
+[[package]]
+name = "cfg_aliases"
+version = "0.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "613afe47fcd5fac7ccf1db93babcb082c5994d996f20b8b159f2ad1658eb5724"
+
 [[package]]
 name = "clap"
 version = "4.6.1"
@@ -160,6 +200,15 @@ version = "1.0.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "1d07550c9036bf2ae0c684c4297d503f838287c83c53686d05370d0e139ae570"
 
+[[package]]
+name = "colored"
+version = "3.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "faf9468729b8cbcea668e36183cb69d317348c2e08e994829fb56ebfdfbaac34"
+dependencies = [
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "cpufeatures"
 version = "0.2.17"
@@ -214,6 +263,17 @@ dependencies = [
  "crypto-common",
 ]
 
+[[package]]
+name = "displaydoc"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "97369cbbc041bc366949bc74d34658d6cda5621039731c6310521892a3a20ae0"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "equivalent"
 version = "1.0.2"
@@ -227,7 +287,7 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "39cab71617ae0d63f51a36d69f866391735b51691dbda63cf6f96d042b63efeb"
 dependencies = [
  "libc",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -242,12 +302,76 @@ version = "0.1.9"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5baebc0774151f905a1a2cc41989300b1e6fbb29aff0ceffa1064fdd3088d582"
 
+[[package]]
+name = "fnv"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3f9eec918d3f24069decb9af1554cad7c880e2da24a9afd88aca000531ab82c1"
+
 [[package]]
 name = "foldhash"
 version = "0.1.5"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "d9c4f5dac5e15c24eb999c26181a6ca40b39fe946cbe4c263c7209467bc83af2"
 
+[[package]]
+name = "form_urlencoded"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb4cb245038516f5f85277875cdaa4f7d2c9a0fa0468de06ed190163b1581fcf"
+dependencies = [
+ "percent-encoding",
+]
+
+[[package]]
+name = "futures-channel"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "07bbe89c50d7a535e539b8c17bc0b49bdb77747034daa8087407d655f3f7cc1d"
+dependencies = [
+ "futures-core",
+ "futures-sink",
+]
+
+[[package]]
+name = "futures-core"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "7e3450815272ef58cec6d564423f6e755e25379b217b0bc688e295ba24df6b1d"
+
+[[package]]
+name = "futures-io"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cecba35d7ad927e23624b22ad55235f2239cfa44fd10428eecbeba6d6a717718"
+
+[[package]]
+name = "futures-sink"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c39754e157331b013978ec91992bde1ac089843443c49cbc7f46150b0fad0893"
+
+[[package]]
+name = "futures-task"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "037711b3d59c33004d3856fbdc83b99d4ff37a24768fa1be9ce3538a1cde4393"
+
+[[package]]
+name = "futures-util"
+version = "0.3.32"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "389ca41296e6190b48053de0321d02a77f32f8a5d2461dd38762c0593805c6d6"
+dependencies = [
+ "futures-core",
+ "futures-io",
+ "futures-sink",
+ "futures-task",
+ "memchr",
+ "pin-project-lite",
+ "slab",
+]
+
 [[package]]
 name = "generic-array"
 version = "0.14.7"
@@ -258,6 +382,33 @@ dependencies = [
  "version_check",
 ]
 
+[[package]]
+name = "getrandom"
+version = "0.2.17"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff2abc00be7fca6ebc474524697ae276ad847ad0a6b3faa4bcb027e9a4614ad0"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "libc",
+ "wasi",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "getrandom"
+version = "0.3.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "899def5c37c4fd7b2664648c28120ecec138e4d395b459e5ca34f9cce2dd77fd"
+dependencies = [
+ "cfg-if",
+ "js-sys",
+ "libc",
+ "r-efi 5.3.0",
+ "wasip2",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "getrandom"
 version = "0.4.2"
@@ -266,7 +417,7 @@ checksum = "0de51e6874e94e7bf76d726fc5d13ba782deca734ff60d5bb2fb2607c7406555"
 dependencies = [
  "cfg-if",
  "libc",
- "r-efi",
+ "r-efi 6.0.0",
  "wasip2",
  "wasip3",
 ]
@@ -284,6 +435,25 @@ dependencies = [
  "regex-syntax",
 ]
 
+[[package]]
+name = "h2"
+version = "0.4.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2f44da3a8150a6703ed5d34e164b875fd14c2cdab9af1252a9a1020bde2bdc54"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "fnv",
+ "futures-core",
+ "futures-sink",
+ "http",
+ "indexmap",
+ "slab",
+ "tokio",
+ "tokio-util",
+ "tracing",
+]
+
 [[package]]
 name = "hashbrown"
 version = "0.15.5"
@@ -305,12 +475,221 @@ version = "0.5.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "2304e00983f87ffb38b55b444b5e3b60a884b5d30c0fca7d82fe33449bbe55ea"
 
+[[package]]
+name = "http"
+version = "1.4.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e3ba2a386d7f85a81f119ad7498ebe444d2e22c2af0b86b069416ace48b3311a"
+dependencies = [
+ "bytes",
+ "itoa",
+]
+
+[[package]]
+name = "http-body"
+version = "1.0.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1efedce1fb8e6913f23e0c92de8e62cd5b772a67e7b3946df930a62566c93184"
+dependencies = [
+ "bytes",
+ "http",
+]
+
+[[package]]
+name = "http-body-util"
+version = "0.1.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b021d93e26becf5dc7e1b75b1bed1fd93124b374ceb73f43d4d4eafec896a64a"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "http",
+ "http-body",
+ "pin-project-lite",
+]
+
+[[package]]
+name = "httparse"
+version = "1.10.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6dbf3de79e51f3d586ab4cb9d5c3e2c14aa28ed23d180cf89b4df0454a69cc87"
+
+[[package]]
+name = "httpdate"
+version = "1.0.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df3b46402a9d5adb4c86a0cf463f42e19994e3ee891101b1841f30a545cb49a9"
+
+[[package]]
+name = "hyper"
+version = "1.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6299f016b246a94207e63da54dbe807655bf9e00044f73ded42c3ac5305fbcca"
+dependencies = [
+ "atomic-waker",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "h2",
+ "http",
+ "http-body",
+ "httparse",
+ "httpdate",
+ "itoa",
+ "pin-project-lite",
+ "smallvec",
+ "tokio",
+ "want",
+]
+
+[[package]]
+name = "hyper-rustls"
+version = "0.27.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "33ca68d021ef39cf6463ab54c1d0f5daf03377b70561305bb89a8f83aab66e0f"
+dependencies = [
+ "http",
+ "hyper",
+ "hyper-util",
+ "rustls",
+ "tokio",
+ "tokio-rustls",
+ "tower-service",
+ "webpki-roots",
+]
+
+[[package]]
+name = "hyper-util"
+version = "0.1.20"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "96547c2556ec9d12fb1578c4eaf448b04993e7fb79cbaad930a656880a6bdfa0"
+dependencies = [
+ "base64",
+ "bytes",
+ "futures-channel",
+ "futures-util",
+ "http",
+ "http-body",
+ "hyper",
+ "ipnet",
+ "libc",
+ "percent-encoding",
+ "pin-project-lite",
+ "socket2",
+ "tokio",
+ "tower-service",
+ "tracing",
+]
+
+[[package]]
+name = "icu_collections"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2984d1cd16c883d7935b9e07e44071dca8d917fd52ecc02c04d5fa0b5a3f191c"
+dependencies = [
+ "displaydoc",
+ "potential_utf",
+ "utf8_iter",
+ "yoke",
+ "zerofrom",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_locale_core"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92219b62b3e2b4d88ac5119f8904c10f8f61bf7e95b640d25ba3075e6cac2c29"
+dependencies = [
+ "displaydoc",
+ "litemap",
+ "tinystr",
+ "writeable",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c56e5ee99d6e3d33bd91c5d85458b6005a22140021cc324cea84dd0e72cff3b4"
+dependencies = [
+ "icu_collections",
+ "icu_normalizer_data",
+ "icu_properties",
+ "icu_provider",
+ "smallvec",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_normalizer_data"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "da3be0ae77ea334f4da67c12f149704f19f81d1adf7c51cf482943e84a2bad38"
+
+[[package]]
+name = "icu_properties"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bee3b67d0ea5c2cca5003417989af8996f8604e34fb9ddf96208a033901e70de"
+dependencies = [
+ "icu_collections",
+ "icu_locale_core",
+ "icu_properties_data",
+ "icu_provider",
+ "zerotrie",
+ "zerovec",
+]
+
+[[package]]
+name = "icu_properties_data"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e2bbb201e0c04f7b4b3e14382af113e17ba4f63e2c9d2ee626b720cbce54a14"
+
+[[package]]
+name = "icu_provider"
+version = "2.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "139c4cf31c8b5f33d7e199446eff9c1e02decfc2f0eec2c8d71f65befa45b421"
+dependencies = [
+ "displaydoc",
+ "icu_locale_core",
+ "writeable",
+ "yoke",
+ "zerofrom",
+ "zerotrie",
+ "zerovec",
+]
+
 [[package]]
 name = "id-arena"
 version = "2.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "3d3067d79b975e8844ca9eb072e16b31c3c1c36928edf9c6789548c524d0d954"
 
+[[package]]
+name = "idna"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3b0875f23caa03898994f6ddc501886a45c7d3d62d04d2d90788d47be1b1e4de"
+dependencies = [
+ "idna_adapter",
+ "smallvec",
+ "utf8_iter",
+]
+
+[[package]]
+name = "idna_adapter"
+version = "1.2.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "cb68373c0d6620ef8105e855e7745e18b0d00d3bdb07fb532e434244cdb9a714"
+dependencies = [
+ "icu_normalizer",
+ "icu_properties",
+]
+
 [[package]]
 name = "ignore"
 version = "0.4.25"
@@ -339,6 +718,22 @@ dependencies = [
  "serde_core",
 ]
 
+[[package]]
+name = "ipnet"
+version = "2.12.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d98f6fed1fde3f8c21bc40a1abb88dd75e67924f9cffc3ef95607bad8017f8e2"
+
+[[package]]
+name = "iri-string"
+version = "0.7.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "25e659a4bb38e810ebc252e53b5814ff908a8c58c2a9ce2fae1bbec24cbf4e20"
+dependencies = [
+ "memchr",
+ "serde",
+]
+
 [[package]]
 name = "is_terminal_polyfill"
 version = "1.70.2"
@@ -351,6 +746,18 @@ version = "1.0.18"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "8f42a60cbdf9a97f5d2305f08a87dc4e09308d1276d28c869c684d7777685682"
 
+[[package]]
+name = "js-sys"
+version = "0.3.97"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a1840c94c045fbcf8ba2812c95db44499f7c64910a912551aaaa541decebcacf"
+dependencies = [
+ "cfg-if",
+ "futures-util",
+ "once_cell",
+ "wasm-bindgen",
+]
+
 [[package]]
 name = "lazy_static"
 version = "1.5.0"
@@ -375,12 +782,33 @@ version = "0.12.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32a66949e030da00e8c7d4434b251670a91556f4144941d37452769c25d58a53"
 
+[[package]]
+name = "litemap"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "92daf443525c4cce67b150400bc2316076100ce0b3686209eb8cf3c31612e6f0"
+
+[[package]]
+name = "lock_api"
+version = "0.4.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "224399e74b87b5f3557511d98dff8b14089b3dadafcab6bb93eab67d3aace965"
+dependencies = [
+ "scopeguard",
+]
+
 [[package]]
 name = "log"
 version = "0.4.29"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5e5032e24019045c762d3c0f28f5b6b8bbf38563a65908389bf7978758920897"
 
+[[package]]
+name = "lru-slab"
+version = "0.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "112b39cec0b298b6c1999fee3e31427f74f676e4cb9879ed1a121b43661a4154"
+
 [[package]]
 name = "matchers"
 version = "0.2.0"
@@ -396,13 +824,49 @@ version = "2.8.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8ca58f447f06ed17d5fc4043ce1b10dd205e060fb3ce5b979b8ed8e59ff3f79"
 
+[[package]]
+name = "mio"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "50b7e5b27aa02a74bac8c3f23f448f8d87ff11f92d3aac1a6ed369ee08cc56c1"
+dependencies = [
+ "libc",
+ "wasi",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "mockito"
+version = "1.7.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90820618712cab19cfc46b274c6c22546a82affcb3c3bdf0f29e3db8e1bb92c0"
+dependencies = [
+ "assert-json-diff",
+ "bytes",
+ "colored",
+ "futures-core",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-util",
+ "log",
+ "pin-project-lite",
+ "rand",
+ "regex",
+ "serde_json",
+ "serde_urlencoded",
+ "similar",
+ "tokio",
+]
+
 [[package]]
 name = "nu-ansi-term"
 version = "0.50.3"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7957b9740744892f114936ab4a57b3f487491bbeafaf8083688b16841a4240e5"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -445,12 +909,59 @@ version = "1.70.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "384b8ab6d37215f3c5301a95a4accb5d64aa607f1fcb26a11b5303878451b4fe"
 
+[[package]]
+name = "parking_lot"
+version = "0.12.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "93857453250e3077bd71ff98b6a65ea6621a19bb0f559a85248955ac12c45a1a"
+dependencies = [
+ "lock_api",
+ "parking_lot_core",
+]
+
+[[package]]
+name = "parking_lot_core"
+version = "0.9.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2621685985a2ebf1c516881c026032ac7deafcda1a2c9b7850dc81e3dfcb64c1"
+dependencies = [
+ "cfg-if",
+ "libc",
+ "redox_syscall",
+ "smallvec",
+ "windows-link",
+]
+
+[[package]]
+name = "percent-encoding"
+version = "2.3.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b4f627cb1b25917193a259e49bdad08f671f8d9708acfd5fe0a8c1455d87220"
+
 [[package]]
 name = "pin-project-lite"
 version = "0.2.17"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "a89322df9ebe1c1578d689c92318e070967d1042b512afbe49518723f4e6d5cd"
 
+[[package]]
+name = "potential_utf"
+version = "0.1.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0103b1cef7ec0cf76490e969665504990193874ea05c85ff9bab8b911d0a0564"
+dependencies = [
+ "zerovec",
+]
+
+[[package]]
+name = "ppv-lite86"
+version = "0.2.21"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "85eae3c4ed2f50dcfe72643da4befc30deadb458a9b590d720cde2f2b1e97da9"
+dependencies = [
+ "zerocopy",
+]
+
 [[package]]
 name = "prettyplease"
 version = "0.2.37"
@@ -471,20 +982,119 @@ dependencies = [
 ]
 
 [[package]]
-name = "quote"
-version = "1.0.45"
+name = "quinn"
+version = "0.11.9"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9e20a958963c291dc322d98411f541009df2ced7b5a4f2bd52337638cfccf20"
+dependencies = [
+ "bytes",
+ "cfg_aliases",
+ "pin-project-lite",
+ "quinn-proto",
+ "quinn-udp",
+ "rustc-hash",
+ "rustls",
+ "socket2",
+ "thiserror",
+ "tokio",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-proto"
+version = "0.11.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "434b42fec591c96ef50e21e886936e66d3cc3f737104fdb9b737c40ffb94c098"
+dependencies = [
+ "bytes",
+ "getrandom 0.3.4",
+ "lru-slab",
+ "rand",
+ "ring",
+ "rustc-hash",
+ "rustls",
+ "rustls-pki-types",
+ "slab",
+ "thiserror",
+ "tinyvec",
+ "tracing",
+ "web-time",
+]
+
+[[package]]
+name = "quinn-udp"
+version = "0.5.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "addec6a0dcad8a8d96a771f815f0eaf55f9d1805756410b39f5fa81332574cbd"
+dependencies = [
+ "cfg_aliases",
+ "libc",
+ "once_cell",
+ "socket2",
+ "tracing",
+ "windows-sys 0.60.2",
+]
+
+[[package]]
+name = "quote"
+version = "1.0.45"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "41f2619966050689382d2b44f664f4bc593e129785a36d6ee376ddf37259b924"
 dependencies = [
  "proc-macro2",
 ]
 
+[[package]]
+name = "r-efi"
+version = "5.3.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69cdb34c158ceb288df11e18b4bd39de994f6657d83847bdffdbd7f346754b0f"
+
 [[package]]
 name = "r-efi"
 version = "6.0.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f8dcc9c7d52a811697d2151c701e0d08956f92b0e24136cf4cf27b57a6a0d9bf"
 
+[[package]]
+name = "rand"
+version = "0.9.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "44c5af06bb1b7d3216d91932aed5265164bf384dc89cd6ba05cf59a35f5f76ea"
+dependencies = [
+ "rand_chacha",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_chacha"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3022b5f1df60f26e1ffddd6c66e8aa15de382ae63b3a0c1bfc0e4d3e3f325cb"
+dependencies = [
+ "ppv-lite86",
+ "rand_core",
+]
+
+[[package]]
+name = "rand_core"
+version = "0.9.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "76afc826de14238e6e8c374ddcc1fa19e374fd8dd986b0d2af0d02377261d83c"
+dependencies = [
+ "getrandom 0.3.4",
+]
+
+[[package]]
+name = "redox_syscall"
+version = "0.5.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ed2bf2547551a7053d6fdfafda3f938979645c44812fbfcda098faae3f1a362d"
+dependencies = [
+ "bitflags",
+]
+
 [[package]]
 name = "regex"
 version = "1.12.3"
@@ -530,6 +1140,66 @@ dependencies = [
  "thiserror",
 ]
 
+[[package]]
+name = "reqwest"
+version = "0.12.28"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eddd3ca559203180a307f12d114c268abf583f59b03cb906fd0b3ff8646c1147"
+dependencies = [
+ "base64",
+ "bytes",
+ "futures-channel",
+ "futures-core",
+ "futures-util",
+ "http",
+ "http-body",
+ "http-body-util",
+ "hyper",
+ "hyper-rustls",
+ "hyper-util",
+ "js-sys",
+ "log",
+ "percent-encoding",
+ "pin-project-lite",
+ "quinn",
+ "rustls",
+ "rustls-pki-types",
+ "serde",
+ "serde_json",
+ "serde_urlencoded",
+ "sync_wrapper",
+ "tokio",
+ "tokio-rustls",
+ "tower",
+ "tower-http",
+ "tower-service",
+ "url",
+ "wasm-bindgen",
+ "wasm-bindgen-futures",
+ "web-sys",
+ "webpki-roots",
+]
+
+[[package]]
+name = "ring"
+version = "0.17.14"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a4689e6c2294d81e88dc6261c768b63bc4fcdb852be6d1352498b114f61383b7"
+dependencies = [
+ "cc",
+ "cfg-if",
+ "getrandom 0.2.17",
+ "libc",
+ "untrusted",
+ "windows-sys 0.52.0",
+]
+
+[[package]]
+name = "rustc-hash"
+version = "2.1.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94300abf3f1ae2e2b8ffb7b58043de3d399c73fa6f4b73826402a5c457614dbe"
+
 [[package]]
 name = "rustix"
 version = "1.1.4"
@@ -540,9 +1210,56 @@ dependencies = [
  "errno",
  "libc",
  "linux-raw-sys",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
+[[package]]
+name = "rustls"
+version = "0.23.40"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ef86cd5876211988985292b91c96a8f2d298df24e75989a43a3c73f2d4d8168b"
+dependencies = [
+ "once_cell",
+ "ring",
+ "rustls-pki-types",
+ "rustls-webpki",
+ "subtle",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-pki-types"
+version = "1.14.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "30a7197ae7eb376e574fe940d068c30fe0462554a3ddbe4eca7838e049c937a9"
+dependencies = [
+ "web-time",
+ "zeroize",
+]
+
+[[package]]
+name = "rustls-webpki"
+version = "0.103.13"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "61c429a8649f110dddef65e2a5ad240f747e85f7758a6bccc7e5777bd33f756e"
+dependencies = [
+ "ring",
+ "rustls-pki-types",
+ "untrusted",
+]
+
+[[package]]
+name = "rustversion"
+version = "1.0.22"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b39cdef0fa800fc44525c84ccb54a029961a8215f9619753635a9c0d2538d46d"
+
+[[package]]
+name = "ryu"
+version = "1.0.23"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9774ba4a74de5f7b1c1451ed6cd5285a32eddb5cccb8cc655a4e50009e06477f"
+
 [[package]]
 name = "same-file"
 version = "1.0.6"
@@ -552,6 +1269,12 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "scopeguard"
+version = "1.2.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "94143f37725109f92c262ed2cf5e59bce7498c01bcc1502d7b9afe439a4e9f49"
+
 [[package]]
 name = "semver"
 version = "1.0.28"
@@ -610,6 +1333,18 @@ dependencies = [
  "serde",
 ]
 
+[[package]]
+name = "serde_urlencoded"
+version = "0.7.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d3491c14715ca2294c4d6a88f15e84739788c1d030eed8c110436aafdaa2f3fd"
+dependencies = [
+ "form_urlencoded",
+ "itoa",
+ "ryu",
+ "serde",
+]
+
 [[package]]
 name = "sha2"
 version = "0.10.9"
@@ -636,18 +1371,46 @@ version = "1.3.0"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "0fda2ff0d084019ba4d7c6f371c95d8fd75ce3524c3cb8fb653a3023f6323e64"
 
+[[package]]
+name = "similar"
+version = "2.7.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bbbb5d9659141646ae647b42fe094daf6c6192d1620870b449d9557f748b2daa"
+
+[[package]]
+name = "slab"
+version = "0.4.12"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0c790de23124f9ab44544d7ac05d60440adc586479ce501c1d6d7da3cd8c9cf5"
+
 [[package]]
 name = "smallvec"
 version = "1.15.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "67b1b7a3b5fe4f1376887184045fcf45c69e92af734b7aaddc05fb777b6fbd03"
 
+[[package]]
+name = "socket2"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3a766e1110788c36f4fa1c2b71b387a7815aa65f88ce0229841826633d93723e"
+dependencies = [
+ "libc",
+ "windows-sys 0.61.2",
+]
+
 [[package]]
 name = "spin"
 version = "0.9.8"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "6980e8d7511241f8acf4aebddbb1ff938df5eebe98691418c4468d0b72a96a67"
 
+[[package]]
+name = "stable_deref_trait"
+version = "1.2.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "6ce2be8dc25455e1f91df71bfa12ad37d7af1092ae736f3a6cd0e37bc7810596"
+
 [[package]]
 name = "streaming-iterator"
 version = "0.1.9"
@@ -660,6 +1423,12 @@ version = "0.11.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "7da8b5736845d9f2fcb837ea5d9e2628564b3b043a70948a3f0b778838c5fb4f"
 
+[[package]]
+name = "subtle"
+version = "2.6.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "13c2bddecc57b384dee18652358fb23172facb8a2c51ccc10d74c157bdea3292"
+
 [[package]]
 name = "syn"
 version = "2.0.117"
@@ -671,6 +1440,26 @@ dependencies = [
  "unicode-ident",
 ]
 
+[[package]]
+name = "sync_wrapper"
+version = "1.0.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0bf256ce5efdfa370213c1dabab5935a12e49f2c58d15e9eac2870d3b4f27263"
+dependencies = [
+ "futures-core",
+]
+
+[[package]]
+name = "synstructure"
+version = "0.13.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "728a70f3dbaf5bab7f0c4b1ac8d7ae5ea60a4b5549c8a5914361c99147a709d2"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "tempfile"
 version = "3.27.0"
@@ -678,10 +1467,10 @@ source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "32497e9a4c7b38532efcdebeef879707aa9f794296a4f0244f6f69e9bc8574bd"
 dependencies = [
  "fastrand",
- "getrandom",
+ "getrandom 0.4.2",
  "once_cell",
  "rustix",
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -713,6 +1502,69 @@ dependencies = [
  "cfg-if",
 ]
 
+[[package]]
+name = "tinystr"
+version = "0.8.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "c8323304221c2a851516f22236c5722a72eaa19749016521d6dff0824447d96d"
+dependencies = [
+ "displaydoc",
+ "zerovec",
+]
+
+[[package]]
+name = "tinyvec"
+version = "1.11.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "3e61e67053d25a4e82c844e8424039d9745781b3fc4f32b8d55ed50f5f667ef3"
+dependencies = [
+ "tinyvec_macros",
+]
+
+[[package]]
+name = "tinyvec_macros"
+version = "0.1.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1f3ccbac311fea05f86f61904b462b55fb3df8837a366dfc601a0161d0532f20"
+
+[[package]]
+name = "tokio"
+version = "1.52.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b67dee974fe86fd92cc45b7a95fdd2f99a36a6d7b0d431a231178d3d670bbcc6"
+dependencies = [
+ "bytes",
+ "libc",
+ "mio",
+ "parking_lot",
+ "pin-project-lite",
+ "socket2",
+ "windows-sys 0.61.2",
+]
+
+[[package]]
+name = "tokio-rustls"
+version = "0.26.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1729aa945f29d91ba541258c8df89027d5792d85a8841fb65e8bf0f4ede4ef61"
+dependencies = [
+ "rustls",
+ "tokio",
+]
+
+[[package]]
+name = "tokio-util"
+version = "0.7.18"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9ae9cec805b01e8fc3fd2fe289f89149a9b66dd16786abd8b19cfa7b48cb0098"
+dependencies = [
+ "bytes",
+ "futures-core",
+ "futures-sink",
+ "pin-project-lite",
+ "tokio",
+]
+
 [[package]]
 name = "toml"
 version = "0.8.23"
@@ -754,6 +1606,51 @@ version = "0.1.2"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "5d99f8c9a7727884afe522e9bd5edbfc91a3312b36a77b5fb8926e4c31a41801"
 
+[[package]]
+name = "tower"
+version = "0.5.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ebe5ef63511595f1344e2d5cfa636d973292adc0eec1f0ad45fae9f0851ab1d4"
+dependencies = [
+ "futures-core",
+ "futures-util",
+ "pin-project-lite",
+ "sync_wrapper",
+ "tokio",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "tower-http"
+version = "0.6.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d4e6559d53cc268e5031cd8429d05415bc4cb4aefc4aa5d6cc35fbf5b924a1f8"
+dependencies = [
+ "bitflags",
+ "bytes",
+ "futures-util",
+ "http",
+ "http-body",
+ "iri-string",
+ "pin-project-lite",
+ "tower",
+ "tower-layer",
+ "tower-service",
+]
+
+[[package]]
+name = "tower-layer"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "121c2a6cda46980bb0fcd1647ffaf6cd3fc79a013de288782836f6df9c48780e"
+
+[[package]]
+name = "tower-service"
+version = "0.3.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8df9b6e13f2d32c91b9bd719c00d1958837bc7dec474d94952798cc8e69eeec3"
+
 [[package]]
 name = "tracing"
 version = "0.1.44"
@@ -864,6 +1761,12 @@ dependencies = [
  "tree-sitter-language",
 ]
 
+[[package]]
+name = "try-lock"
+version = "0.2.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "e421abadd41a4225275504ea4d6566923418b7f05506fbc9c0fe86ba7396114b"
+
 [[package]]
 name = "typenum"
 version = "1.20.0"
@@ -882,6 +1785,30 @@ version = "0.2.6"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "ebc1c04c71510c7f702b52b7c350734c9ff1295c464a03335b00bb84fc54f853"
 
+[[package]]
+name = "untrusted"
+version = "0.9.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8ecb6da28b8a351d773b68d5825ac39017e680750f980f3a1a85cd8dd28a47c1"
+
+[[package]]
+name = "url"
+version = "2.5.8"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ff67a8a4397373c3ef660812acab3268222035010ab8680ec4215f38ba3d0eed"
+dependencies = [
+ "form_urlencoded",
+ "idna",
+ "percent-encoding",
+ "serde",
+]
+
+[[package]]
+name = "utf8_iter"
+version = "1.0.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b6c140620e7ffbb22c2dee59cafe6084a59b5ffc27a8859a5f0d494b5d52b6be"
+
 [[package]]
 name = "utf8parse"
 version = "0.2.2"
@@ -910,6 +1837,21 @@ dependencies = [
  "winapi-util",
 ]
 
+[[package]]
+name = "want"
+version = "0.3.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "bfa7760aed19e106de2c7c0b581b509f2f25d3dacaf737cb82ac61bc6d760b0e"
+dependencies = [
+ "try-lock",
+]
+
+[[package]]
+name = "wasi"
+version = "0.11.1+wasi-snapshot-preview1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "ccf3ec651a847eb01de73ccad15eb7d99f80485de043efb2f370cd654f4ea44b"
+
 [[package]]
 name = "wasip2"
 version = "1.0.3+wasi-0.2.9"
@@ -928,6 +1870,61 @@ dependencies = [
  "wit-bindgen 0.51.0",
 ]
 
+[[package]]
+name = "wasm-bindgen"
+version = "0.2.120"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "df52b6d9b87e0c74c9edfa1eb2d9bf85e5d63515474513aa50fa181b3c4f5db1"
+dependencies = [
+ "cfg-if",
+ "once_cell",
+ "rustversion",
+ "wasm-bindgen-macro",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-futures"
+version = "0.4.70"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "af934872acec734c2d80e6617bbb5ff4f12b052dd8e6332b0817bce889516084"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "wasm-bindgen-macro"
+version = "0.2.120"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "78b1041f495fb322e64aca85f5756b2172e35cd459376e67f2a6c9dffcedb103"
+dependencies = [
+ "quote",
+ "wasm-bindgen-macro-support",
+]
+
+[[package]]
+name = "wasm-bindgen-macro-support"
+version = "0.2.120"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9dcd0ff20416988a18ac686d4d4d0f6aae9ebf08a389ff5d29012b05af2a1b41"
+dependencies = [
+ "bumpalo",
+ "proc-macro2",
+ "quote",
+ "syn",
+ "wasm-bindgen-shared",
+]
+
+[[package]]
+name = "wasm-bindgen-shared"
+version = "0.2.120"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "49757b3c82ebf16c57d69365a142940b384176c24df52a087fb748e2085359ea"
+dependencies = [
+ "unicode-ident",
+]
+
 [[package]]
 name = "wasm-encoder"
 version = "0.244.0"
@@ -962,13 +1959,42 @@ dependencies = [
  "semver",
 ]
 
+[[package]]
+name = "web-sys"
+version = "0.3.97"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "2eadbac71025cd7b0834f20d1fe8472e8495821b4e9801eb0a60bd1f19827602"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "web-time"
+version = "1.1.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "5a6580f308b1fad9207618087a65c04e7a10bc77e02c8e84e9b00dd4b12fa0bb"
+dependencies = [
+ "js-sys",
+ "wasm-bindgen",
+]
+
+[[package]]
+name = "webpki-roots"
+version = "1.0.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "52f5ee44c96cf55f1b349600768e3ece3a8f26010c05265ab73f945bb1a2eb9d"
+dependencies = [
+ "rustls-pki-types",
+]
+
 [[package]]
 name = "winapi-util"
 version = "0.1.11"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "c2a7b1c03c876122aa43f3020e6c3c3ee5c05081c9a00739faf7503aeba10d22"
 dependencies = [
- "windows-sys",
+ "windows-sys 0.61.2",
 ]
 
 [[package]]
@@ -977,6 +2003,24 @@ version = "0.2.1"
 source = "registry+https://github.com/rust-lang/crates.io-index"
 checksum = "f0805222e57f7521d6a62e36fa9163bc891acd422f971defe97d64e70d0a4fe5"
 
+[[package]]
+name = "windows-sys"
+version = "0.52.0"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "282be5f36a8ce781fad8c8ae18fa3f9beff57ec1b52cb3de0789201425d9a33d"
+dependencies = [
+ "windows-targets 0.52.6",
+]
+
+[[package]]
+name = "windows-sys"
+version = "0.60.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "f2f500e4d28234f72040990ec9d39e3a6b950f9f22d3dba18416c35882612bcb"
+dependencies = [
+ "windows-targets 0.53.5",
+]
+
 [[package]]
 name = "windows-sys"
 version = "0.61.2"
@@ -986,6 +2030,135 @@ dependencies = [
  "windows-link",
 ]
 
+[[package]]
+name = "windows-targets"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9b724f72796e036ab90c1021d4780d4d3d648aca59e491e6b98e725b84e99973"
+dependencies = [
+ "windows_aarch64_gnullvm 0.52.6",
+ "windows_aarch64_msvc 0.52.6",
+ "windows_i686_gnu 0.52.6",
+ "windows_i686_gnullvm 0.52.6",
+ "windows_i686_msvc 0.52.6",
+ "windows_x86_64_gnu 0.52.6",
+ "windows_x86_64_gnullvm 0.52.6",
+ "windows_x86_64_msvc 0.52.6",
+]
+
+[[package]]
+name = "windows-targets"
+version = "0.53.5"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "4945f9f551b88e0d65f3db0bc25c33b8acea4d9e41163edf90dcd0b19f9069f3"
+dependencies = [
+ "windows-link",
+ "windows_aarch64_gnullvm 0.53.1",
+ "windows_aarch64_msvc 0.53.1",
+ "windows_i686_gnu 0.53.1",
+ "windows_i686_gnullvm 0.53.1",
+ "windows_i686_msvc 0.53.1",
+ "windows_x86_64_gnu 0.53.1",
+ "windows_x86_64_gnullvm 0.53.1",
+ "windows_x86_64_msvc 0.53.1",
+]
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "32a4622180e7a0ec044bb555404c800bc9fd9ec262ec147edd5989ccd0c02cd3"
+
+[[package]]
+name = "windows_aarch64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "a9d8416fa8b42f5c947f8482c43e7d89e73a173cead56d044f6a56104a6d1b53"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "09ec2a7bb152e2252b53fa7803150007879548bc709c039df7627cabbd05d469"
+
+[[package]]
+name = "windows_aarch64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b9d782e804c2f632e395708e99a94275910eb9100b2114651e04744e9b125006"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "8e9b5ad5ab802e97eb8e295ac6720e509ee4c243f69d781394014ebfe8bbfa0b"
+
+[[package]]
+name = "windows_i686_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "960e6da069d81e09becb0ca57a65220ddff016ff2d6af6a223cf372a506593a3"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0eee52d38c090b3caa76c563b86c3a4bd71ef1a819287c19d586d7334ae8ed66"
+
+[[package]]
+name = "windows_i686_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "fa7359d10048f68ab8b09fa71c3daccfb0e9b559aed648a8f95469c27057180c"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "240948bc05c5e7c6dabba28bf89d89ffce3e303022809e73deaefe4f6ec56c66"
+
+[[package]]
+name = "windows_i686_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1e7ac75179f18232fe9c285163565a57ef8d3c89254a30685b57d83a38d326c2"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "147a5c80aabfbf0c7d901cb5895d1de30ef2907eb21fbbab29ca94c5b08b1a78"
+
+[[package]]
+name = "windows_x86_64_gnu"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "9c3842cdd74a865a8066ab39c8a7a473c0778a3f29370b5fd6b4b9aa7df4a499"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "24d5b23dc417412679681396f2b49f3de8c1473deb516bd34410872eff51ed0d"
+
+[[package]]
+name = "windows_x86_64_gnullvm"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0ffa179e2d07eee8ad8f57493436566c7cc30ac536a3379fdf008f47f6bb7ae1"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.52.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "589f6da84c646204747d1270a2a5661ea66ed1cced2631d546fdfb155959f9ec"
+
+[[package]]
+name = "windows_x86_64_msvc"
+version = "0.53.1"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "d6bbff5f0aada427a1e5a6da5f1f98158182f26556f345ac9e04d36d0ebed650"
+
 [[package]]
 name = "winnow"
 version = "0.7.15"
@@ -1089,14 +2262,125 @@ dependencies = [
  "wasmparser",
 ]
 
+[[package]]
+name = "writeable"
+version = "0.6.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "1ffae5123b2d3fc086436f8834ae3ab053a283cfac8fe0a0b8eaae044768a4c4"
+
+[[package]]
+name = "yoke"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "abe8c5fda708d9ca3df187cae8bfb9ceda00dd96231bed36e445a1a48e66f9ca"
+dependencies = [
+ "stable_deref_trait",
+ "yoke-derive",
+ "zerofrom",
+]
+
+[[package]]
+name = "yoke-derive"
+version = "0.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "de844c262c8848816172cef550288e7dc6c7b7814b4ee56b3e1553f275f1858e"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "zerocopy"
+version = "0.8.48"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "eed437bf9d6692032087e337407a86f04cd8d6a16a37199ed57949d415bd68e9"
+dependencies = [
+ "zerocopy-derive",
+]
+
+[[package]]
+name = "zerocopy-derive"
+version = "0.8.48"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "70e3cd084b1788766f53af483dd21f93881ff30d7320490ec3ef7526d203bad4"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
+[[package]]
+name = "zerofrom"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "69faa1f2a1ea75661980b013019ed6687ed0e83d069bc1114e2cc74c6c04c4df"
+dependencies = [
+ "zerofrom-derive",
+]
+
+[[package]]
+name = "zerofrom-derive"
+version = "0.1.7"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "11532158c46691caf0f2593ea8358fed6bbf68a0315e80aae9bd41fbade684a1"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+ "synstructure",
+]
+
+[[package]]
+name = "zeroize"
+version = "1.8.2"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "b97154e67e32c85465826e8bcc1c59429aaaf107c1e4a9e53c8d8ccd5eff88d0"
+
+[[package]]
+name = "zerotrie"
+version = "0.2.4"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "0f9152d31db0792fa83f70fb2f83148effb5c1f5b8c7686c3459e361d9bc20bf"
+dependencies = [
+ "displaydoc",
+ "yoke",
+ "zerofrom",
+]
+
+[[package]]
+name = "zerovec"
+version = "0.11.6"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "90f911cbc359ab6af17377d242225f4d75119aec87ea711a880987b18cd7b239"
+dependencies = [
+ "yoke",
+ "zerofrom",
+ "zerovec-derive",
+]
+
+[[package]]
+name = "zerovec-derive"
+version = "0.11.3"
+source = "registry+https://github.com/rust-lang/crates.io-index"
+checksum = "625dc425cab0dca6dc3c3319506e6593dcb08a9f387ea3b284dbd52a92c40555"
+dependencies = [
+ "proc-macro2",
+ "quote",
+ "syn",
+]
+
 [[package]]
 name = "zift"
 version = "0.1.2"
 dependencies = [
  "clap",
  "ignore",
+ "mockito",
  "regex",
  "regorus",
+ "reqwest",
  "serde",
  "serde_json",
  "sha2",
diff --git a/Cargo.toml b/Cargo.toml
index 2a1d6f1..4d59f87 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -23,9 +23,11 @@ sha2 = "0.10"
 regex = "1"
 streaming-iterator = "0.1"
 regorus = { version = "0.9", default-features = false, features = ["arc"] }
+reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] }
 
 [dev-dependencies]
 tempfile = "3"
+mockito = "1"
 
 [package.metadata.binstall]
 pkg-url = "{ repo }/releases/download/v{ version }/zift-{ target }{ archive-suffix }"
diff --git a/src/deep/client.rs b/src/deep/client.rs
index 9134c91..ae39be1 100644
--- a/src/deep/client.rs
+++ b/src/deep/client.rs
@@ -5,14 +5,20 @@
 //! backend that exposes the OpenAI dialect (Ollama, LM Studio, llama.cpp,
 //! vLLM, OpenRouter, OpenAI itself, Anthropic-via-proxy, …).
 //!
-//! Implementation lands in commit 5 (where reqwest enters the build).
+//! On parse failure of the structured-output response, the client retries
+//! once **without** `response_format` — many local servers (older Ollama,
+//! llama.cpp's `server`) ignore that field and fall back to plain text or
+//! emit JSON in the message body anyway. The retry strips the directive
+//! and re-parses; if that still fails, we return [`DeepError::BadResponse`].
 
-#![allow(dead_code)]
+#![allow(dead_code)] // wired into the binary in commit 6
 
 use crate::deep::config::DeepRuntime;
 use crate::deep::error::DeepError;
 use crate::deep::finding::SemanticFinding;
 use crate::deep::prompt::RenderedPrompt;
+use serde::Deserialize;
+use std::time::Duration;
 
 #[derive(Debug, Clone, Copy, Default, PartialEq, Eq)]
 pub struct TokenUsage {
@@ -26,21 +32,220 @@ pub struct AnalyzeResponse {
     pub usage: TokenUsage,
 }
 
-/// HTTP client for an OpenAI-compatible chat-completions endpoint.
-///
-/// Fields land in commit 5 (`reqwest::blocking::Client`, base_url, api_key,
-/// model, temperature).
 pub struct OpenAiCompatibleClient {
-    // Body lands in commit 5.
+    http: reqwest::blocking::Client,
+    base_url: String,
+    api_key: Option<String>,
+    model: String,
+    temperature: f32,
 }
 
 impl OpenAiCompatibleClient {
-    pub fn new(_runtime: &DeepRuntime) -> Result<Self, DeepError> {
-        unimplemented!("OpenAiCompatibleClient::new: commit 5")
+    pub fn new(runtime: &DeepRuntime) -> Result<Self, DeepError> {
+        let http = reqwest::blocking::Client::builder()
+            .timeout(Duration::from_secs(runtime.request_timeout_secs))
+            .build()
+            .map_err(|e| DeepError::Config(format!("failed to build HTTP client: {e}")))?;
+
+        Ok(Self {
+            http,
+            base_url: runtime.base_url.trim_end_matches('/').to_string(),
+            api_key: runtime.api_key.clone(),
+            model: runtime.model.clone(),
+            temperature: runtime.temperature,
+        })
+    }
+
+    /// Send one prompt to the endpoint and parse the response. Retries once
+    /// without `response_format` if the first attempt's content fails to
+    /// parse as our findings schema.
+    pub fn analyze(&self, prompt: &RenderedPrompt) -> Result<AnalyzeResponse, DeepError> {
+        match self.try_analyze(prompt, true) {
+            Ok(resp) => Ok(resp),
+            Err(DeepError::BadResponse(msg)) => {
+                tracing::debug!("deep: retrying without response_format after bad JSON: {msg}");
+                self.try_analyze(prompt, false)
+            }
+            Err(other) => Err(other),
+        }
+    }
+
+    fn try_analyze(
+        &self,
+        prompt: &RenderedPrompt,
+        with_response_format: bool,
+    ) -> Result<AnalyzeResponse, DeepError> {
+        let url = format!("{}/chat/completions", self.base_url);
+
+        let mut body = serde_json::json!({
+            "model": self.model,
+            "messages": [
+                {"role": "system", "content": prompt.system},
+                {"role": "user",   "content": prompt.user}
+            ],
+            "temperature": self.temperature,
+        });
+        if with_response_format {
+            body["response_format"] = serde_json::json!({
+                "type": "json_schema",
+                "json_schema": {
+                    "name": "zift_findings",
+                    "strict": true,
+                    "schema": prompt.schema,
+                }
+            });
+        }
+
+        let mut req = self.http.post(&url).json(&body);
+        if let Some(key) = &self.api_key {
+            req = req.bearer_auth(key);
+        }
+
+        let response = req.send()?;
+        let status = response.status();
+        if !status.is_success() {
+            // Auth errors get distinct surfacing; everything else is generic.
+            if status.as_u16() == 401 || status.as_u16() == 403 {
+                return Err(DeepError::Config(format!(
+                    "auth rejected by {} ({})",
+                    self.base_url, status
+                )));
+            }
+            return Err(DeepError::Config(format!(
+                "HTTP {} from {}",
+                status, self.base_url
+            )));
+        }
+
+        let body: ChatCompletionResponse = response
+            .json()
+            .map_err(|e| DeepError::BadResponse(format!("response was not valid JSON: {e}")))?;
+
+        let content = body
+            .choices
+            .into_iter()
+            .next()
+            .and_then(|c| c.message.content)
+            .ok_or_else(|| DeepError::BadResponse("response had no message content".into()))?;
+
+        // Try to parse the message content as our findings envelope.
+        // Some servers wrap JSON in markdown fences; strip those if present.
+        let content_clean = strip_markdown_fence(&content);
+        let parsed: FindingsEnvelope = serde_json::from_str(content_clean).map_err(|e| {
+            DeepError::BadResponse(format!(
+                "content was not valid findings JSON: {e}; got: {}",
+                truncate_for_log(&content)
+            ))
+        })?;
+
+        let usage = TokenUsage {
+            input_tokens: body.usage.as_ref().map(|u| u.prompt_tokens).unwrap_or(0),
+            output_tokens: body
+                .usage
+                .as_ref()
+                .map(|u| u.completion_tokens)
+                .unwrap_or(0),
+        };
+
+        Ok(AnalyzeResponse {
+            findings: parsed.findings,
+            usage,
+        })
+    }
+}
+
+/// Strip a leading/trailing ```json``` (or ```) markdown fence if present.
+/// Some local models wrap JSON in fences despite system-prompt instructions
+/// not to.
+fn strip_markdown_fence(s: &str) -> &str {
+    let trimmed = s.trim();
+    let after_fence = trimmed
+        .strip_prefix("```json")
+        .or_else(|| trimmed.strip_prefix("```"))
+        .unwrap_or(trimmed);
+    after_fence
+        .trim()
+        .strip_suffix("```")
+        .unwrap_or(after_fence)
+        .trim()
+}
+
+fn truncate_for_log(s: &str) -> String {
+    const MAX: usize = 200;
+    if s.len() <= MAX {
+        s.to_string()
+    } else {
+        format!("{}...", &s[..MAX])
+    }
+}
+
+// -- OpenAI response types -------------------------------------------------
+
+#[derive(Deserialize)]
+struct ChatCompletionResponse {
+    choices: Vec<ChatChoice>,
+    usage: Option<UsageStats>,
+}
+
+#[derive(Deserialize)]
+struct ChatChoice {
+    message: ChatMessage,
+}
+
+#[derive(Deserialize)]
+struct ChatMessage {
+    content: Option<String>,
+}
+
+#[derive(Deserialize)]
+struct UsageStats {
+    prompt_tokens: u32,
+    completion_tokens: u32,
+}
+
+#[derive(Deserialize)]
+struct FindingsEnvelope {
+    findings: Vec<SemanticFinding>,
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    #[test]
+    fn strip_fence_handles_json_fence() {
+        let raw = "```json\n{\"findings\": []}\n```";
+        assert_eq!(strip_markdown_fence(raw), "{\"findings\": []}");
+    }
+
+    #[test]
+    fn strip_fence_handles_plain_fence() {
+        let raw = "```\n{\"findings\": []}\n```";
+        assert_eq!(strip_markdown_fence(raw), "{\"findings\": []}");
+    }
+
+    #[test]
+    fn strip_fence_passes_through_when_absent() {
+        let raw = "{\"findings\": []}";
+        assert_eq!(strip_markdown_fence(raw), raw);
+    }
+
+    #[test]
+    fn strip_fence_handles_leading_whitespace() {
+        let raw = "  \n```json\n{\"findings\": []}\n```\n  ";
+        assert_eq!(strip_markdown_fence(raw), "{\"findings\": []}");
+    }
+
+    #[test]
+    fn truncate_for_log_short_string_passthrough() {
+        assert_eq!(truncate_for_log("hello"), "hello");
     }
 
-    /// Send one prompt to the endpoint, return the parsed findings + usage.
-    pub fn analyze(&self, _prompt: &RenderedPrompt) -> Result<AnalyzeResponse, DeepError> {
-        unimplemented!("OpenAiCompatibleClient::analyze: commit 5")
+    #[test]
+    fn truncate_for_log_long_string_clipped() {
+        let long = "x".repeat(500);
+        let truncated = truncate_for_log(&long);
+        assert!(truncated.ends_with("..."));
+        assert!(truncated.len() < long.len());
     }
 }
diff --git a/src/deep/cost.rs b/src/deep/cost.rs
index 6fa5122..5d47960 100644
--- a/src/deep/cost.rs
+++ b/src/deep/cost.rs
@@ -1,34 +1,189 @@
 //! Token-based USD cost ceiling for deep-scan calls.
 //!
-//! See plans/todo/01-pr1-deep-http-transport.md §10. Implementation lands
-//! in commit 5.
+//! Spend is tracked in micro-USD (millionths of a dollar) using
+//! [`AtomicU64`] so the tracker is safe to share across threads when
+//! running concurrent requests. If both per-1k rates are unset (or zero),
+//! tracking is a no-op and `record` always returns `Ok`.
 
-#![allow(dead_code)]
+#![allow(dead_code)] // wired into the binary in commit 6
 
 use crate::deep::client::TokenUsage;
 use crate::deep::config::DeepRuntime;
 use crate::deep::error::DeepError;
+use std::sync::atomic::{AtomicU64, Ordering};
+
+const MICRO_PER_USD: u64 = 1_000_000;
 
 /// Tracks cumulative USD spend across deep-scan requests; errors via
 /// [`DeepError::CostExceeded`] when the cap is reached.
-///
-/// If both rates are `None`, tracking is a no-op (spent stays 0).
 pub struct CostTracker {
-    // Fields land in commit 5 (atomic spend counter, cap, rates).
+    spent_micro_usd: AtomicU64,
+    cap_micro_usd: Option<u64>,
+    in_rate_per_1k: f64,
+    out_rate_per_1k: f64,
 }
 
 impl CostTracker {
-    pub fn new(_runtime: &DeepRuntime) -> Self {
-        unimplemented!("CostTracker::new: commit 5")
+    pub fn new(runtime: &DeepRuntime) -> Self {
+        Self {
+            spent_micro_usd: AtomicU64::new(0),
+            cap_micro_usd: runtime
+                .max_cost_usd
+                .filter(|c| c.is_finite() && *c >= 0.0)
+                .map(|c| (c * MICRO_PER_USD as f64) as u64),
+            in_rate_per_1k: runtime.cost_per_1k_input.unwrap_or(0.0),
+            out_rate_per_1k: runtime.cost_per_1k_output.unwrap_or(0.0),
+        }
     }
 
-    /// Record token usage from one response; return Err if cap exceeded.
-    pub fn record(&self, _usage: &TokenUsage) -> Result<(), DeepError> {
-        unimplemented!("CostTracker::record: commit 5")
+    /// Add this request's token usage to the running total. Returns
+    /// [`DeepError::CostExceeded`] if the new total exceeds the cap.
+    ///
+    /// If both per-1k rates are zero (default for local models), this is
+    /// a no-op — there's no concept of cost without rates.
+    pub fn record(&self, usage: &TokenUsage) -> Result<(), DeepError> {
+        if self.in_rate_per_1k == 0.0 && self.out_rate_per_1k == 0.0 {
+            return Ok(());
+        }
+
+        let delta_usd = (usage.input_tokens as f64 / 1000.0) * self.in_rate_per_1k
+            + (usage.output_tokens as f64 / 1000.0) * self.out_rate_per_1k;
+        let delta_micro = (delta_usd * MICRO_PER_USD as f64).round() as u64;
+
+        let prior = self
+            .spent_micro_usd
+            .fetch_add(delta_micro, Ordering::Relaxed);
+        let new_total = prior + delta_micro;
+
+        if let Some(cap) = self.cap_micro_usd
+            && new_total > cap
+        {
+            let spent = new_total as f64 / MICRO_PER_USD as f64;
+            return Err(DeepError::CostExceeded { spent });
+        }
+        Ok(())
     }
 
-    /// Cumulative USD spent so far.
+    /// Cumulative USD spent so far. Useful for end-of-run logging.
     pub fn spent_usd(&self) -> f64 {
-        unimplemented!("CostTracker::spent_usd: commit 5")
+        self.spent_micro_usd.load(Ordering::Relaxed) as f64 / MICRO_PER_USD as f64
+    }
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+
+    fn rt(cap: Option<f64>, in_rate: Option<f64>, out_rate: Option<f64>) -> DeepRuntime {
+        DeepRuntime {
+            base_url: "http://x/v1".into(),
+            model: "m".into(),
+            api_key: None,
+            max_cost_usd: cap,
+            cost_per_1k_input: in_rate,
+            cost_per_1k_output: out_rate,
+            request_timeout_secs: 60,
+            max_candidates: 50,
+            max_concurrent: 1,
+            temperature: 0.0,
+            max_prompt_chars: 16_000,
+        }
+    }
+
+    fn usage(in_tokens: u32, out_tokens: u32) -> TokenUsage {
+        TokenUsage {
+            input_tokens: in_tokens,
+            output_tokens: out_tokens,
+        }
+    }
+
+    #[test]
+    fn no_rates_means_no_tracking() {
+        let tracker = CostTracker::new(&rt(Some(0.01), None, None));
+        // Massive usage, but no rates → no spend recorded → no cap trigger.
+        for _ in 0..1000 {
+            tracker.record(&usage(10_000, 10_000)).unwrap();
+        }
+        assert_eq!(tracker.spent_usd(), 0.0);
+    }
+
+    #[test]
+    fn under_cap_records_without_error() {
+        // 1k input @ $0.001/k = $0.001 spent
+        let tracker = CostTracker::new(&rt(Some(1.0), Some(0.001), Some(0.001)));
+        tracker.record(&usage(1_000, 0)).unwrap();
+        let spent = tracker.spent_usd();
+        assert!(
+            (spent - 0.001).abs() < 1e-6,
+            "expected ~$0.001, got {spent}"
+        );
+    }
+
+    #[test]
+    fn cap_exceeded_triggers_error() {
+        // Cap $0.01; one request @ $0.10 → trips cap.
+        let tracker = CostTracker::new(&rt(Some(0.01), Some(0.10), None));
+        let err = tracker.record(&usage(1_000, 0)).unwrap_err();
+        assert!(matches!(err, DeepError::CostExceeded { .. }));
+    }
+
+    #[test]
+    fn cap_exceeded_after_multiple_records() {
+        // Cap $1.00; 10 requests @ $0.20 each → trips on the 6th.
+        let tracker = CostTracker::new(&rt(Some(1.00), Some(0.20), None));
+        for i in 0..10 {
+            let result = tracker.record(&usage(1_000, 0));
+            if i < 5 {
+                assert!(result.is_ok(), "request {i} should be under cap");
+            } else if i == 5 {
+                assert!(matches!(
+                    result.unwrap_err(),
+                    DeepError::CostExceeded { .. }
+                ));
+                break;
+            }
+        }
+    }
+
+    #[test]
+    fn no_cap_never_errors() {
+        // Rates set but cap not — no error regardless of spend.
+        let tracker = CostTracker::new(&rt(None, Some(100.0), Some(100.0)));
+        for _ in 0..100 {
+            tracker.record(&usage(10_000, 10_000)).unwrap();
+        }
+        assert!(tracker.spent_usd() > 0.0);
+    }
+
+    #[test]
+    fn input_and_output_rates_both_apply() {
+        // 1k input @ $0.01/k + 2k output @ $0.05/k = $0.01 + $0.10 = $0.11
+        let tracker = CostTracker::new(&rt(None, Some(0.01), Some(0.05)));
+        tracker.record(&usage(1_000, 2_000)).unwrap();
+        let spent = tracker.spent_usd();
+        assert!((spent - 0.11).abs() < 1e-6, "expected ~$0.11, got {spent}");
+    }
+
+    #[test]
+    fn thread_safe_concurrent_records() {
+        use std::sync::Arc;
+        use std::thread;
+
+        let tracker = Arc::new(CostTracker::new(&rt(None, Some(0.001), None)));
+        let mut handles = Vec::new();
+        for _ in 0..10 {
+            let t = Arc::clone(&tracker);
+            handles.push(thread::spawn(move || {
+                for _ in 0..100 {
+                    t.record(&usage(1_000, 0)).unwrap();
+                }
+            }));
+        }
+        for h in handles {
+            h.join().unwrap();
+        }
+        // 10 threads × 100 records × $0.001 = $1.00
+        let spent = tracker.spent_usd();
+        assert!((spent - 1.0).abs() < 1e-3, "expected ~$1.00, got {spent}");
     }
 }
diff --git a/src/deep/error.rs b/src/deep/error.rs
index 6cd0862..a51efe1 100644
--- a/src/deep/error.rs
+++ b/src/deep/error.rs
@@ -23,6 +23,7 @@ pub enum DeepError {
 
     #[error("request timed out after {secs}s")]
     Timeout { secs: u64 },
-    // Http(#[from] reqwest::Error) is added in commit 5 alongside the HTTP
-    // client, so we don't drag reqwest into the build before it's needed.
+
+    #[error("HTTP error: {0}")]
+    Http(#[from] reqwest::Error),
 }
diff --git a/src/lib.rs b/src/lib.rs
new file mode 100644
index 0000000..7b02d7f
--- /dev/null
+++ b/src/lib.rs
@@ -0,0 +1,17 @@
+//! Zift — static analysis for embedded authorization logic.
+//!
+//! This crate is published as both a binary (`zift`) and a library. The
+//! binary at `src/main.rs` is a thin shim over the library; downstream
+//! consumers (e.g. the MCP server in PR 2) can depend on the library.
+
+pub mod cli;
+pub mod commands;
+pub mod config;
+pub mod deep;
+pub mod error;
+pub mod logging;
+pub mod output;
+pub mod rego;
+pub mod rules;
+pub mod scanner;
+pub mod types;
diff --git a/src/main.rs b/src/main.rs
index 0bff422..2dfb69d 100644
--- a/src/main.rs
+++ b/src/main.rs
@@ -1,18 +1,7 @@
-mod cli;
-mod commands;
-mod config;
-mod deep;
-mod error;
-mod logging;
-mod output;
-mod rego;
-mod rules;
-mod scanner;
-mod types;
-
 use clap::Parser;
 
-use cli::Cli;
+use zift::cli::Cli;
+use zift::{commands, config, error, logging};
 
 fn main() {
     let cli = Cli::parse();
diff --git a/tests/deep_http_integration.rs b/tests/deep_http_integration.rs
new file mode 100644
index 0000000..6cc9b67
--- /dev/null
+++ b/tests/deep_http_integration.rs
@@ -0,0 +1,336 @@
+//! Integration tests for the deep-pass HTTP client against a mocked
+//! OpenAI-compatible endpoint (mockito).
+//!
+//! These tests exercise the full client path: request body shape, response
+//! parsing, retry-on-bad-JSON, cost cap enforcement, and auth errors.
+
+use mockito::Server;
+use serde_json::json;
+
+use zift::deep::candidate::{Candidate, CandidateKind};
+use zift::deep::client::{OpenAiCompatibleClient, TokenUsage};
+use zift::deep::config::DeepRuntime;
+use zift::deep::cost::CostTracker;
+use zift::deep::error::DeepError;
+use zift::deep::prompt::{PromptInputs, render};
+use zift::types::{AuthCategory, Confidence, Language};
+
+fn runtime_for(server_url: &str) -> DeepRuntime {
+    DeepRuntime {
+        base_url: server_url.to_string(),
+        model: "test-model".into(),
+        api_key: Some("test-key".into()),
+        max_cost_usd: None,
+        cost_per_1k_input: None,
+        cost_per_1k_output: None,
+        request_timeout_secs: 10,
+        max_candidates: 10,
+        max_concurrent: 1,
+        temperature: 0.0,
+        max_prompt_chars: 16_000,
+    }
+}
+
+fn synth_candidate() -> Candidate {
+    Candidate {
+        kind: CandidateKind::Escalation,
+        file: std::path::PathBuf::from("src/auth.ts"),
+        language: Language::TypeScript,
+        line_start: 10,
+        line_end: 15,
+        source_snippet: "function isAdmin() { return user.role === 'admin'; }".into(),
+        imports: Vec::new(),
+        original_finding_id: Some("structural-1".into()),
+        seed_category: Some(AuthCategory::Custom),
+    }
+}
+
+fn ok_response(content: &str, prompt_tokens: u32, completion_tokens: u32) -> String {
+    json!({
+        "id": "chatcmpl-test",
+        "object": "chat.completion",
+        "created": 1234567890,
+        "model": "test-model",
+        "choices": [{
+            "index": 0,
+            "message": {
+                "role": "assistant",
+                "content": content,
+            },
+            "finish_reason": "stop"
+        }],
+        "usage": {
+            "prompt_tokens": prompt_tokens,
+            "completion_tokens": completion_tokens,
+            "total_tokens": prompt_tokens + completion_tokens,
+        }
+    })
+    .to_string()
+}
+
+fn findings_content_one() -> String {
+    json!({
+        "findings": [{
+            "line_start": 10,
+            "line_end": 12,
+            "category": "rbac",
+            "confidence": "high",
+            "description": "isAdmin role check",
+            "reasoning": "function name + return value structure indicates rbac",
+            "is_false_positive": false
+        }]
+    })
+    .to_string()
+}
+
+#[test]
+fn happy_path_returns_findings_and_usage() {
+    let mut server = Server::new();
+    let m = server
+        .mock("POST", "/chat/completions")
+        .match_header("authorization", "Bearer test-key")
+        .with_status(200)
+        .with_header("content-type", "application/json")
+        .with_body(ok_response(&findings_content_one(), 100, 50))
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let cand = synth_candidate();
+    let prompt = render(&PromptInputs {
+        candidate: &cand,
+        structural_finding: None,
+    });
+
+    let response = client.analyze(&prompt).unwrap();
+    assert_eq!(response.findings.len(), 1);
+    assert_eq!(response.findings[0].line_start, 10);
+    assert_eq!(response.findings[0].category, AuthCategory::Rbac);
+    assert_eq!(response.findings[0].confidence, Confidence::High);
+    assert_eq!(response.usage.input_tokens, 100);
+    assert_eq!(response.usage.output_tokens, 50);
+    m.assert();
+}
+
+#[test]
+fn empty_findings_array_is_valid() {
+    let mut server = Server::new();
+    let m = server
+        .mock("POST", "/chat/completions")
+        .with_status(200)
+        .with_body(ok_response(r#"{"findings": []}"#, 80, 20))
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let prompt = render(&PromptInputs {
+        candidate: &synth_candidate(),
+        structural_finding: None,
+    });
+
+    let response = client.analyze(&prompt).unwrap();
+    assert!(response.findings.is_empty());
+    assert_eq!(response.usage.input_tokens, 80);
+    m.assert();
+}
+
+#[test]
+fn malformed_json_returns_bad_response_after_retry() {
+    let mut server = Server::new();
+    // Both the structured-output attempt and the fallback retry return
+    // garbage. Two hits total.
+    let m = server
+        .mock("POST", "/chat/completions")
+        .with_status(200)
+        .with_body(ok_response("this is definitely not json", 50, 10))
+        .expect(2)
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let prompt = render(&PromptInputs {
+        candidate: &synth_candidate(),
+        structural_finding: None,
+    });
+
+    let err = client.analyze(&prompt).unwrap_err();
+    assert!(
+        matches!(err, DeepError::BadResponse(_)),
+        "expected BadResponse, got: {err:?}"
+    );
+    m.assert();
+}
+
+#[test]
+fn fallback_retry_succeeds_when_first_attempt_returns_bad_json() {
+    let mut server = Server::new();
+
+    // First attempt (with response_format) returns garbage.
+    let _bad = server
+        .mock("POST", "/chat/completions")
+        .match_body(mockito::Matcher::PartialJsonString(
+            r#"{"response_format": {}}"#.into(),
+        ))
+        .with_status(200)
+        .with_body(ok_response("not json", 50, 10))
+        .create();
+
+    // Second attempt (without response_format) returns valid findings.
+    let _good = server
+        .mock("POST", "/chat/completions")
+        .with_status(200)
+        .with_body(ok_response(&findings_content_one(), 60, 30))
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let prompt = render(&PromptInputs {
+        candidate: &synth_candidate(),
+        structural_finding: None,
+    });
+
+    let response = client.analyze(&prompt).unwrap();
+    assert_eq!(response.findings.len(), 1);
+}
+
+#[test]
+fn json_wrapped_in_markdown_fence_is_accepted() {
+    let mut server = Server::new();
+    let fenced = format!("```json\n{}\n```", findings_content_one());
+    let m = server
+        .mock("POST", "/chat/completions")
+        .with_status(200)
+        .with_body(ok_response(&fenced, 50, 10))
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let prompt = render(&PromptInputs {
+        candidate: &synth_candidate(),
+        structural_finding: None,
+    });
+
+    let response = client.analyze(&prompt).unwrap();
+    assert_eq!(response.findings.len(), 1);
+    m.assert();
+}
+
+#[test]
+fn http_401_surfaces_as_config_error() {
+    let mut server = Server::new();
+    let m = server
+        .mock("POST", "/chat/completions")
+        .with_status(401)
+        .with_body("{\"error\": \"unauthorized\"}")
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let prompt = render(&PromptInputs {
+        candidate: &synth_candidate(),
+        structural_finding: None,
+    });
+
+    let err = client.analyze(&prompt).unwrap_err();
+    let msg = format!("{err}");
+    assert!(msg.contains("auth rejected"), "got: {msg}");
+    m.assert();
+}
+
+#[test]
+fn http_500_surfaces_as_config_error() {
+    let mut server = Server::new();
+    let m = server
+        .mock("POST", "/chat/completions")
+        .with_status(500)
+        .with_body("internal server error")
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let prompt = render(&PromptInputs {
+        candidate: &synth_candidate(),
+        structural_finding: None,
+    });
+
+    let err = client.analyze(&prompt).unwrap_err();
+    let msg = format!("{err}");
+    assert!(msg.contains("500"), "got: {msg}");
+    m.assert();
+}
+
+#[test]
+fn cost_tracker_caps_and_errors() {
+    let mut runtime = runtime_for("http://unused");
+    runtime.max_cost_usd = Some(0.01);
+    runtime.cost_per_1k_input = Some(0.10); // 1k input = $0.10 → exceeds $0.01 cap
+
+    let tracker = CostTracker::new(&runtime);
+    let usage = TokenUsage {
+        input_tokens: 1_000,
+        output_tokens: 0,
+    };
+    let err = tracker.record(&usage).unwrap_err();
+    assert!(matches!(err, DeepError::CostExceeded { .. }));
+}
+
+#[test]
+fn request_body_includes_model_and_messages() {
+    let mut server = Server::new();
+    // Use mockito's body matcher to assert the request shape.
+    let m = server
+        .mock("POST", "/chat/completions")
+        .match_body(mockito::Matcher::AllOf(vec![
+            mockito::Matcher::PartialJsonString(r#"{"model": "test-model"}"#.into()),
+            mockito::Matcher::PartialJsonString(
+                r#"{"messages": [{"role": "system"}, {"role": "user"}]}"#.into(),
+            ),
+            mockito::Matcher::PartialJsonString(r#"{"temperature": 0.0}"#.into()),
+        ]))
+        .with_status(200)
+        .with_body(ok_response(r#"{"findings": []}"#, 10, 5))
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let prompt = render(&PromptInputs {
+        candidate: &synth_candidate(),
+        structural_finding: None,
+    });
+
+    client.analyze(&prompt).unwrap();
+    m.assert();
+}
+
+#[test]
+fn missing_usage_field_defaults_to_zero() {
+    // Some local servers don't return usage at all.
+    let mut server = Server::new();
+    let response_without_usage = json!({
+        "id": "chatcmpl-test",
+        "model": "test-model",
+        "choices": [{
+            "index": 0,
+            "message": {"role": "assistant", "content": r#"{"findings": []}"#},
+            "finish_reason": "stop"
+        }]
+    })
+    .to_string();
+    let m = server
+        .mock("POST", "/chat/completions")
+        .with_status(200)
+        .with_body(response_without_usage)
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let prompt = render(&PromptInputs {
+        candidate: &synth_candidate(),
+        structural_finding: None,
+    });
+
+    let response = client.analyze(&prompt).unwrap();
+    assert_eq!(response.usage.input_tokens, 0);
+    assert_eq!(response.usage.output_tokens, 0);
+    m.assert();
+}

From db3102fbff2ae629b9036439782f716c1b3b2368 Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 21:36:26 -0400
Subject: [PATCH 08/18] feat(deep): wire orchestrator end-to-end and merge
 semantic findings
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Final piece of PR 1. After this commit, --deep is fully functional:

  zift scan ./repo --deep \
    --base-url http://localhost:11434/v1 \
    --model qwen2.5-coder:14b

…produces a structural scan augmented with semantic findings (pass:
ScanPass::Semantic) merged into the JSON output.

Orchestrator (src/deep/mod.rs run()):
- Takes ownership of the structural finding set so it can drop entries
  the model identifies as false positives.
- Builds candidates via select_candidates, builds an HTTP client and
  cost tracker, then iterates: render prompt, call analyze, record
  cost, translate findings.
- Per-candidate Http/BadResponse/Timeout errors are logged and the
  candidate is skipped — best-effort enrichment, not all-or-nothing.
- Config / CostExceeded / Io errors are hard fails (propagate).
- Concurrency is sequential in this commit (TODO: std::thread::scope
  fan-out to runtime.max_concurrent in a follow-up).
- Drops false-positive structural findings before merge; emits
  semantic findings only for non-false-positive returns.

Merge logic (src/deep/merge.rs):
- Semantic finding overlapping a structural one (>= 50% range
  overlap, same file) replaces the structural iff
  semantic.confidence >= structural.confidence.
- Overlap fraction = intersection / max(range_a, range_b) — the
  conservative choice prevents tiny semantic findings from
  collapsing huge structural ranges.
- Non-overlapping semantic findings are appended.

Cleanup:
- All #![allow(dead_code)] annotations removed from deep submodules
  and per-function allows from scanner::discovery — everything is
  now reachable from the binary.
- Unused is_tsx_jsx helper deleted from candidate.rs.

3 new end-to-end integration tests in tests/deep_http_integration.rs:
- deep::run produces semantic findings from cold-region discovery
- false-positive flag drops the structural seed
- empty input (no candidates) skips HTTP entirely

11 new merge unit tests. 199 total tests (186 lib + 13 integration).
Refs plans/todo/01-pr1-deep-http-transport.md
---
 src/commands/scan.rs           |   5 +-
 src/deep/candidate.rs          |  14 +--
 src/deep/client.rs             |   2 -
 src/deep/config.rs             |   4 -
 src/deep/context.rs            |   4 -
 src/deep/cost.rs               |   2 -
 src/deep/error.rs              |   3 -
 src/deep/finding.rs            |   3 -
 src/deep/merge.rs              | 193 ++++++++++++++++++++++++++++++---
 src/deep/mod.rs                | 134 ++++++++++++++++++-----
 src/deep/prompt.rs             |   3 -
 src/scanner/discovery.rs       |   2 -
 tests/deep_http_integration.rs | 134 +++++++++++++++++++++++
 13 files changed, 422 insertions(+), 81 deletions(-)

diff --git a/src/commands/scan.rs b/src/commands/scan.rs
index e7e5ab6..3d688cd 100644
--- a/src/commands/scan.rs
+++ b/src/commands/scan.rs
@@ -50,10 +50,7 @@ pub fn execute(args: ScanArgs, config: ZiftConfig) -> Result<()> {
             runtime.model,
             runtime.max_concurrent
         );
-        let semantic = deep::run(&result.findings, &path, runtime)?;
-        if !semantic.is_empty() {
-            result.findings = deep::merge::merge(result.findings, semantic);
-        }
+        result.findings = deep::run(result.findings, &path, runtime)?;
     }
 
     let stdout = std::io::stdout();
diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs
index 5631c11..1f52b8b 100644
--- a/src/deep/candidate.rs
+++ b/src/deep/candidate.rs
@@ -14,15 +14,10 @@
 //!
 //! Candidates are sorted deterministically by `(file, line_start)`.
 
-// The deep module is wired into the binary only when `deep::run` actually
-// does work (commit 6). Until then it's "dead" from the binary's perspective
-// even though tests cover it. This allow goes away in commit 6.
-#![allow(dead_code)]
-
 use crate::deep::config::DeepRuntime;
 use crate::deep::context::{expand_finding, expand_region};
 use crate::deep::error::DeepError;
-use crate::scanner::discovery::{detect_language_for_deep, discover_files_for_deep};
+use crate::scanner::discovery::discover_files_for_deep;
 use crate::types::{AuthCategory, Confidence, Finding, Language};
 use regex::Regex;
 use std::collections::HashSet;
@@ -286,13 +281,6 @@ fn overlaps_any(
     })
 }
 
-/// Lookup a language's tsx/jsx flavor for a given file path.
-pub(crate) fn is_tsx_jsx(path: &Path) -> bool {
-    detect_language_for_deep(path)
-        .map(|(_, tsx)| tsx)
-        .unwrap_or(false)
-}
-
 #[cfg(test)]
 mod tests {
     use super::*;
diff --git a/src/deep/client.rs b/src/deep/client.rs
index ae39be1..672b550 100644
--- a/src/deep/client.rs
+++ b/src/deep/client.rs
@@ -11,8 +11,6 @@
 //! emit JSON in the message body anyway. The retry strips the directive
 //! and re-parses; if that still fails, we return [`DeepError::BadResponse`].
 
-#![allow(dead_code)] // wired into the binary in commit 6
-
 use crate::deep::config::DeepRuntime;
 use crate::deep::error::DeepError;
 use crate::deep::finding::SemanticFinding;
diff --git a/src/deep/config.rs b/src/deep/config.rs
index 07a6431..bac81b9 100644
--- a/src/deep/config.rs
+++ b/src/deep/config.rs
@@ -7,10 +7,6 @@
 //!   from `.zift.toml` — keys belong in env or CLI, not source-controlled
 //!   files.
 
-// Most DeepRuntime fields are read by the HTTP client (commit 5) and
-// orchestrator (commit 6); skipped here until those land.
-#![allow(dead_code)]
-
 use crate::cli::ScanArgs;
 use crate::config::ZiftConfig;
 use crate::deep::error::DeepError;
diff --git a/src/deep/context.rs b/src/deep/context.rs
index 59e039f..94ea683 100644
--- a/src/deep/context.rs
+++ b/src/deep/context.rs
@@ -10,10 +10,6 @@
 //!   sufficient for v1. Most local 7B-14B models can figure out function
 //!   boundaries from a generous line window with imports included.
 
-// ExpandedContext.imports/file_relative/language are populated here but
-// consumed by prompt rendering in commit 4. The allow goes away then.
-#![allow(dead_code)]
-
 use crate::deep::error::DeepError;
 use crate::types::{Finding, Language};
 use std::path::{Path, PathBuf};
diff --git a/src/deep/cost.rs b/src/deep/cost.rs
index 5d47960..a975180 100644
--- a/src/deep/cost.rs
+++ b/src/deep/cost.rs
@@ -5,8 +5,6 @@
 //! running concurrent requests. If both per-1k rates are unset (or zero),
 //! tracking is a no-op and `record` always returns `Ok`.
 
-#![allow(dead_code)] // wired into the binary in commit 6
-
 use crate::deep::client::TokenUsage;
 use crate::deep::config::DeepRuntime;
 use crate::deep::error::DeepError;
diff --git a/src/deep/error.rs b/src/deep/error.rs
index a51efe1..0bcafc5 100644
--- a/src/deep/error.rs
+++ b/src/deep/error.rs
@@ -1,6 +1,3 @@
-// Some variants are constructed only by code that lands in commits 5/6.
-#![allow(dead_code)]
-
 use thiserror::Error;
 
 /// Errors produced by the deep (semantic) scan pipeline.
diff --git a/src/deep/finding.rs b/src/deep/finding.rs
index 31e30ee..9203725 100644
--- a/src/deep/finding.rs
+++ b/src/deep/finding.rs
@@ -1,8 +1,5 @@
 //! LLM-side finding shape and translation to the canonical [`Finding`].
 
-// Wired into the binary in commit 6; until then, dead from main()'s view.
-#![allow(dead_code)]
-
 use crate::deep::candidate::Candidate;
 use crate::scanner::matcher::compute_finding_id;
 use crate::types::{AuthCategory, Confidence, Finding, ScanPass};
diff --git a/src/deep/merge.rs b/src/deep/merge.rs
index ae744e9..51843cc 100644
--- a/src/deep/merge.rs
+++ b/src/deep/merge.rs
@@ -1,24 +1,185 @@
 //! Merge semantic findings into the structural-pass finding set.
 //!
-//! Real merge logic lands in commit 6:
+//! Rules:
 //!
-//! - Semantic finding overlapping a structural finding's range (>= 50%
-//!   overlap, same file) replaces the structural one **iff** semantic
-//!   confidence ≥ structural confidence.
-//! - `is_false_positive: true` from a `SemanticFinding` drops the seed
-//!   structural finding entirely.
-//! - Non-overlapping semantic findings are appended.
+//! - A semantic finding overlapping a structural finding's range (>= 50%
+//!   overlap, same file) **replaces** the structural one iff
+//!   `semantic.confidence >= structural.confidence`.
+//! - A semantic finding with no overlap is appended.
 //!
-//! In commit 2 this is a trivial concat — semantic findings are appended
-//! verbatim. Sufficient because [`crate::deep::run`] is itself a no-op stub
-//! that returns an empty vec.
+//! Overlap is computed as `intersection / max(range_a, range_b)`. Using max
+//! (rather than min) is the conservative choice — a tiny semantic finding
+//! that lands inside a sprawling structural one only counts as a near-match
+//! if the larger range is also small.
+//!
+//! False-positive drops happen *before* merge in the orchestrator
+//! ([`crate::deep::run`]), so the structural slice arriving here has already
+//! had model-rejected entries removed.
 
 use crate::types::Finding;
 
-pub fn merge(structural: Vec<Finding>, semantic: Vec<Finding>) -> Vec<Finding> {
-    // TODO(commit 6): overlap detection + confidence-based replacement +
-    // false-positive drops.
-    let mut all = structural;
-    all.extend(semantic);
-    all
+pub fn merge(mut structural: Vec<Finding>, semantic: Vec<Finding>) -> Vec<Finding> {
+    for sem in semantic {
+        let replace_idx = structural.iter().position(|s| {
+            s.file == sem.file
+                && range_overlap_fraction(s.line_start, s.line_end, sem.line_start, sem.line_end)
+                    >= 0.5
+                && sem.confidence >= s.confidence
+        });
+        match replace_idx {
+            Some(idx) => {
+                tracing::debug!(
+                    "merge: semantic finding replaces structural at {}:{}-{}",
+                    sem.file.display(),
+                    sem.line_start,
+                    sem.line_end
+                );
+                structural[idx] = sem;
+            }
+            None => structural.push(sem),
+        }
+    }
+    structural
+}
+
+fn range_overlap_fraction(a_start: usize, a_end: usize, b_start: usize, b_end: usize) -> f32 {
+    let overlap_start = a_start.max(b_start);
+    let overlap_end = a_end.min(b_end);
+    if overlap_start > overlap_end {
+        return 0.0;
+    }
+    let overlap_lines = (overlap_end - overlap_start + 1) as f32;
+    let max_range = (a_end - a_start + 1).max(b_end - b_start + 1) as f32;
+    overlap_lines / max_range
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::types::{AuthCategory, Confidence, Language, ScanPass};
+    use std::path::PathBuf;
+
+    fn finding(
+        file: &str,
+        start: usize,
+        end: usize,
+        confidence: Confidence,
+        pass: ScanPass,
+    ) -> Finding {
+        Finding {
+            id: format!("{file}-{start}-{end}-{pass:?}"),
+            file: PathBuf::from(file),
+            line_start: start,
+            line_end: end,
+            code_snippet: String::new(),
+            language: Language::TypeScript,
+            category: AuthCategory::Custom,
+            confidence,
+            description: String::new(),
+            pattern_rule: None,
+            rego_stub: None,
+            pass,
+        }
+    }
+
+    #[test]
+    fn non_overlapping_findings_both_kept() {
+        let s = finding("a.ts", 10, 15, Confidence::Medium, ScanPass::Structural);
+        let sem = finding("a.ts", 50, 60, Confidence::High, ScanPass::Semantic);
+        let merged = merge(vec![s], vec![sem]);
+        assert_eq!(merged.len(), 2);
+    }
+
+    #[test]
+    fn overlapping_higher_confidence_replaces() {
+        let s = finding("a.ts", 10, 15, Confidence::Low, ScanPass::Structural);
+        let sem = finding("a.ts", 10, 15, Confidence::High, ScanPass::Semantic);
+        let merged = merge(vec![s], vec![sem]);
+        assert_eq!(merged.len(), 1);
+        assert_eq!(merged[0].pass, ScanPass::Semantic);
+        assert_eq!(merged[0].confidence, Confidence::High);
+    }
+
+    #[test]
+    fn overlapping_equal_confidence_replaces() {
+        // Equal confidence still replaces — semantic has more reasoning attached.
+        let s = finding("a.ts", 10, 15, Confidence::Medium, ScanPass::Structural);
+        let sem = finding("a.ts", 10, 15, Confidence::Medium, ScanPass::Semantic);
+        let merged = merge(vec![s], vec![sem]);
+        assert_eq!(merged.len(), 1);
+        assert_eq!(merged[0].pass, ScanPass::Semantic);
+    }
+
+    #[test]
+    fn overlapping_lower_confidence_keeps_both() {
+        let s = finding("a.ts", 10, 15, Confidence::High, ScanPass::Structural);
+        let sem = finding("a.ts", 10, 15, Confidence::Low, ScanPass::Semantic);
+        let merged = merge(vec![s], vec![sem]);
+        assert_eq!(merged.len(), 2);
+        assert!(merged.iter().any(|f| f.pass == ScanPass::Structural));
+        assert!(merged.iter().any(|f| f.pass == ScanPass::Semantic));
+    }
+
+    #[test]
+    fn different_files_never_merge() {
+        let s = finding("a.ts", 10, 15, Confidence::Low, ScanPass::Structural);
+        let sem = finding("b.ts", 10, 15, Confidence::High, ScanPass::Semantic);
+        let merged = merge(vec![s], vec![sem]);
+        assert_eq!(merged.len(), 2);
+    }
+
+    #[test]
+    fn partial_overlap_below_threshold_keeps_both() {
+        // Structural: lines 10-30 (21 lines)
+        // Semantic: lines 28-32 (5 lines)
+        // Overlap: 28-30 = 3 lines, max range = 21 → 3/21 ≈ 14% — keeps both.
+        let s = finding("a.ts", 10, 30, Confidence::Low, ScanPass::Structural);
+        let sem = finding("a.ts", 28, 32, Confidence::High, ScanPass::Semantic);
+        let merged = merge(vec![s], vec![sem]);
+        assert_eq!(merged.len(), 2);
+    }
+
+    #[test]
+    fn substantial_overlap_above_threshold_replaces() {
+        // Structural: lines 10-20 (11 lines)
+        // Semantic: lines 11-19 (9 lines)
+        // Overlap: 11-19 = 9 lines, max range = 11 → 9/11 ≈ 82% — replaces.
+        let s = finding("a.ts", 10, 20, Confidence::Low, ScanPass::Structural);
+        let sem = finding("a.ts", 11, 19, Confidence::High, ScanPass::Semantic);
+        let merged = merge(vec![s], vec![sem]);
+        assert_eq!(merged.len(), 1);
+        assert_eq!(merged[0].pass, ScanPass::Semantic);
+    }
+
+    #[test]
+    fn empty_inputs_return_empty() {
+        assert!(merge(vec![], vec![]).is_empty());
+    }
+
+    #[test]
+    fn semantic_only_returns_semantic() {
+        let sem = finding("a.ts", 10, 15, Confidence::High, ScanPass::Semantic);
+        let merged = merge(vec![], vec![sem]);
+        assert_eq!(merged.len(), 1);
+        assert_eq!(merged[0].pass, ScanPass::Semantic);
+    }
+
+    #[test]
+    fn structural_only_returns_structural() {
+        let s = finding("a.ts", 10, 15, Confidence::Medium, ScanPass::Structural);
+        let merged = merge(vec![s], vec![]);
+        assert_eq!(merged.len(), 1);
+        assert_eq!(merged[0].pass, ScanPass::Structural);
+    }
+
+    #[test]
+    fn overlap_fraction_computation() {
+        // Identical ranges → 1.0
+        assert!((range_overlap_fraction(10, 20, 10, 20) - 1.0).abs() < 1e-6);
+        // No overlap → 0.0
+        assert_eq!(range_overlap_fraction(10, 20, 30, 40), 0.0);
+        // 50% overlap, equal-sized: 5/10 = 0.5
+        let f = range_overlap_fraction(10, 19, 15, 24);
+        assert!((f - 0.5).abs() < 1e-6, "expected 0.5, got {f}");
+    }
 }
diff --git a/src/deep/mod.rs b/src/deep/mod.rs
index c8ca55c..50e5e49 100644
--- a/src/deep/mod.rs
+++ b/src/deep/mod.rs
@@ -1,14 +1,6 @@
 //! Deep (LLM-assisted) semantic scan.
 //!
 //! See [`plans/todo/01-pr1-deep-http-transport.md`] for the full design.
-//! This module is being built incrementally across six commits:
-//!
-//! 1. CLI/config refactor (done in PR-prep commit)
-//! 2. Module skeleton + config + error types (this commit)
-//! 3. Candidate selection + context expansion
-//! 4. Prompt rendering + JSON output schema
-//! 5. OpenAI-compatible HTTP client + cost tracker
-//! 6. Result merge + end-to-end wiring
 //!
 //! The primitives in this module are intentionally transport-agnostic so
 //! that PR 2 (MCP server) and PR 3 (subprocess hook) can reuse them.
@@ -23,30 +15,122 @@ pub mod finding;
 pub mod merge;
 pub mod prompt;
 
-// Convenience re-exports (DeepRuntime, DeepError, SemanticFinding) will be
-// added in commit 6 when end-to-end wiring lands and external callers
-// actually use them. Adding them now triggers unused-import warnings.
+pub use config::DeepRuntime;
+pub use error::DeepError;
+pub use finding::SemanticFinding;
 
-use crate::deep::config::DeepRuntime;
-use crate::deep::error::DeepError;
 use crate::types::Finding;
+use std::collections::{HashMap, HashSet};
 use std::path::Path;
 
 /// Run the deep (semantic) scan over a set of structural findings.
 ///
-/// Returns additional findings with `pass: ScanPass::Semantic`. Merging into
-/// the master findings vec is the caller's responsibility (use
-/// [`merge::merge`]).
+/// Takes ownership of `structural` because the deep pass may drop entries
+/// the model identifies as false positives. Returns the **merged** finding
+/// set (filtered structural ∪ semantic, with overlap dedup applied).
+///
+/// Errors:
+/// - `DeepError::Config`: missing config or HTTP client construction failure (hard fail)
+/// - `DeepError::CostExceeded`: cap reached mid-run; returns immediately (hard fail)
+/// - `DeepError::Io`: filesystem error reading source files (hard fail)
 ///
-/// In commit 2 this is a no-op stub. Subsequent commits add candidate
-/// selection, context expansion, prompt rendering, HTTP analyze, and result
-/// merging.
+/// Per-candidate `Http`, `BadResponse`, and `Timeout` errors are logged
+/// and the candidate is skipped — best-effort enrichment, not all-or-nothing.
 pub fn run(
-    _structural: &[Finding],
-    _scan_root: &Path,
-    _runtime: &DeepRuntime,
+    structural: Vec<Finding>,
+    scan_root: &Path,
+    runtime: &DeepRuntime,
 ) -> Result<Vec<Finding>, DeepError> {
-    // TODO(commits 3-6): candidate selection -> context expansion ->
-    // prompt rendering -> HTTP analyze -> finding merge.
-    Ok(Vec::new())
+    let candidates = candidate::select_candidates(&structural, scan_root, runtime)?;
+    if candidates.is_empty() {
+        tracing::info!("deep: no candidates to analyze; returning structural findings as-is");
+        return Ok(structural);
+    }
+    tracing::info!(
+        "deep: analyzing {} candidate(s) (cap: {})",
+        candidates.len(),
+        runtime.max_candidates
+    );
+
+    let client = client::OpenAiCompatibleClient::new(runtime)?;
+    let cost_tracker = cost::CostTracker::new(runtime);
+
+    // Index structural findings by id so we can look up the seed Finding for
+    // escalation candidates (used by prompt rendering and false-positive drops).
+    let structural_by_id: HashMap<String, Finding> =
+        structural.into_iter().map(|f| (f.id.clone(), f)).collect();
+
+    let mut semantic_findings: Vec<Finding> = Vec::new();
+    let mut false_positive_seeds: HashSet<String> = HashSet::new();
+
+    for candidate in &candidates {
+        let seed = candidate
+            .original_finding_id
+            .as_deref()
+            .and_then(|id| structural_by_id.get(id));
+
+        let prompt = prompt::render(&prompt::PromptInputs {
+            candidate,
+            structural_finding: seed,
+        });
+
+        let response = match client.analyze(&prompt) {
+            Ok(r) => r,
+            Err(DeepError::Http(e)) => {
+                tracing::warn!(
+                    "deep: HTTP error on {}:{} (skipping): {e}",
+                    candidate.file.display(),
+                    candidate.line_start
+                );
+                continue;
+            }
+            Err(DeepError::BadResponse(msg)) => {
+                tracing::warn!(
+                    "deep: bad response on {}:{} (skipping): {msg}",
+                    candidate.file.display(),
+                    candidate.line_start
+                );
+                continue;
+            }
+            Err(DeepError::Timeout { secs }) => {
+                tracing::warn!(
+                    "deep: timeout ({}s) on {}:{} (skipping)",
+                    secs,
+                    candidate.file.display(),
+                    candidate.line_start
+                );
+                continue;
+            }
+            // Config / CostExceeded / Io are hard fails — propagate.
+            Err(other) => return Err(other),
+        };
+
+        cost_tracker.record(&response.usage)?;
+
+        for sem in response.findings {
+            if sem.is_false_positive {
+                if let Some(seed_id) = &candidate.original_finding_id {
+                    false_positive_seeds.insert(seed_id.clone());
+                }
+                continue;
+            }
+            let f = finding::into_finding(sem, candidate, seed, scan_root);
+            semantic_findings.push(f);
+        }
+    }
+
+    tracing::info!(
+        "deep: {} semantic finding(s); {} structural false-positive(s); spent ${:.4}",
+        semantic_findings.len(),
+        false_positive_seeds.len(),
+        cost_tracker.spent_usd()
+    );
+
+    // Drop structural findings the model rejected, then merge semantic in.
+    let filtered_structural: Vec<Finding> = structural_by_id
+        .into_values()
+        .filter(|f| !false_positive_seeds.contains(&f.id))
+        .collect();
+
+    Ok(merge::merge(filtered_structural, semantic_findings))
 }
diff --git a/src/deep/prompt.rs b/src/deep/prompt.rs
index 7a858e9..e2c418b 100644
--- a/src/deep/prompt.rs
+++ b/src/deep/prompt.rs
@@ -11,9 +11,6 @@
 //! detected in the candidate's imports — keeps the base prompt small for
 //! the common case, adds targeted hints when relevant.
 
-// Wired into the binary in commit 6; until then, dead from main()'s view.
-#![allow(dead_code)]
-
 use crate::deep::candidate::Candidate;
 use crate::types::{Finding, Language};
 
diff --git a/src/scanner/discovery.rs b/src/scanner/discovery.rs
index 14ec968..f9c5826 100644
--- a/src/scanner/discovery.rs
+++ b/src/scanner/discovery.rs
@@ -31,7 +31,6 @@ pub fn detect_language(path: &Path) -> Option<(Language, bool)> {
 /// C#, Kotlin, Ruby, PHP). Used by the deep (semantic) scan, which can run
 /// regex-based cold-region detection on any language regardless of grammar
 /// availability.
-#[allow(dead_code)] // wired into the binary in commit 6
 pub fn detect_language_for_deep(path: &Path) -> Option<(Language, bool)> {
     let ext = path.extension()?.to_str()?.to_ascii_lowercase();
     match ext.as_str() {
@@ -61,7 +60,6 @@ pub fn discover_files(
 /// Discover source files for the deep (semantic) scan. Behaves identically
 /// to [`discover_files`] but emits files in **all** languages from the
 /// [`Language`] enum, not only structurally-supported ones.
-#[allow(dead_code)] // wired into the binary in commit 6
 pub fn discover_files_for_deep(
     root: &Path,
     exclude_patterns: &[String],
diff --git a/tests/deep_http_integration.rs b/tests/deep_http_integration.rs
index 6cc9b67..4883941 100644
--- a/tests/deep_http_integration.rs
+++ b/tests/deep_http_integration.rs
@@ -334,3 +334,137 @@ fn missing_usage_field_defaults_to_zero() {
     assert_eq!(response.usage.output_tokens, 0);
     m.assert();
 }
+
+// -- End-to-end deep::run tests --------------------------------------------
+
+use std::fs;
+use std::path::PathBuf;
+use tempfile::tempdir;
+use zift::types::{Finding, ScanPass};
+
+fn structural_finding(file: &str, line: usize) -> Finding {
+    Finding {
+        id: format!("structural-{file}-{line}"),
+        file: PathBuf::from(file),
+        line_start: line,
+        line_end: line + 2,
+        code_snippet: String::new(),
+        language: Language::TypeScript,
+        category: AuthCategory::Custom,
+        confidence: Confidence::Low,
+        description: "matched custom rule".into(),
+        pattern_rule: Some("ts-custom".into()),
+        rego_stub: None,
+        pass: ScanPass::Structural,
+    }
+}
+
+#[test]
+fn deep_run_end_to_end_produces_semantic_finding() {
+    let dir = tempdir().unwrap();
+    // Write a source file containing an auth-y function so cold-region picks it up.
+    fs::write(
+        dir.path().join("auth.ts"),
+        "// imports here\nfunction isAdmin(user) {\n  return user.role === 'admin';\n}\n",
+    )
+    .unwrap();
+
+    let mut server = Server::new();
+    let _m = server
+        .mock("POST", "/chat/completions")
+        .with_status(200)
+        .with_body(ok_response(
+            &json!({
+                "findings": [{
+                    "line_start": 2,
+                    "line_end": 4,
+                    "category": "rbac",
+                    "confidence": "high",
+                    "description": "isAdmin role check",
+                    "reasoning": "function name + role comparison",
+                    "is_false_positive": false
+                }]
+            })
+            .to_string(),
+            120,
+            40,
+        ))
+        .expect_at_least(1)
+        .create();
+
+    let runtime = runtime_for(&server.url());
+
+    // No structural findings — cold-region scan should pick up isAdmin.
+    let merged = zift::deep::run(Vec::new(), dir.path(), &runtime).unwrap();
+
+    assert!(!merged.is_empty(), "expected at least one semantic finding");
+    let semantic: Vec<&Finding> = merged
+        .iter()
+        .filter(|f| f.pass == ScanPass::Semantic)
+        .collect();
+    assert_eq!(semantic.len(), 1);
+    assert_eq!(semantic[0].category, AuthCategory::Rbac);
+    assert_eq!(semantic[0].confidence, Confidence::High);
+}
+
+#[test]
+fn deep_run_drops_structural_when_model_flags_false_positive() {
+    let dir = tempdir().unwrap();
+    // Write a source file with auth-y content so the structural finding can resolve.
+    fs::write(
+        dir.path().join("auth.ts"),
+        "function maybeAuth() {\n  // not actually authz\n  return true;\n}\n",
+    )
+    .unwrap();
+
+    let mut server = Server::new();
+    let _m = server
+        .mock("POST", "/chat/completions")
+        .with_status(200)
+        .with_body(ok_response(
+            &json!({
+                "findings": [{
+                    "line_start": 1,
+                    "line_end": 3,
+                    "category": "custom",
+                    "confidence": "low",
+                    "description": "not really auth",
+                    "reasoning": "function name is misleading; no actual authz logic",
+                    "is_false_positive": true
+                }]
+            })
+            .to_string(),
+            80,
+            20,
+        ))
+        .expect_at_least(1)
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let structural = vec![structural_finding("auth.ts", 1)];
+
+    let merged = zift::deep::run(structural, dir.path(), &runtime).unwrap();
+
+    // The structural finding was the only input; the model rejected it.
+    // Result should be empty (no semantic finding emitted, no structural retained).
+    assert!(merged.is_empty(), "expected empty result, got: {merged:?}");
+}
+
+#[test]
+fn deep_run_returns_structural_unchanged_when_no_candidates() {
+    let dir = tempdir().unwrap();
+    // No source files; no auth-y content; no structural findings.
+    // deep::run should return the empty input as-is without making HTTP calls.
+
+    let mut server = Server::new();
+    let m = server
+        .mock("POST", "/chat/completions")
+        .with_status(500) // would fail if called; we shouldn't call it
+        .expect(0)
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let merged = zift::deep::run(Vec::new(), dir.path(), &runtime).unwrap();
+    assert!(merged.is_empty());
+    m.assert();
+}

From 5c79743a74be21edfd5fa796160296acdd3f263e Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 21:37:20 -0400
Subject: [PATCH 09/18] docs(plans): mark PR 1 shipped, move to plans/done/

Adds a "Shipped" section listing all 9 commits, the 8 deviations from
the original plan (each with one-line rationale), and the open
follow-ups carried over to future work. Also enumerates the shared
primitives now exported for PR 2 (MCP server) to wrap.
---
 .../01-pr1-deep-http-transport.md             | 56 +++++++++++++++++++
 1 file changed, 56 insertions(+)
 rename plans/{todo => done}/01-pr1-deep-http-transport.md (83%)

diff --git a/plans/todo/01-pr1-deep-http-transport.md b/plans/done/01-pr1-deep-http-transport.md
similarity index 83%
rename from plans/todo/01-pr1-deep-http-transport.md
rename to plans/done/01-pr1-deep-http-transport.md
index fc9324c..fabb899 100644
--- a/plans/todo/01-pr1-deep-http-transport.md
+++ b/plans/done/01-pr1-deep-http-transport.md
@@ -457,3 +457,59 @@ Each commit ~150-400 lines of diff, reviewable independently. PR title for the m
 - `/Users/brad/dev/zift/src/types.rs`
 - `/Users/brad/dev/zift/src/scanner/matcher.rs` (expose finding_id)
 - `/Users/brad/dev/zift/Cargo.toml`
+
+---
+
+## 14. Shipped
+
+**Branch**: `feat/deep-http`
+**Test count**: 199 passing (186 lib unit + 13 integration); clippy clean with `-D warnings`.
+
+### Commits (in order)
+
+| Commit | Title |
+|---|---|
+| `10f2643` | docs: add plans/ tree with PR 1-3 plan for --deep |
+| `5743690` | docs(plans): lock three decisions for PR 1 deep-mode design |
+| `e29eb42` | refactor(cli): replace closed LlmProvider enum with --base-url |
+| `08ad940` | feat(deep): add deep module skeleton with config + error types |
+| `4a1a110` | feat(deep): candidate selection and context expansion |
+| `0b0ecef` | feat(deep): prompt rendering and JSON schema |
+| `fd2683a` | feat(deep): OpenAI-compatible HTTP client + cost tracker |
+| `c9a5004` | feat(deep): wire orchestrator end-to-end and merge semantic findings |
+
+The plan called for 6 implementation commits; we shipped 6, plus 2 doc commits up front and 1 commit to move this plan to `done/`.
+
+### Plan deviations (all flagged in commit messages)
+
+1. **`api_key` excluded from `.zift.toml`.** Originally §2 said precedence was CLI > env > config; security review during commit 1 dropped the config-file step — keys belong in env or CLI, not source-controlled files. Plan §2 was updated in commit 1.
+2. **CLI flag rename**: `ZIFT_API_KEY` → `ZIFT_AGENT_API_KEY` (commit 1). Decided mid-implementation; namespaced + semantic.
+3. **`Candidate.imports` field added.** §2 didn't specify it; needed by `prompt::render` for per-call framework detection. Populated from `ExpandedContext.imports` in `select_candidates`.
+4. **Smart-path tree-sitter expansion deferred.** Plan §7 specced both fast-path and smart-path for commit 3; only fast-path shipped. The line-window with imports is sufficient for the model to figure out function boundaries on the languages we support, and adding tree-sitter walking can land later if measurement says it matters. Smart-path comments preserved as TODOs in `src/deep/context.rs`.
+5. **Concurrency is sequential, not fan-out.** Plan §4 mentioned `std::thread::scope` over `reqwest::blocking::Client` to honor `runtime.max_concurrent`. Commit 6 ships sequential dispatch with a TODO in `src/deep/mod.rs::run`. Local servers (localhost auto-capped to 1) wouldn't benefit anyway, and remote endpoints can have this added later without API changes.
+6. **`src/lib.rs` split added in commit 5.** Required to let `tests/deep_http_integration.rs` reach internal modules. `src/main.rs` is now a thin shim. Future-proofs PR 2 (the MCP server can depend on `zift` as a library).
+7. **Markdown-fence stripping added to client.** Not in original plan; shipped after observing that some local models wrap JSON in ` ```json ` fences despite system-prompt instructions. `strip_markdown_fence` in `src/deep/client.rs`.
+8. **`AUTH_NAME_REGEX` tweak**: pattern is `authori[sz]\w*` not `authori[sz]e\w*`. The plan-suggested regex would have missed "authorization" (no `e` between `z` and `ation`). Caught by tests in commit 3.
+
+### Open follow-ups (from §12 "Open issues" + new ones)
+
+- **Concurrency fan-out** — implement `std::thread::scope` parallelism for non-localhost backends (commit 6 TODO).
+- **Smart-path tree-sitter expansion** — walk to enclosing function for TS/JS/Java findings. Useful when fast-path snippet is < 8 lines after window (commit 3 TODO in `context.rs`).
+- **`response_format` capability detection at startup** — current model is "send it, retry without on parse failure"; could be one-off probe instead.
+- **HTTP 5xx exponential backoff** — currently any 5xx is a hard skip; plan §9 specced 3 attempts at 1s/4s/16s. Worth adding for flaky remote endpoints.
+- **`compute_finding_id` move** — currently `pub(crate)` in `scanner/matcher.rs`; cleaner home would be `types::compute_finding_id`.
+
+### Ready for PR 2
+
+The shared primitives are stable and exported:
+
+- `crate::deep::prompt::SYSTEM_PROMPT`
+- `crate::deep::prompt::output_schema()`
+- `crate::deep::prompt::render(...)`
+- `crate::deep::candidate::select_candidates(...)`
+- `crate::deep::context::expand_finding(...)`, `expand_region(...)`
+- `crate::deep::finding::SemanticFinding`, `into_finding(...)`
+- `crate::deep::merge::merge(...)`
+- `crate::deep::cost::CostTracker`
+
+PR 2 (MCP server) can wrap these without reimplementing.

From 37bda349861272d23bd38b8b666d80794803b3e8 Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 21:46:24 -0400
Subject: [PATCH 10/18] docs: update README for functional --deep and current
 language support
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- Drop "Design phase — not yet functional" status banner; replace with
  accurate v0.1 status (structural ready for TS/JS/Java; --deep mode
  functional via OpenAI-compatible endpoints).
- Add a Deep mode section with two concrete examples: local (Ollama)
  and hosted (OpenAI). Document --base-url, --model, --max-cost,
  ZIFT_AGENT_API_KEY env var, and the localhost concurrency auto-cap.
- Document the [deep] config file schema, with the security note that
  api_key is intentionally not readable from .zift.toml.
- Replace the static language list with a matrix showing structural
  support vs. deep cold-region coverage. The cold-region pass works on
  all 9 Language enum entries today, so users running --deep against
  Python/Go/C#/Kotlin/Ruby/PHP get value before structural ships.
---
 README.md | 65 ++++++++++++++++++++++++++++++++++++++++++++++++++-----
 1 file changed, 60 insertions(+), 5 deletions(-)

diff --git a/README.md b/README.md
index 7a99249..b8df0f1 100644
--- a/README.md
+++ b/README.md
@@ -2,6 +2,8 @@
 
 Sift through your codebase for embedded authorization logic. Extract it into Rego for [OPA](https://www.openpolicyagent.org/).
 
+> **Status:** v0.1 — structural scanning ready for TypeScript, JavaScript, and Java. `--deep` (LLM-assisted) mode functional via any OpenAI-compatible endpoint.
+
 ## What is zift?
 
 Most applications embed authorization decisions directly in application code: role checks in `if` statements, permission guards in middleware, business rules that act as access control. This scattered auth logic is hard to audit, hard to test, and impossible to enforce consistently.
@@ -10,9 +12,9 @@ Most applications embed authorization decisions directly in application code: ro
 
 ## How it works
 
-```
-zift .                          # scan current directory
-zift --deep .                   # include LLM-assisted semantic analysis
+```bash
+zift .                          # structural scan of current directory (fast, free)
+zift scan ./src --deep ...      # also run LLM-assisted semantic analysis
 zift extract ./findings.json    # generate Rego from scan findings
 zift report .                   # detailed findings report
 ```
@@ -21,11 +23,64 @@ zift report .                   # detailed findings report
 
 1. **Structural scan** (tree-sitter) — fast, deterministic, zero-cost. Finds known authorization patterns: role checks, permission guards, auth middleware, security annotations.
 
-2. **Semantic scan** (LLM-assisted, opt-in) — analyzes candidate code regions for authorization logic that doesn't use explicit auth vocabulary. Catches business rules that implicitly encode access control.
+2. **Semantic scan** (`--deep`, opt-in) — sends candidate code regions to an LLM that classifies authorization logic the structural pass missed or misjudged. Useful for business rules that implicitly encode access control, and for languages where structural support hasn't shipped yet (Python, Go, etc.).
+
+## Deep mode (`--deep`)
+
+`--deep` talks to **any OpenAI-compatible chat-completions endpoint** — one client speaks to Ollama, LM Studio, llama.cpp, vLLM, OpenRouter, OpenAI, and Anthropic-via-proxy. Pick where you want your bytes to go.
+
+### Local model (Ollama, LM Studio, llama.cpp)
+
+```bash
+ollama pull qwen2.5-coder:14b
+zift scan ./src --deep \
+  --base-url http://localhost:11434/v1 \
+  --model qwen2.5-coder:14b
+```
+
+No API key needed. Concurrency auto-caps to 1 for localhost endpoints — single-GPU servers serialize internally, so parallelism > 1 just adds queueing.
+
+### Hosted model (OpenAI, OpenRouter, etc.)
+
+```bash
+export ZIFT_AGENT_API_KEY=sk-...
+zift scan ./src --deep \
+  --base-url https://api.openai.com/v1 \
+  --model gpt-4o-mini \
+  --max-cost 5.00
+```
+
+`--max-cost` enforces a USD spend ceiling using token rates supplied via `.zift.toml` (see below). With no rates configured, tracking is a no-op.
+
+### Configuration file
+
+Most settings can live in `.zift.toml`:
+
+```toml
+[deep]
+base_url          = "http://localhost:11434/v1"
+model             = "qwen2.5-coder:14b"
+max_cost          = 5.00
+cost_per_1k_input  = 0.0   # hosted models: e.g. 0.00015 for gpt-4o-mini input
+cost_per_1k_output = 0.0   #                e.g. 0.0006  for gpt-4o-mini output
+```
+
+`api_key` is intentionally **not** readable from `.zift.toml` — keys belong in `$ZIFT_AGENT_API_KEY` or `--api-key`, not in source-controlled files.
 
 ## Supported languages
 
-TypeScript, JavaScript, and Java (Python, Go, C#, Kotlin, Ruby, PHP planned).
+| Language | Structural | Deep (cold-region) | Framework hints (deep) |
+|----------|-----------|---------------------|------------------------|
+| TypeScript / JavaScript | yes (v0.1) | yes (v0.1) | Express, NestJS, Next.js |
+| Java | yes (v0.1) | yes (v0.1) | Spring Security, Jakarta Security |
+| Python | planned (v0.2) | yes (v0.1) | Django, Flask, FastAPI |
+| Go | planned (v0.2) | yes (v0.1) | Gin, Echo |
+| C# | planned (v0.3) | yes (v0.1) | ASP.NET Core |
+| Kotlin | planned (v0.3) | yes (v0.1) | Spring (Kotlin) |
+| Ruby | planned (v0.3) | yes (v0.1) | Rails |
+| PHP | planned (v0.3) | yes (v0.1) | Laravel |
+
+Deep mode walks the full source tree by extension and detects auth-y function names with regex — so it produces useful results in any language well before structural support lands.
 
 ## Installation
 

From 9c3f203e927d61ac4d140e79512264989bdb424f Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 22:02:19 -0400
Subject: [PATCH 11/18] fix(deep): plumb excludes/cost rates, UTF-8 safety,
 deterministic order

Address review feedback for PR 1:

- Honor --exclude / --language in cold-region discovery (was hardcoded
  to &[], &[]). Threaded through DeepRuntime so deep mode respects the
  same scope as the structural pass.
- Load cost_per_1k_input / cost_per_1k_output from .zift.toml so
  --max-cost actually binds. Warn when cap is set without rates.
  README documented these but DeepConfig didn't carry them.
- Use floor_char_boundary in context.rs::truncate and client.rs's log
  truncator to avoid panics on multi-byte UTF-8 chars in source files
  or model responses.
- Sort deep::run output by (file, line_start, line_end) so HashMap
  iteration order doesn't leak into user-visible finding ordering.
- Classify reqwest timeouts as DeepError::Timeout (was unreachable;
  all timeouts surfaced as opaque "HTTP error: ...").
- Strip markdown fences with any language tag, not just \`\`\`json.
- Drop unused max_concurrent from scan-start log line; mark concurrency
  TODO next to the candidate loop.
- Tighten max_candidates_cap_respected from <= 5 to == 1 (the cap
  actually binds at 1 cold-region candidate). Surfaced a fixture bug:
  is_admin_5 doesn't match AUTH_NAME_REGEX (no word boundary after _).
---
 src/commands/init.rs           |  4 +-
 src/commands/scan.rs           |  3 +-
 src/config.rs                  | 10 +++++
 src/deep/candidate.rs          | 53 ++++++++++++++++++++++--
 src/deep/client.rs             | 65 ++++++++++++++++++++++++++----
 src/deep/config.rs             | 73 +++++++++++++++++++++++++++++++++-
 src/deep/context.rs            | 24 ++++++++++-
 src/deep/cost.rs               |  2 +
 src/deep/mod.rs                | 16 +++++++-
 tests/deep_http_integration.rs | 42 +++++++++++++++++++
 10 files changed, 273 insertions(+), 19 deletions(-)

diff --git a/src/commands/init.rs b/src/commands/init.rs
index 2966029..2950403 100644
--- a/src/commands/init.rs
+++ b/src/commands/init.rs
@@ -9,7 +9,9 @@ exclude = ["vendor/**", "node_modules/**", "target/**"]
 # [deep]
 # base_url = "http://localhost:11434/v1"   # Ollama, LM Studio, OpenAI-compatible
 # model    = "your-model-name"
-# max_cost = 5.00
+# max_cost = 5.00                           # USD spend ceiling (requires rates below)
+# cost_per_1k_input  = 0.00015              # e.g. gpt-4o-mini input
+# cost_per_1k_output = 0.0006               # e.g. gpt-4o-mini output
 # # API key: set $ZIFT_AGENT_API_KEY in your environment, or pass --api-key.
 # # Do NOT put the key in this file — it gets checked into source control.
 
diff --git a/src/commands/scan.rs b/src/commands/scan.rs
index 3d688cd..1bac2a4 100644
--- a/src/commands/scan.rs
+++ b/src/commands/scan.rs
@@ -45,10 +45,9 @@ pub fn execute(args: ScanArgs, config: ZiftConfig) -> Result<()> {
 
     if let Some(runtime) = deep_runtime.as_ref() {
         tracing::info!(
-            "running deep scan: base_url={} model={} concurrency={}",
+            "running deep scan: base_url={} model={}",
             runtime.base_url,
             runtime.model,
-            runtime.max_concurrent
         );
         result.findings = deep::run(result.findings, &path, runtime)?;
     }
diff --git a/src/config.rs b/src/config.rs
index 265fc99..59354f7 100644
--- a/src/config.rs
+++ b/src/config.rs
@@ -30,6 +30,12 @@ pub struct DeepConfig {
     pub model: Option<String>,
     /// Maximum spend limit in USD.
     pub max_cost: Option<f64>,
+    /// USD cost per 1k input tokens. Required for `max_cost` to bind on
+    /// hosted models — without this (and `cost_per_1k_output`), the spend
+    /// tracker is a no-op.
+    pub cost_per_1k_input: Option<f64>,
+    /// USD cost per 1k output tokens. See `cost_per_1k_input`.
+    pub cost_per_1k_output: Option<f64>,
     // NOTE: api_key is intentionally NOT readable from this file — keys belong
     // in $ZIFT_AGENT_API_KEY or --api-key, not checked into source control.
 }
@@ -87,6 +93,8 @@ min_confidence = "medium"
 base_url = "http://localhost:11434/v1"
 model = "qwen2.5-coder:14b"
 max_cost = 5.00
+cost_per_1k_input = 0.00015
+cost_per_1k_output = 0.0006
 
 [extract]
 package_prefix = "app.authz"
@@ -104,6 +112,8 @@ additional = ["./custom-rules"]
         );
         assert_eq!(config.deep.model.as_deref(), Some("qwen2.5-coder:14b"));
         assert_eq!(config.deep.max_cost, Some(5.0));
+        assert_eq!(config.deep.cost_per_1k_input, Some(0.00015));
+        assert_eq!(config.deep.cost_per_1k_output, Some(0.0006));
         assert_eq!(config.extract.package_prefix.as_deref(), Some("app.authz"));
         assert_eq!(config.rules.additional, vec!["./custom-rules"]);
     }
diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs
index 1f52b8b..6005f72 100644
--- a/src/deep/candidate.rs
+++ b/src/deep/candidate.rs
@@ -180,7 +180,8 @@ fn build_cold_regions(
         return Ok(Vec::new());
     }
 
-    let discovered = discover_files_for_deep(scan_root, &[], &[]);
+    let discovered =
+        discover_files_for_deep(scan_root, &runtime.excludes, &runtime.language_filter);
     let mut out: Vec<Candidate> = Vec::new();
 
     for file in discovered {
@@ -318,6 +319,8 @@ mod tests {
             max_concurrent: 1,
             temperature: 0.0,
             max_prompt_chars: 16_000,
+            excludes: Vec::new(),
+            language_filter: Vec::new(),
         }
     }
 
@@ -525,17 +528,59 @@ mod tests {
     #[test]
     fn max_candidates_cap_respected() {
         let dir = tempdir().unwrap();
-        // 20 files, each with one auth-y name.
+        // 20 files, each with one auth-y name. Use `is_admin()` (rather than
+        // `is_admin_{i}`) — the regex's trailing \b doesn't fire after
+        // `_<digit>` because `_` is a word char.
         for i in 0..20 {
             fs::write(
                 dir.path().join(format!("f{i}.py")),
-                format!("def is_admin_{i}():\n    pass\n"),
+                "def is_admin():\n    pass\n",
             )
             .unwrap();
+            // Use the suffix only to vary file names, not the auth-y token.
+            let _ = i;
         }
         let mut runtime = rt();
         runtime.max_candidates = 5;
         let candidates = select_candidates(&[], dir.path(), &runtime).unwrap();
-        assert!(candidates.len() <= 5);
+        // cold_budget = 5 * 0.3 → 1 candidate. Cap binds: we should get
+        // exactly 1, not 20 (the count of available cold-region hits).
+        assert_eq!(candidates.len(), 1);
+        assert!(candidates.len() <= runtime.max_candidates);
+    }
+
+    #[test]
+    fn cold_region_respects_excludes() {
+        let dir = tempdir().unwrap();
+        fs::create_dir_all(dir.path().join("vendor")).unwrap();
+        fs::write(
+            dir.path().join("vendor/legacy.py"),
+            "def is_admin():\n    pass\n",
+        )
+        .unwrap();
+        fs::write(dir.path().join("app.py"), "def has_role(u, r):\n    pass\n").unwrap();
+
+        let mut runtime = rt();
+        runtime.excludes = vec!["vendor/**".into()];
+        let candidates = select_candidates(&[], dir.path(), &runtime).unwrap();
+        assert_eq!(candidates.len(), 1);
+        assert_eq!(candidates[0].file, PathBuf::from("app.py"));
+    }
+
+    #[test]
+    fn cold_region_respects_language_filter() {
+        let dir = tempdir().unwrap();
+        fs::write(dir.path().join("a.py"), "def is_admin():\n    pass\n").unwrap();
+        fs::write(
+            dir.path().join("b.go"),
+            "func IsAdmin() bool { return true }\n",
+        )
+        .unwrap();
+
+        let mut runtime = rt();
+        runtime.language_filter = vec![Language::Python];
+        let candidates = select_candidates(&[], dir.path(), &runtime).unwrap();
+        assert_eq!(candidates.len(), 1);
+        assert_eq!(candidates[0].language, Language::Python);
     }
 }
diff --git a/src/deep/client.rs b/src/deep/client.rs
index 672b550..9198339 100644
--- a/src/deep/client.rs
+++ b/src/deep/client.rs
@@ -36,6 +36,9 @@ pub struct OpenAiCompatibleClient {
     api_key: Option<String>,
     model: String,
     temperature: f32,
+    /// Echoed into `DeepError::Timeout` so the user-visible error states the
+    /// configured limit, not just "HTTP error".
+    timeout_secs: u64,
 }
 
 impl OpenAiCompatibleClient {
@@ -51,6 +54,7 @@ impl OpenAiCompatibleClient {
             api_key: runtime.api_key.clone(),
             model: runtime.model.clone(),
             temperature: runtime.temperature,
+            timeout_secs: runtime.request_timeout_secs,
         })
     }
 
@@ -99,7 +103,18 @@ impl OpenAiCompatibleClient {
             req = req.bearer_auth(key);
         }
 
-        let response = req.send()?;
+        // Distinguish timeouts from generic HTTP errors so the orchestrator
+        // can surface a specific message ("request timed out after Ns")
+        // rather than the opaque "HTTP error: ...".
+        let response = match req.send() {
+            Ok(r) => r,
+            Err(e) if e.is_timeout() => {
+                return Err(DeepError::Timeout {
+                    secs: self.timeout_secs,
+                });
+            }
+            Err(e) => return Err(DeepError::Http(e)),
+        };
         let status = response.status();
         if !status.is_success() {
             // Auth errors get distinct surfacing; everything else is generic.
@@ -152,17 +167,25 @@ impl OpenAiCompatibleClient {
     }
 }
 
-/// Strip a leading/trailing ```json``` (or ```) markdown fence if present.
+/// Strip a leading/trailing markdown fence if present, regardless of the
+/// optional language tag (` ```json `, ` ```javascript `, plain ` ``` `, …).
 /// Some local models wrap JSON in fences despite system-prompt instructions
 /// not to.
 fn strip_markdown_fence(s: &str) -> &str {
     let trimmed = s.trim();
-    let after_fence = trimmed
-        .strip_prefix("```json")
-        .or_else(|| trimmed.strip_prefix("```"))
-        .unwrap_or(trimmed);
+    let after_fence = match trimmed.strip_prefix("```") {
+        Some(rest) => {
+            // Drop the language tag (everything up to the first newline) if
+            // any, then continue with the remaining content.
+            match rest.find('\n') {
+                Some(nl) => &rest[nl + 1..],
+                None => rest,
+            }
+        }
+        None => trimmed,
+    };
     after_fence
-        .trim()
+        .trim_end()
         .strip_suffix("```")
         .unwrap_or(after_fence)
         .trim()
@@ -173,7 +196,10 @@ fn truncate_for_log(s: &str) -> String {
     if s.len() <= MAX {
         s.to_string()
     } else {
-        format!("{}...", &s[..MAX])
+        // Round down to a UTF-8 char boundary so we never panic on a
+        // multi-byte char straddling MAX (likely on garbage model output).
+        let cut = s.floor_char_boundary(MAX);
+        format!("{}...", &s[..cut])
     }
 }
 
@@ -234,6 +260,18 @@ mod tests {
         assert_eq!(strip_markdown_fence(raw), "{\"findings\": []}");
     }
 
+    #[test]
+    fn strip_fence_handles_alternative_language_tags() {
+        for lang in ["javascript", "ts", "rust", "yaml"] {
+            let raw = format!("```{lang}\n{{\"findings\": []}}\n```");
+            assert_eq!(
+                strip_markdown_fence(&raw),
+                "{\"findings\": []}",
+                "fence stripper failed for tag: {lang}"
+            );
+        }
+    }
+
     #[test]
     fn truncate_for_log_short_string_passthrough() {
         assert_eq!(truncate_for_log("hello"), "hello");
@@ -246,4 +284,15 @@ mod tests {
         assert!(truncated.ends_with("..."));
         assert!(truncated.len() < long.len());
     }
+
+    #[test]
+    fn truncate_for_log_handles_multibyte_at_boundary() {
+        // 198 ascii + a 4-byte emoji crossing byte 200. Naive [..200] would panic.
+        let mut s = "x".repeat(198);
+        s.push('🦀'); // 4 bytes
+        s.push_str(&"y".repeat(50));
+        let truncated = truncate_for_log(&s);
+        // No panic, and we didn't slice mid-codepoint.
+        assert!(truncated.ends_with("..."));
+    }
 }
diff --git a/src/deep/config.rs b/src/deep/config.rs
index bac81b9..eca827e 100644
--- a/src/deep/config.rs
+++ b/src/deep/config.rs
@@ -10,6 +10,7 @@
 use crate::cli::ScanArgs;
 use crate::config::ZiftConfig;
 use crate::deep::error::DeepError;
+use crate::types::Language;
 
 /// Resolved runtime configuration for the deep (semantic) scan.
 #[derive(Debug, Clone)]
@@ -25,6 +26,13 @@ pub struct DeepRuntime {
     pub max_concurrent: usize,
     pub temperature: f32,
     pub max_prompt_chars: usize,
+    /// Glob exclude patterns merged from `--exclude` and `[scan].exclude`.
+    /// Forwarded to cold-region file discovery so deep mode honors the same
+    /// scope users set for the structural pass.
+    pub excludes: Vec<String>,
+    /// Language filter from `--language`. Empty == all languages. Forwarded
+    /// to cold-region file discovery.
+    pub language_filter: Vec<Language>,
 }
 
 const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 120;
@@ -80,6 +88,20 @@ pub fn build(args: &ScanArgs, config: &ZiftConfig) -> Result<DeepRuntime, DeepEr
 
     let api_key = args.api_key.clone().filter(|s| !s.is_empty());
     let max_cost_usd = args.max_cost.or(config.deep.max_cost);
+    let cost_per_1k_input = config.deep.cost_per_1k_input;
+    let cost_per_1k_output = config.deep.cost_per_1k_output;
+
+    // Warn if a cap is set but no rates are configured — the tracker
+    // short-circuits when both rates are 0, so the cap would never bind.
+    let no_rates =
+        cost_per_1k_input.unwrap_or(0.0) == 0.0 && cost_per_1k_output.unwrap_or(0.0) == 0.0;
+    if max_cost_usd.is_some() && no_rates {
+        eprintln!(
+            "warning: --max-cost is set but [deep] cost_per_1k_input / \
+             cost_per_1k_output are not configured in .zift.toml — spend \
+             tracking is a no-op without rates"
+        );
+    }
 
     let max_concurrent = if is_localhost(&base_url) {
         DEFAULT_LOCAL_CONCURRENCY
@@ -87,18 +109,24 @@ pub fn build(args: &ScanArgs, config: &ZiftConfig) -> Result<DeepRuntime, DeepEr
         DEFAULT_REMOTE_CONCURRENCY
     };
 
+    // Merge excludes from config + CLI; preserve CLI ordering after config.
+    let mut excludes = config.scan.exclude.clone();
+    excludes.extend(args.exclude.iter().cloned());
+
     Ok(DeepRuntime {
         base_url,
         model,
         api_key,
         max_cost_usd,
-        cost_per_1k_input: None,
-        cost_per_1k_output: None,
+        cost_per_1k_input,
+        cost_per_1k_output,
         request_timeout_secs: DEFAULT_REQUEST_TIMEOUT_SECS,
         max_candidates: DEFAULT_MAX_CANDIDATES,
         max_concurrent,
         temperature: DEFAULT_TEMPERATURE,
         max_prompt_chars: DEFAULT_MAX_PROMPT_CHARS,
+        excludes,
+        language_filter: args.language.clone(),
     })
 }
 
@@ -137,6 +165,7 @@ mod tests {
             base_url: Some("http://config/v1".into()),
             model: Some("config-model".into()),
             max_cost: Some(1.0),
+            ..DeepConfig::default()
         });
         let runtime = build(&args, &config).unwrap();
         assert_eq!(runtime.base_url, "http://cli/v1");
@@ -150,6 +179,7 @@ mod tests {
             base_url: Some("http://config/v1".into()),
             model: Some("config-model".into()),
             max_cost: Some(2.5),
+            ..DeepConfig::default()
         });
         let runtime = build(&args, &config).unwrap();
         assert_eq!(runtime.base_url, "http://config/v1");
@@ -164,11 +194,50 @@ mod tests {
             base_url: None,
             model: None,
             max_cost: Some(10.0),
+            ..DeepConfig::default()
         });
         let runtime = build(&args, &config).unwrap();
         assert_eq!(runtime.max_cost_usd, Some(0.5));
     }
 
+    #[test]
+    fn cost_rates_loaded_from_config() {
+        let args = args_with(Some("http://x/v1"), Some("m"), None, Some(1.0));
+        let config = config_with(DeepConfig {
+            cost_per_1k_input: Some(0.0002),
+            cost_per_1k_output: Some(0.0008),
+            ..DeepConfig::default()
+        });
+        let runtime = build(&args, &config).unwrap();
+        assert_eq!(runtime.cost_per_1k_input, Some(0.0002));
+        assert_eq!(runtime.cost_per_1k_output, Some(0.0008));
+    }
+
+    #[test]
+    fn excludes_merged_from_cli_and_config() {
+        let mut args = args_with(Some("http://x/v1"), Some("m"), None, None);
+        args.exclude = vec!["cli/**".into()];
+        let mut zcfg = ZiftConfig::default();
+        zcfg.scan.exclude = vec!["config/**".into()];
+        zcfg.deep = DeepConfig {
+            ..DeepConfig::default()
+        };
+        let runtime = build(&args, &zcfg).unwrap();
+        assert_eq!(runtime.excludes, vec!["config/**", "cli/**"]);
+    }
+
+    #[test]
+    fn language_filter_passed_through() {
+        use crate::types::Language;
+        let mut args = args_with(Some("http://x/v1"), Some("m"), None, None);
+        args.language = vec![Language::Java, Language::Python];
+        let runtime = build(&args, &ZiftConfig::default()).unwrap();
+        assert_eq!(
+            runtime.language_filter,
+            vec![Language::Java, Language::Python]
+        );
+    }
+
     #[test]
     fn missing_base_url_errors() {
         let args = args_with(None, Some("m"), None, None);
diff --git a/src/deep/context.rs b/src/deep/context.rs
index 94ea683..8ea7cfd 100644
--- a/src/deep/context.rs
+++ b/src/deep/context.rs
@@ -105,8 +105,11 @@ fn expand_inner(
 
     // Truncate at max_chars (favors keeping the head — the part most likely
     // to contain the actual auth check; trailing context is more discardable).
+    // Round down to a UTF-8 char boundary to avoid `String::truncate` panics
+    // on multi-byte chars (e.g. Unicode comments/identifiers in source).
     if snippet.len() > max_chars {
-        snippet.truncate(max_chars);
+        let cut = snippet.floor_char_boundary(max_chars);
+        snippet.truncate(cut);
         snippet.push_str("\n// [truncated by zift deep-mode max_prompt_chars]");
     }
 
@@ -262,6 +265,25 @@ mod tests {
         assert!(ctx.snippet.contains("[truncated"));
     }
 
+    #[test]
+    fn truncation_does_not_panic_on_multibyte_boundary() {
+        // Build a snippet whose byte length exceeds max_chars and whose
+        // truncation point lands inside a multi-byte char. Naive truncate
+        // would panic.
+        let dir = tempdir().unwrap();
+        let mut content = String::new();
+        // 198 ascii bytes, then a 4-byte emoji that crosses byte 200.
+        content.push_str(&"a".repeat(198));
+        content.push('🦀');
+        content.push_str(&"b".repeat(200));
+        write_file(dir.path(), "a.ts", &content);
+        let finding = make_finding(PathBuf::from("a.ts"), 1, 1);
+
+        // No panic — boundary-rounded truncate keeps us valid.
+        let ctx = expand_finding(&finding, dir.path(), 200).unwrap();
+        assert!(ctx.snippet.contains("[truncated"));
+    }
+
     #[test]
     fn expand_region_uses_relative_path_in_output() {
         let dir = tempdir().unwrap();
diff --git a/src/deep/cost.rs b/src/deep/cost.rs
index a975180..3efab92 100644
--- a/src/deep/cost.rs
+++ b/src/deep/cost.rs
@@ -85,6 +85,8 @@ mod tests {
             max_concurrent: 1,
             temperature: 0.0,
             max_prompt_chars: 16_000,
+            excludes: Vec::new(),
+            language_filter: Vec::new(),
         }
     }
 
diff --git a/src/deep/mod.rs b/src/deep/mod.rs
index 50e5e49..7453bc8 100644
--- a/src/deep/mod.rs
+++ b/src/deep/mod.rs
@@ -63,6 +63,9 @@ pub fn run(
     let mut semantic_findings: Vec<Finding> = Vec::new();
     let mut false_positive_seeds: HashSet<String> = HashSet::new();
 
+    // TODO(deep-concurrency): honor `runtime.max_concurrent` via
+    // `std::thread::scope` over `reqwest::blocking::Client` (clone-cheap).
+    // Localhost endpoints auto-cap to 1 anyway; remote fan-out is the win.
     for candidate in &candidates {
         let seed = candidate
             .original_finding_id
@@ -127,10 +130,21 @@ pub fn run(
     );
 
     // Drop structural findings the model rejected, then merge semantic in.
+    // HashMap iteration order is randomized, so we must re-sort the merged
+    // result to match the deterministic (file, line_start) ordering the
+    // structural pass establishes — otherwise `--deep` produces different
+    // output orderings between runs over the same input.
     let filtered_structural: Vec<Finding> = structural_by_id
         .into_values()
         .filter(|f| !false_positive_seeds.contains(&f.id))
         .collect();
 
-    Ok(merge::merge(filtered_structural, semantic_findings))
+    let mut merged = merge::merge(filtered_structural, semantic_findings);
+    merged.sort_by(|a, b| {
+        a.file
+            .cmp(&b.file)
+            .then(a.line_start.cmp(&b.line_start))
+            .then(a.line_end.cmp(&b.line_end))
+    });
+    Ok(merged)
 }
diff --git a/tests/deep_http_integration.rs b/tests/deep_http_integration.rs
index 4883941..51eb146 100644
--- a/tests/deep_http_integration.rs
+++ b/tests/deep_http_integration.rs
@@ -28,6 +28,8 @@ fn runtime_for(server_url: &str) -> DeepRuntime {
         max_concurrent: 1,
         temperature: 0.0,
         max_prompt_chars: 16_000,
+        excludes: Vec::new(),
+        language_filter: Vec::new(),
     }
 }
 
@@ -450,6 +452,46 @@ fn deep_run_drops_structural_when_model_flags_false_positive() {
     assert!(merged.is_empty(), "expected empty result, got: {merged:?}");
 }
 
+#[test]
+fn deep_run_emits_findings_in_deterministic_order() {
+    // Three structural findings across two files; deep::run must return
+    // them sorted by (file, line_start, line_end), regardless of the
+    // randomized HashMap iteration internally.
+    let dir = tempdir().unwrap();
+    fs::write(dir.path().join("a.ts"), "x\n".repeat(100)).unwrap();
+    fs::write(dir.path().join("b.ts"), "x\n".repeat(100)).unwrap();
+
+    let mut server = Server::new();
+    // Model returns no findings — keeps focus on the structural ordering.
+    let _m = server
+        .mock("POST", "/chat/completions")
+        .with_status(200)
+        .with_body(ok_response(r#"{"findings": []}"#, 10, 5))
+        .expect_at_least(1)
+        .create();
+    let runtime = runtime_for(&server.url());
+
+    let structural = vec![
+        structural_finding("b.ts", 50),
+        structural_finding("a.ts", 80),
+        structural_finding("a.ts", 10),
+    ];
+    let merged = zift::deep::run(structural, dir.path(), &runtime).unwrap();
+
+    let order: Vec<(String, usize)> = merged
+        .iter()
+        .map(|f| (f.file.display().to_string(), f.line_start))
+        .collect();
+    assert_eq!(
+        order,
+        vec![
+            ("a.ts".to_string(), 10),
+            ("a.ts".to_string(), 80),
+            ("b.ts".to_string(), 50),
+        ]
+    );
+}
+
 #[test]
 fn deep_run_returns_structural_unchanged_when_no_candidates() {
     let dir = tempdir().unwrap();

From 606898a1803d144c4eec66e196dcf0fd5fdedd5d Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 22:02:24 -0400
Subject: [PATCH 12/18] =?UTF-8?q?docs:=20drop=20project-level=20"Hard=20Ru?=
 =?UTF-8?q?les"=20=E2=80=94=20moved=20to=20user=20global?=
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

The Co-Authored-By rule lives in ~/.claude/CLAUDE.md now; no need to
duplicate it in the project file.
---
 CLAUDE.md | 4 ----
 1 file changed, 4 deletions(-)

diff --git a/CLAUDE.md b/CLAUDE.md
index a1beb48..5d646cd 100644
--- a/CLAUDE.md
+++ b/CLAUDE.md
@@ -54,7 +54,3 @@ Skipped prefixes (no version bump):
 - `docs:`, `test:`, `ci:`, `chore:`, `style:`, `build:`
 
 PR titles must use a conventional commit prefix.
-
-## Hard Rules
-
-- Never include `Co-Authored-By` in commit messages

From 4934af26da5bed6113380628a11068bb11df35af Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 22:22:41 -0400
Subject: [PATCH 13/18] fix(deep): address PR review feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

CodeRabbit + Amazon Q review feedback on PR #17:

- Implement Debug for DeepRuntime manually with api_key redaction
  (defense in depth — derive(Debug) would let the secret leak through
  any tracing::debug!("{runtime:?}") call site).
- Validate max_cost / cost_per_1k_input / cost_per_1k_output as
  finite, non-negative numbers in deep::config::build (NaN/Inf/
  negative now error early instead of corrupting spend tracking).
- Route 400/422 from response_format-rejecting servers into the
  no-schema retry path. Previously these surfaced as DeepError::Config
  and short-circuited the documented fallback.
- Stop echoing model output in DeepError::BadResponse messages — the
  model can mirror prompt content (potentially user source) back to
  us. Generic message returned; truncated payload sample emitted at
  debug level only.
- Validate model-reported finding ranges: drop reversed (line_end <
  line_start) and out-of-window findings; clamp overshoots to the
  candidate window. Prevents bogus findings from flowing into
  merge/sort/snippet extraction.
- Add `requires = "deep"` to --base-url, --model, --max-cost so clap
  fails at parse time instead of being silently ignored. (--api-key
  intentionally excluded — ZIFT_AGENT_API_KEY env var would break
  every non-deep invocation otherwise.)
- Switch eprintln! to tracing::warn! for cost-rate warning
  (consistency with the rest of the deep module).
- Remove dead `let _ = i;` from max_candidates_cap_respected test.
- Add ```bash / ```text language tags to four markdown code fences
  flagged by markdownlint (MD040).
---
 plans/done/01-pr1-deep-http-transport.md |   2 +-
 plans/todo/00-deep-mode-overview.md      |   2 +-
 plans/todo/02-pr2-mcp-server.md          |   2 +-
 plans/todo/03-pr3-subprocess-hook.md     |   2 +-
 src/cli.rs                               |  10 +-
 src/deep/candidate.rs                    |   9 +-
 src/deep/client.rs                       |  35 +++++--
 src/deep/config.rs                       | 104 +++++++++++++++++--
 src/deep/mod.rs                          | 127 +++++++++++++++++++++++
 tests/deep_http_integration.rs           |  59 +++++++++++
 10 files changed, 326 insertions(+), 26 deletions(-)

diff --git a/plans/done/01-pr1-deep-http-transport.md b/plans/done/01-pr1-deep-http-transport.md
index fabb899..1895433 100644
--- a/plans/done/01-pr1-deep-http-transport.md
+++ b/plans/done/01-pr1-deep-http-transport.md
@@ -6,7 +6,7 @@ Companion to [00-deep-mode-overview.md](./00-deep-mode-overview.md). This PR mak
 
 End-to-end working `--deep` flag using a single HTTP client that speaks the OpenAI `/v1/chat/completions` shape. After this PR:
 
-```
+```bash
 zift scan ./repo --deep \
   --base-url http://localhost:11434/v1 \
   --model qwen2.5-coder:14b \
diff --git a/plans/todo/00-deep-mode-overview.md b/plans/todo/00-deep-mode-overview.md
index 0591af4..b5ac450 100644
--- a/plans/todo/00-deep-mode-overview.md
+++ b/plans/todo/00-deep-mode-overview.md
@@ -27,7 +27,7 @@ User picks explicitly via `[deep] mode = "mcp" | "http" | "subprocess"`. No prov
 
 We build PR 1 first even though MCP (PR 2) is the strategically headline answer. Reason: MCP needs the prompt library, candidate selection, context expansion, and structured-output schema *anyway*. Building HTTP first forces those primitives into a clean shape; the MCP server in PR 2 is then a thin transport layer over them. The reverse order means writing the primitives for MCP, then refactoring when HTTP shows up.
 
-```
+```text
                                ┌─────────────────────────┐
                                │     src/deep/           │
                                │  candidate · context    │
diff --git a/plans/todo/02-pr2-mcp-server.md b/plans/todo/02-pr2-mcp-server.md
index 462fb15..3053001 100644
--- a/plans/todo/02-pr2-mcp-server.md
+++ b/plans/todo/02-pr2-mcp-server.md
@@ -12,7 +12,7 @@ Out of scope: HTTP-transport MCP (stdio is the universal default), authenticatio
 
 ## 2. Subcommand
 
-```
+```bash
 zift mcp [--rules-dir DIR] [--scan-root DIR]
 ```
 
diff --git a/plans/todo/03-pr3-subprocess-hook.md b/plans/todo/03-pr3-subprocess-hook.md
index fffd60d..6896fe1 100644
--- a/plans/todo/03-pr3-subprocess-hook.md
+++ b/plans/todo/03-pr3-subprocess-hook.md
@@ -17,7 +17,7 @@ Out of scope: process pooling, IPC beyond stdin/stdout, environment-variable inj
 
 ## 2. CLI surface
 
-```
+```bash
 zift scan ./repo --deep --agent-cmd "claude -p --output-format json"
 ```
 
diff --git a/src/cli.rs b/src/cli.rs
index 39a6d46..ef4143d 100644
--- a/src/cli.rs
+++ b/src/cli.rs
@@ -90,18 +90,22 @@ pub struct ScanArgs {
     ///
     /// Examples: http://localhost:11434/v1 (Ollama), http://localhost:1234/v1 (LM Studio),
     /// https://api.openai.com/v1, https://openrouter.ai/api/v1
-    #[arg(long)]
+    #[arg(long, requires = "deep")]
     pub base_url: Option<String>,
 
     /// Model name to send to the agent endpoint (requires --deep)
-    #[arg(long)]
+    #[arg(long, requires = "deep")]
     pub model: Option<String>,
 
     /// Maximum spend limit in USD (requires --deep)
-    #[arg(long)]
+    #[arg(long, requires = "deep")]
     pub max_cost: Option<f64>,
 
     /// API key for the agent endpoint (or set ZIFT_AGENT_API_KEY)
+    ///
+    /// NOTE: no `requires = "deep"` here — `ZIFT_AGENT_API_KEY` may live in
+    /// the shell environment and would otherwise fail every non-deep
+    /// invocation. Build-time validation in `deep::config::build` is enough.
     #[arg(long, env = "ZIFT_AGENT_API_KEY")]
     pub api_key: Option<String>,
 }
diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs
index 6005f72..cf092b1 100644
--- a/src/deep/candidate.rs
+++ b/src/deep/candidate.rs
@@ -528,17 +528,16 @@ mod tests {
     #[test]
     fn max_candidates_cap_respected() {
         let dir = tempdir().unwrap();
-        // 20 files, each with one auth-y name. Use `is_admin()` (rather than
-        // `is_admin_{i}`) — the regex's trailing \b doesn't fire after
-        // `_<digit>` because `_` is a word char.
+        // 20 files, each with one auth-y name. Use `is_admin()` (not
+        // `is_admin_{i}`) because the regex's trailing `\b` doesn't fire
+        // after `_<digit>` (`_` is a word char). Suffix the *file name*
+        // to keep them unique without changing the auth-y token.
         for i in 0..20 {
             fs::write(
                 dir.path().join(format!("f{i}.py")),
                 "def is_admin():\n    pass\n",
             )
             .unwrap();
-            // Use the suffix only to vary file names, not the auth-y token.
-            let _ = i;
         }
         let mut runtime = rt();
         runtime.max_candidates = 5;
diff --git a/src/deep/client.rs b/src/deep/client.rs
index 9198339..0d8cd7d 100644
--- a/src/deep/client.rs
+++ b/src/deep/client.rs
@@ -117,13 +117,25 @@ impl OpenAiCompatibleClient {
         };
         let status = response.status();
         if !status.is_success() {
-            // Auth errors get distinct surfacing; everything else is generic.
-            if status.as_u16() == 401 || status.as_u16() == 403 {
+            let code = status.as_u16();
+            // Auth errors get distinct surfacing.
+            if code == 401 || code == 403 {
                 return Err(DeepError::Config(format!(
                     "auth rejected by {} ({})",
                     self.base_url, status
                 )));
             }
+            // 400/422 on a request that included `response_format` is the
+            // signature of a backend that hard-fails unsupported structured
+            // output (vs. the more common case of silently ignoring it).
+            // Surface as `BadResponse` so `analyze()`'s retry path strips
+            // the schema and tries again. On the no-schema retry, this same
+            // status code falls through to the generic Config error below.
+            if with_response_format && (code == 400 || code == 422) {
+                return Err(DeepError::BadResponse(format!(
+                    "server rejected response_format ({status}); retrying without schema"
+                )));
+            }
             return Err(DeepError::Config(format!(
                 "HTTP {} from {}",
                 status, self.base_url
@@ -141,14 +153,21 @@ impl OpenAiCompatibleClient {
             .and_then(|c| c.message.content)
             .ok_or_else(|| DeepError::BadResponse("response had no message content".into()))?;
 
-        // Try to parse the message content as our findings envelope.
-        // Some servers wrap JSON in markdown fences; strip those if present.
+        // Parse the message content as our findings envelope. Strip any
+        // markdown fence first (some local models add them despite the
+        // system prompt). Keep the returned error generic — the model may
+        // have mirrored prompt content back, and we don't want user source
+        // code (or other sensitive content) embedded in every BadResponse
+        // error string. The truncated payload sample is emitted at debug
+        // level instead, behind the operator's tracing filter.
         let content_clean = strip_markdown_fence(&content);
         let parsed: FindingsEnvelope = serde_json::from_str(content_clean).map_err(|e| {
-            DeepError::BadResponse(format!(
-                "content was not valid findings JSON: {e}; got: {}",
-                truncate_for_log(&content)
-            ))
+            tracing::debug!(
+                error = %e,
+                preview = %truncate_for_log(&content),
+                "deep: model response was not valid findings JSON",
+            );
+            DeepError::BadResponse("content was not valid findings JSON".into())
         })?;
 
         let usage = TokenUsage {
diff --git a/src/deep/config.rs b/src/deep/config.rs
index eca827e..246d958 100644
--- a/src/deep/config.rs
+++ b/src/deep/config.rs
@@ -13,7 +13,11 @@ use crate::deep::error::DeepError;
 use crate::types::Language;
 
 /// Resolved runtime configuration for the deep (semantic) scan.
-#[derive(Debug, Clone)]
+///
+/// `Debug` is implemented manually to redact `api_key` — derive(Debug) would
+/// allow the secret to leak through any `tracing::debug!("{runtime:?}")`
+/// call site (none today, but defense in depth).
+#[derive(Clone)]
 pub struct DeepRuntime {
     pub base_url: String,
     pub model: String,
@@ -35,6 +39,40 @@ pub struct DeepRuntime {
     pub language_filter: Vec<Language>,
 }
 
+impl std::fmt::Debug for DeepRuntime {
+    fn fmt(&self, f: &mut std::fmt::Formatter<'_>) -> std::fmt::Result {
+        f.debug_struct("DeepRuntime")
+            .field("base_url", &self.base_url)
+            .field("model", &self.model)
+            .field("api_key", &self.api_key.as_ref().map(|_| "<redacted>"))
+            .field("max_cost_usd", &self.max_cost_usd)
+            .field("cost_per_1k_input", &self.cost_per_1k_input)
+            .field("cost_per_1k_output", &self.cost_per_1k_output)
+            .field("request_timeout_secs", &self.request_timeout_secs)
+            .field("max_candidates", &self.max_candidates)
+            .field("max_concurrent", &self.max_concurrent)
+            .field("temperature", &self.temperature)
+            .field("max_prompt_chars", &self.max_prompt_chars)
+            .field("excludes", &self.excludes)
+            .field("language_filter", &self.language_filter)
+            .finish()
+    }
+}
+
+/// Reject NaN, infinite, or negative values in cost-related config so
+/// downstream spend tracking cannot receive nonsense (e.g. `f64::NAN`
+/// silently propagates through arithmetic and breaks the cap).
+fn validate_non_negative_finite(name: &str, v: Option<f64>) -> Result<Option<f64>, DeepError> {
+    if let Some(x) = v
+        && (!x.is_finite() || x < 0.0)
+    {
+        return Err(DeepError::Config(format!(
+            "{name} must be a non-negative finite number (got {x})"
+        )));
+    }
+    Ok(v)
+}
+
 const DEFAULT_REQUEST_TIMEOUT_SECS: u64 = 120;
 const DEFAULT_MAX_CANDIDATES: usize = 50;
 const DEFAULT_MAX_PROMPT_CHARS: usize = 16_000;
@@ -87,17 +125,20 @@ pub fn build(args: &ScanArgs, config: &ZiftConfig) -> Result<DeepRuntime, DeepEr
         })?;
 
     let api_key = args.api_key.clone().filter(|s| !s.is_empty());
-    let max_cost_usd = args.max_cost.or(config.deep.max_cost);
-    let cost_per_1k_input = config.deep.cost_per_1k_input;
-    let cost_per_1k_output = config.deep.cost_per_1k_output;
+    let max_cost_usd =
+        validate_non_negative_finite("max_cost", args.max_cost.or(config.deep.max_cost))?;
+    let cost_per_1k_input =
+        validate_non_negative_finite("cost_per_1k_input", config.deep.cost_per_1k_input)?;
+    let cost_per_1k_output =
+        validate_non_negative_finite("cost_per_1k_output", config.deep.cost_per_1k_output)?;
 
     // Warn if a cap is set but no rates are configured — the tracker
     // short-circuits when both rates are 0, so the cap would never bind.
     let no_rates =
         cost_per_1k_input.unwrap_or(0.0) == 0.0 && cost_per_1k_output.unwrap_or(0.0) == 0.0;
     if max_cost_usd.is_some() && no_rates {
-        eprintln!(
-            "warning: --max-cost is set but [deep] cost_per_1k_input / \
+        tracing::warn!(
+            "--max-cost is set but [deep] cost_per_1k_input / \
              cost_per_1k_output are not configured in .zift.toml — spend \
              tracking is a no-op without rates"
         );
@@ -200,6 +241,57 @@ mod tests {
         assert_eq!(runtime.max_cost_usd, Some(0.5));
     }
 
+    #[test]
+    fn negative_cost_field_rejected() {
+        let args = args_with(Some("http://x/v1"), Some("m"), None, Some(-1.0));
+        let err = build(&args, &ZiftConfig::default()).unwrap_err();
+        assert!(matches!(err, DeepError::Config(_)));
+    }
+
+    #[test]
+    fn nan_cost_rate_rejected() {
+        let args = args_with(Some("http://x/v1"), Some("m"), None, None);
+        let config = config_with(DeepConfig {
+            cost_per_1k_input: Some(f64::NAN),
+            ..DeepConfig::default()
+        });
+        let err = build(&args, &config).unwrap_err();
+        assert!(matches!(err, DeepError::Config(_)));
+    }
+
+    #[test]
+    fn infinite_cost_rate_rejected() {
+        let args = args_with(Some("http://x/v1"), Some("m"), None, None);
+        let config = config_with(DeepConfig {
+            cost_per_1k_output: Some(f64::INFINITY),
+            ..DeepConfig::default()
+        });
+        let err = build(&args, &config).unwrap_err();
+        assert!(matches!(err, DeepError::Config(_)));
+    }
+
+    #[test]
+    fn debug_format_redacts_api_key() {
+        let runtime = DeepRuntime {
+            base_url: "http://x/v1".into(),
+            model: "m".into(),
+            api_key: Some("sk-supersecret".into()),
+            max_cost_usd: None,
+            cost_per_1k_input: None,
+            cost_per_1k_output: None,
+            request_timeout_secs: 60,
+            max_candidates: 50,
+            max_concurrent: 1,
+            temperature: 0.0,
+            max_prompt_chars: 16_000,
+            excludes: Vec::new(),
+            language_filter: Vec::new(),
+        };
+        let formatted = format!("{runtime:?}");
+        assert!(!formatted.contains("sk-supersecret"));
+        assert!(formatted.contains("<redacted>"));
+    }
+
     #[test]
     fn cost_rates_loaded_from_config() {
         let args = args_with(Some("http://x/v1"), Some("m"), None, Some(1.0));
diff --git a/src/deep/mod.rs b/src/deep/mod.rs
index 7453bc8..b4d1e60 100644
--- a/src/deep/mod.rs
+++ b/src/deep/mod.rs
@@ -117,6 +117,14 @@ pub fn run(
                 }
                 continue;
             }
+            // Validate model-reported ranges against the candidate window.
+            // Even with a strict JSON schema, the model can return reversed
+            // ranges or numbers outside the analyzed snippet — we don't
+            // want those flowing into merge/sort/snippet extraction as
+            // bogus findings.
+            let Some(sem) = clamp_to_candidate(sem, candidate) else {
+                continue;
+            };
             let f = finding::into_finding(sem, candidate, seed, scan_root);
             semantic_findings.push(f);
         }
@@ -148,3 +156,122 @@ pub fn run(
     });
     Ok(merged)
 }
+
+/// Clamp a [`SemanticFinding`]'s line range to the candidate's analyzed
+/// window. Drops the finding entirely when:
+///
+/// - `line_start == 0` (schema requires `>= 1`, but be defensive),
+/// - `line_end < line_start`,
+/// - the entire range falls outside the candidate's window.
+///
+/// Otherwise pulls the range into `[candidate.line_start, candidate.line_end]`,
+/// logs the clamp, and returns the normalized finding.
+fn clamp_to_candidate(
+    sem: SemanticFinding,
+    candidate: &candidate::Candidate,
+) -> Option<SemanticFinding> {
+    if sem.line_start == 0 || sem.line_end < sem.line_start {
+        tracing::warn!(
+            file = %candidate.file.display(),
+            reported = format!("{}-{}", sem.line_start, sem.line_end),
+            "deep: dropping finding with invalid line range",
+        );
+        return None;
+    }
+    // Whole range outside the candidate window? Drop.
+    if sem.line_end < candidate.line_start || sem.line_start > candidate.line_end {
+        tracing::warn!(
+            file = %candidate.file.display(),
+            reported = format!("{}-{}", sem.line_start, sem.line_end),
+            window = format!("{}-{}", candidate.line_start, candidate.line_end),
+            "deep: dropping finding outside candidate window",
+        );
+        return None;
+    }
+    let line_start = sem.line_start.max(candidate.line_start);
+    let line_end = sem.line_end.min(candidate.line_end).max(line_start);
+    if line_start != sem.line_start || line_end != sem.line_end {
+        tracing::debug!(
+            file = %candidate.file.display(),
+            reported = format!("{}-{}", sem.line_start, sem.line_end),
+            clamped = format!("{line_start}-{line_end}"),
+            "deep: clamped finding range to candidate window",
+        );
+    }
+    Some(SemanticFinding {
+        line_start,
+        line_end,
+        ..sem
+    })
+}
+
+#[cfg(test)]
+mod tests {
+    use super::*;
+    use crate::deep::candidate::CandidateKind;
+    use crate::types::{AuthCategory, Confidence, Language};
+    use std::path::PathBuf;
+
+    fn cand(line_start: usize, line_end: usize) -> candidate::Candidate {
+        candidate::Candidate {
+            kind: CandidateKind::ColdRegion,
+            file: PathBuf::from("a.ts"),
+            language: Language::TypeScript,
+            line_start,
+            line_end,
+            source_snippet: String::new(),
+            imports: Vec::new(),
+            original_finding_id: None,
+            seed_category: None,
+        }
+    }
+
+    fn sem(line_start: usize, line_end: usize) -> SemanticFinding {
+        SemanticFinding {
+            line_start,
+            line_end,
+            category: AuthCategory::Rbac,
+            confidence: Confidence::High,
+            description: "x".into(),
+            reasoning: "y".into(),
+            is_false_positive: false,
+        }
+    }
+
+    #[test]
+    fn clamp_drops_reversed_range() {
+        assert!(clamp_to_candidate(sem(20, 10), &cand(1, 100)).is_none());
+    }
+
+    #[test]
+    fn clamp_drops_zero_line_start() {
+        assert!(clamp_to_candidate(sem(0, 5), &cand(1, 100)).is_none());
+    }
+
+    #[test]
+    fn clamp_drops_range_entirely_outside_window() {
+        assert!(clamp_to_candidate(sem(200, 250), &cand(1, 100)).is_none());
+        assert!(clamp_to_candidate(sem(1, 5), &cand(50, 100)).is_none());
+    }
+
+    #[test]
+    fn clamp_pulls_overshooting_range_into_window() {
+        let out = clamp_to_candidate(sem(50, 200), &cand(40, 80)).unwrap();
+        assert_eq!(out.line_start, 50);
+        assert_eq!(out.line_end, 80);
+    }
+
+    #[test]
+    fn clamp_pulls_undershooting_range_into_window() {
+        let out = clamp_to_candidate(sem(5, 60), &cand(40, 80)).unwrap();
+        assert_eq!(out.line_start, 40);
+        assert_eq!(out.line_end, 60);
+    }
+
+    #[test]
+    fn clamp_passes_through_in_window_range() {
+        let out = clamp_to_candidate(sem(50, 60), &cand(40, 80)).unwrap();
+        assert_eq!(out.line_start, 50);
+        assert_eq!(out.line_end, 60);
+    }
+}
diff --git a/tests/deep_http_integration.rs b/tests/deep_http_integration.rs
index 51eb146..8faabd9 100644
--- a/tests/deep_http_integration.rs
+++ b/tests/deep_http_integration.rs
@@ -217,6 +217,65 @@ fn json_wrapped_in_markdown_fence_is_accepted() {
     m.assert();
 }
 
+#[test]
+fn http_400_with_response_format_triggers_retry() {
+    // First attempt (with response_format) returns 400 — typical of a server
+    // that hard-fails unsupported structured output rather than ignoring it.
+    // Second attempt (without response_format) returns valid findings.
+    let mut server = Server::new();
+
+    let _bad = server
+        .mock("POST", "/chat/completions")
+        .match_body(mockito::Matcher::PartialJsonString(
+            r#"{"response_format": {}}"#.into(),
+        ))
+        .with_status(400)
+        .with_body(r#"{"error": "response_format unsupported"}"#)
+        .create();
+
+    let _good = server
+        .mock("POST", "/chat/completions")
+        .with_status(200)
+        .with_body(ok_response(&findings_content_one(), 60, 30))
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let prompt = render(&PromptInputs {
+        candidate: &synth_candidate(),
+        structural_finding: None,
+    });
+
+    let response = client.analyze(&prompt).unwrap();
+    assert_eq!(response.findings.len(), 1);
+}
+
+#[test]
+fn http_400_without_response_format_surfaces_as_config_error() {
+    // After retry, 400/422 should fall through to Config — no infinite loop.
+    let mut server = Server::new();
+    let m = server
+        .mock("POST", "/chat/completions")
+        .with_status(400)
+        .with_body("bad request")
+        .expect_at_least(2)
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let prompt = render(&PromptInputs {
+        candidate: &synth_candidate(),
+        structural_finding: None,
+    });
+
+    let err = client.analyze(&prompt).unwrap_err();
+    assert!(
+        matches!(err, DeepError::Config(_)),
+        "expected Config after retry, got: {err:?}"
+    );
+    m.assert();
+}
+
 #[test]
 fn http_401_surfaces_as_config_error() {
     let mut server = Server::new();

From 6a442f1b83427b73372a6863d9afde2a553b57be Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 22:39:09 -0400
Subject: [PATCH 14/18] fix(deep): address PR review round-2 feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Round-2 CodeRabbit findings on the deep-mode pipeline:

- candidate: cold-region budget uses ceiling (not floor cast) so small
  --max-candidates (1-3) still leave one cold slot. Previously rounded
  to zero and silently disabled cold scanning under tight caps.
- context: enforce scan_root containment via canonicalize() in
  expand_finding so absolute paths, ../ traversal, or symlinks pointing
  outside the scanned tree can't leak local files into deep-mode prompts.
- context: snippet truncation reserves room for the marker AND the
  imports payload so the combined size never exceeds max_chars. Imports
  are also clamped per-line and capped at 25% of the budget so a single
  giant generated line can't crowd out the snippet.
- finding: drop verbatim `reasoning` from the tracing::debug log; emit
  reasoning_len instead. The canonical Finding already drops it; the
  debug log was undoing that and could persist mirrored source/secrets
  to log files.
- mod: cost cap breach mid-run no longer discards accumulated semantic
  findings — orchestrator stops dispatching new requests but merges the
  in-flight response and returns what was collected.
- prompt: prefix snippet lines with their absolute file line number so
  the model emits findings in the same coordinate system the rest of
  the pipeline interprets them in. Without the prefix, model line
  numbers got mapped to wrong absolute file lines whenever the snippet
  didn't start at line 1.
- plans/done: update test totals to reflect what actually shipped.

Tests: 227 passing (210 lib unit + 17 integration); clippy clean.
---
 plans/done/01-pr1-deep-http-transport.md |   2 +-
 src/deep/candidate.rs                    |  42 +++++-
 src/deep/context.rs                      | 178 +++++++++++++++++++++--
 src/deep/finding.rs                      |   6 +-
 src/deep/mod.rs                          |  37 ++++-
 src/deep/prompt.rs                       |  62 +++++++-
 tests/deep_http_integration.rs           |  60 ++++++++
 7 files changed, 362 insertions(+), 25 deletions(-)

diff --git a/plans/done/01-pr1-deep-http-transport.md b/plans/done/01-pr1-deep-http-transport.md
index 1895433..ec26c15 100644
--- a/plans/done/01-pr1-deep-http-transport.md
+++ b/plans/done/01-pr1-deep-http-transport.md
@@ -463,7 +463,7 @@ Each commit ~150-400 lines of diff, reviewable independently. PR title for the m
 ## 14. Shipped
 
 **Branch**: `feat/deep-http`
-**Test count**: 199 passing (186 lib unit + 13 integration); clippy clean with `-D warnings`.
+**Test count**: 227 passing (210 lib unit + 17 integration); clippy clean with `-D warnings`.
 
 ### Commits (in order)
 
diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs
index cf092b1..bc76419 100644
--- a/src/deep/candidate.rs
+++ b/src/deep/candidate.rs
@@ -108,7 +108,16 @@ pub fn select_candidates(
     let mut escalations = build_escalations(structural, scan_root, runtime)?;
     escalations.truncate(runtime.max_candidates);
 
-    let cold_budget = (runtime.max_candidates as f32 * COLD_REGION_FRACTION) as usize;
+    // Use ceiling so small `max_candidates` (1-3) still leave at least one
+    // cold slot when no escalations consume the budget. Plain floor cast
+    // rounded `1 * 0.3 → 0`, which silently disabled cold scanning under
+    // tight caps and made `--deep` look like a no-op.
+    let cold_budget = if runtime.max_candidates == 0 {
+        0
+    } else {
+        let scaled = (runtime.max_candidates as f32 * COLD_REGION_FRACTION).ceil() as usize;
+        scaled.max(1)
+    };
     let cold_budget = cold_budget.min(runtime.max_candidates.saturating_sub(escalations.len()));
 
     let cold = if cold_budget == 0 {
@@ -542,12 +551,37 @@ mod tests {
         let mut runtime = rt();
         runtime.max_candidates = 5;
         let candidates = select_candidates(&[], dir.path(), &runtime).unwrap();
-        // cold_budget = 5 * 0.3 → 1 candidate. Cap binds: we should get
-        // exactly 1, not 20 (the count of available cold-region hits).
-        assert_eq!(candidates.len(), 1);
+        // cold_budget = ceil(5 * 0.3) = 2 candidates. Cap binds: we should
+        // get exactly 2, not 20 (the count of available cold-region hits).
+        assert_eq!(candidates.len(), 2);
         assert!(candidates.len() <= runtime.max_candidates);
     }
 
+    #[test]
+    fn small_max_candidates_still_yields_cold_slot() {
+        // Regression: floor cast turned `1 * 0.3 → 0`, so `--deep` with a
+        // tight cap silently disabled cold-region analysis. Ceiling + min(1)
+        // guarantees at least one cold slot when nothing is escalated.
+        let dir = tempdir().unwrap();
+        for i in 0..3 {
+            fs::write(
+                dir.path().join(format!("f{i}.py")),
+                "def is_admin():\n    pass\n",
+            )
+            .unwrap();
+        }
+        for cap in [1, 2, 3] {
+            let mut runtime = rt();
+            runtime.max_candidates = cap;
+            let candidates = select_candidates(&[], dir.path(), &runtime).unwrap();
+            assert!(
+                !candidates.is_empty(),
+                "cap={cap} produced no candidates; cold-region budget rounded to zero?"
+            );
+            assert!(candidates.len() <= cap);
+        }
+    }
+
     #[test]
     fn cold_region_respects_excludes() {
         let dir = tempdir().unwrap();
diff --git a/src/deep/context.rs b/src/deep/context.rs
index 8ea7cfd..93e034a 100644
--- a/src/deep/context.rs
+++ b/src/deep/context.rs
@@ -17,6 +17,38 @@ use std::path::{Path, PathBuf};
 const LINES_BEFORE: usize = 5;
 const LINES_AFTER: usize = 15;
 const IMPORT_LINES: usize = 20;
+/// Per-import-line cap so a single 100KB minified line can't dominate the
+/// imports payload.
+const IMPORT_LINE_MAX_CHARS: usize = 200;
+/// Cap the imports payload at this fraction of `max_chars` so it can never
+/// crowd out the actual snippet. The remaining budget goes to snippet + marker.
+const IMPORTS_BUDGET_FRACTION: f32 = 0.25;
+const TRUNCATION_MARKER: &str = "\n// [truncated by zift deep-mode max_prompt_chars]";
+
+/// Build at most `IMPORT_LINES` import strings whose combined length stays
+/// within `total_budget`. Each line is also clamped to
+/// `IMPORT_LINE_MAX_CHARS` so a single huge line can't consume the whole
+/// budget. Truncation is rounded down to a UTF-8 char boundary so multi-byte
+/// chars never split.
+fn build_bounded_imports(lines: &[&str], total_budget: usize) -> Vec<String> {
+    let mut out: Vec<String> = Vec::with_capacity(IMPORT_LINES.min(lines.len()));
+    let mut spent: usize = 0;
+    for raw in lines.iter().take(IMPORT_LINES) {
+        let mut line = (*raw).to_string();
+        if line.len() > IMPORT_LINE_MAX_CHARS {
+            let cut = line.floor_char_boundary(IMPORT_LINE_MAX_CHARS);
+            line.truncate(cut);
+        }
+        // +1 accounts for the "\n" separator the caller adds when joining.
+        let added = line.len() + 1;
+        if spent.saturating_add(added) > total_budget {
+            break;
+        }
+        spent += added;
+        out.push(line);
+    }
+    out
+}
 
 #[derive(Debug, Clone)]
 pub struct ExpandedContext {
@@ -31,12 +63,17 @@ pub struct ExpandedContext {
 /// Expand a structural finding's snippet to include surrounding lines and
 /// file-level imports. `finding.file` is interpreted as relative to
 /// `scan_root`.
+///
+/// Verifies that the resolved file path stays inside `scan_root` after
+/// canonicalization — defense against absolute paths, `..` traversal, or
+/// symlinks pointing outside the scanned tree leaking arbitrary local
+/// files into deep-mode prompts.
 pub fn expand_finding(
     finding: &Finding,
     scan_root: &Path,
     max_chars: usize,
 ) -> Result<ExpandedContext, DeepError> {
-    let abs_path = scan_root.join(&finding.file);
+    let abs_path = ensure_within_scan_root(scan_root, &finding.file)?;
     expand_inner(
         &abs_path,
         finding.file.clone(),
@@ -47,6 +84,25 @@ pub fn expand_finding(
     )
 }
 
+/// Resolve `scan_root.join(relative)` and verify the canonical result is a
+/// descendant of canonical `scan_root`. Returns the canonical absolute path
+/// on success; [`DeepError::Config`] on traversal attempts (so the error is
+/// distinguishable from genuine I/O failures and the user-facing message
+/// names the offending path).
+fn ensure_within_scan_root(scan_root: &Path, relative: &Path) -> Result<PathBuf, DeepError> {
+    let candidate = scan_root.join(relative);
+    let canonical_root = scan_root.canonicalize()?;
+    let canonical_path = candidate.canonicalize()?;
+    if !canonical_path.starts_with(&canonical_root) {
+        return Err(DeepError::Config(format!(
+            "finding path {} resolves outside scan_root {}",
+            canonical_path.display(),
+            canonical_root.display(),
+        )));
+    }
+    Ok(canonical_path)
+}
+
 /// Expand an arbitrary file region (used for `ColdRegion` candidates that
 /// have no structural finding behind them). `file_absolute` must be readable;
 /// `file_relative` is the path used in [`ExpandedContext::file_relative`].
@@ -99,26 +155,33 @@ fn expand_inner(
     let window_start = start_1based.saturating_sub(LINES_BEFORE).max(1);
     let window_end = (end_1based + LINES_AFTER).min(total);
 
+    // Build imports first so we know how much budget they consume against
+    // `max_chars`. Cap each line at `IMPORT_LINE_MAX_CHARS` and the total at
+    // `IMPORTS_BUDGET_FRACTION * max_chars` so a file full of giant generated
+    // lines (minified bundles, codegen) can't blow the prompt size budget.
+    let imports_budget = (max_chars as f32 * IMPORTS_BUDGET_FRACTION) as usize;
+    let imports = build_bounded_imports(&lines, imports_budget);
+    let imports_len: usize = imports.iter().map(|s| s.len()).sum::<usize>() + imports.len(); // +1 per for "\n" join
+
     // 0-based indexing into `lines`.
     let snippet_slice = &lines[(window_start - 1)..window_end];
     let mut snippet = snippet_slice.join("\n");
 
     // Truncate at max_chars (favors keeping the head — the part most likely
     // to contain the actual auth check; trailing context is more discardable).
+    // Reserve space for both the truncation marker and the imports payload so
+    // the combined `snippet + imports + marker` cannot exceed `max_chars`.
     // Round down to a UTF-8 char boundary to avoid `String::truncate` panics
     // on multi-byte chars (e.g. Unicode comments/identifiers in source).
-    if snippet.len() > max_chars {
-        let cut = snippet.floor_char_boundary(max_chars);
+    let snippet_budget = max_chars
+        .saturating_sub(TRUNCATION_MARKER.len())
+        .saturating_sub(imports_len);
+    if snippet.len() > snippet_budget {
+        let cut = snippet.floor_char_boundary(snippet_budget);
         snippet.truncate(cut);
-        snippet.push_str("\n// [truncated by zift deep-mode max_prompt_chars]");
+        snippet.push_str(TRUNCATION_MARKER);
     }
 
-    let imports: Vec<String> = lines
-        .iter()
-        .take(IMPORT_LINES)
-        .map(|s| (*s).to_string())
-        .collect();
-
     Ok(ExpandedContext {
         file_relative,
         language,
@@ -261,10 +324,67 @@ mod tests {
         let finding = make_finding(PathBuf::from("a.ts"), 100, 100);
 
         let ctx = expand_finding(&finding, dir.path(), 500).unwrap();
-        assert!(ctx.snippet.len() < 600); // 500 + tail marker
+        // snippet + imports + marker is the full prompt-payload budget.
+        let imports_len: usize =
+            ctx.imports.iter().map(|s| s.len()).sum::<usize>() + ctx.imports.len();
+        assert!(
+            ctx.snippet.len() + imports_len <= 500,
+            "snippet({}) + imports({}) exceeded max_chars=500",
+            ctx.snippet.len(),
+            imports_len,
+        );
         assert!(ctx.snippet.contains("[truncated"));
     }
 
+    #[test]
+    fn combined_budget_includes_marker_and_imports() {
+        // Snippet truncation must reserve room for the marker AND the
+        // imports payload — otherwise concatenated payload busts max_chars.
+        let dir = tempdir().unwrap();
+        // Long imports + long snippet, both pressuring the budget.
+        let mut content = String::new();
+        for i in 1..=20 {
+            content.push_str(&format!("import line {i} ").repeat(30));
+            content.push('\n');
+        }
+        content.push_str(&"x".repeat(5_000));
+        write_file(dir.path(), "a.ts", &content);
+        let finding = make_finding(PathBuf::from("a.ts"), 21, 21);
+
+        let max = 1_000;
+        let ctx = expand_finding(&finding, dir.path(), max).unwrap();
+        let imports_len: usize =
+            ctx.imports.iter().map(|s| s.len()).sum::<usize>() + ctx.imports.len();
+        assert!(
+            ctx.snippet.len() + imports_len <= max,
+            "snippet({}) + imports({}) > max_chars={max}",
+            ctx.snippet.len(),
+            imports_len,
+        );
+    }
+
+    #[test]
+    fn long_imports_clamped_per_line() {
+        // A single 100KB minified line in the imports region must not
+        // explode the prompt size.
+        let dir = tempdir().unwrap();
+        let mut content = String::new();
+        content.push_str(&"x".repeat(100_000));
+        content.push('\n');
+        content.push_str(&numbered_lines(50));
+        write_file(dir.path(), "a.ts", &content);
+        let finding = make_finding(PathBuf::from("a.ts"), 30, 30);
+
+        let ctx = expand_finding(&finding, dir.path(), 16_000).unwrap();
+        for (i, imp) in ctx.imports.iter().enumerate() {
+            assert!(
+                imp.len() <= IMPORT_LINE_MAX_CHARS,
+                "import[{i}] length {} > {IMPORT_LINE_MAX_CHARS}",
+                imp.len(),
+            );
+        }
+    }
+
     #[test]
     fn truncation_does_not_panic_on_multibyte_boundary() {
         // Build a snippet whose byte length exceeds max_chars and whose
@@ -284,6 +404,42 @@ mod tests {
         assert!(ctx.snippet.contains("[truncated"));
     }
 
+    #[test]
+    fn expand_finding_rejects_dotdot_traversal() {
+        // Layout: scan_root/inner/, with secret outside scan_root that the
+        // attacker tries to read via `../secret.txt`.
+        let dir = tempdir().unwrap();
+        let scan_root = dir.path().join("inner");
+        fs::create_dir_all(&scan_root).unwrap();
+        write_file(dir.path(), "secret.txt", "leaked");
+        // Need a file inside scan_root for canonicalize to succeed at all,
+        // otherwise the test fails for the wrong reason.
+        write_file(&scan_root, "ok.ts", "x");
+
+        let finding = make_finding(PathBuf::from("../secret.txt"), 1, 1);
+        let err = expand_finding(&finding, &scan_root, 16_000).unwrap_err();
+        assert!(
+            matches!(err, DeepError::Config(ref msg) if msg.contains("outside scan_root")),
+            "expected Config error, got: {err:?}",
+        );
+    }
+
+    #[test]
+    fn expand_finding_rejects_absolute_path_outside_scan_root() {
+        let dir = tempdir().unwrap();
+        let scan_root = dir.path().join("inner");
+        fs::create_dir_all(&scan_root).unwrap();
+        let outside = write_file(dir.path(), "outside.ts", "x");
+        write_file(&scan_root, "ok.ts", "x");
+
+        let finding = make_finding(outside.clone(), 1, 1);
+        let err = expand_finding(&finding, &scan_root, 16_000).unwrap_err();
+        assert!(
+            matches!(err, DeepError::Config(ref msg) if msg.contains("outside scan_root")),
+            "expected Config error, got: {err:?}",
+        );
+    }
+
     #[test]
     fn expand_region_uses_relative_path_in_output() {
         let dir = tempdir().unwrap();
diff --git a/src/deep/finding.rs b/src/deep/finding.rs
index 9203725..9cd5c91 100644
--- a/src/deep/finding.rs
+++ b/src/deep/finding.rs
@@ -40,13 +40,17 @@ pub fn into_finding(
     seed: Option<&Finding>,
     scan_root: &Path,
 ) -> Finding {
+    // `reasoning` can mirror back scanned source or secrets the model saw in
+    // the snippet. The canonical `Finding` already drops it; persisting the
+    // verbatim text in tracing logs would undo that. Log only the length so
+    // operators can still spot suspicious blank/oversize reasoning chains.
     tracing::debug!(
         file = %candidate.file.display(),
         lines = format!("{}-{}", sem.line_start, sem.line_end),
         category = ?sem.category,
         confidence = ?sem.confidence,
         is_false_positive = sem.is_false_positive,
-        reasoning = %sem.reasoning,
+        reasoning_len = sem.reasoning.len(),
         "semantic finding"
     );
 
diff --git a/src/deep/mod.rs b/src/deep/mod.rs
index b4d1e60..5795723 100644
--- a/src/deep/mod.rs
+++ b/src/deep/mod.rs
@@ -31,9 +31,13 @@ use std::path::Path;
 ///
 /// Errors:
 /// - `DeepError::Config`: missing config or HTTP client construction failure (hard fail)
-/// - `DeepError::CostExceeded`: cap reached mid-run; returns immediately (hard fail)
 /// - `DeepError::Io`: filesystem error reading source files (hard fail)
 ///
+/// `DeepError::CostExceeded` is **not** propagated as an error — when the cap
+/// trips mid-run we stop dispatching new candidates but still merge the
+/// already-collected semantic findings back into the structural set, so the
+/// user keeps the work paid for. The cap breach is logged at `warn`.
+///
 /// Per-candidate `Http`, `BadResponse`, and `Timeout` errors are logged
 /// and the candidate is skipped — best-effort enrichment, not all-or-nothing.
 pub fn run(
@@ -104,11 +108,38 @@ pub fn run(
                 );
                 continue;
             }
-            // Config / CostExceeded / Io are hard fails — propagate.
+            // Config / Io are hard fails — propagate.
+            // (CostExceeded comes from cost_tracker.record below, not from
+            // analyze, so it's handled separately to preserve in-flight findings.)
             Err(other) => return Err(other),
         };
 
-        cost_tracker.record(&response.usage)?;
+        // Cap breach stops new dispatch, but the findings already merged in
+        // earlier iterations (and the ones in this very response) are still
+        // worth surfacing — the user paid for them. Break out of the loop
+        // instead of returning the error and discarding the work.
+        if let Err(DeepError::CostExceeded { spent }) = cost_tracker.record(&response.usage) {
+            tracing::warn!(
+                "deep: cost ceiling reached after ${spent:.4} USD — stopping new requests; \
+                 returning {} semantic finding(s) collected so far",
+                semantic_findings.len() + response.findings.len(),
+            );
+            // Drain the in-flight response too — same candidate window.
+            for sem in response.findings {
+                if sem.is_false_positive {
+                    if let Some(seed_id) = &candidate.original_finding_id {
+                        false_positive_seeds.insert(seed_id.clone());
+                    }
+                    continue;
+                }
+                let Some(sem) = clamp_to_candidate(sem, candidate) else {
+                    continue;
+                };
+                let f = finding::into_finding(sem, candidate, seed, scan_root);
+                semantic_findings.push(f);
+            }
+            break;
+        }
 
         for sem in response.findings {
             if sem.is_false_positive {
diff --git a/src/deep/prompt.rs b/src/deep/prompt.rs
index e2c418b..375ca26 100644
--- a/src/deep/prompt.rs
+++ b/src/deep/prompt.rs
@@ -101,12 +101,19 @@ pub fn render(inputs: &PromptInputs) -> RenderedPrompt {
     user.push_str("\n```");
     user.push_str(language_fence(inputs.candidate.language));
     user.push('\n');
-    user.push_str(&inputs.candidate.source_snippet);
-    if !inputs.candidate.source_snippet.ends_with('\n') {
-        user.push('\n');
-    }
+    // Prefix every snippet line with its absolute file line number so the
+    // model emits `line_start`/`line_end` in the same coordinate system the
+    // rest of the pipeline (and `finding::into_finding`) interprets them in.
+    // Without this, the model counts from 1 inside the snippet and findings
+    // get mapped to wrong absolute file lines whenever the snippet doesn't
+    // start at line 1.
+    push_numbered_snippet(
+        &mut user,
+        &inputs.candidate.source_snippet,
+        inputs.candidate.line_start,
+    );
     user.push_str(
-        "```\n\nIdentify all authorization decisions in the snippet. Use line numbers from the snippet.",
+        "```\n\nIdentify all authorization decisions in the snippet. Use the absolute file line numbers shown in the `NNNN: ` prefix on each line — not snippet-relative offsets.",
     );
 
     RenderedPrompt {
@@ -262,6 +269,24 @@ fn detect_frameworks(imports: &[String], language: Language) -> Vec<&'static Fra
         .collect()
 }
 
+/// Append `snippet` to `out`, prefixing each line with its absolute file line
+/// number left-padded to 4 digits + ": ". Always ends with `\n` so the
+/// caller's closing fence lands on its own line. Empty snippet → just `\n`.
+fn push_numbered_snippet(out: &mut String, snippet: &str, first_line: usize) {
+    if snippet.is_empty() {
+        out.push('\n');
+        return;
+    }
+    // `lines()` drops the trailing newline (if any); we add one per emitted
+    // line below so the snippet always ends with a newline before the fence.
+    for (i, line) in snippet.lines().enumerate() {
+        let n = first_line + i;
+        out.push_str(&format!("{n:>4}: "));
+        out.push_str(line);
+        out.push('\n');
+    }
+}
+
 fn language_fence(lang: Language) -> &'static str {
     match lang {
         Language::TypeScript => "typescript",
@@ -396,6 +421,33 @@ mod tests {
         assert!(rendered.user.contains("```typescript"));
     }
 
+    #[test]
+    fn render_prefixes_snippet_lines_with_absolute_file_numbers() {
+        // Candidate window starts at line 10 — every snippet line should
+        // be prefixed with `10:`, `11:`, etc., not `1:`, `2:`. Without the
+        // prefix the model emits snippet-relative offsets and findings
+        // get mapped to wrong absolute lines.
+        let cand = candidate_with_imports(Language::TypeScript, vec![]);
+        let inputs = PromptInputs {
+            candidate: &cand,
+            structural_finding: None,
+        };
+        let rendered = render(&inputs);
+        // First line of candidate.source_snippet should appear with line 10
+        // prefix. Padding is 4 chars right-aligned.
+        assert!(
+            rendered.user.contains("  10: function isAdmin"),
+            "expected absolute-line prefix '  10:', got user prompt:\n{}",
+            rendered.user,
+        );
+        // The system prompt must instruct the model to use the prefixed
+        // numbers, not snippet offsets.
+        assert!(
+            rendered.user.contains("absolute file line numbers"),
+            "user prompt missing line-number guidance",
+        );
+    }
+
     #[test]
     fn render_includes_seed_when_escalation() {
         let cand = candidate_with_imports(Language::TypeScript, vec![]);
diff --git a/tests/deep_http_integration.rs b/tests/deep_http_integration.rs
index 8faabd9..5e23a4c 100644
--- a/tests/deep_http_integration.rs
+++ b/tests/deep_http_integration.rs
@@ -551,6 +551,66 @@ fn deep_run_emits_findings_in_deterministic_order() {
     );
 }
 
+#[test]
+fn deep_run_preserves_findings_when_cost_cap_trips_mid_run() {
+    // Two cold-region candidates. Tight cap + high rates → first response
+    // tips us over the cap. The orchestrator should keep that first
+    // semantic finding and the surviving structural set (none here),
+    // not propagate CostExceeded as an error.
+    let dir = tempdir().unwrap();
+    fs::write(
+        dir.path().join("a.ts"),
+        "function isAdmin(u) { return u.role === 'admin'; }\n",
+    )
+    .unwrap();
+    fs::write(
+        dir.path().join("b.ts"),
+        "function hasPermission(u) { return u.perms.includes('x'); }\n",
+    )
+    .unwrap();
+
+    let mut server = Server::new();
+    let _m = server
+        .mock("POST", "/chat/completions")
+        .with_status(200)
+        .with_body(ok_response(
+            &json!({
+                "findings": [{
+                    "line_start": 1,
+                    "line_end": 1,
+                    "category": "rbac",
+                    "confidence": "high",
+                    "description": "role check",
+                    "reasoning": "isAdmin role comparison",
+                    "is_false_positive": false
+                }]
+            })
+            .to_string(),
+            10_000, // huge usage so the very first record() trips the cap
+            5_000,
+        ))
+        .expect_at_least(1)
+        .create();
+
+    let mut runtime = runtime_for(&server.url());
+    runtime.max_cost_usd = Some(0.01);
+    runtime.cost_per_1k_input = Some(1.00); // 10k input @ $1/k = $10 → way over $0.01 cap
+    runtime.cost_per_1k_output = Some(1.00);
+
+    // Should NOT return Err(CostExceeded) — should return what was collected.
+    let merged = zift::deep::run(Vec::new(), dir.path(), &runtime)
+        .expect("cap breach must not propagate as error");
+    let semantic: Vec<&Finding> = merged
+        .iter()
+        .filter(|f| f.pass == ScanPass::Semantic)
+        .collect();
+    assert_eq!(
+        semantic.len(),
+        1,
+        "expected to keep the in-flight semantic finding, got: {merged:?}",
+    );
+}
+
 #[test]
 fn deep_run_returns_structural_unchanged_when_no_candidates() {
     let dir = tempdir().unwrap();

From ef1e474b6ed517f477050af51a3f2f2c193f1c37 Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 23:02:32 -0400
Subject: [PATCH 15/18] chore(skills): reorder address-pr-feedback so bot
 replies follow push
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Adopts the ordering fix from the ea repo so "Fixed" replies land after
the fix commit is on the remote (otherwise the bot reads the remote SHA
when verifying and re-flags the comment as unfixed).

Additional improvements:

- Step 2a: compare PR head SHA to each bot's last review commit_id
  before fetching comments. Catches the case where a previous /address
  pass pushed a fix and the bot's response review hasn't landed yet —
  which would otherwise cause the next pass to miss new findings.

- Step 3c: explicitly handle Amazon Q's PR-level "Critical Issue"
  summary. The headline security/severity callouts here aren't always
  duplicated as inline comments and are easy to miss.

- Step 11: proactively remind the user that the push will trigger
  another review pass, so they should re-run in a few minutes if
  more findings come back.

Quality gates updated to use cargo (fmt + clippy --all-targets
-D warnings + test) instead of the ea repo's bun toolchain.
---
 .claude/commands/address-pr-feedback.md | 180 ++++++++++++++++--------
 1 file changed, 125 insertions(+), 55 deletions(-)

diff --git a/.claude/commands/address-pr-feedback.md b/.claude/commands/address-pr-feedback.md
index 32b1a3a..c65da5e 100644
--- a/.claude/commands/address-pr-feedback.md
+++ b/.claude/commands/address-pr-feedback.md
@@ -38,6 +38,24 @@ gh api "repos/{owner}/{repo}/pulls/{pr}/comments" --paginate \
 
 **If either bot hasn't finished, stop here.** Do not proceed to fixing issues with incomplete feedback.
 
+#### 2a. Confirm the latest bot review covers the latest commit
+
+Bots re-review on every push. If you ran a previous round of `/address-pr-feedback`, pushed a fix commit, and the bot's response to that push hasn't landed yet, the next round will miss the new findings and cause exactly the bug this section exists to prevent.
+
+```bash
+# Compare the head SHA of the PR to the most recent CodeRabbit review's commit_id
+HEAD_SHA=$(gh api "repos/{owner}/{repo}/pulls/{pr}" --jq .head.sha)
+LATEST_CR_COMMIT=$(gh api "repos/{owner}/{repo}/pulls/{pr}/reviews" --paginate \
+  --jq '[.[] | select(.user.login == "coderabbitai[bot]")] | sort_by(.submitted_at) | last | .commit_id')
+echo "PR head:  $HEAD_SHA"
+echo "Last CR review commit: $LATEST_CR_COMMIT"
+```
+
+If `$LATEST_CR_COMMIT` does not match `$HEAD_SHA`, CodeRabbit hasn't reviewed the latest commit yet. Tell the user:
+> "CodeRabbit's latest review is on commit `<short-SHA>` but PR head is `<short-SHA>`. Wait a few minutes for the new review to land, then re-run."
+
+Do the same check for Amazon Q (its review-author endpoint pins to a `commit_id` too). Don't proceed until both bots have caught up to head.
+
 ### 3. Fetch review comments (token-efficient two-pass approach)
 
 **CRITICAL: Always use `--paginate` with `gh api` for review comments.** The default page size is 30, which is easily exceeded when bots post 16+ inline comments plus replies. Without `--paginate`, you will miss comments from later review passes.
@@ -50,7 +68,6 @@ Fetch all bot inline comments with bodies truncated to 300 chars. This is enough
 
 ```bash
 # Get bot inline comments — TRUNCATED bodies (saves ~80% tokens)
-# MUST use --paginate to get all comments across pages
 gh api "repos/{owner}/{repo}/pulls/{pr}/comments" --paginate \
   --jq '.[] | select(.in_reply_to_id == null)
   | select(.user.login == "coderabbitai[bot]" or .user.login == "amazon-q-developer[bot]")
@@ -61,13 +78,18 @@ gh api "repos/{owner}/{repo}/pulls/{pr}/comments" --paginate \
   --jq '[.[] | select(.in_reply_to_id != null) | select(.user.login != "coderabbitai[bot]" and .user.login != "amazon-q-developer[bot]") | .in_reply_to_id] | unique'
 ```
 
-To identify **new unaddressed root comments**, filter by:
-- `in_reply_to_id == null` (root comment, not a reply)
-- `user.login` is a bot (`coderabbitai[bot]` or `amazon-q-developer[bot]`)
-- No reply from the PR author exists with matching `in_reply_to_id`
-
 Cross-reference to find **unreplied** bot comments only.
 
+Useful shortcut to see how many review batches exist:
+
+```bash
+gh api "repos/{owner}/{repo}/pulls/{pr}/comments" --paginate \
+  --jq '.[] | select(.user.login == "coderabbitai[bot]") | select(.in_reply_to_id == null) | .created_at' \
+  | sort | uniq -c | sort -rn
+```
+
+Each unique timestamp cluster represents one review pass.
+
 #### 3b. Pass 1 — Scan CodeRabbit review bodies (extract counts only)
 
 CodeRabbit review bodies are the largest token consumers (3-8KB each). First extract just the actionable metadata:
@@ -80,10 +102,11 @@ gh api "repos/{owner}/{repo}/pulls/{pr}/reviews" --paginate \
      actionable: ((.body | try capture("Actionable comments posted: (?<n>[0-9]+)") catch null | .n) // "0"),
      has_nitpicks: (.body | test("Nitpick comments")),
      has_duplicates: (.body | test("Duplicate comments")),
+     has_outside_diff: (.body | test("Outside diff range")),
      has_agent_prompt: (.body | test("Prompt for AI Agents"))}'
 ```
 
-**Only fetch the full review body** if `has_nitpicks`, `has_duplicates`, or `has_agent_prompt` is true AND the review is from the latest round (i.e., after your last push). For earlier rounds where inline comments were already replied to, skip the full body fetch.
+**Only fetch the full review body** if `has_nitpicks`, `has_duplicates`, `has_outside_diff`, or `has_agent_prompt` is true AND the review is from the latest round (i.e., after your last push). For earlier rounds where inline comments were already replied to, skip the full body fetch.
 
 ```bash
 # Fetch full body ONLY for reviews that need it (one at a time)
@@ -92,18 +115,29 @@ gh api "repos/{owner}/{repo}/pulls/{pr}/reviews/{review_id}" \
 ```
 
 Parse each fetched review body for:
-- **"Nitpick comments (N)"** — valid code quality items; fix them
-- **"Duplicate comments (N)"** — re-raised from prior reviews; fix them
-- **"Outside diff range comments (N)"** — comments on code outside the changed lines; triage these the same as inline comments
-- **"Prompt for AI Agents"** — structured fix instructions with file paths and line numbers
+- **"🧹 Nitpick comments (N)"** — valid code quality items; fix them
+- **"♻️ Duplicate comments (N)"** — re-raised from prior reviews; fix them
+- **"⚠️ Outside diff range comments (N)"** — comments on code not in the current diff but related to the change; these contain file paths, line numbers, and the same format as inline comments. **Easy to miss** — always check for this section
+- **"🤖 Prompt for AI Agents"** — structured fix instructions with file paths and line numbers
 
 **The inline comments (3a) are only the Critical/Major items. Nitpicks, duplicates, and outside-diff-range items stay in the review body (3b).** If the user says "5 comments and 3 comments", those numbers come from "Actionable comments posted: N" in separate review bodies.
 
-#### 3c. Amazon Q general comments (if any)
+#### 3c. General PR comments (issues endpoint)
+
+Bots may also post general PR-level comments (not inline on code). Fetch these with pagination:
 
 ```bash
-gh pr view --json comments \
-  --jq '.comments[] | select(.author.login == "amazon-q-developer") | {bodyPreview: (.body[:300])}'
+gh api "repos/{owner}/{repo}/issues/{pr}/comments" --paginate \
+  --jq '.[] | select(.user.login == "coderabbitai[bot]" or .user.login == "amazon-q-developer[bot]")
+  | {id, author: .user.login, created_at, bodyPreview: (.body[:300])}'
+```
+
+**Amazon Q "Critical Issue" callouts.** Amazon Q posts a single PR-level comment summarizing severity counts ("Critical Issue: …", "Recommendations: …"). The headline items here are NOT always duplicated in the inline comments — they may only exist in this summary. Always read the full body of the latest Amazon Q PR-level comment and triage each callout as a separate item:
+
+```bash
+# Fetch the latest Amazon Q PR-level comment in full
+gh api "repos/{owner}/{repo}/issues/{pr}/comments" --paginate \
+  --jq '[.[] | select(.user.login == "amazon-q-developer[bot]")] | sort_by(.created_at) | last | .body'
 ```
 
 #### 3d. CodeRabbit summary comment — SKIP unless user asks
@@ -123,10 +157,10 @@ gh api "repos/{owner}/{repo}/pulls/comments/{comment_id}" --jq '.body'
 
 ### 5. Identify actionable feedback
 
-Collect ALL feedback from inline comments (3a), review body items (3b), and Amazon Q comments. **Do not skip nitpicks or duplicate items from the review body**; they get the same triage treatment as inline review comments:
+Collect ALL feedback from inline comments (3a), review body items (3b), and general PR comments (3c). **Do not skip nitpicks, duplicate items, or outside-diff-range items from the review body**; they get the same triage treatment as inline review comments:
 
-- **CodeRabbit** (`coderabbitai[bot]`): Inline review comments (3a) + review body nitpicks/duplicates/actionable items (3b)
-- **Amazon Q** (`amazon-q-developer[bot]`): Inline review comments (3a)
+- **CodeRabbit** (`coderabbitai[bot]`): Inline review comments (3a) + review body nitpicks/duplicates/outside-diff-range/actionable items (3b) + general PR comments (3c)
+- **Amazon Q** (`amazon-q-developer[bot]`): Inline review comments (3a) + general PR comments (3c, including the "Critical Issue" summary)
 
 **Before applying any fix**, first verify the finding against the current code and decide whether a code change is actually needed. If the finding is not valid or no change is required, do not modify code for that item and briefly explain why it was skipped.
 
@@ -143,8 +177,10 @@ For each comment, determine:
 | # | Source | File | Line | Comment Summary | Decision | Rationale |
 |---|--------|------|------|-----------------|----------|-----------|
 | 1 | CR inline | `path/to/file.rs` | 42 | Brief summary | Fix / Dismiss / Stale | Why |
-| 2 | CR review body | `path/to/file.rs` | 10 | Brief summary | Fix / Dismiss / Stale | Why |
-| 3 | AQ inline | `path/to/file.rs` | 55 | Brief summary | Fix / Dismiss / Stale | Why |
+| 2 | CR nitpick | `path/to/file.rs` | 10 | Brief summary | Fix / Dismiss / Stale | Why |
+| 3 | CR outside-diff | `path/to/file.rs` | 78 | Brief summary | Fix / Dismiss / Stale | Why |
+| 4 | AQ inline | `path/to/file.rs` | 55 | Brief summary | Fix / Dismiss / Stale | Why |
+| 5 | AQ critical (PR-level) | `path/to/file.rs` | n/a | Brief summary | Fix / Dismiss / Stale | Why |
 
 Wait for the user to:
 - **Approve all** — proceed with all decisions as proposed
@@ -153,50 +189,27 @@ Wait for the user to:
 
 **Do not proceed to step 7 until the user approves.**
 
-### 7. Address each item
+### 7. Apply fixes locally (do NOT reply to bots yet)
 
-**CRITICAL — Reply rules:**
+**CRITICAL — DO NOT reply to bot threads in this step.** Bot replies must happen *after* push so the bot can verify against the actual remote. Replying with "Fixed" before the commit is on the remote causes the bot to re-flag the comment as unfixed (it reads the remote, not your working tree).
 
-1. **ALWAYS reply on the conversation thread** — use `gh api .../comments/{comment_id}/replies`. NEVER use `gh pr comment` to post a top-level PR comment. Bots track conversations by thread, not by scanning all PR comments.
+For each item that needs a code change:
 
-2. **Bot reply prefixes** — replies MUST start with the correct prefix:
-   - **Amazon Q**: `/q` (e.g., `/q Fixed — <explanation>`)
-   - **CodeRabbit**: `@coderabbitai` (e.g., `@coderabbitai Fixed — <explanation>`)
+1. Read the file and understand the context around the flagged line.
+2. Apply the fix.
 
-Without thread replies and correct prefixes, the bots will NOT see your reply and the comment won't be resolved.
-
-For valid concerns:
-1. Read the file and understand the context around the flagged line
-2. Apply the fix
-3. Reply to the review comment thread explaining what was fixed:
-   ```bash
-   # For Amazon Q comments — MUST start with /q
-   gh api "repos/{owner}/{repo}/pulls/{pr}/comments/{comment_id}/replies" \
-     -X POST -f body="/q Fixed — <brief explanation>"
-
-   # For CodeRabbit comments — MUST start with @coderabbitai
-   gh api "repos/{owner}/{repo}/pulls/{pr}/comments/{comment_id}/replies" \
-     -X POST -f body="@coderabbitai Fixed — <brief explanation>"
-   ```
-
-For false positives:
-1. Reply to the review comment thread explaining why it's not an issue:
-   ```bash
-   # For Amazon Q comments — MUST start with /q
-   gh api "repos/{owner}/{repo}/pulls/{pr}/comments/{comment_id}/replies" \
-     -X POST -f body="/q <explanation of why this is safe>"
+Do not post any thread replies, PR comments, or "Fixed" messages yet. Just edit code.
 
-   # For CodeRabbit comments — MUST start with @coderabbitai
-   gh api "repos/{owner}/{repo}/pulls/{pr}/comments/{comment_id}/replies" \
-     -X POST -f body="@coderabbitai <explanation of why this is safe>"
-   ```
+For dismissals and stale items, also hold off on replies — batch them with the post-push replies in step 10. Sending dismissal replies early is technically safe (no code dependency), but mixing early dismissals with late "Fixed" replies makes the timeline confusing for bots and humans, and easy to get wrong.
 
-### 8. Run checks
+### 8. Run quality gates
 
 After all fixes are applied:
 
 ```bash
-cargo check && cargo clippy -- -D warnings && cargo test
+cargo fmt
+cargo clippy --all-targets -- -D warnings
+cargo test
 ```
 
 All must pass before committing.
@@ -211,7 +224,59 @@ git commit -m "fix: address PR review feedback"
 git push
 ```
 
-### 10. Report summary
+**Verify the push landed before moving on.** Confirm `git log origin/<branch> -1` matches your local HEAD, or check `gh api repos/{owner}/{repo}/pulls/{pr} --jq .head.sha`. The bots will read this SHA when re-evaluating, so the reply in step 10 must reference code that is actually on it.
+
+### 10. Reply to bot threads
+
+Now that the fix is on the remote, post replies. Reference the new commit SHA in "Fixed" replies so the bot can verify and so the timeline is auditable later.
+
+**Reply rules:**
+
+1. **Inline review comments (3a)** — reply on the conversation thread using `gh api .../comments/{comment_id}/replies`.
+
+2. **Review-body/outside-diff items (3b)** — no inline thread exists. Post a top-level PR comment (`gh pr comment`) summarizing fixes/dismissals.
+
+3. **General PR comments (3c)** — these are issue-endpoint comments with no inline review thread. Issue comments don't support threaded replies, so post a new PR comment referencing the original:
+   ```bash
+   gh pr comment {pr} --body "@coderabbitai Re: comment {comment_id} — Fixed in <sha> — <explanation>"
+   ```
+
+4. **Bot reply prefixes** — replies MUST start with the correct prefix:
+   - **Amazon Q**: `/q` (e.g., `/q Fixed in <sha> — <explanation>`)
+   - **CodeRabbit**: `@coderabbitai` (e.g., `@coderabbitai Fixed in <sha> — <explanation>`)
+
+Without thread replies and correct prefixes, the bots will NOT see your reply and the comment won't be resolved.
+
+For valid concerns (fixed):
+```bash
+# For Amazon Q comments — MUST start with /q
+gh api "repos/{owner}/{repo}/pulls/{pr}/comments/{comment_id}/replies" \
+  -X POST -f body="/q Fixed in <sha> — <brief explanation>"
+
+# For CodeRabbit comments — MUST start with @coderabbitai
+gh api "repos/{owner}/{repo}/pulls/{pr}/comments/{comment_id}/replies" \
+  -X POST -f body="@coderabbitai Fixed in <sha> — <brief explanation>"
+```
+
+For false positives (dismissed):
+```bash
+# For Amazon Q comments — MUST start with /q
+gh api "repos/{owner}/{repo}/pulls/{pr}/comments/{comment_id}/replies" \
+  -X POST -f body="/q <explanation of why this is safe>"
+
+# For CodeRabbit comments — MUST start with @coderabbitai
+gh api "repos/{owner}/{repo}/pulls/{pr}/comments/{comment_id}/replies" \
+  -X POST -f body="@coderabbitai <explanation of why this is safe>"
+```
+
+For outside-diff-range and review body items (no inline comment ID to reply to):
+```bash
+gh pr comment {pr} --body "@coderabbitai Addressed in <sha>:
+- Fixed: <list of fixes with file:line references>
+- Dismissed: <list with reasoning>"
+```
+
+### 11. Report summary and offer a follow-up pass
 
 Present a summary to the user:
 
@@ -219,4 +284,9 @@ Present a summary to the user:
 - **Dismissed**: List of false positives with reasoning
 - **Stale**: Comments on code that was already changed/removed
 - **Needs input**: Any ambiguous items requiring user decision
-- **Checks**: Pass/fail status
+- **Quality gates**: Pass/fail status
+
+Then **proactively remind the user**:
+> "The push will trigger another bot review pass. If new findings come back, re-run `/address-pr-feedback` in a few minutes to address them."
+
+This is the recurring pattern: every fix push spawns a fresh review, and missing it leaves real findings unaddressed at merge time.

From e7d6d9accc7e839776d60dd6ce82374c30097a8b Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 23:02:51 -0400
Subject: [PATCH 16/18] fix(deep): address PR review round-3+4 feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

Round-3 CodeRabbit findings:

- client: 5xx responses now return DeepError::BadResponse instead of
  DeepError::Config so the orchestrator's per-candidate skip path takes
  them. Mapping transient server-side failures to Config (a hard fail)
  meant a single upstream 500 aborted the whole deep run.
- config: parse `--base-url` with url::Url::parse during build() so a
  typo hard-fails up front. Without this, malformed URLs surfaced later
  as DeepError::Http and got per-candidate-skipped, masking the
  misconfiguration and quietly falling back to structural-only output.
- plans: fix broken relative links to PR 1 / overview now that PR 1
  moved to plans/done/ (4 files updated).

Round-4 CodeRabbit findings:

- candidate: skip per-candidate I/O errors in build_escalations and
  build_cold_regions instead of propagating DeepError::Io. Deep mode
  is best-effort — one deleted/permission-denied file shouldn't kill
  semantic enrichment for the rest of the scan. Non-Io errors
  (Config from containment violation, anything else) stay fatal.
- tests: add `.assert()` to both retry-test mocks so the tests prove
  the structured-output attempt fired AND the no-schema retry fired,
  not just that *some* request hit the server.
- plans: sync the documented `deep::run` contract to match the shipped
  signature (Vec ownership, returns merged set), and add `text` lang
  tag to the cost::record fenced block.

Amazon Q "Critical Issue" (CWE-532, API key exposure in debug logs)
was already addressed in 4934af2 — verified DeepRuntime's manual
Debug impl redacts api_key, OpenAiCompatibleClient doesn't derive
Debug/Display/Serialize, so no bypass exists.

Tests: 230 passing (213 lib unit + 17 integration); clippy clean.
---
 Cargo.lock                               |  1 +
 Cargo.toml                               |  1 +
 plans/done/01-pr1-deep-http-transport.md | 10 ++--
 plans/todo/00-deep-mode-overview.md      |  4 +-
 plans/todo/02-pr2-mcp-server.md          |  2 +-
 plans/todo/03-pr3-subprocess-hook.md     |  2 +-
 src/deep/candidate.rs                    | 65 ++++++++++++++++++++++--
 src/deep/client.rs                       | 11 ++++
 src/deep/config.rs                       | 37 ++++++++++++++
 tests/deep_http_integration.rs           | 40 ++++++++++++---
 10 files changed, 153 insertions(+), 20 deletions(-)

diff --git a/Cargo.lock b/Cargo.lock
index 7a8b676..fe5a969 100644
--- a/Cargo.lock
+++ b/Cargo.lock
@@ -2394,6 +2394,7 @@ dependencies = [
  "tree-sitter-java",
  "tree-sitter-javascript",
  "tree-sitter-typescript",
+ "url",
 ]
 
 [[package]]
diff --git a/Cargo.toml b/Cargo.toml
index 4d59f87..15190c5 100644
--- a/Cargo.toml
+++ b/Cargo.toml
@@ -24,6 +24,7 @@ regex = "1"
 streaming-iterator = "0.1"
 regorus = { version = "0.9", default-features = false, features = ["arc"] }
 reqwest = { version = "0.12", default-features = false, features = ["blocking", "json", "rustls-tls"] }
+url = "2"
 
 [dev-dependencies]
 tempfile = "3"
diff --git a/plans/done/01-pr1-deep-http-transport.md b/plans/done/01-pr1-deep-http-transport.md
index ec26c15..0d79836 100644
--- a/plans/done/01-pr1-deep-http-transport.md
+++ b/plans/done/01-pr1-deep-http-transport.md
@@ -1,6 +1,6 @@
 # PR 1 — Tier 2 deep scan: OpenAI-compatible HTTP transport
 
-Companion to [00-deep-mode-overview.md](./00-deep-mode-overview.md). This PR makes `--deep` functional end-to-end and lays down the shared primitives that PR 2 and PR 3 reuse.
+Companion to [00-deep-mode-overview.md](../todo/00-deep-mode-overview.md). This PR makes `--deep` functional end-to-end and lays down the shared primitives that PR 2 and PR 3 reuse.
 
 ## 1. Goal & scope
 
@@ -38,13 +38,13 @@ pub mod merge;
 pub mod prompt;
 
 pub fn run(
-    structural: &[Finding],
+    structural: Vec<Finding>,
     scan_root: &Path,
     runtime: &DeepRuntime,
 ) -> Result<Vec<Finding>, DeepError>;
 ```
 
-`run` is the single entry point called from `commands/scan.rs`. Synchronous (see §4). Returns `Vec<Finding>` with `pass: Semantic` already set. Merging into the master vec happens in the caller.
+`run` is the single entry point called from `commands/scan.rs`. Synchronous (see §4). Takes ownership of the structural vector (the deep pass may drop entries the model identifies as false positives) and returns the **merged** structural ∪ semantic findings, already deterministically re-sorted by `(file, line_start, line_end)`. Callers do not perform their own merge. PR 2 (MCP server) and PR 3 (subprocess hook) bind to the same shape.
 
 ### `src/deep/config.rs` — runtime config
 
@@ -411,7 +411,7 @@ cost_per_1k_output = 0.0
 
 Logic in `cost::record`:
 
-```
+```text
 delta = (in_tokens / 1000.0) * in_rate + (out_tokens / 1000.0) * out_rate
 spent += delta
 if cap.is_some_and(|c| spent > c): Err(CostExceeded { spent })
@@ -463,7 +463,7 @@ Each commit ~150-400 lines of diff, reviewable independently. PR title for the m
 ## 14. Shipped
 
 **Branch**: `feat/deep-http`
-**Test count**: 227 passing (210 lib unit + 17 integration); clippy clean with `-D warnings`.
+**Test count**: 230 passing (213 lib unit + 17 integration); clippy clean with `-D warnings`.
 
 ### Commits (in order)
 
diff --git a/plans/todo/00-deep-mode-overview.md b/plans/todo/00-deep-mode-overview.md
index b5ac450..0a60ea3 100644
--- a/plans/todo/00-deep-mode-overview.md
+++ b/plans/todo/00-deep-mode-overview.md
@@ -18,7 +18,7 @@ Make `zift scan --deep` produce semantic findings (`pass: ScanPass::Semantic`) w
 | Tier | Transport | When it's used | PR |
 |------|-----------|----------------|-----|
 | 1 | **MCP server** (`zift mcp`) | User has an agent host (Claude Code, Cursor, Continue, Cline, Zed). Their agent calls Zift tools; their agent calls the model. We never see the model. | [PR 2](./02-pr2-mcp-server.md) |
-| 2 | **OpenAI-compatible HTTP** (`--base-url`) | Headless / CI runs. One client speaks to Ollama, LM Studio, llama.cpp `server`, vLLM, OpenRouter, Together, Groq, Anthropic-via-proxy, OpenAI itself. | [PR 1](./01-pr1-deep-http-transport.md) |
+| 2 | **OpenAI-compatible HTTP** (`--base-url`) | Headless / CI runs. One client speaks to Ollama, LM Studio, llama.cpp `server`, vLLM, OpenRouter, Together, Groq, Anthropic-via-proxy, OpenAI itself. | [PR 1](../done/01-pr1-deep-http-transport.md) |
 | 3 | **Subprocess hook** (`--agent-cmd`) | Anything else — `claude -p`, `aider`, custom shell scripts, agents that don't expose HTTP. Stdin: prompt + JSON. Stdout: JSON matching our schema. | [PR 3](./03-pr3-subprocess-hook.md) |
 
 User picks explicitly via `[deep] mode = "mcp" | "http" | "subprocess"`. No provider auto-detection magic.
@@ -62,6 +62,6 @@ We build PR 1 first even though MCP (PR 2) is the strategically headline answer.
 
 ## Cross-references
 
-- [PR 1 — HTTP transport](./01-pr1-deep-http-transport.md)
+- [PR 1 — HTTP transport](../done/01-pr1-deep-http-transport.md)
 - [PR 2 — MCP server](./02-pr2-mcp-server.md)
 - [PR 3 — Subprocess hook](./03-pr3-subprocess-hook.md)
diff --git a/plans/todo/02-pr2-mcp-server.md b/plans/todo/02-pr2-mcp-server.md
index 3053001..de5dff2 100644
--- a/plans/todo/02-pr2-mcp-server.md
+++ b/plans/todo/02-pr2-mcp-server.md
@@ -1,6 +1,6 @@
 # PR 2 — Tier 1 deep scan: MCP server
 
-Companion to [00-deep-mode-overview.md](./00-deep-mode-overview.md). Builds on the primitives shipped in [PR 1](./01-pr1-deep-http-transport.md). This is the strategically headline transport — it inverts the model relationship so Zift never hosts an LLM client; the user's existing agent host (Claude Code, Cursor, Continue, Cline, Zed, etc.) calls Zift as an MCP tool provider.
+Companion to [00-deep-mode-overview.md](./00-deep-mode-overview.md). Builds on the primitives shipped in [PR 1](../done/01-pr1-deep-http-transport.md). This is the strategically headline transport — it inverts the model relationship so Zift never hosts an LLM client; the user's existing agent host (Claude Code, Cursor, Continue, Cline, Zed, etc.) calls Zift as an MCP tool provider.
 
 **Status**: not started. Depends on PR 1 landing.
 
diff --git a/plans/todo/03-pr3-subprocess-hook.md b/plans/todo/03-pr3-subprocess-hook.md
index 6896fe1..990fca3 100644
--- a/plans/todo/03-pr3-subprocess-hook.md
+++ b/plans/todo/03-pr3-subprocess-hook.md
@@ -1,6 +1,6 @@
 # PR 3 — Tier 3 deep scan: subprocess hook
 
-Companion to [00-deep-mode-overview.md](./00-deep-mode-overview.md). Builds on the primitives shipped in [PR 1](./01-pr1-deep-http-transport.md). The smallest of the three transports — an escape hatch for any agent that doesn't fit Tier 1 (MCP) or Tier 2 (HTTP).
+Companion to [00-deep-mode-overview.md](./00-deep-mode-overview.md). Builds on the primitives shipped in [PR 1](../done/01-pr1-deep-http-transport.md). The smallest of the three transports — an escape hatch for any agent that doesn't fit Tier 1 (MCP) or Tier 2 (HTTP).
 
 **Status**: not started. Depends on PR 1 landing.
 
diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs
index bc76419..a9cdeb8 100644
--- a/src/deep/candidate.rs
+++ b/src/deep/candidate.rs
@@ -163,7 +163,24 @@ fn build_escalations(
         if !should_escalate(finding) {
             continue;
         }
-        let ctx = expand_finding(finding, scan_root, runtime.max_prompt_chars)?;
+        // I/O errors on a single file (deleted between scan and analyze,
+        // permission-denied, etc.) are best-effort: log and skip the
+        // candidate, don't abort the whole deep pass. Containment violations
+        // (`DeepError::Config` from `expand_finding`) and any other variant
+        // remain hard fails — they signal misconfiguration or malicious
+        // input that the operator should see.
+        let ctx = match expand_finding(finding, scan_root, runtime.max_prompt_chars) {
+            Ok(ctx) => ctx,
+            Err(DeepError::Io(e)) => {
+                tracing::warn!(
+                    "deep: skipping escalation for {}:{} — I/O error reading source: {e}",
+                    finding.file.display(),
+                    finding.line_start,
+                );
+                continue;
+            }
+            Err(other) => return Err(other),
+        };
         out.push(Candidate {
             kind: CandidateKind::Escalation,
             file: finding.file.clone(),
@@ -229,14 +246,28 @@ fn build_cold_regions(
             if overlaps_any(&file_relative, start, end, escalation_ranges) {
                 continue;
             }
-            let ctx = expand_region(
+            // Same best-effort policy as `build_escalations`: skip the
+            // cold region on per-file I/O errors, propagate everything else.
+            let ctx = match expand_region(
                 &file.path,
                 file_relative.clone(),
                 file.language,
                 start,
                 end,
                 runtime.max_prompt_chars,
-            )?;
+            ) {
+                Ok(ctx) => ctx,
+                Err(DeepError::Io(e)) => {
+                    tracing::warn!(
+                        "deep: skipping cold region {}:{}-{} — I/O error reading source: {e}",
+                        file_relative.display(),
+                        start,
+                        end,
+                    );
+                    continue;
+                }
+                Err(other) => return Err(other),
+            };
             out.push(Candidate {
                 kind: CandidateKind::ColdRegion,
                 file: file_relative.clone(),
@@ -582,6 +613,34 @@ mod tests {
         }
     }
 
+    #[test]
+    fn missing_escalation_file_is_skipped_not_fatal() {
+        // Regression: a structural finding pointing at a deleted file used to
+        // propagate `DeepError::Io` through `?`, killing the entire deep pass
+        // even though deep mode is otherwise best-effort.
+        use crate::types::{AuthCategory, Confidence, Finding, ScanPass};
+        let dir = tempdir().unwrap();
+        // One escalation finding pointing at a file that doesn't exist.
+        let bad = Finding {
+            id: "x".into(),
+            file: PathBuf::from("does-not-exist.ts"),
+            line_start: 1,
+            line_end: 1,
+            code_snippet: String::new(),
+            language: Language::TypeScript,
+            category: AuthCategory::Custom,
+            confidence: Confidence::Low,
+            description: "x".into(),
+            pattern_rule: None,
+            rego_stub: None,
+            pass: ScanPass::Structural,
+        };
+        // Should NOT propagate Io; should return Ok with the bad escalation
+        // skipped. (No cold-region files either, so result is empty.)
+        let candidates = select_candidates(&[bad], dir.path(), &rt()).unwrap();
+        assert!(candidates.is_empty(), "got: {candidates:?}");
+    }
+
     #[test]
     fn cold_region_respects_excludes() {
         let dir = tempdir().unwrap();
diff --git a/src/deep/client.rs b/src/deep/client.rs
index 0d8cd7d..cba7b58 100644
--- a/src/deep/client.rs
+++ b/src/deep/client.rs
@@ -136,6 +136,17 @@ impl OpenAiCompatibleClient {
                     "server rejected response_format ({status}); retrying without schema"
                 )));
             }
+            // 5xx is a transient/server-side failure, not misconfiguration.
+            // Surface as `BadResponse` so the orchestrator's per-candidate
+            // skip path takes it instead of aborting the entire deep run
+            // (which `Config` would do — that bucket is reserved for
+            // operator-actionable misconfiguration).
+            if status.is_server_error() {
+                return Err(DeepError::BadResponse(format!(
+                    "upstream {} from {}",
+                    status, self.base_url
+                )));
+            }
             return Err(DeepError::Config(format!(
                 "HTTP {} from {}",
                 status, self.base_url
diff --git a/src/deep/config.rs b/src/deep/config.rs
index 246d958..5a99112 100644
--- a/src/deep/config.rs
+++ b/src/deep/config.rs
@@ -110,6 +110,13 @@ pub fn build(args: &ScanArgs, config: &ZiftConfig) -> Result<DeepRuntime, DeepEr
                     .into(),
             )
         })?;
+    // Parse the URL eagerly so a typo (`htp://...`, missing scheme, etc.)
+    // hard-fails at config-build time instead of surfacing later as a
+    // per-candidate `DeepError::Http` skip — which would silently fall back
+    // to structural-only output and hide the misconfiguration from the user.
+    url::Url::parse(&base_url).map_err(|e| {
+        DeepError::Config(format!("--base-url is not a valid URL ({base_url:?}): {e}"))
+    })?;
 
     let model = args
         .model
@@ -351,6 +358,36 @@ mod tests {
         assert!(matches!(err, DeepError::Config(_)));
     }
 
+    #[test]
+    fn malformed_base_url_rejected_at_build_time() {
+        // A typo without a scheme would otherwise reach `client.rs` and fail
+        // there as `DeepError::Http`, which the orchestrator silently skips
+        // per-candidate — masking the misconfiguration. Catch it up front.
+        let args = args_with(Some("not a url"), Some("m"), None, None);
+        let err = build(&args, &ZiftConfig::default()).unwrap_err();
+        assert!(
+            matches!(err, DeepError::Config(ref msg) if msg.contains("not a valid URL")),
+            "expected Config(<not a valid URL>), got: {err:?}",
+        );
+    }
+
+    #[test]
+    fn well_formed_base_urls_accepted() {
+        // Sanity: the validator must not regress on real-world base URLs.
+        for url in [
+            "http://localhost:11434/v1",
+            "https://api.openai.com/v1",
+            "http://127.0.0.1:8080/v1",
+            "http://[::1]:8080/v1",
+        ] {
+            let args = args_with(Some(url), Some("m"), None, None);
+            assert!(
+                build(&args, &ZiftConfig::default()).is_ok(),
+                "validator rejected real-world URL: {url}",
+            );
+        }
+    }
+
     #[test]
     fn empty_api_key_normalized_to_none() {
         let args = args_with(Some("http://x/v1"), Some("m"), Some(""), None);
diff --git a/tests/deep_http_integration.rs b/tests/deep_http_integration.rs
index 5e23a4c..dc40021 100644
--- a/tests/deep_http_integration.rs
+++ b/tests/deep_http_integration.rs
@@ -167,8 +167,10 @@ fn malformed_json_returns_bad_response_after_retry() {
 fn fallback_retry_succeeds_when_first_attempt_returns_bad_json() {
     let mut server = Server::new();
 
-    // First attempt (with response_format) returns garbage.
-    let _bad = server
+    // First attempt (with response_format) returns garbage. PartialJsonString
+    // requires the body to have a `response_format` key, so this mock only
+    // matches the structured-output attempt — not the retry.
+    let bad = server
         .mock("POST", "/chat/completions")
         .match_body(mockito::Matcher::PartialJsonString(
             r#"{"response_format": {}}"#.into(),
@@ -177,8 +179,10 @@ fn fallback_retry_succeeds_when_first_attempt_returns_bad_json() {
         .with_body(ok_response("not json", 50, 10))
         .create();
 
-    // Second attempt (without response_format) returns valid findings.
-    let _good = server
+    // Second attempt (without response_format) returns valid findings. The
+    // first mock won't match this request (no `response_format` field), so
+    // mockito falls through to this one.
+    let good = server
         .mock("POST", "/chat/completions")
         .with_status(200)
         .with_body(ok_response(&findings_content_one(), 60, 30))
@@ -193,6 +197,12 @@ fn fallback_retry_succeeds_when_first_attempt_returns_bad_json() {
 
     let response = client.analyze(&prompt).unwrap();
     assert_eq!(response.findings.len(), 1);
+    // Assert BOTH mocks were consumed exactly once (mockito's default
+    // expectation). This proves the structured-output attempt fired AND the
+    // retry without schema fired — without these asserts the test could pass
+    // by accidentally hitting `good` twice.
+    bad.assert();
+    good.assert();
 }
 
 #[test]
@@ -224,7 +234,7 @@ fn http_400_with_response_format_triggers_retry() {
     // Second attempt (without response_format) returns valid findings.
     let mut server = Server::new();
 
-    let _bad = server
+    let bad = server
         .mock("POST", "/chat/completions")
         .match_body(mockito::Matcher::PartialJsonString(
             r#"{"response_format": {}}"#.into(),
@@ -233,7 +243,7 @@ fn http_400_with_response_format_triggers_retry() {
         .with_body(r#"{"error": "response_format unsupported"}"#)
         .create();
 
-    let _good = server
+    let good = server
         .mock("POST", "/chat/completions")
         .with_status(200)
         .with_body(ok_response(&findings_content_one(), 60, 30))
@@ -248,6 +258,11 @@ fn http_400_with_response_format_triggers_retry() {
 
     let response = client.analyze(&prompt).unwrap();
     assert_eq!(response.findings.len(), 1);
+    // Assert BOTH mocks fired exactly once — see the fallback-retry test
+    // above for why this matters (without it, the structured-output attempt
+    // could be silently skipped and the test would still pass).
+    bad.assert();
+    good.assert();
 }
 
 #[test]
@@ -299,12 +314,17 @@ fn http_401_surfaces_as_config_error() {
 }
 
 #[test]
-fn http_500_surfaces_as_config_error() {
+fn http_500_surfaces_as_bad_response_for_per_candidate_skip() {
+    // 5xx is a transient server-side failure, NOT misconfiguration. It must
+    // surface as `BadResponse` so the orchestrator's per-candidate skip path
+    // takes it; mapping to `Config` would hard-fail the whole deep run on
+    // one upstream blip.
     let mut server = Server::new();
     let m = server
         .mock("POST", "/chat/completions")
         .with_status(500)
         .with_body("internal server error")
+        .expect_at_least(1) // analyze() retries without response_format once
         .create();
 
     let runtime = runtime_for(&server.url());
@@ -315,8 +335,12 @@ fn http_500_surfaces_as_config_error() {
     });
 
     let err = client.analyze(&prompt).unwrap_err();
+    assert!(
+        matches!(err, DeepError::BadResponse(_)),
+        "expected BadResponse for 5xx, got: {err:?}",
+    );
     let msg = format!("{err}");
-    assert!(msg.contains("500"), "got: {msg}");
+    assert!(msg.contains("500"), "msg should reference status: {msg}");
     m.assert();
 }
 

From dafd0cab8d325582f6a5e86b99b0f2bb2b89aff1 Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 23:14:31 -0400
Subject: [PATCH 17/18] fix(deep): address PR review round-5 feedback

- client: classify HTTP 429 as transient (BadResponse) instead of Config
  hard-fail, so rate-limited candidates take the per-candidate skip path
- config: validate base_url scheme is http/https at config-build time;
  reject file://, ftp://, ws://, etc. up front instead of letting them
  fail later as opaque transport errors
- candidate: sort discovered files by path before applying the cold-region
  budget so the surviving subset is stable across filesystems and runs
- skill: broaden Amazon Q "review complete" gate to include PR-level
  issue comments (the channel where AQ posts its Critical Issue summary);
  document that AQ does not auto-re-review on push, so its commit_id
  lagging HEAD is expected and should not block the workflow
- tests: add http_429 transport-skip test and non-HTTP-scheme rejection test
---
 .claude/commands/address-pr-feedback.md | 20 ++++++++++++++---
 src/deep/candidate.rs                   |  6 ++++-
 src/deep/client.rs                      |  9 ++++++++
 src/deep/config.rs                      | 27 +++++++++++++++++++++-
 tests/deep_http_integration.rs          | 30 +++++++++++++++++++++++++
 5 files changed, 87 insertions(+), 5 deletions(-)

diff --git a/.claude/commands/address-pr-feedback.md b/.claude/commands/address-pr-feedback.md
index c65da5e..1c7d753 100644
--- a/.claude/commands/address-pr-feedback.md
+++ b/.claude/commands/address-pr-feedback.md
@@ -33,14 +33,28 @@ gh api "repos/{owner}/{repo}/pulls/{pr}/comments" --paginate \
 **CodeRabbit**: Look for a PR comment containing "Walkthrough" or a review with `coderabbitai` as author. If not present, inform the user:
 > "CodeRabbit hasn't reviewed this PR yet. Wait for its review or run `@coderabbitai review` as a PR comment, then re-run this command."
 
-**Amazon Q**: Look for review comments from `amazon-q-developer[bot]`. If not present, inform the user:
+**Amazon Q**: Look for activity from `amazon-q-developer[bot]` in *either* channel:
+- review/inline comments (pulls comments endpoint), or
+- PR-level comments (issues comments endpoint — this is where AQ posts its "Critical Issue" summary).
+
+If neither is present, inform the user:
 > "Amazon Q hasn't reviewed this PR yet. Wait for its review, then re-run this command."
 
+```bash
+# Check both channels for AQ activity
+gh api "repos/{owner}/{repo}/pulls/{pr}/comments" --paginate \
+  --jq '[.[] | select(.user.login == "amazon-q-developer[bot]")] | length'
+gh api "repos/{owner}/{repo}/issues/{pr}/comments" --paginate \
+  --jq '[.[] | select(.user.login == "amazon-q-developer[bot]")] | length'
+```
+
 **If either bot hasn't finished, stop here.** Do not proceed to fixing issues with incomplete feedback.
 
 #### 2a. Confirm the latest bot review covers the latest commit
 
-Bots re-review on every push. If you ran a previous round of `/address-pr-feedback`, pushed a fix commit, and the bot's response to that push hasn't landed yet, the next round will miss the new findings and cause exactly the bug this section exists to prevent.
+CodeRabbit re-reviews on every push. If you ran a previous round of `/address-pr-feedback`, pushed a fix commit, and CodeRabbit's response to that push hasn't landed yet, the next round will miss the new findings and cause exactly the bug this section exists to prevent.
+
+**Amazon Q does NOT re-review automatically on push** — it only reviews on initial PR open (or when explicitly triggered). After any fix push, AQ's `commit_id` will lag HEAD and that is *expected*. Don't block on it.
 
 ```bash
 # Compare the head SHA of the PR to the most recent CodeRabbit review's commit_id
@@ -54,7 +68,7 @@ echo "Last CR review commit: $LATEST_CR_COMMIT"
 If `$LATEST_CR_COMMIT` does not match `$HEAD_SHA`, CodeRabbit hasn't reviewed the latest commit yet. Tell the user:
 > "CodeRabbit's latest review is on commit `<short-SHA>` but PR head is `<short-SHA>`. Wait a few minutes for the new review to land, then re-run."
 
-Do the same check for Amazon Q (its review-author endpoint pins to a `commit_id` too). Don't proceed until both bots have caught up to head.
+For Amazon Q, optionally surface its review `commit_id` for context but **do not block** on a mismatch — note to the user that AQ's findings (if any) will be from its initial review pass and proceed.
 
 ### 3. Fetch review comments (token-efficient two-pass approach)
 
diff --git a/src/deep/candidate.rs b/src/deep/candidate.rs
index a9cdeb8..b81691e 100644
--- a/src/deep/candidate.rs
+++ b/src/deep/candidate.rs
@@ -206,8 +206,12 @@ fn build_cold_regions(
         return Ok(Vec::new());
     }
 
-    let discovered =
+    let mut discovered =
         discover_files_for_deep(scan_root, &runtime.excludes, &runtime.language_filter);
+    // Sort by path so that under tight `max_candidates`, the surviving cold
+    // subset is stable across filesystems and runs. Without this, the
+    // post-loop sort only orders the items we already happened to pick.
+    discovered.sort_by(|a, b| a.path.cmp(&b.path));
     let mut out: Vec<Candidate> = Vec::new();
 
     for file in discovered {
diff --git a/src/deep/client.rs b/src/deep/client.rs
index cba7b58..20abd40 100644
--- a/src/deep/client.rs
+++ b/src/deep/client.rs
@@ -147,6 +147,15 @@ impl OpenAiCompatibleClient {
                     status, self.base_url
                 )));
             }
+            // 429 Too Many Requests is transient (rate-limit / quota), same
+            // bucket as 5xx — let the orchestrator skip this candidate rather
+            // than abort the whole deep run.
+            if code == 429 {
+                return Err(DeepError::BadResponse(format!(
+                    "upstream rate-limited ({} from {})",
+                    status, self.base_url
+                )));
+            }
             return Err(DeepError::Config(format!(
                 "HTTP {} from {}",
                 status, self.base_url
diff --git a/src/deep/config.rs b/src/deep/config.rs
index 5a99112..a4460c2 100644
--- a/src/deep/config.rs
+++ b/src/deep/config.rs
@@ -114,9 +114,18 @@ pub fn build(args: &ScanArgs, config: &ZiftConfig) -> Result<DeepRuntime, DeepEr
     // hard-fails at config-build time instead of surfacing later as a
     // per-candidate `DeepError::Http` skip — which would silently fall back
     // to structural-only output and hide the misconfiguration from the user.
-    url::Url::parse(&base_url).map_err(|e| {
+    let parsed = url::Url::parse(&base_url).map_err(|e| {
         DeepError::Config(format!("--base-url is not a valid URL ({base_url:?}): {e}"))
     })?;
+    // The deep client speaks HTTP; reject `file://`, `ftp://`, etc. up-front
+    // rather than letting the request fail downstream as an opaque transport
+    // error.
+    if !matches!(parsed.scheme(), "http" | "https") {
+        return Err(DeepError::Config(format!(
+            "--base-url must use http or https (got {:?} in {base_url:?})",
+            parsed.scheme()
+        )));
+    }
 
     let model = args
         .model
@@ -371,6 +380,22 @@ mod tests {
         );
     }
 
+    #[test]
+    fn non_http_scheme_rejected_at_build_time() {
+        // The deep client speaks HTTP. `file://`, `ftp://`, etc. parse as
+        // valid URLs but have no business reaching the request layer — surface
+        // them as `Config` up front so the user gets a clear error instead of
+        // an opaque transport failure.
+        for url in ["file:///etc/passwd", "ftp://example.com/", "ws://x/v1"] {
+            let args = args_with(Some(url), Some("m"), None, None);
+            let err = build(&args, &ZiftConfig::default()).unwrap_err();
+            assert!(
+                matches!(err, DeepError::Config(ref msg) if msg.contains("must use http or https")),
+                "expected Config(<must use http or https>) for {url}, got: {err:?}",
+            );
+        }
+    }
+
     #[test]
     fn well_formed_base_urls_accepted() {
         // Sanity: the validator must not regress on real-world base URLs.
diff --git a/tests/deep_http_integration.rs b/tests/deep_http_integration.rs
index dc40021..ff3b8b3 100644
--- a/tests/deep_http_integration.rs
+++ b/tests/deep_http_integration.rs
@@ -344,6 +344,36 @@ fn http_500_surfaces_as_bad_response_for_per_candidate_skip() {
     m.assert();
 }
 
+#[test]
+fn http_429_surfaces_as_bad_response_for_per_candidate_skip() {
+    // 429 Too Many Requests is transient (rate-limit / quota), same bucket
+    // as 5xx — must hit the per-candidate skip path, NOT abort the whole
+    // deep run via `Config`.
+    let mut server = Server::new();
+    let m = server
+        .mock("POST", "/chat/completions")
+        .with_status(429)
+        .with_body("rate limited")
+        .expect_at_least(1)
+        .create();
+
+    let runtime = runtime_for(&server.url());
+    let client = OpenAiCompatibleClient::new(&runtime).unwrap();
+    let prompt = render(&PromptInputs {
+        candidate: &synth_candidate(),
+        structural_finding: None,
+    });
+
+    let err = client.analyze(&prompt).unwrap_err();
+    assert!(
+        matches!(err, DeepError::BadResponse(_)),
+        "expected BadResponse for 429, got: {err:?}",
+    );
+    let msg = format!("{err}");
+    assert!(msg.contains("429"), "msg should reference status: {msg}");
+    m.assert();
+}
+
 #[test]
 fn cost_tracker_caps_and_errors() {
     let mut runtime = runtime_for("http://unused");

From 15655a12e763eafc0113a42a0ae7325b72792239 Mon Sep 17 00:00:00 2001
From: Brad Anderson <brad@enforceauth.com>
Date: Tue, 28 Apr 2026 23:32:31 -0400
Subject: [PATCH 18/18] fix(deep): address PR review round-6 feedback
MIME-Version: 1.0
Content-Type: text/plain; charset=UTF-8
Content-Transfer-Encoding: 8bit

- error/client/mod: add `DeepError::Transient` variant for upstream
  transient failures (5xx, 429). Previous round mapped these to
  `BadResponse`, which incorrectly triggered the schema-fallback retry
  in `analyze()` — doubling traffic during outages and rate limits with
  zero benefit (removing `response_format` cannot fix a 500 or a 429).
  Orchestrator now skips `Transient` per-candidate just like `BadResponse`,
  but the retry path is gated to `BadResponse` only.
- config: trim `model` before the emptiness check so whitespace-only
  values like "   " are treated as missing and fail fast at config-build
  time instead of as an opaque upstream rejection.
- plans/done/01: sync the API-key handling description with shipped
  behavior (silently accepted, not "warn at startup").
- plans/todo/03: drop the recommendation to print 500 bytes of stdout
  and raw stderr on `agent_cmd` failure; align the planning text with
  the redaction discipline from PR 1 (gate behind debug logging,
  redact + cap snippets) so we don't reintroduce the source/secret-leak
  class we already avoid in `src/deep/client.rs`.
- skill: hyphenate "full-body fetch" (compound modifier).
- tests: tighten the 500/429 tests to assert exactly one upstream request
  (no schema-fallback retry on transient failures) and to expect the new
  `Transient` variant. Add a unit test for whitespace-only `model`
  rejection.
---
 .claude/commands/address-pr-feedback.md  |  2 +-
 plans/done/01-pr1-deep-http-transport.md |  6 ++---
 plans/todo/03-pr3-subprocess-hook.md     |  2 +-
 src/deep/client.rs                       | 15 +++++++-----
 src/deep/config.rs                       | 19 +++++++++++++++
 src/deep/error.rs                        |  8 ++++++
 src/deep/mod.rs                          |  8 ++++++
 tests/deep_http_integration.rs           | 31 ++++++++++++++----------
 8 files changed, 67 insertions(+), 24 deletions(-)

diff --git a/.claude/commands/address-pr-feedback.md b/.claude/commands/address-pr-feedback.md
index 1c7d753..eb4f008 100644
--- a/.claude/commands/address-pr-feedback.md
+++ b/.claude/commands/address-pr-feedback.md
@@ -120,7 +120,7 @@ gh api "repos/{owner}/{repo}/pulls/{pr}/reviews" --paginate \
      has_agent_prompt: (.body | test("Prompt for AI Agents"))}'
 ```
 
-**Only fetch the full review body** if `has_nitpicks`, `has_duplicates`, `has_outside_diff`, or `has_agent_prompt` is true AND the review is from the latest round (i.e., after your last push). For earlier rounds where inline comments were already replied to, skip the full body fetch.
+**Only fetch the full review body** if `has_nitpicks`, `has_duplicates`, `has_outside_diff`, or `has_agent_prompt` is true AND the review is from the latest round (i.e., after your last push). For earlier rounds where inline comments were already replied to, skip the full-body fetch.
 
 ```bash
 # Fetch full body ONLY for reviews that need it (one at a time)
diff --git a/plans/done/01-pr1-deep-http-transport.md b/plans/done/01-pr1-deep-http-transport.md
index 0d79836..6b5f31c 100644
--- a/plans/done/01-pr1-deep-http-transport.md
+++ b/plans/done/01-pr1-deep-http-transport.md
@@ -71,7 +71,7 @@ Resolution precedence:
 - `base_url`, `model`, `max_cost`: CLI flag > `[deep]` config > built-in default.
 - `api_key`: CLI flag (`--api-key`) > env var (`ZIFT_AGENT_API_KEY`) > unset. **Intentionally NOT readable from `.zift.toml`** — keys belong in env vars or CLI to avoid accidental secret commits.
 
-Validation: empty `base_url` is hard error; missing `model` is hard error; missing `api_key` is a warning (not an error — Ollama/llama.cpp accept any value).
+Validation: empty `base_url` is hard error; missing `model` is hard error; missing `api_key` is silently accepted (Ollama/llama.cpp accept any value, so requiring a key — or even warning — would create friction for the local-LLM path that motivated this design).
 
 ### `src/deep/error.rs`
 
@@ -387,7 +387,7 @@ CLI test for `--provider` no longer applies; replace with `--base-url`. Existing
 |---|---|
 | Malformed JSON from model | One retry with degraded prompt; if still bad, log warning + drop candidate, continue |
 | HTTP timeout | Configurable per-request timeout (default 120s); on timeout, log + drop candidate |
-| API key missing | Warn at startup if base_url is non-localhost; allow it (local servers don't need keys) |
+| API key missing | Silently accept (`api_key: None`); local servers don't need keys, and remote endpoints will surface their own 401/403, which we already hard-fail on with a clear "auth rejected by {base_url}" message |
 | Cost ceiling hit mid-run | Stop dispatching new candidates; finalize in-flight; warn with spent total; return findings collected so far |
 | HTTP 401/403 | Hard fail with clear "auth rejected by {base_url}" message |
 | HTTP 5xx | Exponential backoff (3 attempts at 1s, 4s, 16s) then drop |
@@ -425,7 +425,7 @@ CLI `--max-cost` wins over toml; CLI flags for the rates intentionally not added
 
 Six commits, each compiling and passing tests:
 
-1. **`refactor(cli): drop closed LlmProvider enum, add --base-url, rename env var`** — `cli.rs`, `config.rs`, `commands/init.rs`, `docs/DESIGN.md`, CLI tests. Renames `ZIFT_API_KEY` → `ZIFT_AGENT_API_KEY`; `api_key` removed from config-file schema. Stub-only deep scan still prints the warning.
+1. **`refactor(cli): drop closed LlmProvider enum, add --base-url, rename env var`** — `cli.rs`, `config.rs`, `commands/init.rs`, `docs/DESIGN.md`, CLI tests. Renames `ZIFT_API_KEY` → `ZIFT_AGENT_API_KEY`; `api_key` removed from config-file schema.
 2. **`feat(deep): add deep module skeleton with config + error types`** — empty modules with type definitions; `deep::run` returns `Ok(vec![])`; wired into `commands/scan.rs`; tests for `config::build`. Expose `compute_finding_id` from scanner.
 3. **`feat(deep): candidate selection and context expansion`** — `candidate.rs`, `context.rs` with tests. `deep::run` produces candidates but returns empty findings.
 4. **`feat(deep): prompt rendering and JSON schema`** — `prompt.rs`, `finding.rs`. `output_schema()` and `SYSTEM_PROMPT` exported. Tests for prompt validity.
diff --git a/plans/todo/03-pr3-subprocess-hook.md b/plans/todo/03-pr3-subprocess-hook.md
index 990fca3..8066e77 100644
--- a/plans/todo/03-pr3-subprocess-hook.md
+++ b/plans/todo/03-pr3-subprocess-hook.md
@@ -103,5 +103,5 @@ Each commit small and reviewable.
 
 ## 9. Risks
 
-- **Hard to debug.** When a user's `agent_cmd` returns garbage, the failure mode is opaque. Always print the first ~500 bytes of stdout to stderr on parse failure. Always print stderr from the subprocess on nonzero exit.
+- **Hard to debug.** When a user's `agent_cmd` returns garbage, the failure mode is opaque. Surface a generic, non-sensitive error to the user (e.g. "agent_cmd failed to parse output"). Gate verbose stdout/stderr capture behind explicit debug logging (e.g. `RUST_LOG=zift::deep=debug`), and even there cap the snippet length and apply the same redaction discipline as `src/deep/client.rs` — `agent_cmd` output can mirror prompt text and scanned source verbatim, which would re-create the secret/source-leak class we already avoid in the HTTP client.
 - **Security.** Running arbitrary shell commands the user configured is a footgun if `.zift.toml` is checked in to a repo and Zift is run by another user. Document; consider warning when `agent_cmd` is read from a `.zift.toml` not owned by the running user.
diff --git a/src/deep/client.rs b/src/deep/client.rs
index 20abd40..2e6f637 100644
--- a/src/deep/client.rs
+++ b/src/deep/client.rs
@@ -137,21 +137,24 @@ impl OpenAiCompatibleClient {
                 )));
             }
             // 5xx is a transient/server-side failure, not misconfiguration.
-            // Surface as `BadResponse` so the orchestrator's per-candidate
+            // Surface as `Transient` so the orchestrator's per-candidate
             // skip path takes it instead of aborting the entire deep run
             // (which `Config` would do — that bucket is reserved for
-            // operator-actionable misconfiguration).
+            // operator-actionable misconfiguration). NOT `BadResponse`,
+            // because that triggers `analyze()`'s schema-fallback retry —
+            // removing `response_format` cannot fix a 5xx, so retrying
+            // just doubles traffic during outages.
             if status.is_server_error() {
-                return Err(DeepError::BadResponse(format!(
+                return Err(DeepError::Transient(format!(
                     "upstream {} from {}",
                     status, self.base_url
                 )));
             }
             // 429 Too Many Requests is transient (rate-limit / quota), same
-            // bucket as 5xx — let the orchestrator skip this candidate rather
-            // than abort the whole deep run.
+            // bucket as 5xx — skip the candidate, do NOT trigger the
+            // schema-fallback retry (it would just re-hit the rate limit).
             if code == 429 {
-                return Err(DeepError::BadResponse(format!(
+                return Err(DeepError::Transient(format!(
                     "upstream rate-limited ({} from {})",
                     status, self.base_url
                 )));
diff --git a/src/deep/config.rs b/src/deep/config.rs
index a4460c2..33d2f81 100644
--- a/src/deep/config.rs
+++ b/src/deep/config.rs
@@ -127,10 +127,14 @@ pub fn build(args: &ScanArgs, config: &ZiftConfig) -> Result<DeepRuntime, DeepEr
         )));
     }
 
+    // Trim before the emptiness check so whitespace-only values like "   "
+    // are treated as missing — otherwise the failure moves from config-build
+    // (clear, actionable) to request-time as an opaque upstream rejection.
     let model = args
         .model
         .clone()
         .or_else(|| config.deep.model.clone())
+        .map(|s| s.trim().to_string())
         .filter(|s| !s.is_empty())
         .ok_or_else(|| {
             DeepError::Config(
@@ -360,6 +364,21 @@ mod tests {
         assert!(matches!(err, DeepError::Config(_)));
     }
 
+    #[test]
+    fn whitespace_only_model_treated_as_missing() {
+        // Without trim, "   " sneaks past `!is_empty()` and the failure moves
+        // to request time as an opaque upstream rejection — defeating the
+        // fail-fast config contract.
+        for model in ["   ", "\t", "\n", " \t\n "] {
+            let args = args_with(Some("http://x/v1"), Some(model), None, None);
+            let err = build(&args, &ZiftConfig::default()).unwrap_err();
+            assert!(
+                matches!(err, DeepError::Config(_)),
+                "expected Config error for model={model:?}, got: {err:?}",
+            );
+        }
+    }
+
     #[test]
     fn empty_base_url_treated_as_missing() {
         let args = args_with(Some(""), Some("m"), None, None);
diff --git a/src/deep/error.rs b/src/deep/error.rs
index 0bcafc5..e9de078 100644
--- a/src/deep/error.rs
+++ b/src/deep/error.rs
@@ -15,6 +15,14 @@ pub enum DeepError {
     #[error("model returned malformed JSON: {0}")]
     BadResponse(String),
 
+    /// Transient upstream failure (5xx, 429, etc.). The orchestrator skips
+    /// the affected candidate and continues; this is *not* hard-failed as
+    /// `Config`, and unlike `BadResponse` it does NOT trigger the
+    /// schema-fallback retry — removing `response_format` cannot fix a
+    /// rate-limit or server outage, and retrying just doubles traffic.
+    #[error("transient upstream failure: {0}")]
+    Transient(String),
+
     #[error("cost ceiling reached after ${spent:.4} USD")]
     CostExceeded { spent: f64 },
 
diff --git a/src/deep/mod.rs b/src/deep/mod.rs
index 5795723..56b8b5b 100644
--- a/src/deep/mod.rs
+++ b/src/deep/mod.rs
@@ -99,6 +99,14 @@ pub fn run(
                 );
                 continue;
             }
+            Err(DeepError::Transient(msg)) => {
+                tracing::warn!(
+                    "deep: transient upstream failure on {}:{} (skipping): {msg}",
+                    candidate.file.display(),
+                    candidate.line_start
+                );
+                continue;
+            }
             Err(DeepError::Timeout { secs }) => {
                 tracing::warn!(
                     "deep: timeout ({}s) on {}:{} (skipping)",
diff --git a/tests/deep_http_integration.rs b/tests/deep_http_integration.rs
index ff3b8b3..f49d142 100644
--- a/tests/deep_http_integration.rs
+++ b/tests/deep_http_integration.rs
@@ -314,17 +314,20 @@ fn http_401_surfaces_as_config_error() {
 }
 
 #[test]
-fn http_500_surfaces_as_bad_response_for_per_candidate_skip() {
+fn http_500_surfaces_as_transient_for_per_candidate_skip() {
     // 5xx is a transient server-side failure, NOT misconfiguration. It must
-    // surface as `BadResponse` so the orchestrator's per-candidate skip path
-    // takes it; mapping to `Config` would hard-fail the whole deep run on
-    // one upstream blip.
+    // surface as `Transient` so the orchestrator's per-candidate skip path
+    // takes it. Mapping to `Config` would hard-fail the whole deep run on
+    // one upstream blip; mapping to `BadResponse` would (incorrectly) trigger
+    // the schema-fallback retry — pointless during an outage and just doubles
+    // upstream traffic.
     let mut server = Server::new();
     let m = server
         .mock("POST", "/chat/completions")
         .with_status(500)
         .with_body("internal server error")
-        .expect_at_least(1) // analyze() retries without response_format once
+        // EXACTLY one request: `analyze()` must NOT retry transient failures.
+        .expect(1)
         .create();
 
     let runtime = runtime_for(&server.url());
@@ -336,8 +339,8 @@ fn http_500_surfaces_as_bad_response_for_per_candidate_skip() {
 
     let err = client.analyze(&prompt).unwrap_err();
     assert!(
-        matches!(err, DeepError::BadResponse(_)),
-        "expected BadResponse for 5xx, got: {err:?}",
+        matches!(err, DeepError::Transient(_)),
+        "expected Transient for 5xx, got: {err:?}",
     );
     let msg = format!("{err}");
     assert!(msg.contains("500"), "msg should reference status: {msg}");
@@ -345,16 +348,18 @@ fn http_500_surfaces_as_bad_response_for_per_candidate_skip() {
 }
 
 #[test]
-fn http_429_surfaces_as_bad_response_for_per_candidate_skip() {
+fn http_429_surfaces_as_transient_for_per_candidate_skip() {
     // 429 Too Many Requests is transient (rate-limit / quota), same bucket
-    // as 5xx — must hit the per-candidate skip path, NOT abort the whole
-    // deep run via `Config`.
+    // as 5xx — must hit the per-candidate skip path, NOT abort via `Config`,
+    // and NOT trigger the schema-fallback retry (it would just re-hit the
+    // rate limit and worsen the situation).
     let mut server = Server::new();
     let m = server
         .mock("POST", "/chat/completions")
         .with_status(429)
         .with_body("rate limited")
-        .expect_at_least(1)
+        // EXACTLY one request: `analyze()` must NOT retry rate-limited responses.
+        .expect(1)
         .create();
 
     let runtime = runtime_for(&server.url());
@@ -366,8 +371,8 @@ fn http_429_surfaces_as_bad_response_for_per_candidate_skip() {
 
     let err = client.analyze(&prompt).unwrap_err();
     assert!(
-        matches!(err, DeepError::BadResponse(_)),
-        "expected BadResponse for 429, got: {err:?}",
+        matches!(err, DeepError::Transient(_)),
+        "expected Transient for 429, got: {err:?}",
     );
     let msg = format!("{err}");
     assert!(msg.contains("429"), "msg should reference status: {msg}");