diff --git a/package.json b/package.json
index 5fc3dd6..3610080 100644
--- a/package.json
+++ b/package.json
@@ -40,7 +40,7 @@
     "build": "tsc && cp src/voice/ui.html dist/voice/",
     "dev": "tsc --watch",
     "start": "node dist/index.js",
-    "test": "node --test test/*.test.ts --experimental-strip-types"
+    "test": "node --experimental-strip-types --experimental-loader=./test/ts-resolve-hook.mjs --no-warnings --test test/*.test.ts"
   },
   "engines": {
     "node": ">=20"
diff --git a/spec/schemas/workflow.schema.json b/spec/schemas/workflow.schema.json
new file mode 100644
index 0000000..9f0c98b
--- /dev/null
+++ b/spec/schemas/workflow.schema.json
@@ -0,0 +1,69 @@
+{
+  "$schema": "http://json-schema.org/draft-07/schema#",
+  "$id": "https://gitclaw.dev/spec/workflow.schema.json",
+  "title": "GitClaw SkillFlow Workflow",
+  "description": "A SkillFlow workflow: a named sequence of steps, where each step invokes a skill with a prompt. Runtime semantics are defined by src/workflows.ts.",
+  "type": "object",
+  "additionalProperties": false,
+  "required": ["name", "description", "steps"],
+  "properties": {
+    "name": {
+      "type": "string",
+      "description": "Kebab-case identifier for the workflow. Used as the file name.",
+      "pattern": "^[a-z0-9]+(-[a-z0-9]+)*$"
+    },
+    "description": {
+      "type": "string",
+      "description": "One-line description of what the workflow does.",
+      "minLength": 1
+    },
+    "steps": {
+      "type": "array",
+      "description": "Ordered list of steps. Steps execute top-to-bottom.",
+      "minItems": 1,
+      "items": { "$ref": "#/definitions/step" }
+    }
+  },
+  "definitions": {
+    "step": {
+      "type": "object",
+      "additionalProperties": false,
+      "required": ["skill", "prompt"],
+      "properties": {
+        "id": {
+          "type": "string",
+          "description": "Optional snake_case identifier for the step. Used when other steps reference this one via depends_on.",
+          "pattern": "^[a-z0-9]+(_[a-z0-9]+)*$"
+        },
+        "skill": {
+          "type": "string",
+          "description": "Name of an installed skill (kebab-case) the step will invoke. Must match an entry in the agent's skills/ directory, or 'approval' for a human-review step.",
+          "pattern": "^[a-z0-9]+(-[a-z0-9]+)*$"
+        },
+        "prompt": {
+          "type": "string",
+          "description": "The natural-language instruction passed to the skill for this step.",
+          "minLength": 1
+        },
+        "channel": {
+          "type": "string",
+          "description": "Optional channel/destination for the step output (e.g. a Slack channel name).",
+          "minLength": 1
+        },
+        "depends_on": {
+          "type": "array",
+          "description": "Optional list of step ids that must complete before this step runs.",
+          "items": {
+            "type": "string",
+            "pattern": "^[a-z0-9]+(_[a-z0-9]+)*$"
+          },
+          "uniqueItems": true
+        },
+        "requires_approval": {
+          "type": "boolean",
+          "description": "If true, the workflow pauses for human approval before this step runs."
+        }
+      }
+    }
+  }
+}
diff --git a/src/commands/workflow.ts b/src/commands/workflow.ts
new file mode 100644
index 0000000..93f6434
--- /dev/null
+++ b/src/commands/workflow.ts
@@ -0,0 +1,188 @@
+import { mkdir, readFile, writeFile } from "fs/promises";
+import { join, resolve } from "path";
+import { discoverSkills } from "../skills.js";
+import { validateWorkflow } from "../utils/schemas.js";
+import { generateWorkflow, type LlmClient } from "../utils/workflow-generator.js";
+
+interface GenerateFlags {
+	dir: string;
+	prompt?: string;
+	refine?: string;
+	model?: string;
+	apiKey?: string;
+	dryRun: boolean;
+}
+
+const RED = (s: string) => `\x1b[31m${s}\x1b[0m`;
+const GREEN = (s: string) => `\x1b[32m${s}\x1b[0m`;
+const DIM = (s: string) => `\x1b[2m${s}\x1b[0m`;
+const BOLD = (s: string) => `\x1b[1m${s}\x1b[0m`;
+
+const MAX_RETRIES = 2;
+
+function printHelp(): void {
+	console.log(`${BOLD("gitclaw workflow")} — generate SkillFlow workflows from natural language
+
+Usage:
+  gitclaw workflow generate [options]
+
+Options:
+  -d, --dir <path>        Agent directory (default: current directory)
+  -p, --prompt <text>     Natural-language description of the workflow (required)
+      --refine <file>     Refine an existing workflow YAML by applying --prompt as an instruction
+  -m, --model <spec>      LLM model in provider:model form (default: openai:gpt-4o)
+      --api-key <key>     API key for the provider (falls back to OPENAI_API_KEY or <PROVIDER>_API_KEY)
+      --dry-run           Print the generated YAML to stdout instead of writing a file
+  -h, --help              Show this help message
+
+Examples:
+  gitclaw workflow generate -p "every morning summarize unread emails and post to Slack"
+  gitclaw workflow generate -p "add a human approval step before the Slack post" --refine workflows/morning-digest.yaml
+`);
+}
+
+function parseFlags(argv: string[]): GenerateFlags {
+	const flags: GenerateFlags = { dir: process.cwd(), dryRun: false };
+	for (let i = 0; i < argv.length; i++) {
+		const a = argv[i];
+		switch (a) {
+			case "-d":
+			case "--dir":
+				flags.dir = argv[++i];
+				break;
+			case "-p":
+			case "--prompt":
+				flags.prompt = argv[++i];
+				break;
+			case "--refine":
+				flags.refine = argv[++i];
+				break;
+			case "-m":
+			case "--model":
+				flags.model = argv[++i];
+				break;
+			case "--api-key":
+				flags.apiKey = argv[++i];
+				break;
+			case "--dry-run":
+				flags.dryRun = true;
+				break;
+			case "-h":
+			case "--help":
+				printHelp();
+				process.exit(0);
+				break;
+			default:
+				if (!a.startsWith("-") && flags.prompt === undefined) {
+					flags.prompt = a;
+				} else {
+					console.error(RED(`Unknown option: ${a}`));
+					process.exit(2);
+				}
+		}
+	}
+	return flags;
+}
+
+function slugify(name: string): string {
+	const cleaned = name
+		.toLowerCase()
+		.trim()
+		.replace(/[^a-z0-9-]+/g, "-")
+		.replace(/^-+|-+$/g, "")
+		.replace(/-+/g, "-");
+	return cleaned || "workflow";
+}
+
+export interface RunGenerateOptions {
+	flags: GenerateFlags;
+	llm?: LlmClient;
+}
+
+export async function runGenerate(opts: RunGenerateOptions): Promise<{ filePath?: string; yaml: string; }> {
+	const { flags } = opts;
+	if (!flags.prompt || !flags.prompt.trim()) {
+		throw new Error("--prompt is required");
+	}
+
+	const agentDir = resolve(flags.dir);
+	const skills = await discoverSkills(agentDir);
+
+	let previousWorkflow: string | undefined;
+	if (flags.refine) {
+		const refinePath = resolve(agentDir, flags.refine);
+		previousWorkflow = await readFile(refinePath, "utf-8");
+	}
+
+	let promptForLlm = flags.prompt.trim();
+	let lastErrors: string[] = [];
+	let yaml = "";
+
+	for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) {
+		console.error(DIM(attempt === 0 ? "Generating workflow..." : `Retry ${attempt}/${MAX_RETRIES} — fixing validation errors...`));
+		yaml = await generateWorkflow({
+			prompt: promptForLlm,
+			skills,
+			previousWorkflow,
+			model: flags.model,
+			apiKey: flags.apiKey,
+			llm: opts.llm,
+		});
+		const result = validateWorkflow(yaml);
+		if (result.valid) {
+			lastErrors = [];
+			break;
+		}
+		lastErrors = result.errors;
+		if (attempt < MAX_RETRIES) {
+			promptForLlm =
+				`${flags.prompt.trim()}\n\nThe previous attempt failed schema validation. Fix these errors and return the full YAML again:\n` +
+				result.errors.map((e) => `- ${e}`).join("\n");
+		}
+	}
+
+	if (lastErrors.length > 0) {
+		console.error(RED("\nWorkflow validation failed after retries:"));
+		for (const e of lastErrors) console.error(RED(`  - ${e}`));
+		console.error(DIM("\nLast generated YAML:\n"));
+		console.error(yaml);
+		throw new Error("Validation failed after retries");
+	}
+
+	if (flags.dryRun) {
+		process.stdout.write(yaml.endsWith("\n") ? yaml : yaml + "\n");
+		return { yaml };
+	}
+
+	// Parse the validated YAML to get the workflow name for the file path.
+	const validated = validateWorkflow(yaml).data!;
+	const slug = slugify(validated.name);
+	const workflowsDir = join(agentDir, "workflows");
+	await mkdir(workflowsDir, { recursive: true });
+	const filePath = join(workflowsDir, `${slug}.yaml`);
+	await writeFile(filePath, yaml.endsWith("\n") ? yaml : yaml + "\n", "utf-8");
+	console.error(GREEN(`\nWrote workflow to ${filePath}`));
+	return { filePath, yaml };
+}
+
+export async function handleWorkflowCommand(argv: string[]): Promise<void> {
+	// argv is the raw process.argv tail starting at the 'workflow' token.
+	// argv[0] === "workflow"; argv[1] is the sub-command.
+	const sub = argv[1];
+	if (!sub || sub === "-h" || sub === "--help") {
+		printHelp();
+		return;
+	}
+	if (sub !== "generate") {
+		console.error(RED(`Unknown subcommand: ${sub}`));
+		printHelp();
+		process.exit(2);
+	}
+	const flags = parseFlags(argv.slice(2));
+	try {
+		await runGenerate({ flags });
+	} catch (err: any) {
+		console.error(RED(`\nError: ${err?.message ?? String(err)}`));
+		process.exit(1);
+	}
+}
diff --git a/src/index.ts b/src/index.ts
index 03e8c55..b5a0396 100644
--- a/src/index.ts
+++ b/src/index.ts
@@ -22,6 +22,7 @@ import { initLocalSession } from "./session.js";
 import type { LocalSession } from "./session.js";
 import { startVoiceServer } from "./voice/server.js";
 import { handlePluginCommand } from "./plugin-cli.js";
+import { handleWorkflowCommand } from "./commands/workflow.js";
 import { context as otelContext } from "@opentelemetry/api";
 import {
 	initTelemetry,
@@ -301,6 +302,12 @@ async function ensureRepo(dir: string, model?: string): Promise<string> {
 }
 
 async function main(): Promise<void> {
+	// Handle workflow subcommand: gitclaw workflow <generate|...>
+	if (process.argv[2] === "workflow") {
+		await handleWorkflowCommand(process.argv.slice(2));
+		return;
+	}
+
 	// Handle plugin subcommand: gitclaw plugin <install|list|remove|...>
 	if (process.argv[2] === "plugin") {
 		const allArgs = process.argv.slice(3);
diff --git a/src/utils/schemas.ts b/src/utils/schemas.ts
new file mode 100644
index 0000000..8e04e57
--- /dev/null
+++ b/src/utils/schemas.ts
@@ -0,0 +1,209 @@
+import { readFileSync, existsSync } from "fs";
+import { dirname, join, resolve } from "path";
+import { fileURLToPath } from "url";
+import yaml from "js-yaml";
+
+export interface WorkflowStep {
+	id?: string;
+	skill: string;
+	prompt: string;
+	channel?: string;
+	depends_on?: string[];
+	requires_approval?: boolean;
+}
+
+export interface WorkflowDef {
+	name: string;
+	description: string;
+	steps: WorkflowStep[];
+}
+
+export interface ValidationResult {
+	valid: boolean;
+	errors: string[];
+	data?: WorkflowDef;
+}
+
+let cachedSchema: any = null;
+let cachedSchemaText: string | null = null;
+
+function resolveSchemaPath(): string {
+	const here = dirname(fileURLToPath(import.meta.url));
+	// Try candidates relative to this module's location.
+	// 1. Running from src/utils/ in tests: ../../spec/schemas/workflow.schema.json
+	// 2. Running from dist/utils/ after build: ../../spec/schemas/workflow.schema.json
+	// 3. Running from dist/utils/ when spec/ is not packed: walk upward.
+	const candidates = [
+		resolve(here, "..", "..", "spec", "schemas", "workflow.schema.json"),
+		resolve(here, "..", "..", "..", "spec", "schemas", "workflow.schema.json"),
+	];
+	for (const p of candidates) {
+		if (existsSync(p)) return p;
+	}
+	// Fallback: walk up to 6 levels looking for the schema.
+	let cur = here;
+	for (let i = 0; i < 6; i++) {
+		const guess = join(cur, "spec", "schemas", "workflow.schema.json");
+		if (existsSync(guess)) return guess;
+		const parent = dirname(cur);
+		if (parent === cur) break;
+		cur = parent;
+	}
+	throw new Error(`Could not locate spec/schemas/workflow.schema.json relative to ${here}`);
+}
+
+export function loadWorkflowSchema(): any {
+	if (cachedSchema) return cachedSchema;
+	const path = resolveSchemaPath();
+	cachedSchemaText = readFileSync(path, "utf-8");
+	cachedSchema = JSON.parse(cachedSchemaText);
+	return cachedSchema;
+}
+
+export function getWorkflowSchemaText(): string {
+	if (cachedSchemaText) return cachedSchemaText;
+	loadWorkflowSchema();
+	return cachedSchemaText!;
+}
+
+function typeOf(v: any): string {
+	if (v === null) return "null";
+	if (Array.isArray(v)) return "array";
+	return typeof v;
+}
+
+function matchesType(v: any, expected: string | string[]): boolean {
+	const types = Array.isArray(expected) ? expected : [expected];
+	const actual = typeOf(v);
+	return types.includes(actual) || (types.includes("integer") && actual === "number" && Number.isInteger(v));
+}
+
+interface Issue {
+	path: string;
+	message: string;
+}
+
+function validateAgainst(data: any, schema: any, path: string, root: any, issues: Issue[]): void {
+	// Resolve $ref
+	if (schema && typeof schema === "object" && schema.$ref) {
+		const ref = schema.$ref as string;
+		if (!ref.startsWith("#/")) {
+			issues.push({ path, message: `unsupported $ref "${ref}" (only local refs are supported)` });
+			return;
+		}
+		const segments = ref.slice(2).split("/");
+		let resolved: any = root;
+		for (const seg of segments) {
+			resolved = resolved?.[seg];
+		}
+		if (!resolved) {
+			issues.push({ path, message: `cannot resolve $ref "${ref}"` });
+			return;
+		}
+		validateAgainst(data, resolved, path, root, issues);
+		return;
+	}
+
+	if (!schema || typeof schema !== "object") return;
+
+	if (schema.type && !matchesType(data, schema.type)) {
+		issues.push({
+			path,
+			message: `expected type ${Array.isArray(schema.type) ? schema.type.join("|") : schema.type}, got ${typeOf(data)}`,
+		});
+		return;
+	}
+
+	if (typeOf(data) === "object") {
+		const required: string[] = Array.isArray(schema.required) ? schema.required : [];
+		for (const key of required) {
+			if (!(key in data)) {
+				issues.push({ path: path || "(root)", message: `missing required property "${key}"` });
+			}
+		}
+
+		const props = schema.properties ?? {};
+		const additionalAllowed = schema.additionalProperties !== false;
+		for (const key of Object.keys(data)) {
+			const childPath = path ? `${path}.${key}` : key;
+			if (props[key]) {
+				validateAgainst(data[key], props[key], childPath, root, issues);
+			} else if (!additionalAllowed) {
+				issues.push({ path: path || "(root)", message: `unknown property "${key}"` });
+			}
+		}
+	} else if (typeOf(data) === "array") {
+		if (schema.minItems != null && data.length < schema.minItems) {
+			issues.push({ path: path || "(root)", message: `array must have at least ${schema.minItems} item(s), got ${data.length}` });
+		}
+		if (schema.items) {
+			for (let i = 0; i < data.length; i++) {
+				validateAgainst(data[i], schema.items, `${path}[${i}]`, root, issues);
+			}
+		}
+		if (schema.uniqueItems === true) {
+			const seen = new Set<string>();
+			for (let i = 0; i < data.length; i++) {
+				const key = JSON.stringify(data[i]);
+				if (seen.has(key)) {
+					issues.push({ path: `${path}[${i}]`, message: `duplicate item` });
+				}
+				seen.add(key);
+			}
+		}
+	} else if (typeOf(data) === "string") {
+		if (schema.minLength != null && data.length < schema.minLength) {
+			issues.push({ path: path || "(root)", message: `string must be at least ${schema.minLength} character(s)` });
+		}
+		if (schema.pattern && !new RegExp(schema.pattern).test(data)) {
+			issues.push({ path: path || "(root)", message: `value "${data}" does not match pattern ${schema.pattern}` });
+		}
+	}
+}
+
+export function validateWorkflow(yamlText: string): ValidationResult {
+	let parsed: unknown;
+	try {
+		parsed = yaml.load(yamlText);
+	} catch (err: any) {
+		return { valid: false, errors: [`YAML parse error: ${err?.message ?? String(err)}`] };
+	}
+
+	if (parsed === null || parsed === undefined) {
+		return { valid: false, errors: ["workflow is empty"] };
+	}
+
+	if (typeof parsed !== "object" || Array.isArray(parsed)) {
+		return { valid: false, errors: [`workflow must be an object, got ${typeOf(parsed)}`] };
+	}
+
+	const schema = loadWorkflowSchema();
+	const issues: Issue[] = [];
+	validateAgainst(parsed, schema, "", schema, issues);
+
+	// Cross-field check: depends_on ids must reference declared step ids
+	const data = parsed as any;
+	if (Array.isArray(data.steps)) {
+		const declared = new Set<string>();
+		for (const step of data.steps) {
+			if (step && typeof step.id === "string") declared.add(step.id);
+		}
+		data.steps.forEach((step: any, i: number) => {
+			if (step && Array.isArray(step.depends_on)) {
+				for (const dep of step.depends_on) {
+					if (typeof dep === "string" && !declared.has(dep)) {
+						issues.push({ path: `steps[${i}].depends_on`, message: `references unknown step id "${dep}"` });
+					}
+				}
+			}
+		});
+	}
+
+	if (issues.length === 0) {
+		return { valid: true, errors: [], data: parsed as WorkflowDef };
+	}
+	return {
+		valid: false,
+		errors: issues.map((i) => (i.path ? `${i.path}: ${i.message}` : i.message)),
+	};
+}
diff --git a/src/utils/workflow-generator.ts b/src/utils/workflow-generator.ts
new file mode 100644
index 0000000..63f6ca5
--- /dev/null
+++ b/src/utils/workflow-generator.ts
@@ -0,0 +1,197 @@
+import type { SkillMetadata } from "../skills.js";
+import { getWorkflowSchemaText } from "./schemas.js";
+
+export type LlmRole = "system" | "user" | "assistant";
+
+export interface LlmMessage {
+	role: LlmRole;
+	content: string;
+}
+
+export interface LlmCallOptions {
+	model: string;
+	temperature?: number;
+	apiKey?: string;
+}
+
+export type LlmClient = (messages: LlmMessage[], opts: LlmCallOptions) => Promise<string>;
+
+export interface GenerateWorkflowOptions {
+	prompt: string;
+	skills: SkillMetadata[];
+	previousWorkflow?: string;
+	model?: string;
+	apiKey?: string;
+	llm?: LlmClient;
+}
+
+const DEFAULT_MODEL = "openai:gpt-4o";
+
+const SYSTEM_RULES = `Rules (MUST follow):
+- Output ONLY valid YAML. No markdown fences, no prose, no commentary before or after.
+- The output MUST validate against the schema above.
+- "name" must be kebab-case (lowercase letters, digits, and single hyphens).
+- Every step "skill" must reference an installed skill from the list below, unless the step is human approval — in that case use skill: "approval" and set requires_approval: true.
+- When multiple steps need ordering beyond top-to-bottom, give them snake_case "id" values and use "depends_on" to express the dependency.
+- Keep "prompt" fields concrete and self-contained — a downstream agent will read them verbatim.
+- Do not invent fields not in the schema.`;
+
+const FEW_SHOT_USER_1 = "Every morning, summarize my unread emails and post the summary to Slack.";
+
+const FEW_SHOT_ASSISTANT_1 = `name: morning-email-digest
+description: Summarize unread emails and post the digest to Slack each morning.
+steps:
+  - skill: gmail
+    prompt: Fetch all unread emails from the last 24 hours and return subject, sender, and a one-sentence summary for each.
+  - skill: summarize
+    prompt: Compose a single-paragraph digest of the unread emails, grouped by sender priority.
+  - skill: slack
+    prompt: Post the digest to the configured channel.
+    channel: "#daily-digest"
+`;
+
+
+const FEW_SHOT_USER_2 = "Pull yesterday's sales data, get sign-off, then send the report to the team.";
+
+const FEW_SHOT_ASSISTANT_2 = `name: daily-sales-report
+description: Pull sales data, require human approval, then distribute the report.
+steps:
+  - id: pull_data
+    skill: analytics
+    prompt: Pull yesterday's sales totals broken down by region and product line.
+  - id: approve
+    skill: approval
+    prompt: Review the pulled sales data for accuracy and approve distribution.
+    requires_approval: true
+    depends_on: [pull_data]
+  - id: send_report
+    skill: email
+    prompt: Send the approved sales report to the sales-leadership distribution list.
+    depends_on: [approve]
+`;
+
+function formatSkillsForPrompt(skills: SkillMetadata[]): string {
+	if (skills.length === 0) {
+		return "(no installed skills detected — use generic skill names that match the user's intent, e.g. gmail, slack, summarize)";
+	}
+	return skills.map((s) => `- ${s.name}: ${s.description}`).join("\n");
+}
+
+export function buildSystemPrompt(skills: SkillMetadata[]): string {
+	const schemaText = getWorkflowSchemaText();
+	const skillList = formatSkillsForPrompt(skills);
+	return `You are a workflow builder for GitClaw SkillFlow.
+
+Your job is to translate the user's natural-language description into a YAML workflow that conforms exactly to this JSON Schema:
+
+<schema>
+${schemaText}
+</schema>
+
+Installed skills the workflow may invoke:
+<skills>
+${skillList}
+</skills>
+
+${SYSTEM_RULES}`;
+}
+
+export function buildMessages(opts: GenerateWorkflowOptions): LlmMessage[] {
+	const messages: LlmMessage[] = [
+		{ role: "system", content: buildSystemPrompt(opts.skills) },
+		{ role: "user", content: FEW_SHOT_USER_1 },
+		{ role: "assistant", content: FEW_SHOT_ASSISTANT_1 },
+		{ role: "user", content: FEW_SHOT_USER_2 },
+		{ role: "assistant", content: FEW_SHOT_ASSISTANT_2 },
+	];
+
+	if (opts.previousWorkflow && opts.previousWorkflow.trim()) {
+		messages.push({
+			role: "user",
+			content: `Here is the current workflow:\n\n${opts.previousWorkflow.trim()}\n\nApply this refinement: ${opts.prompt}\n\nReturn the complete updated workflow as YAML — not a diff.`,
+		});
+	} else {
+		messages.push({ role: "user", content: opts.prompt });
+	}
+
+	return messages;
+}
+
+const FENCE_RE = /^\s*```(?:ya?ml)?\s*\n([\s\S]*?)\n```\s*$/i;
+
+export function stripCodeFences(raw: string): string {
+	const m = raw.match(FENCE_RE);
+	return m ? m[1] : raw.trim();
+}
+
+async function defaultLlmClient(messages: LlmMessage[], opts: LlmCallOptions): Promise<string> {
+	const [providerRaw, ...modelParts] = opts.model.split(":");
+	const provider = providerRaw?.trim();
+	const modelId = modelParts.join(":").trim();
+	if (!provider || !modelId) {
+		throw new Error(`Invalid model spec "${opts.model}". Expected "provider:model-id" (e.g. "openai:gpt-4o").`);
+	}
+
+	const apiKey =
+		opts.apiKey ||
+		process.env[`${provider.toUpperCase()}_API_KEY`] ||
+		process.env.OPENAI_API_KEY;
+	if (!apiKey) {
+		throw new Error(
+			`No API key found. Pass --api-key or set ${provider.toUpperCase()}_API_KEY (or OPENAI_API_KEY) in your environment.`,
+		);
+	}
+
+	const [{ getModel }, { Agent }] = await Promise.all([
+		import("@mariozechner/pi-ai" as any) as Promise<any>,
+		import("@mariozechner/pi-agent-core" as any) as Promise<any>,
+	]);
+
+	if (!process.env[`${provider.toUpperCase()}_API_KEY`]) {
+		process.env[`${provider.toUpperCase()}_API_KEY`] = apiKey;
+	}
+
+	const model = getModel(provider as any, modelId as any);
+	const systemMessage = messages.find((m) => m.role === "system")?.content ?? "";
+	const conversation = messages.filter((m) => m.role !== "system");
+
+	const agent = new Agent({
+		initialState: {
+			systemPrompt: systemMessage,
+			model,
+			tools: [],
+			temperature: opts.temperature ?? 0,
+			maxTokens: 4096,
+		},
+	});
+
+	let collected = "";
+	agent.subscribe((event: any) => {
+		if (event.type === "message_end" && event.message?.role === "assistant") {
+			for (const block of event.message.content) {
+				if (block.type === "text") collected += block.text;
+			}
+		}
+	});
+
+	// Replay prior assistant/user turns as a single composed prompt so we don't
+	// have to drive Agent through multiple chat turns. The few-shot pairs are
+	// preserved as part of the prompt text so the model still sees them.
+	const composed = conversation
+		.map((m) => `[${m.role.toUpperCase()}]\n${m.content}`)
+		.join("\n\n");
+
+	await agent.prompt(composed);
+	return collected;
+}
+
+export async function generateWorkflow(opts: GenerateWorkflowOptions): Promise<string> {
+	if (!opts.prompt || !opts.prompt.trim()) {
+		throw new Error("generateWorkflow: prompt is required");
+	}
+	const llm = opts.llm ?? defaultLlmClient;
+	const model = opts.model ?? DEFAULT_MODEL;
+	const messages = buildMessages(opts);
+	const raw = await llm(messages, { model, apiKey: opts.apiKey, temperature: 0 });
+	return stripCodeFences(raw);
+}
diff --git a/test/ts-resolve-hook.mjs b/test/ts-resolve-hook.mjs
new file mode 100644
index 0000000..3fa4398
--- /dev/null
+++ b/test/ts-resolve-hook.mjs
@@ -0,0 +1,27 @@
+// ESM resolve hook: if a `.js` import inside src/ has no matching `.js`
+// on disk, retry with the `.ts` extension. Lets node --experimental-strip-types
+// run TypeScript sources whose internal imports follow the Node16 `.js` style.
+
+import { existsSync } from "node:fs";
+import { fileURLToPath } from "node:url";
+
+export async function resolve(specifier, context, nextResolve) {
+	if (specifier.startsWith(".") && specifier.endsWith(".js")) {
+		try {
+			return await nextResolve(specifier, context);
+		} catch (err) {
+			if (err?.code !== "ERR_MODULE_NOT_FOUND") throw err;
+			const tsSpecifier = specifier.slice(0, -3) + ".ts";
+			try {
+				const candidate = await nextResolve(tsSpecifier, context);
+				if (candidate?.url?.startsWith("file://") && existsSync(fileURLToPath(candidate.url))) {
+					return candidate;
+				}
+			} catch {
+				// fall through and re-throw the original .js miss
+			}
+			throw err;
+		}
+	}
+	return nextResolve(specifier, context);
+}
diff --git a/test/workflow-generator.test.ts b/test/workflow-generator.test.ts
new file mode 100644
index 0000000..a9f201f
--- /dev/null
+++ b/test/workflow-generator.test.ts
@@ -0,0 +1,222 @@
+// Unit tests for src/utils/workflow-generator.ts and the retry loop in
+// src/commands/workflow.ts. The LLM is fully mocked — no network calls.
+
+import test from "node:test";
+import assert from "node:assert/strict";
+import { mkdtemp, readFile, rm } from "node:fs/promises";
+import { tmpdir } from "node:os";
+import { join } from "node:path";
+
+import {
+	buildMessages,
+	buildSystemPrompt,
+	stripCodeFences,
+	generateWorkflow,
+	type LlmClient,
+	type LlmMessage,
+} from "../src/utils/workflow-generator.ts";
+import { runGenerate } from "../src/commands/workflow.ts";
+import type { SkillMetadata } from "../src/skills.ts";
+
+const SKILLS: SkillMetadata[] = [
+	{ name: "gmail", description: "Read and send email", directory: "/x/skills/gmail", filePath: "/x/skills/gmail/SKILL.md" },
+	{ name: "slack", description: "Post to Slack", directory: "/x/skills/slack", filePath: "/x/skills/slack/SKILL.md" },
+	{ name: "summarize", description: "Summarize text", directory: "/x/skills/summarize", filePath: "/x/skills/summarize/SKILL.md" },
+];
+
+const VALID_YAML = `name: morning-digest
+description: Summarize unread emails and post to Slack each morning.
+steps:
+  - skill: gmail
+    prompt: Fetch unread emails.
+  - skill: summarize
+    prompt: Compose a digest.
+  - skill: slack
+    prompt: Post the digest.
+    channel: "#daily-digest"
+`;
+
+const INVALID_YAML_MISSING_NAME = `description: no name here
+steps:
+  - skill: gmail
+    prompt: hi
+`;
+
+// ── buildSystemPrompt / buildMessages ──────────────────────────────────
+
+test("buildSystemPrompt embeds the schema text and the skill list", () => {
+	const sys = buildSystemPrompt(SKILLS);
+	assert.ok(sys.includes("<schema>"), "system prompt missing <schema> tag");
+	assert.ok(sys.includes('"$id": "https://gitclaw.dev/spec/workflow.schema.json"'), "system prompt missing schema $id");
+	assert.ok(sys.includes("- gmail: Read and send email"), "system prompt missing gmail skill");
+	assert.ok(sys.includes("- slack: Post to Slack"), "system prompt missing slack skill");
+	assert.ok(sys.includes("Output ONLY valid YAML"), "system prompt missing rule about raw YAML");
+});
+
+test("buildSystemPrompt handles empty skill list with a fallback hint", () => {
+	const sys = buildSystemPrompt([]);
+	assert.ok(sys.includes("no installed skills detected"), "system prompt missing empty-skills fallback");
+});
+
+test("buildMessages includes two few-shot pairs and the user prompt", () => {
+	const messages = buildMessages({ prompt: "Do the thing", skills: SKILLS });
+	assert.equal(messages[0].role, "system");
+	assert.equal(messages[1].role, "user");
+	assert.equal(messages[2].role, "assistant");
+	assert.equal(messages[3].role, "user");
+	assert.equal(messages[4].role, "assistant");
+	assert.equal(messages[5].role, "user");
+	assert.equal(messages[5].content, "Do the thing");
+});
+
+test("buildMessages wraps refine-mode prompts with the previous YAML and instruction", () => {
+	const messages = buildMessages({
+		prompt: "Add an approval step before the Slack post.",
+		skills: SKILLS,
+		previousWorkflow: VALID_YAML,
+	});
+	const last = messages[messages.length - 1];
+	assert.equal(last.role, "user");
+	assert.ok(last.content.includes("Here is the current workflow"));
+	assert.ok(last.content.includes("Add an approval step before the Slack post."));
+	assert.ok(last.content.includes("morning-digest"));
+	assert.ok(last.content.includes("Return the complete updated workflow as YAML — not a diff."));
+});
+
+// ── stripCodeFences ────────────────────────────────────────────────────
+
+test("stripCodeFences removes generic fenced code", () => {
+	const input = "```\nname: foo\n```\n";
+	assert.equal(stripCodeFences(input), "name: foo");
+});
+
+test("stripCodeFences removes yaml-tagged fences", () => {
+	const input = "```yaml\nname: foo\n```";
+	assert.equal(stripCodeFences(input), "name: foo");
+});
+
+test("stripCodeFences leaves unfenced YAML alone", () => {
+	const input = "name: foo\n";
+	assert.equal(stripCodeFences(input), "name: foo");
+});
+
+// ── generateWorkflow with an injected LLM ──────────────────────────────
+
+test("generateWorkflow returns the LLM output after stripping fences", async () => {
+	const captured: { messages: LlmMessage[] } = { messages: [] };
+	const llm: LlmClient = async (messages) => {
+		captured.messages = messages;
+		return "```yaml\n" + VALID_YAML + "```";
+	};
+	const out = await generateWorkflow({
+		prompt: "summarize emails and post to slack",
+		skills: SKILLS,
+		llm,
+	});
+	assert.equal(out.trim().startsWith("name: morning-digest"), true);
+	assert.equal(captured.messages.length, 6);
+	assert.equal(captured.messages[0].role, "system");
+});
+
+test("generateWorkflow throws if prompt is empty", async () => {
+	await assert.rejects(
+		() => generateWorkflow({ prompt: "   ", skills: SKILLS, llm: async () => VALID_YAML }),
+		/prompt is required/,
+	);
+});
+
+// ── Retry loop in runGenerate ──────────────────────────────────────────
+
+test("runGenerate retries when validation fails, then writes the file when the second attempt is valid", async () => {
+	const dir = await mkdtemp(join(tmpdir(), "gitclaw-test-"));
+	try {
+		let calls = 0;
+		const llm: LlmClient = async (messages) => {
+			calls++;
+			const userMsg = messages[messages.length - 1].content;
+			if (calls === 1) return INVALID_YAML_MISSING_NAME;
+			// Second attempt: ensure the retry prompt included the validation error.
+			assert.ok(userMsg.includes("schema validation"), `retry user message did not mention validation: ${userMsg}`);
+			return VALID_YAML;
+		};
+		const result = await runGenerate({
+			flags: {
+				dir,
+				prompt: "summarize unread emails and post to Slack",
+				dryRun: false,
+			},
+			llm,
+		});
+		assert.equal(calls, 2);
+		assert.ok(result.filePath, "expected a written file path");
+		assert.equal(result.filePath!.endsWith("workflows/morning-digest.yaml"), true);
+		const written = await readFile(result.filePath!, "utf-8");
+		assert.ok(written.includes("name: morning-digest"));
+	} finally {
+		await rm(dir, { recursive: true, force: true });
+	}
+});
+
+test("runGenerate honours --dry-run by returning YAML without writing", async () => {
+	const dir = await mkdtemp(join(tmpdir(), "gitclaw-test-"));
+	try {
+		const llm: LlmClient = async () => VALID_YAML;
+		const result = await runGenerate({
+			flags: { dir, prompt: "x", dryRun: true },
+			llm,
+		});
+		assert.equal(result.filePath, undefined);
+		assert.ok(result.yaml.includes("name: morning-digest"));
+		// workflows/ must not have been created.
+		await assert.rejects(() => readFile(join(dir, "workflows", "morning-digest.yaml"), "utf-8"));
+	} finally {
+		await rm(dir, { recursive: true, force: true });
+	}
+});
+
+test("runGenerate gives up after MAX_RETRIES and throws", async () => {
+	const dir = await mkdtemp(join(tmpdir(), "gitclaw-test-"));
+	try {
+		let calls = 0;
+		const llm: LlmClient = async () => {
+			calls++;
+			return INVALID_YAML_MISSING_NAME;
+		};
+		await assert.rejects(
+			() => runGenerate({ flags: { dir, prompt: "x", dryRun: true }, llm }),
+			/Validation failed after retries/,
+		);
+		assert.equal(calls, 3); // 1 initial + 2 retries
+	} finally {
+		await rm(dir, { recursive: true, force: true });
+	}
+});
+
+test("runGenerate refine mode reads previous YAML and passes it to the LLM", async () => {
+	const dir = await mkdtemp(join(tmpdir(), "gitclaw-test-"));
+	try {
+		const { writeFile, mkdir } = await import("node:fs/promises");
+		await mkdir(join(dir, "workflows"), { recursive: true });
+		const refinePath = join(dir, "workflows", "starter.yaml");
+		await writeFile(refinePath, VALID_YAML, "utf-8");
+
+		let observed = "";
+		const llm: LlmClient = async (messages) => {
+			observed = messages[messages.length - 1].content;
+			return VALID_YAML;
+		};
+		await runGenerate({
+			flags: {
+				dir,
+				prompt: "add an approval step before slack",
+				refine: "workflows/starter.yaml",
+				dryRun: true,
+			},
+			llm,
+		});
+		assert.ok(observed.includes("Here is the current workflow"));
+		assert.ok(observed.includes("add an approval step before slack"));
+	} finally {
+		await rm(dir, { recursive: true, force: true });
+	}
+});
diff --git a/test/workflow-validator.test.ts b/test/workflow-validator.test.ts
new file mode 100644
index 0000000..80f9699
--- /dev/null
+++ b/test/workflow-validator.test.ts
@@ -0,0 +1,153 @@
+// Unit tests for src/utils/schemas.ts — the SkillFlow workflow validator.
+
+import test from "node:test";
+import assert from "node:assert/strict";
+
+import { validateWorkflow, loadWorkflowSchema } from "../src/utils/schemas.ts";
+
+const VALID_YAML = `name: morning-digest
+description: Summarize unread emails and post to Slack each morning.
+steps:
+  - skill: gmail
+    prompt: Fetch unread emails from the last 24h.
+  - skill: summarize
+    prompt: Compose a digest grouped by sender priority.
+  - skill: slack
+    prompt: Post the digest to the team channel.
+    channel: "#daily-digest"
+`;
+
+test("loadWorkflowSchema returns the parsed schema with required top-level keys", () => {
+	const schema = loadWorkflowSchema();
+	assert.equal(typeof schema, "object");
+	assert.deepEqual(schema.required, ["name", "description", "steps"]);
+	assert.equal(schema.definitions.step.required.includes("skill"), true);
+	assert.equal(schema.definitions.step.required.includes("prompt"), true);
+});
+
+test("validateWorkflow accepts a well-formed workflow", () => {
+	const r = validateWorkflow(VALID_YAML);
+	assert.equal(r.valid, true);
+	assert.deepEqual(r.errors, []);
+	assert.equal(r.data?.name, "morning-digest");
+	assert.equal(r.data?.steps.length, 3);
+});
+
+test("validateWorkflow rejects missing name", () => {
+	const yaml = `description: foo
+steps:
+  - skill: gmail
+    prompt: do thing
+`;
+	const r = validateWorkflow(yaml);
+	assert.equal(r.valid, false);
+	assert.ok(r.errors.some((e) => e.includes('missing required property "name"')), `errors: ${JSON.stringify(r.errors)}`);
+});
+
+test("validateWorkflow rejects non-kebab-case name", () => {
+	const yaml = `name: MyWorkflow
+description: foo
+steps:
+  - skill: gmail
+    prompt: do thing
+`;
+	const r = validateWorkflow(yaml);
+	assert.equal(r.valid, false);
+	assert.ok(r.errors.some((e) => e.includes("pattern")), `errors: ${JSON.stringify(r.errors)}`);
+});
+
+test("validateWorkflow rejects empty steps array", () => {
+	const yaml = `name: empty-flow
+description: nothing
+steps: []
+`;
+	const r = validateWorkflow(yaml);
+	assert.equal(r.valid, false);
+	assert.ok(r.errors.some((e) => e.includes("at least 1")), `errors: ${JSON.stringify(r.errors)}`);
+});
+
+test("validateWorkflow rejects a step missing required prompt", () => {
+	const yaml = `name: bad-step
+description: missing prompt
+steps:
+  - skill: gmail
+`;
+	const r = validateWorkflow(yaml);
+	assert.equal(r.valid, false);
+	assert.ok(
+		r.errors.some((e) => e.includes('missing required property "prompt"')),
+		`errors: ${JSON.stringify(r.errors)}`,
+	);
+});
+
+test("validateWorkflow rejects unknown step property", () => {
+	const yaml = `name: extra-prop
+description: bad
+steps:
+  - skill: gmail
+    prompt: do thing
+    nonsense: true
+`;
+	const r = validateWorkflow(yaml);
+	assert.equal(r.valid, false);
+	assert.ok(
+		r.errors.some((e) => e.includes('unknown property "nonsense"')),
+		`errors: ${JSON.stringify(r.errors)}`,
+	);
+});
+
+test("validateWorkflow flags depends_on referencing a missing id", () => {
+	const yaml = `name: bad-deps
+description: dangling dep
+steps:
+  - id: a
+    skill: gmail
+    prompt: fetch
+  - skill: slack
+    prompt: post
+    depends_on: [does_not_exist]
+`;
+	const r = validateWorkflow(yaml);
+	assert.equal(r.valid, false);
+	assert.ok(
+		r.errors.some((e) => e.includes('references unknown step id "does_not_exist"')),
+		`errors: ${JSON.stringify(r.errors)}`,
+	);
+});
+
+test("validateWorkflow accepts approval step with requires_approval", () => {
+	const yaml = `name: approval-flow
+description: needs sign-off
+steps:
+  - id: pull
+    skill: analytics
+    prompt: Pull data.
+  - id: approve
+    skill: approval
+    prompt: Approve distribution.
+    requires_approval: true
+    depends_on: [pull]
+  - skill: email
+    prompt: Send report.
+    depends_on: [approve]
+`;
+	const r = validateWorkflow(yaml);
+	assert.equal(r.valid, true, `errors: ${JSON.stringify(r.errors)}`);
+});
+
+test("validateWorkflow surfaces YAML parse errors", () => {
+	const yaml = `name: bad
+description: : :
+steps:
+  - skill: [unterminated
+`;
+	const r = validateWorkflow(yaml);
+	assert.equal(r.valid, false);
+	assert.ok(r.errors[0].startsWith("YAML parse error"), `errors: ${JSON.stringify(r.errors)}`);
+});
+
+test("validateWorkflow rejects empty document", () => {
+	const r = validateWorkflow("");
+	assert.equal(r.valid, false);
+	assert.ok(r.errors[0].includes("empty"));
+});