diff --git a/package.json b/package.json index 5fc3dd6..3610080 100644 --- a/package.json +++ b/package.json @@ -40,7 +40,7 @@ "build": "tsc && cp src/voice/ui.html dist/voice/", "dev": "tsc --watch", "start": "node dist/index.js", - "test": "node --test test/*.test.ts --experimental-strip-types" + "test": "node --experimental-strip-types --experimental-loader=./test/ts-resolve-hook.mjs --no-warnings --test test/*.test.ts" }, "engines": { "node": ">=20" diff --git a/spec/schemas/workflow.schema.json b/spec/schemas/workflow.schema.json new file mode 100644 index 0000000..9f0c98b --- /dev/null +++ b/spec/schemas/workflow.schema.json @@ -0,0 +1,69 @@ +{ + "$schema": "http://json-schema.org/draft-07/schema#", + "$id": "https://gitclaw.dev/spec/workflow.schema.json", + "title": "GitClaw SkillFlow Workflow", + "description": "A SkillFlow workflow: a named sequence of steps, where each step invokes a skill with a prompt. Runtime semantics are defined by src/workflows.ts.", + "type": "object", + "additionalProperties": false, + "required": ["name", "description", "steps"], + "properties": { + "name": { + "type": "string", + "description": "Kebab-case identifier for the workflow. Used as the file name.", + "pattern": "^[a-z0-9]+(-[a-z0-9]+)*$" + }, + "description": { + "type": "string", + "description": "One-line description of what the workflow does.", + "minLength": 1 + }, + "steps": { + "type": "array", + "description": "Ordered list of steps. Steps execute top-to-bottom.", + "minItems": 1, + "items": { "$ref": "#/definitions/step" } + } + }, + "definitions": { + "step": { + "type": "object", + "additionalProperties": false, + "required": ["skill", "prompt"], + "properties": { + "id": { + "type": "string", + "description": "Optional snake_case identifier for the step. Used when other steps reference this one via depends_on.", + "pattern": "^[a-z0-9]+(_[a-z0-9]+)*$" + }, + "skill": { + "type": "string", + "description": "Name of an installed skill (kebab-case) the step will invoke. Must match an entry in the agent's skills/ directory, or 'approval' for a human-review step.", + "pattern": "^[a-z0-9]+(-[a-z0-9]+)*$" + }, + "prompt": { + "type": "string", + "description": "The natural-language instruction passed to the skill for this step.", + "minLength": 1 + }, + "channel": { + "type": "string", + "description": "Optional channel/destination for the step output (e.g. a Slack channel name).", + "minLength": 1 + }, + "depends_on": { + "type": "array", + "description": "Optional list of step ids that must complete before this step runs.", + "items": { + "type": "string", + "pattern": "^[a-z0-9]+(_[a-z0-9]+)*$" + }, + "uniqueItems": true + }, + "requires_approval": { + "type": "boolean", + "description": "If true, the workflow pauses for human approval before this step runs." + } + } + } + } +} diff --git a/src/commands/workflow.ts b/src/commands/workflow.ts new file mode 100644 index 0000000..93f6434 --- /dev/null +++ b/src/commands/workflow.ts @@ -0,0 +1,188 @@ +import { mkdir, readFile, writeFile } from "fs/promises"; +import { join, resolve } from "path"; +import { discoverSkills } from "../skills.js"; +import { validateWorkflow } from "../utils/schemas.js"; +import { generateWorkflow, type LlmClient } from "../utils/workflow-generator.js"; + +interface GenerateFlags { + dir: string; + prompt?: string; + refine?: string; + model?: string; + apiKey?: string; + dryRun: boolean; +} + +const RED = (s: string) => `\x1b[31m${s}\x1b[0m`; +const GREEN = (s: string) => `\x1b[32m${s}\x1b[0m`; +const DIM = (s: string) => `\x1b[2m${s}\x1b[0m`; +const BOLD = (s: string) => `\x1b[1m${s}\x1b[0m`; + +const MAX_RETRIES = 2; + +function printHelp(): void { + console.log(`${BOLD("gitclaw workflow")} — generate SkillFlow workflows from natural language + +Usage: + gitclaw workflow generate [options] + +Options: + -d, --dir Agent directory (default: current directory) + -p, --prompt Natural-language description of the workflow (required) + --refine Refine an existing workflow YAML by applying --prompt as an instruction + -m, --model LLM model in provider:model form (default: openai:gpt-4o) + --api-key API key for the provider (falls back to OPENAI_API_KEY or _API_KEY) + --dry-run Print the generated YAML to stdout instead of writing a file + -h, --help Show this help message + +Examples: + gitclaw workflow generate -p "every morning summarize unread emails and post to Slack" + gitclaw workflow generate -p "add a human approval step before the Slack post" --refine workflows/morning-digest.yaml +`); +} + +function parseFlags(argv: string[]): GenerateFlags { + const flags: GenerateFlags = { dir: process.cwd(), dryRun: false }; + for (let i = 0; i < argv.length; i++) { + const a = argv[i]; + switch (a) { + case "-d": + case "--dir": + flags.dir = argv[++i]; + break; + case "-p": + case "--prompt": + flags.prompt = argv[++i]; + break; + case "--refine": + flags.refine = argv[++i]; + break; + case "-m": + case "--model": + flags.model = argv[++i]; + break; + case "--api-key": + flags.apiKey = argv[++i]; + break; + case "--dry-run": + flags.dryRun = true; + break; + case "-h": + case "--help": + printHelp(); + process.exit(0); + break; + default: + if (!a.startsWith("-") && flags.prompt === undefined) { + flags.prompt = a; + } else { + console.error(RED(`Unknown option: ${a}`)); + process.exit(2); + } + } + } + return flags; +} + +function slugify(name: string): string { + const cleaned = name + .toLowerCase() + .trim() + .replace(/[^a-z0-9-]+/g, "-") + .replace(/^-+|-+$/g, "") + .replace(/-+/g, "-"); + return cleaned || "workflow"; +} + +export interface RunGenerateOptions { + flags: GenerateFlags; + llm?: LlmClient; +} + +export async function runGenerate(opts: RunGenerateOptions): Promise<{ filePath?: string; yaml: string; }> { + const { flags } = opts; + if (!flags.prompt || !flags.prompt.trim()) { + throw new Error("--prompt is required"); + } + + const agentDir = resolve(flags.dir); + const skills = await discoverSkills(agentDir); + + let previousWorkflow: string | undefined; + if (flags.refine) { + const refinePath = resolve(agentDir, flags.refine); + previousWorkflow = await readFile(refinePath, "utf-8"); + } + + let promptForLlm = flags.prompt.trim(); + let lastErrors: string[] = []; + let yaml = ""; + + for (let attempt = 0; attempt <= MAX_RETRIES; attempt++) { + console.error(DIM(attempt === 0 ? "Generating workflow..." : `Retry ${attempt}/${MAX_RETRIES} — fixing validation errors...`)); + yaml = await generateWorkflow({ + prompt: promptForLlm, + skills, + previousWorkflow, + model: flags.model, + apiKey: flags.apiKey, + llm: opts.llm, + }); + const result = validateWorkflow(yaml); + if (result.valid) { + lastErrors = []; + break; + } + lastErrors = result.errors; + if (attempt < MAX_RETRIES) { + promptForLlm = + `${flags.prompt.trim()}\n\nThe previous attempt failed schema validation. Fix these errors and return the full YAML again:\n` + + result.errors.map((e) => `- ${e}`).join("\n"); + } + } + + if (lastErrors.length > 0) { + console.error(RED("\nWorkflow validation failed after retries:")); + for (const e of lastErrors) console.error(RED(` - ${e}`)); + console.error(DIM("\nLast generated YAML:\n")); + console.error(yaml); + throw new Error("Validation failed after retries"); + } + + if (flags.dryRun) { + process.stdout.write(yaml.endsWith("\n") ? yaml : yaml + "\n"); + return { yaml }; + } + + // Parse the validated YAML to get the workflow name for the file path. + const validated = validateWorkflow(yaml).data!; + const slug = slugify(validated.name); + const workflowsDir = join(agentDir, "workflows"); + await mkdir(workflowsDir, { recursive: true }); + const filePath = join(workflowsDir, `${slug}.yaml`); + await writeFile(filePath, yaml.endsWith("\n") ? yaml : yaml + "\n", "utf-8"); + console.error(GREEN(`\nWrote workflow to ${filePath}`)); + return { filePath, yaml }; +} + +export async function handleWorkflowCommand(argv: string[]): Promise { + // argv is the raw process.argv tail starting at the 'workflow' token. + // argv[0] === "workflow"; argv[1] is the sub-command. + const sub = argv[1]; + if (!sub || sub === "-h" || sub === "--help") { + printHelp(); + return; + } + if (sub !== "generate") { + console.error(RED(`Unknown subcommand: ${sub}`)); + printHelp(); + process.exit(2); + } + const flags = parseFlags(argv.slice(2)); + try { + await runGenerate({ flags }); + } catch (err: any) { + console.error(RED(`\nError: ${err?.message ?? String(err)}`)); + process.exit(1); + } +} diff --git a/src/index.ts b/src/index.ts index 03e8c55..b5a0396 100644 --- a/src/index.ts +++ b/src/index.ts @@ -22,6 +22,7 @@ import { initLocalSession } from "./session.js"; import type { LocalSession } from "./session.js"; import { startVoiceServer } from "./voice/server.js"; import { handlePluginCommand } from "./plugin-cli.js"; +import { handleWorkflowCommand } from "./commands/workflow.js"; import { context as otelContext } from "@opentelemetry/api"; import { initTelemetry, @@ -301,6 +302,12 @@ async function ensureRepo(dir: string, model?: string): Promise { } async function main(): Promise { + // Handle workflow subcommand: gitclaw workflow + if (process.argv[2] === "workflow") { + await handleWorkflowCommand(process.argv.slice(2)); + return; + } + // Handle plugin subcommand: gitclaw plugin if (process.argv[2] === "plugin") { const allArgs = process.argv.slice(3); diff --git a/src/utils/schemas.ts b/src/utils/schemas.ts new file mode 100644 index 0000000..8e04e57 --- /dev/null +++ b/src/utils/schemas.ts @@ -0,0 +1,209 @@ +import { readFileSync, existsSync } from "fs"; +import { dirname, join, resolve } from "path"; +import { fileURLToPath } from "url"; +import yaml from "js-yaml"; + +export interface WorkflowStep { + id?: string; + skill: string; + prompt: string; + channel?: string; + depends_on?: string[]; + requires_approval?: boolean; +} + +export interface WorkflowDef { + name: string; + description: string; + steps: WorkflowStep[]; +} + +export interface ValidationResult { + valid: boolean; + errors: string[]; + data?: WorkflowDef; +} + +let cachedSchema: any = null; +let cachedSchemaText: string | null = null; + +function resolveSchemaPath(): string { + const here = dirname(fileURLToPath(import.meta.url)); + // Try candidates relative to this module's location. + // 1. Running from src/utils/ in tests: ../../spec/schemas/workflow.schema.json + // 2. Running from dist/utils/ after build: ../../spec/schemas/workflow.schema.json + // 3. Running from dist/utils/ when spec/ is not packed: walk upward. + const candidates = [ + resolve(here, "..", "..", "spec", "schemas", "workflow.schema.json"), + resolve(here, "..", "..", "..", "spec", "schemas", "workflow.schema.json"), + ]; + for (const p of candidates) { + if (existsSync(p)) return p; + } + // Fallback: walk up to 6 levels looking for the schema. + let cur = here; + for (let i = 0; i < 6; i++) { + const guess = join(cur, "spec", "schemas", "workflow.schema.json"); + if (existsSync(guess)) return guess; + const parent = dirname(cur); + if (parent === cur) break; + cur = parent; + } + throw new Error(`Could not locate spec/schemas/workflow.schema.json relative to ${here}`); +} + +export function loadWorkflowSchema(): any { + if (cachedSchema) return cachedSchema; + const path = resolveSchemaPath(); + cachedSchemaText = readFileSync(path, "utf-8"); + cachedSchema = JSON.parse(cachedSchemaText); + return cachedSchema; +} + +export function getWorkflowSchemaText(): string { + if (cachedSchemaText) return cachedSchemaText; + loadWorkflowSchema(); + return cachedSchemaText!; +} + +function typeOf(v: any): string { + if (v === null) return "null"; + if (Array.isArray(v)) return "array"; + return typeof v; +} + +function matchesType(v: any, expected: string | string[]): boolean { + const types = Array.isArray(expected) ? expected : [expected]; + const actual = typeOf(v); + return types.includes(actual) || (types.includes("integer") && actual === "number" && Number.isInteger(v)); +} + +interface Issue { + path: string; + message: string; +} + +function validateAgainst(data: any, schema: any, path: string, root: any, issues: Issue[]): void { + // Resolve $ref + if (schema && typeof schema === "object" && schema.$ref) { + const ref = schema.$ref as string; + if (!ref.startsWith("#/")) { + issues.push({ path, message: `unsupported $ref "${ref}" (only local refs are supported)` }); + return; + } + const segments = ref.slice(2).split("/"); + let resolved: any = root; + for (const seg of segments) { + resolved = resolved?.[seg]; + } + if (!resolved) { + issues.push({ path, message: `cannot resolve $ref "${ref}"` }); + return; + } + validateAgainst(data, resolved, path, root, issues); + return; + } + + if (!schema || typeof schema !== "object") return; + + if (schema.type && !matchesType(data, schema.type)) { + issues.push({ + path, + message: `expected type ${Array.isArray(schema.type) ? schema.type.join("|") : schema.type}, got ${typeOf(data)}`, + }); + return; + } + + if (typeOf(data) === "object") { + const required: string[] = Array.isArray(schema.required) ? schema.required : []; + for (const key of required) { + if (!(key in data)) { + issues.push({ path: path || "(root)", message: `missing required property "${key}"` }); + } + } + + const props = schema.properties ?? {}; + const additionalAllowed = schema.additionalProperties !== false; + for (const key of Object.keys(data)) { + const childPath = path ? `${path}.${key}` : key; + if (props[key]) { + validateAgainst(data[key], props[key], childPath, root, issues); + } else if (!additionalAllowed) { + issues.push({ path: path || "(root)", message: `unknown property "${key}"` }); + } + } + } else if (typeOf(data) === "array") { + if (schema.minItems != null && data.length < schema.minItems) { + issues.push({ path: path || "(root)", message: `array must have at least ${schema.minItems} item(s), got ${data.length}` }); + } + if (schema.items) { + for (let i = 0; i < data.length; i++) { + validateAgainst(data[i], schema.items, `${path}[${i}]`, root, issues); + } + } + if (schema.uniqueItems === true) { + const seen = new Set(); + for (let i = 0; i < data.length; i++) { + const key = JSON.stringify(data[i]); + if (seen.has(key)) { + issues.push({ path: `${path}[${i}]`, message: `duplicate item` }); + } + seen.add(key); + } + } + } else if (typeOf(data) === "string") { + if (schema.minLength != null && data.length < schema.minLength) { + issues.push({ path: path || "(root)", message: `string must be at least ${schema.minLength} character(s)` }); + } + if (schema.pattern && !new RegExp(schema.pattern).test(data)) { + issues.push({ path: path || "(root)", message: `value "${data}" does not match pattern ${schema.pattern}` }); + } + } +} + +export function validateWorkflow(yamlText: string): ValidationResult { + let parsed: unknown; + try { + parsed = yaml.load(yamlText); + } catch (err: any) { + return { valid: false, errors: [`YAML parse error: ${err?.message ?? String(err)}`] }; + } + + if (parsed === null || parsed === undefined) { + return { valid: false, errors: ["workflow is empty"] }; + } + + if (typeof parsed !== "object" || Array.isArray(parsed)) { + return { valid: false, errors: [`workflow must be an object, got ${typeOf(parsed)}`] }; + } + + const schema = loadWorkflowSchema(); + const issues: Issue[] = []; + validateAgainst(parsed, schema, "", schema, issues); + + // Cross-field check: depends_on ids must reference declared step ids + const data = parsed as any; + if (Array.isArray(data.steps)) { + const declared = new Set(); + for (const step of data.steps) { + if (step && typeof step.id === "string") declared.add(step.id); + } + data.steps.forEach((step: any, i: number) => { + if (step && Array.isArray(step.depends_on)) { + for (const dep of step.depends_on) { + if (typeof dep === "string" && !declared.has(dep)) { + issues.push({ path: `steps[${i}].depends_on`, message: `references unknown step id "${dep}"` }); + } + } + } + }); + } + + if (issues.length === 0) { + return { valid: true, errors: [], data: parsed as WorkflowDef }; + } + return { + valid: false, + errors: issues.map((i) => (i.path ? `${i.path}: ${i.message}` : i.message)), + }; +} diff --git a/src/utils/workflow-generator.ts b/src/utils/workflow-generator.ts new file mode 100644 index 0000000..63f6ca5 --- /dev/null +++ b/src/utils/workflow-generator.ts @@ -0,0 +1,197 @@ +import type { SkillMetadata } from "../skills.js"; +import { getWorkflowSchemaText } from "./schemas.js"; + +export type LlmRole = "system" | "user" | "assistant"; + +export interface LlmMessage { + role: LlmRole; + content: string; +} + +export interface LlmCallOptions { + model: string; + temperature?: number; + apiKey?: string; +} + +export type LlmClient = (messages: LlmMessage[], opts: LlmCallOptions) => Promise; + +export interface GenerateWorkflowOptions { + prompt: string; + skills: SkillMetadata[]; + previousWorkflow?: string; + model?: string; + apiKey?: string; + llm?: LlmClient; +} + +const DEFAULT_MODEL = "openai:gpt-4o"; + +const SYSTEM_RULES = `Rules (MUST follow): +- Output ONLY valid YAML. No markdown fences, no prose, no commentary before or after. +- The output MUST validate against the schema above. +- "name" must be kebab-case (lowercase letters, digits, and single hyphens). +- Every step "skill" must reference an installed skill from the list below, unless the step is human approval — in that case use skill: "approval" and set requires_approval: true. +- When multiple steps need ordering beyond top-to-bottom, give them snake_case "id" values and use "depends_on" to express the dependency. +- Keep "prompt" fields concrete and self-contained — a downstream agent will read them verbatim. +- Do not invent fields not in the schema.`; + +const FEW_SHOT_USER_1 = "Every morning, summarize my unread emails and post the summary to Slack."; + +const FEW_SHOT_ASSISTANT_1 = `name: morning-email-digest +description: Summarize unread emails and post the digest to Slack each morning. +steps: + - skill: gmail + prompt: Fetch all unread emails from the last 24 hours and return subject, sender, and a one-sentence summary for each. + - skill: summarize + prompt: Compose a single-paragraph digest of the unread emails, grouped by sender priority. + - skill: slack + prompt: Post the digest to the configured channel. + channel: "#daily-digest" +`; + + +const FEW_SHOT_USER_2 = "Pull yesterday's sales data, get sign-off, then send the report to the team."; + +const FEW_SHOT_ASSISTANT_2 = `name: daily-sales-report +description: Pull sales data, require human approval, then distribute the report. +steps: + - id: pull_data + skill: analytics + prompt: Pull yesterday's sales totals broken down by region and product line. + - id: approve + skill: approval + prompt: Review the pulled sales data for accuracy and approve distribution. + requires_approval: true + depends_on: [pull_data] + - id: send_report + skill: email + prompt: Send the approved sales report to the sales-leadership distribution list. + depends_on: [approve] +`; + +function formatSkillsForPrompt(skills: SkillMetadata[]): string { + if (skills.length === 0) { + return "(no installed skills detected — use generic skill names that match the user's intent, e.g. gmail, slack, summarize)"; + } + return skills.map((s) => `- ${s.name}: ${s.description}`).join("\n"); +} + +export function buildSystemPrompt(skills: SkillMetadata[]): string { + const schemaText = getWorkflowSchemaText(); + const skillList = formatSkillsForPrompt(skills); + return `You are a workflow builder for GitClaw SkillFlow. + +Your job is to translate the user's natural-language description into a YAML workflow that conforms exactly to this JSON Schema: + + +${schemaText} + + +Installed skills the workflow may invoke: + +${skillList} + + +${SYSTEM_RULES}`; +} + +export function buildMessages(opts: GenerateWorkflowOptions): LlmMessage[] { + const messages: LlmMessage[] = [ + { role: "system", content: buildSystemPrompt(opts.skills) }, + { role: "user", content: FEW_SHOT_USER_1 }, + { role: "assistant", content: FEW_SHOT_ASSISTANT_1 }, + { role: "user", content: FEW_SHOT_USER_2 }, + { role: "assistant", content: FEW_SHOT_ASSISTANT_2 }, + ]; + + if (opts.previousWorkflow && opts.previousWorkflow.trim()) { + messages.push({ + role: "user", + content: `Here is the current workflow:\n\n${opts.previousWorkflow.trim()}\n\nApply this refinement: ${opts.prompt}\n\nReturn the complete updated workflow as YAML — not a diff.`, + }); + } else { + messages.push({ role: "user", content: opts.prompt }); + } + + return messages; +} + +const FENCE_RE = /^\s*```(?:ya?ml)?\s*\n([\s\S]*?)\n```\s*$/i; + +export function stripCodeFences(raw: string): string { + const m = raw.match(FENCE_RE); + return m ? m[1] : raw.trim(); +} + +async function defaultLlmClient(messages: LlmMessage[], opts: LlmCallOptions): Promise { + const [providerRaw, ...modelParts] = opts.model.split(":"); + const provider = providerRaw?.trim(); + const modelId = modelParts.join(":").trim(); + if (!provider || !modelId) { + throw new Error(`Invalid model spec "${opts.model}". Expected "provider:model-id" (e.g. "openai:gpt-4o").`); + } + + const apiKey = + opts.apiKey || + process.env[`${provider.toUpperCase()}_API_KEY`] || + process.env.OPENAI_API_KEY; + if (!apiKey) { + throw new Error( + `No API key found. Pass --api-key or set ${provider.toUpperCase()}_API_KEY (or OPENAI_API_KEY) in your environment.`, + ); + } + + const [{ getModel }, { Agent }] = await Promise.all([ + import("@mariozechner/pi-ai" as any) as Promise, + import("@mariozechner/pi-agent-core" as any) as Promise, + ]); + + if (!process.env[`${provider.toUpperCase()}_API_KEY`]) { + process.env[`${provider.toUpperCase()}_API_KEY`] = apiKey; + } + + const model = getModel(provider as any, modelId as any); + const systemMessage = messages.find((m) => m.role === "system")?.content ?? ""; + const conversation = messages.filter((m) => m.role !== "system"); + + const agent = new Agent({ + initialState: { + systemPrompt: systemMessage, + model, + tools: [], + temperature: opts.temperature ?? 0, + maxTokens: 4096, + }, + }); + + let collected = ""; + agent.subscribe((event: any) => { + if (event.type === "message_end" && event.message?.role === "assistant") { + for (const block of event.message.content) { + if (block.type === "text") collected += block.text; + } + } + }); + + // Replay prior assistant/user turns as a single composed prompt so we don't + // have to drive Agent through multiple chat turns. The few-shot pairs are + // preserved as part of the prompt text so the model still sees them. + const composed = conversation + .map((m) => `[${m.role.toUpperCase()}]\n${m.content}`) + .join("\n\n"); + + await agent.prompt(composed); + return collected; +} + +export async function generateWorkflow(opts: GenerateWorkflowOptions): Promise { + if (!opts.prompt || !opts.prompt.trim()) { + throw new Error("generateWorkflow: prompt is required"); + } + const llm = opts.llm ?? defaultLlmClient; + const model = opts.model ?? DEFAULT_MODEL; + const messages = buildMessages(opts); + const raw = await llm(messages, { model, apiKey: opts.apiKey, temperature: 0 }); + return stripCodeFences(raw); +} diff --git a/test/ts-resolve-hook.mjs b/test/ts-resolve-hook.mjs new file mode 100644 index 0000000..3fa4398 --- /dev/null +++ b/test/ts-resolve-hook.mjs @@ -0,0 +1,27 @@ +// ESM resolve hook: if a `.js` import inside src/ has no matching `.js` +// on disk, retry with the `.ts` extension. Lets node --experimental-strip-types +// run TypeScript sources whose internal imports follow the Node16 `.js` style. + +import { existsSync } from "node:fs"; +import { fileURLToPath } from "node:url"; + +export async function resolve(specifier, context, nextResolve) { + if (specifier.startsWith(".") && specifier.endsWith(".js")) { + try { + return await nextResolve(specifier, context); + } catch (err) { + if (err?.code !== "ERR_MODULE_NOT_FOUND") throw err; + const tsSpecifier = specifier.slice(0, -3) + ".ts"; + try { + const candidate = await nextResolve(tsSpecifier, context); + if (candidate?.url?.startsWith("file://") && existsSync(fileURLToPath(candidate.url))) { + return candidate; + } + } catch { + // fall through and re-throw the original .js miss + } + throw err; + } + } + return nextResolve(specifier, context); +} diff --git a/test/workflow-generator.test.ts b/test/workflow-generator.test.ts new file mode 100644 index 0000000..a9f201f --- /dev/null +++ b/test/workflow-generator.test.ts @@ -0,0 +1,222 @@ +// Unit tests for src/utils/workflow-generator.ts and the retry loop in +// src/commands/workflow.ts. The LLM is fully mocked — no network calls. + +import test from "node:test"; +import assert from "node:assert/strict"; +import { mkdtemp, readFile, rm } from "node:fs/promises"; +import { tmpdir } from "node:os"; +import { join } from "node:path"; + +import { + buildMessages, + buildSystemPrompt, + stripCodeFences, + generateWorkflow, + type LlmClient, + type LlmMessage, +} from "../src/utils/workflow-generator.ts"; +import { runGenerate } from "../src/commands/workflow.ts"; +import type { SkillMetadata } from "../src/skills.ts"; + +const SKILLS: SkillMetadata[] = [ + { name: "gmail", description: "Read and send email", directory: "/x/skills/gmail", filePath: "/x/skills/gmail/SKILL.md" }, + { name: "slack", description: "Post to Slack", directory: "/x/skills/slack", filePath: "/x/skills/slack/SKILL.md" }, + { name: "summarize", description: "Summarize text", directory: "/x/skills/summarize", filePath: "/x/skills/summarize/SKILL.md" }, +]; + +const VALID_YAML = `name: morning-digest +description: Summarize unread emails and post to Slack each morning. +steps: + - skill: gmail + prompt: Fetch unread emails. + - skill: summarize + prompt: Compose a digest. + - skill: slack + prompt: Post the digest. + channel: "#daily-digest" +`; + +const INVALID_YAML_MISSING_NAME = `description: no name here +steps: + - skill: gmail + prompt: hi +`; + +// ── buildSystemPrompt / buildMessages ────────────────────────────────── + +test("buildSystemPrompt embeds the schema text and the skill list", () => { + const sys = buildSystemPrompt(SKILLS); + assert.ok(sys.includes(""), "system prompt missing tag"); + assert.ok(sys.includes('"$id": "https://gitclaw.dev/spec/workflow.schema.json"'), "system prompt missing schema $id"); + assert.ok(sys.includes("- gmail: Read and send email"), "system prompt missing gmail skill"); + assert.ok(sys.includes("- slack: Post to Slack"), "system prompt missing slack skill"); + assert.ok(sys.includes("Output ONLY valid YAML"), "system prompt missing rule about raw YAML"); +}); + +test("buildSystemPrompt handles empty skill list with a fallback hint", () => { + const sys = buildSystemPrompt([]); + assert.ok(sys.includes("no installed skills detected"), "system prompt missing empty-skills fallback"); +}); + +test("buildMessages includes two few-shot pairs and the user prompt", () => { + const messages = buildMessages({ prompt: "Do the thing", skills: SKILLS }); + assert.equal(messages[0].role, "system"); + assert.equal(messages[1].role, "user"); + assert.equal(messages[2].role, "assistant"); + assert.equal(messages[3].role, "user"); + assert.equal(messages[4].role, "assistant"); + assert.equal(messages[5].role, "user"); + assert.equal(messages[5].content, "Do the thing"); +}); + +test("buildMessages wraps refine-mode prompts with the previous YAML and instruction", () => { + const messages = buildMessages({ + prompt: "Add an approval step before the Slack post.", + skills: SKILLS, + previousWorkflow: VALID_YAML, + }); + const last = messages[messages.length - 1]; + assert.equal(last.role, "user"); + assert.ok(last.content.includes("Here is the current workflow")); + assert.ok(last.content.includes("Add an approval step before the Slack post.")); + assert.ok(last.content.includes("morning-digest")); + assert.ok(last.content.includes("Return the complete updated workflow as YAML — not a diff.")); +}); + +// ── stripCodeFences ──────────────────────────────────────────────────── + +test("stripCodeFences removes generic fenced code", () => { + const input = "```\nname: foo\n```\n"; + assert.equal(stripCodeFences(input), "name: foo"); +}); + +test("stripCodeFences removes yaml-tagged fences", () => { + const input = "```yaml\nname: foo\n```"; + assert.equal(stripCodeFences(input), "name: foo"); +}); + +test("stripCodeFences leaves unfenced YAML alone", () => { + const input = "name: foo\n"; + assert.equal(stripCodeFences(input), "name: foo"); +}); + +// ── generateWorkflow with an injected LLM ────────────────────────────── + +test("generateWorkflow returns the LLM output after stripping fences", async () => { + const captured: { messages: LlmMessage[] } = { messages: [] }; + const llm: LlmClient = async (messages) => { + captured.messages = messages; + return "```yaml\n" + VALID_YAML + "```"; + }; + const out = await generateWorkflow({ + prompt: "summarize emails and post to slack", + skills: SKILLS, + llm, + }); + assert.equal(out.trim().startsWith("name: morning-digest"), true); + assert.equal(captured.messages.length, 6); + assert.equal(captured.messages[0].role, "system"); +}); + +test("generateWorkflow throws if prompt is empty", async () => { + await assert.rejects( + () => generateWorkflow({ prompt: " ", skills: SKILLS, llm: async () => VALID_YAML }), + /prompt is required/, + ); +}); + +// ── Retry loop in runGenerate ────────────────────────────────────────── + +test("runGenerate retries when validation fails, then writes the file when the second attempt is valid", async () => { + const dir = await mkdtemp(join(tmpdir(), "gitclaw-test-")); + try { + let calls = 0; + const llm: LlmClient = async (messages) => { + calls++; + const userMsg = messages[messages.length - 1].content; + if (calls === 1) return INVALID_YAML_MISSING_NAME; + // Second attempt: ensure the retry prompt included the validation error. + assert.ok(userMsg.includes("schema validation"), `retry user message did not mention validation: ${userMsg}`); + return VALID_YAML; + }; + const result = await runGenerate({ + flags: { + dir, + prompt: "summarize unread emails and post to Slack", + dryRun: false, + }, + llm, + }); + assert.equal(calls, 2); + assert.ok(result.filePath, "expected a written file path"); + assert.equal(result.filePath!.endsWith("workflows/morning-digest.yaml"), true); + const written = await readFile(result.filePath!, "utf-8"); + assert.ok(written.includes("name: morning-digest")); + } finally { + await rm(dir, { recursive: true, force: true }); + } +}); + +test("runGenerate honours --dry-run by returning YAML without writing", async () => { + const dir = await mkdtemp(join(tmpdir(), "gitclaw-test-")); + try { + const llm: LlmClient = async () => VALID_YAML; + const result = await runGenerate({ + flags: { dir, prompt: "x", dryRun: true }, + llm, + }); + assert.equal(result.filePath, undefined); + assert.ok(result.yaml.includes("name: morning-digest")); + // workflows/ must not have been created. + await assert.rejects(() => readFile(join(dir, "workflows", "morning-digest.yaml"), "utf-8")); + } finally { + await rm(dir, { recursive: true, force: true }); + } +}); + +test("runGenerate gives up after MAX_RETRIES and throws", async () => { + const dir = await mkdtemp(join(tmpdir(), "gitclaw-test-")); + try { + let calls = 0; + const llm: LlmClient = async () => { + calls++; + return INVALID_YAML_MISSING_NAME; + }; + await assert.rejects( + () => runGenerate({ flags: { dir, prompt: "x", dryRun: true }, llm }), + /Validation failed after retries/, + ); + assert.equal(calls, 3); // 1 initial + 2 retries + } finally { + await rm(dir, { recursive: true, force: true }); + } +}); + +test("runGenerate refine mode reads previous YAML and passes it to the LLM", async () => { + const dir = await mkdtemp(join(tmpdir(), "gitclaw-test-")); + try { + const { writeFile, mkdir } = await import("node:fs/promises"); + await mkdir(join(dir, "workflows"), { recursive: true }); + const refinePath = join(dir, "workflows", "starter.yaml"); + await writeFile(refinePath, VALID_YAML, "utf-8"); + + let observed = ""; + const llm: LlmClient = async (messages) => { + observed = messages[messages.length - 1].content; + return VALID_YAML; + }; + await runGenerate({ + flags: { + dir, + prompt: "add an approval step before slack", + refine: "workflows/starter.yaml", + dryRun: true, + }, + llm, + }); + assert.ok(observed.includes("Here is the current workflow")); + assert.ok(observed.includes("add an approval step before slack")); + } finally { + await rm(dir, { recursive: true, force: true }); + } +}); diff --git a/test/workflow-validator.test.ts b/test/workflow-validator.test.ts new file mode 100644 index 0000000..80f9699 --- /dev/null +++ b/test/workflow-validator.test.ts @@ -0,0 +1,153 @@ +// Unit tests for src/utils/schemas.ts — the SkillFlow workflow validator. + +import test from "node:test"; +import assert from "node:assert/strict"; + +import { validateWorkflow, loadWorkflowSchema } from "../src/utils/schemas.ts"; + +const VALID_YAML = `name: morning-digest +description: Summarize unread emails and post to Slack each morning. +steps: + - skill: gmail + prompt: Fetch unread emails from the last 24h. + - skill: summarize + prompt: Compose a digest grouped by sender priority. + - skill: slack + prompt: Post the digest to the team channel. + channel: "#daily-digest" +`; + +test("loadWorkflowSchema returns the parsed schema with required top-level keys", () => { + const schema = loadWorkflowSchema(); + assert.equal(typeof schema, "object"); + assert.deepEqual(schema.required, ["name", "description", "steps"]); + assert.equal(schema.definitions.step.required.includes("skill"), true); + assert.equal(schema.definitions.step.required.includes("prompt"), true); +}); + +test("validateWorkflow accepts a well-formed workflow", () => { + const r = validateWorkflow(VALID_YAML); + assert.equal(r.valid, true); + assert.deepEqual(r.errors, []); + assert.equal(r.data?.name, "morning-digest"); + assert.equal(r.data?.steps.length, 3); +}); + +test("validateWorkflow rejects missing name", () => { + const yaml = `description: foo +steps: + - skill: gmail + prompt: do thing +`; + const r = validateWorkflow(yaml); + assert.equal(r.valid, false); + assert.ok(r.errors.some((e) => e.includes('missing required property "name"')), `errors: ${JSON.stringify(r.errors)}`); +}); + +test("validateWorkflow rejects non-kebab-case name", () => { + const yaml = `name: MyWorkflow +description: foo +steps: + - skill: gmail + prompt: do thing +`; + const r = validateWorkflow(yaml); + assert.equal(r.valid, false); + assert.ok(r.errors.some((e) => e.includes("pattern")), `errors: ${JSON.stringify(r.errors)}`); +}); + +test("validateWorkflow rejects empty steps array", () => { + const yaml = `name: empty-flow +description: nothing +steps: [] +`; + const r = validateWorkflow(yaml); + assert.equal(r.valid, false); + assert.ok(r.errors.some((e) => e.includes("at least 1")), `errors: ${JSON.stringify(r.errors)}`); +}); + +test("validateWorkflow rejects a step missing required prompt", () => { + const yaml = `name: bad-step +description: missing prompt +steps: + - skill: gmail +`; + const r = validateWorkflow(yaml); + assert.equal(r.valid, false); + assert.ok( + r.errors.some((e) => e.includes('missing required property "prompt"')), + `errors: ${JSON.stringify(r.errors)}`, + ); +}); + +test("validateWorkflow rejects unknown step property", () => { + const yaml = `name: extra-prop +description: bad +steps: + - skill: gmail + prompt: do thing + nonsense: true +`; + const r = validateWorkflow(yaml); + assert.equal(r.valid, false); + assert.ok( + r.errors.some((e) => e.includes('unknown property "nonsense"')), + `errors: ${JSON.stringify(r.errors)}`, + ); +}); + +test("validateWorkflow flags depends_on referencing a missing id", () => { + const yaml = `name: bad-deps +description: dangling dep +steps: + - id: a + skill: gmail + prompt: fetch + - skill: slack + prompt: post + depends_on: [does_not_exist] +`; + const r = validateWorkflow(yaml); + assert.equal(r.valid, false); + assert.ok( + r.errors.some((e) => e.includes('references unknown step id "does_not_exist"')), + `errors: ${JSON.stringify(r.errors)}`, + ); +}); + +test("validateWorkflow accepts approval step with requires_approval", () => { + const yaml = `name: approval-flow +description: needs sign-off +steps: + - id: pull + skill: analytics + prompt: Pull data. + - id: approve + skill: approval + prompt: Approve distribution. + requires_approval: true + depends_on: [pull] + - skill: email + prompt: Send report. + depends_on: [approve] +`; + const r = validateWorkflow(yaml); + assert.equal(r.valid, true, `errors: ${JSON.stringify(r.errors)}`); +}); + +test("validateWorkflow surfaces YAML parse errors", () => { + const yaml = `name: bad +description: : : +steps: + - skill: [unterminated +`; + const r = validateWorkflow(yaml); + assert.equal(r.valid, false); + assert.ok(r.errors[0].startsWith("YAML parse error"), `errors: ${JSON.stringify(r.errors)}`); +}); + +test("validateWorkflow rejects empty document", () => { + const r = validateWorkflow(""); + assert.equal(r.valid, false); + assert.ok(r.errors[0].includes("empty")); +});