diff --git a/.claude/README.md b/.claude/README.md index 74937338..54a41141 100644 --- a/.claude/README.md +++ b/.claude/README.md @@ -83,6 +83,20 @@ Edit `skill-rules.json` to: - Adjust confidence scoring - Update directory mappings +## Knowledge Skills (.claude/skills/knowledge/*.skill.md) + +Domain-specific knowledge files loaded on keyword match. See `skills/knowledge/` for available skills. + +``` +[LOAD]on keyword match from frontmatter `activates_on` → read matching .skill.md +[FALLBACK]if `expires` date passed → use Context7 `context7` field or fetch `sources[]` +[CACHE]loaded skills persist for session|don't re-read same skill +[FORMAT]frontmatter: name,version,domain,expires,activates_on[],sources[],context7 +``` + +Available: `esbuild-esm`, `vitest`, `better-sqlite3`, `mcp-protocol`, `commander-cli` +Plus global skills at `~/.claude/skills/knowledge/`: `stripe-api`, `railway-deploy`, `anthropic-sdk` + ## StackMemory-Specific Patterns Key patterns enforced by hooks and agents: diff --git a/.claude/skills/knowledge/better-sqlite3.skill.md b/.claude/skills/knowledge/better-sqlite3.skill.md new file mode 100644 index 00000000..17a1c337 --- /dev/null +++ b/.claude/skills/knowledge/better-sqlite3.skill.md @@ -0,0 +1,60 @@ +--- +name: better-sqlite3 +version: 2025.12.1 +domain: database +expires: 2026-12-01 +activates_on: [sqlite, better-sqlite3, database, db, query, fts, fts5, pragma, wal, transaction] +sources: + - https://github.com/WiseLibs/better-sqlite3/blob/master/docs/api.md +context7: WiseLibs/better-sqlite3 +--- + +# better-sqlite3 + +## Basics +- Open: `new Database(path)` or `new Database(':memory:')` +- **Synchronous API** — no async/await, no callbacks +- Prepare + run: `db.prepare('SELECT * FROM t WHERE id = ?').get(id)` +- All rows: `.all(params)` | Single row: `.get(params)` | Execute: `.run(params)` +- Params: positional `?` or named `$name` / `:name` / `@name` + +## Transactions +```js +const insert = db.prepare('INSERT INTO t (a, b) VALUES (?, ?)'); +const insertMany = db.transaction((items) => { + for (const item of items) insert.run(item.a, item.b); +}); +insertMany(items); // atomic, auto-rollback on error +``` +- `db.transaction()` returns a reusable function — best pattern for batch ops +- Nested transactions: use `.deferred()`, `.immediate()`, `.exclusive()` + +## FTS5 (Full-Text Search) +- Create: `CREATE VIRTUAL TABLE t_fts USING fts5(content, tokenize='porter unicode61')` +- Search: `SELECT * FROM t_fts WHERE t_fts MATCH 'query'` +- Rank: `SELECT *, rank FROM t_fts WHERE t_fts MATCH 'query' ORDER BY rank` +- BM25: `SELECT *, bm25(t_fts) as score FROM t_fts WHERE t_fts MATCH ?` +- Highlight: `highlight(t_fts, 0, '', '')` + +## WAL Mode +- Enable: `db.pragma('journal_mode = WAL')` — concurrent reads, single writer +- Always enable for production — significant performance improvement +- `db.pragma('busy_timeout = 5000')` — wait up to 5s for write lock + +## Performance +- `db.pragma('cache_size = -64000')` — 64MB cache +- `db.pragma('synchronous = NORMAL')` — faster writes (WAL mode safe) +- `db.prepare()` caches the statement plan — reuse prepared statements +- Batch inserts: always wrap in `db.transaction()` — 100x faster than individual inserts + +## ESM Import +- CJS native addon: `import Database from 'better-sqlite3'` +- Mark as `external` in esbuild — can't bundle native `.node` files +- Prebuilt binaries: `npm install` downloads correct platform binary + +## Gotchas +- Synchronous — blocks event loop on large queries; use worker_threads for heavy ops +- `.get()` returns `undefined` if no row (not `null`) +- `.run()` returns `{ changes, lastInsertRowid }` — not the row itself +- Column names are case-sensitive in result objects +- VACUUM: locks entire DB — run during maintenance windows only diff --git a/.claude/skills/knowledge/commander-cli.skill.md b/.claude/skills/knowledge/commander-cli.skill.md new file mode 100644 index 00000000..9b6e4082 --- /dev/null +++ b/.claude/skills/knowledge/commander-cli.skill.md @@ -0,0 +1,65 @@ +--- +name: commander-cli +version: 2025.12.1 +domain: cli +expires: 2026-12-01 +activates_on: [commander, cli, command, option, argument, subcommand, parse, program, action] +sources: + - https://github.com/tj/commander.js#readme +context7: tj/commander.js +--- + +# Commander.js CLI Framework + +## Basic Setup +```ts +import { Command } from 'commander'; +const program = new Command(); +program + .name('mycli') + .version('1.0.0') + .description('description'); +``` + +## Commands +```ts +program + .command('serve') + .description('Start server') + .option('-p, --port ', 'port', '3000') + .option('-v, --verbose', 'verbose output') + .argument('', 'directory to serve') + .action((dir, options) => { + console.log(dir, options.port, options.verbose); + }); +``` + +## Subcommands +```ts +const parent = program.command('db'); +parent.command('migrate').action(() => { ... }); +parent.command('seed').action(() => { ... }); +// Usage: mycli db migrate +``` + +## Options +- Required value: `-p, --port ` (angle brackets) +- Optional value: `-p, --port [number]` (square brackets) +- Boolean flag: `-v, --verbose` (no value) +- Variadic: `-f, --files ` (collects into array) +- Default: `.option('-p, --port ', 'desc', '3000')` +- Choices: `.addOption(new Option('--env ').choices(['dev', 'prod']))` +- Negatable: `--no-color` (sets `options.color = false`) + +## Patterns for This Project +- 70+ commands — use subcommand groups (`skill`, `frame`, `session`, `linear`, etc.) +- Action handlers: async functions with try/catch + `process.exit(1)` on error +- Global options: define on `program` before subcommands +- Help: auto-generated — add `.addHelpText('after', text)` for examples + +## Gotchas +- Option values are strings by default — parse with `.argParser(parseInt)` or coerce in action +- `program.parse()` must be called last (or `program.parseAsync()` for async actions) +- `.command('*')` for catch-all unknown commands +- Negative options: `--no-foo` creates `options.foo = false` — conflicts if `--foo` also defined +- ESM: Commander works fine, but ensure `#!/usr/bin/env node` in bin entry diff --git a/.claude/skills/knowledge/esbuild-esm.skill.md b/.claude/skills/knowledge/esbuild-esm.skill.md new file mode 100644 index 00000000..867738b5 --- /dev/null +++ b/.claude/skills/knowledge/esbuild-esm.skill.md @@ -0,0 +1,43 @@ +--- +name: esbuild-esm +version: 2025.12.1 +domain: build +expires: 2026-12-01 +activates_on: [esbuild, build, bundle, esm, import, export, module, cjs, commonjs, dist, entry] +sources: + - https://esbuild.github.io/api/ + - https://nodejs.org/api/esm.html +context7: evanw/esbuild +--- + +# esbuild + ESM + +## esbuild Config +- Entry: `esbuild.build({ entryPoints, bundle, platform, format, outdir })` +- Platform: `'node'` for CLI tools (excludes node builtins from bundle) +- Format: `'esm'` — this project is ESM-first (`"type": "module"` in package.json) +- External: mark `better-sqlite3`, native addons as external (can't bundle .node files) +- Sourcemaps: `sourcemap: true` for debugging + +## ESM Rules (Node.js) +- **Always** add `.js` extension to relative imports: `import { foo } from './bar.js'` +- `__dirname` / `__filename` not available — use `import.meta.url` + `fileURLToPath()` +- `require()` not available — use `createRequire(import.meta.url)` for CJS interop +- Top-level `await` works in ESM +- JSON imports: `import data from './file.json' with { type: 'json' }` (or createRequire) + +## CJS Interop +- Import CJS from ESM: default import works (`import pkg from 'cjs-pkg'`) +- Named exports: may need `import pkg from 'pkg'; const { named } = pkg;` +- `better-sqlite3`: CJS native addon — import as default, mark external in esbuild + +## Package.json +- `"type": "module"` — all .js files are ESM +- `"exports"` field for package entry points (not just "main") +- `"bin"` field for CLI executables — ensure shebang `#!/usr/bin/env node` + +## Gotchas +- Missing `.js` extension → `ERR_MODULE_NOT_FOUND` (most common error) +- esbuild `bundle: true` inlines deps — use `external` for native modules +- `--packages=external` excludes all node_modules (useful for dev builds) +- Watch mode: `esbuild.context().then(ctx => ctx.watch())` — not `--watch` flag in API diff --git a/.claude/skills/knowledge/mcp-protocol.skill.md b/.claude/skills/knowledge/mcp-protocol.skill.md new file mode 100644 index 00000000..a07e9734 --- /dev/null +++ b/.claude/skills/knowledge/mcp-protocol.skill.md @@ -0,0 +1,69 @@ +--- +name: mcp-protocol +version: 2025.12.1 +domain: protocol +expires: 2026-06-01 +activates_on: [mcp, model context protocol, tool, resource, prompt, server, transport, stdio, sse, streamable] +sources: + - https://modelcontextprotocol.io/docs + - https://github.com/modelcontextprotocol/typescript-sdk +context7: modelcontextprotocol/typescript-sdk +--- + +# Model Context Protocol (MCP) + +## SDK (@modelcontextprotocol/sdk) +```ts +import { McpServer } from '@modelcontextprotocol/sdk/server/mcp.js'; +import { StdioServerTransport } from '@modelcontextprotocol/sdk/server/stdio.js'; + +const server = new McpServer({ name: 'my-server', version: '1.0.0' }); +``` + +## Tools +```ts +server.tool('tool_name', 'description', { + param: z.string().describe('param description'), // Zod schema +}, async ({ param }) => { + return { content: [{ type: 'text', text: result }] }; +}); +``` +- Input schema: Zod objects auto-converted to JSON Schema +- Return: `{ content: [{ type: 'text' | 'image' | 'resource', ... }] }` +- Error: `{ isError: true, content: [{ type: 'text', text: errorMsg }] }` + +## Resources +```ts +server.resource('resource://uri', 'description', async (uri) => { + return { contents: [{ uri, mimeType: 'text/plain', text: data }] }; +}); +``` +- Static URI or template: `resource://users/{id}` +- `list_changed` notification when resources update + +## Transports +- **Stdio**: `StdioServerTransport` — for CLI-launched servers (Claude Code default) +- **Streamable HTTP**: `StreamableHTTPServerTransport` — for networked servers +- **SSE** (deprecated): use Streamable HTTP instead for new servers + +## Prompts +```ts +server.prompt('prompt_name', 'description', { arg: z.string() }, ({ arg }) => { + return { messages: [{ role: 'user', content: { type: 'text', text: `...${arg}...` } }] }; +}); +``` + +## Client Side +```ts +import { Client } from '@modelcontextprotocol/sdk/client/index.js'; +const client = new Client({ name: 'my-client', version: '1.0.0' }); +await client.connect(transport); +const result = await client.callTool({ name: 'tool_name', arguments: { param: 'value' } }); +``` + +## Gotchas +- Tool names: snake_case convention (not camelCase) +- Stdio transport: server MUST NOT write to stdout except MCP messages (use stderr for logs) +- Zod schemas: `.describe()` on each field — LLMs use descriptions for tool calling +- Error handling: return error content, don't throw (throws crash the server) +- Transport cleanup: `await server.close()` on shutdown diff --git a/.claude/skills/knowledge/vitest.skill.md b/.claude/skills/knowledge/vitest.skill.md new file mode 100644 index 00000000..46159f1b --- /dev/null +++ b/.claude/skills/knowledge/vitest.skill.md @@ -0,0 +1,57 @@ +--- +name: vitest +version: 2025.12.1 +domain: testing +expires: 2026-12-01 +activates_on: [vitest, test, spec, mock, spy, describe, it, expect, vi, beforeEach, coverage] +sources: + - https://vitest.dev/api/ + - https://vitest.dev/guide/mocking +context7: vitest-dev/vitest +--- + +# Vitest + +## Config (this project) +- Projects: unit, integration, live (API), bench +- Coverage: v8 provider, thresholds: 25% statements, 20% branches, 30% functions +- ESM native — no transform needed (unlike Jest + SWC) + +## API (differs from Jest) +- Mock function: `vi.fn()` (not `jest.fn()`) +- Mock module: `vi.mock('./module')` — hoisted like Jest +- Spy: `vi.spyOn(obj, 'method')` +- Timers: `vi.useFakeTimers()` / `vi.advanceTimersByTime(ms)` +- Clear: `vi.clearAllMocks()` / `vi.resetAllMocks()` / `vi.restoreAllMocks()` +- Snapshot: `expect(val).toMatchSnapshot()` — same as Jest + +## Mock Patterns +```ts +vi.mock('./dep.js', () => ({ + myFn: vi.fn().mockReturnValue('mocked'), +})); + +// Reset per test +beforeEach(() => { + vi.clearAllMocks(); + // re-set implementations after clear +}); +``` + +## Inline vs Config Mocks +- `vi.mock()` in test file — hoisted, file-scoped +- `__mocks__/` directory — auto-mock (same as Jest convention) +- `vi.hoisted()` — declare variables used in `vi.mock()` factory + +## Key Differences from Jest +- `vi` namespace instead of `jest` global +- Native ESM — no `.js` extension issues in tests +- `vi.stubEnv('KEY', 'val')` for env vars (cleaner than `process.env` mutation) +- `--reporter=verbose` for detailed output +- `vitest bench` for benchmarks (built-in, not separate tool) + +## Gotchas +- `vi.mock()` factory can't reference outer variables unless via `vi.hoisted()` +- `vi.clearAllMocks()` resets calls + implementations (same gotcha as Jest) +- `--pool=forks` vs `--pool=threads` — forks for better isolation, threads for speed +- SQLite tests: use `:memory:` or temp file, not shared DB (parallel execution) diff --git a/.github/workflows/pr-ci.yml b/.github/workflows/pr-ci.yml new file mode 100644 index 00000000..5780e1e9 --- /dev/null +++ b/.github/workflows/pr-ci.yml @@ -0,0 +1,46 @@ +name: PR CI + +on: + pull_request: + branches: [main] + types: [opened, synchronize, reopened] + +concurrency: + group: pr-ci-${{ github.event.pull_request.number }} + cancel-in-progress: true + +jobs: + lint: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version-file: '.nvmrc' + cache: 'npm' + - run: npm ci + - run: npm run lint + + test: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version-file: '.nvmrc' + cache: 'npm' + - run: npm ci + - run: npm run test:run + env: + CI: true + + build: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v4 + - uses: actions/setup-node@v4 + with: + node-version-file: '.nvmrc' + cache: 'npm' + - run: npm ci + - run: npm run build diff --git a/.github/workflows/pr-review.yml b/.github/workflows/pr-review.yml new file mode 100644 index 00000000..a34bceb6 --- /dev/null +++ b/.github/workflows/pr-review.yml @@ -0,0 +1,125 @@ +name: AI PR Review + +on: + pull_request: + branches: [main] + types: [opened, synchronize] + +concurrency: + group: pr-review-${{ github.event.pull_request.number }} + cancel-in-progress: true + +permissions: + contents: read + pull-requests: write + +jobs: + review: + runs-on: ubuntu-latest + if: github.event.pull_request.draft == false + steps: + - uses: actions/checkout@v4 + with: + fetch-depth: 0 + + - name: Get PR diff + id: diff + run: | + git fetch origin ${{ github.event.pull_request.base.ref }} + DIFF=$(git diff origin/${{ github.event.pull_request.base.ref }}...HEAD -- '*.ts' '*.tsx' | head -c 80000) + echo "diff<> "$GITHUB_OUTPUT" + echo "$DIFF" >> "$GITHUB_OUTPUT" + echo "DIFF_EOF" >> "$GITHUB_OUTPUT" + + - name: Get changed files + id: files + run: | + git fetch origin ${{ github.event.pull_request.base.ref }} + FILES=$(git diff --name-only origin/${{ github.event.pull_request.base.ref }}...HEAD -- '*.ts' '*.tsx') + echo "files<> "$GITHUB_OUTPUT" + echo "$FILES" >> "$GITHUB_OUTPUT" + echo "FILES_EOF" >> "$GITHUB_OUTPUT" + + - name: AI Review + id: review + env: + ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }} + run: | + BODY=$(cat <<'PROMPT' + You are a senior TypeScript reviewer for StackMemory (AI context management, SQLite/FTS5, MCP tools). + Review this PR diff for: + 1. **Bugs** — logic errors, off-by-one, null/undefined issues, resource leaks + 2. **Security** — SQL injection, secret exposure, unsafe input handling + 3. **Performance** — N+1 queries, missing indexes, unbounded loops, memory leaks + 4. **API design** — breaking changes, missing validation, inconsistent naming + 5. **Test gaps** — untested edge cases, missing error path coverage + + Skip: style nits, import order, minor naming preferences. + For each finding: file:line, severity (bug/warn/nit), what's wrong, suggested fix. + If the PR looks good, say so briefly. + + ## Changed files + ${{ steps.files.outputs.files }} + + ## Diff + ```diff + ${{ steps.diff.outputs.diff }} + ``` + PROMPT + ) + + RESPONSE=$(curl -s https://api.anthropic.com/v1/messages \ + -H "Content-Type: application/json" \ + -H "x-api-key: $ANTHROPIC_API_KEY" \ + -H "anthropic-version: 2023-06-01" \ + -d "$(jq -n \ + --arg body "$BODY" \ + '{ + model: "claude-sonnet-4-20250514", + max_tokens: 4096, + messages: [{role: "user", content: $body}] + }')") + + REVIEW=$(echo "$RESPONSE" | jq -r '.content[0].text // "Review failed: " + (.error.message // "unknown error")') + + echo "review<> "$GITHUB_OUTPUT" + echo "$REVIEW" >> "$GITHUB_OUTPUT" + echo "REVIEW_EOF" >> "$GITHUB_OUTPUT" + + - name: Post review comment + uses: actions/github-script@v7 + with: + script: | + const review = `${{ steps.review.outputs.review }}`; + if (!review || review.startsWith('Review failed:')) { + core.warning('AI review failed: ' + review); + return; + } + + // Find existing bot comment to update + const { data: comments } = await github.rest.issues.listComments({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + }); + const botComment = comments.find(c => + c.user.type === 'Bot' && c.body.includes('') + ); + + const body = `\n## AI Review\n\n${review}\n\n---\n*Automated review by Claude Sonnet*`; + + if (botComment) { + await github.rest.issues.updateComment({ + owner: context.repo.owner, + repo: context.repo.repo, + comment_id: botComment.id, + body, + }); + } else { + await github.rest.issues.createComment({ + owner: context.repo.owner, + repo: context.repo.repo, + issue_number: context.issue.number, + body, + }); + } diff --git a/.gitignore b/.gitignore index 7faa58e0..0e7990ff 100644 --- a/.gitignore +++ b/.gitignore @@ -5,7 +5,7 @@ vendor/ # Build outputs dist/ -.tsbuildinfo +.tsbuildinfo* build/ out/ *.out @@ -127,3 +127,10 @@ external/ # DiffMem subpackage (cloned repo) packages/diffmem/ .env.diffmem + +# GEPA runtime state (modified by hooks) +scripts/gepa/.hook-state.json +scripts/gepa/results/scores.jsonl +scripts/gepa/state.json +scripts/gepa/results/ +scripts/gepa/generations/ diff --git a/package-lock.json b/package-lock.json index 7a42e6d3..2d9e9c39 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@stackmemoryai/stackmemory", - "version": "1.7.0", + "version": "1.10.1", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@stackmemoryai/stackmemory", - "version": "1.7.0", + "version": "1.10.1", "hasInstallScript": true, "license": "BUSL-1.1", "dependencies": { diff --git a/package.json b/package.json index e2096f98..1ef92eee 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@stackmemoryai/stackmemory", - "version": "1.9.0", + "version": "1.10.1", "description": "Lossless, project-scoped memory for AI coding tools. Durable context across sessions with 56 MCP tools, FTS5 search, conductor orchestrator, loop/watch monitoring, snapshot capture, pre-flight overlap checks, Claude/Codex/OpenCode wrappers, Linear sync, and automatic hooks.", "engines": { "node": ">=20.0.0", @@ -100,7 +100,7 @@ "postinstall": "node scripts/install-claude-hooks-auto.js || true", "init": "node dist/scripts/initialize.js", "build": "rm -rf dist && node esbuild.config.js", - "typecheck": "NODE_OPTIONS='--max-old-space-size=8192' tsc --noEmit", + "typecheck": "node --max-old-space-size=8192 ./node_modules/.bin/tsc --project tsconfig.check.json", "lint": "eslint 'src/**/*.ts' 'scripts/**/*.ts'", "lint:fix": "eslint 'src/**/*.ts' 'scripts/**/*.ts' --fix --max-warnings=-1", "lint:fast": "oxlint src scripts", diff --git a/scripts/gepa/config.json b/scripts/gepa/config.json index 9ad2dbb2..52570508 100644 --- a/scripts/gepa/config.json +++ b/scripts/gepa/config.json @@ -8,6 +8,33 @@ "backup": true }, + "targets": [ + { + "name": "global", + "file": "~/.claude/CLAUDE.md", + "evals": ["coding-tasks.jsonl"], + "description": "Global rules affecting every session" + }, + { + "name": "provenantai", + "file": "~/Dev/provenantai/.claude/CLAUDE.md", + "evals": ["coding-tasks.jsonl"], + "description": "Project conventions (Express, Jest, migrations)" + }, + { + "name": "dispatch", + "file": "~/Dev/provenantai/.claude/commands/dispatch.md", + "evals": ["conductor-tasks.jsonl"], + "description": "Agent orchestration prompt" + }, + { + "name": "stackmemory", + "file": "CLAUDE.md", + "evals": ["stackmemory-tasks.jsonl"], + "description": "StackMemory project prompt" + } + ], + "evolution": { "populationSize": 4, "generations": 10, diff --git a/scripts/gepa/hooks/gepa-session-hook.js b/scripts/gepa/hooks/gepa-session-hook.js new file mode 100644 index 00000000..737621e7 --- /dev/null +++ b/scripts/gepa/hooks/gepa-session-hook.js @@ -0,0 +1,172 @@ +#!/usr/bin/env node +/** + * GEPA Session Hook — Auto-wires into Claude Code Stop event. + * + * Pipeline: + * 1. Save session metrics (eval-tracker) + * 2. Count accumulated sessions since last optimization + * 3. If threshold reached → reflect → optimize → show delta + * 4. Never blocks — optimization runs async in background + * + * Install: Add to ~/.claude/settings.json hooks.Stop + * + * Env: + * GEPA_DIR Override GEPA directory (default: parent of this script) + * GEPA_AUTO_THRESHOLD Sessions before auto-optimize (default: 10) + * GEPA_AUTO_DISABLE Set to "1" to disable auto-optimization + */ + +import fs from 'fs'; +import path from 'path'; +import { fileURLToPath } from 'url'; +import { spawn } from 'child_process'; + +const __dirname = path.dirname(fileURLToPath(import.meta.url)); +const GEPA_DIR = process.env.GEPA_DIR || path.resolve(__dirname, '..'); +const RESULTS_DIR = path.join(GEPA_DIR, 'results'); +const SESSIONS_DIR = path.join(RESULTS_DIR, 'sessions'); +const STATE_PATH = path.join(GEPA_DIR, 'state.json'); +const HOOK_STATE_PATH = path.join(GEPA_DIR, '.hook-state.json'); + +const THRESHOLD = parseInt(process.env.GEPA_AUTO_THRESHOLD || '10'); +const DISABLED = process.env.GEPA_AUTO_DISABLE === '1'; + +// Ensure directories +[RESULTS_DIR, SESSIONS_DIR].forEach((dir) => { + if (!fs.existsSync(dir)) fs.mkdirSync(dir, { recursive: true }); +}); + +/** + * Read hook state (tracks sessions since last optimization) + */ +function getHookState() { + if (fs.existsSync(HOOK_STATE_PATH)) { + return JSON.parse(fs.readFileSync(HOOK_STATE_PATH, 'utf8')); + } + return { + sessionsSinceLastOptimize: 0, + lastOptimizeTime: null, + lastSessionTime: null, + totalSessions: 0, + }; +} + +function saveHookState(state) { + fs.writeFileSync(HOOK_STATE_PATH, JSON.stringify(state, null, 2)); +} + +/** + * Step 1: Save session metrics + */ +function saveSessionMetrics() { + const sessionId = process.env.CLAUDE_SESSION_ID || `session-${Date.now()}`; + const variant = process.env.GEPA_VARIANT || 'current'; + + // Minimal session record — eval-tracker captures detailed data via its own hooks + const record = { + sessionId, + variant, + timestamp: new Date().toISOString(), + cwd: process.env.CLAUDE_CWD || process.cwd(), + }; + + // Append to scores.jsonl + fs.appendFileSync( + path.join(RESULTS_DIR, 'scores.jsonl'), + JSON.stringify(record) + '\n' + ); + + return sessionId; +} + +/** + * Step 2: Check if optimization should trigger + */ +function shouldOptimize(hookState) { + if (DISABLED) return false; + if (hookState.sessionsSinceLastOptimize < THRESHOLD) return false; + + // Don't optimize more than once per hour + if (hookState.lastOptimizeTime) { + const elapsed = Date.now() - new Date(hookState.lastOptimizeTime).getTime(); + if (elapsed < 3600000) return false; + } + + // Need GEPA state initialized + if (!fs.existsSync(STATE_PATH)) return false; + + return true; +} + +/** + * Step 3: Run optimization in background (non-blocking) + */ +function triggerOptimization(hookState) { + const optimizePath = path.join(GEPA_DIR, 'optimize.js'); + const reflectPath = path.join(GEPA_DIR, 'hooks', 'reflect.js'); + + // Run reflect → optimize as a background pipeline + const script = ` + // Reflect first (generates insights for mutation context) + try { + const { generateReflection } = await import('${reflectPath}'); + await generateReflection(); + } catch {} + + // Then optimize (1 generation, quick) + const { execSync } = await import('child_process'); + try { + execSync('node ${optimizePath} mutate', { stdio: 'pipe', timeout: 300000 }); + execSync('node ${optimizePath} score', { stdio: 'pipe', timeout: 300000 }); + + // Read result and notify + const fs = await import('fs'); + const state = JSON.parse(fs.readFileSync('${STATE_PATH}', 'utf8')); + const msg = \`[GEPA] Auto-optimized: gen \${state.currentGeneration}, best=\${state.bestVariant} (\${(state.bestScore * 100).toFixed(1)}%). Run 'node ${optimizePath} apply' to apply.\`; + process.stderr.write(msg + '\\n'); + } catch (e) { + process.stderr.write('[GEPA] Auto-optimize failed: ' + e.message + '\\n'); + } + `; + + // Fire and forget — don't block the session end + const child = spawn('node', ['--input-type=module', '-e', script], { + detached: true, + stdio: ['pipe', 'ignore', 'inherit'], + env: { ...process.env, GEPA_DIR }, + }); + + child.unref(); + + // Update hook state + hookState.sessionsSinceLastOptimize = 0; + hookState.lastOptimizeTime = new Date().toISOString(); + saveHookState(hookState); + + process.stderr.write( + `[GEPA] Auto-optimization triggered (${THRESHOLD} sessions accumulated)\n` + ); +} + +// Main +try { + const hookState = getHookState(); + + // Step 1: Save metrics + saveSessionMetrics(); + + // Step 2: Update counter + hookState.sessionsSinceLastOptimize++; + hookState.totalSessions++; + hookState.lastSessionTime = new Date().toISOString(); + + // Step 3: Check and trigger + if (shouldOptimize(hookState)) { + triggerOptimization(hookState); + } else { + saveHookState(hookState); + } +} catch (e) { + // Never fail the session end — GEPA is advisory + process.stderr.write(`[GEPA] Hook error (non-fatal): ${e.message}\n`); +} diff --git a/scripts/gepa/optimize.js b/scripts/gepa/optimize.js index d9c79856..40f86403 100755 --- a/scripts/gepa/optimize.js +++ b/scripts/gepa/optimize.js @@ -38,13 +38,30 @@ if (fs.existsSync(envPath)) { const CONFIG_PATH = path.join(__dirname, 'config.json'); const config = JSON.parse(fs.readFileSync(CONFIG_PATH, 'utf8')); -// Profile support: --profile overrides config sections +// --target selects from targets[] array (multi-target mode) +const targetIdx = process.argv.indexOf('--target'); +const targetName = targetIdx !== -1 ? process.argv[targetIdx + 1] : null; +if (targetIdx !== -1) process.argv.splice(targetIdx, 2); + +if (targetName && config.targets) { + const target = config.targets.find((t) => t.name === targetName); + if (!target) { + console.error( + `Error: Unknown target "${targetName}". Available: ${config.targets.map((t) => t.name).join(', ')}` + ); + process.exit(1); + } + config.target.file = target.file; + if (target.evals) config.evals.files = target.evals; + console.log(`Target: ${targetName} (${target.description || target.file})`); +} + +// --profile overrides config sections (legacy single-target mode) const profileIdx = process.argv.indexOf('--profile'); const profileName = profileIdx !== -1 ? process.argv[profileIdx + 1] : null; -if (profileName) { - // Remove --profile from argv so it doesn't interfere with command parsing - process.argv.splice(profileIdx, 2); +if (profileIdx !== -1) process.argv.splice(profileIdx, 2); +if (profileName) { const profiles = config.profiles || {}; if (!profiles[profileName]) { console.error( @@ -943,6 +960,14 @@ async function scoreAndSelect() { // Sort by score scores.sort((a, b) => b.score - a.score); + // Show condensed delta for each variant + const baselinePath = getGenPath(gen, 'baseline'); + const baselineContent = fs.existsSync(baselinePath) + ? fs.readFileSync(baselinePath, 'utf8') + : ''; + const baselineScore = + scores.find((s) => s.variant === 'baseline')?.score || 0; + console.log('\nResults:'); scores.forEach((s, i) => { const marker = i === 0 ? ' <-- BEST' : ''; @@ -951,6 +976,37 @@ async function scoreAndSelect() { ); }); + // Show delta summaries for top variants (skip baseline) + const topVariants = scores + .filter((s) => s.variant !== 'baseline') + .slice(0, 3); + if (topVariants.length && baselineContent) { + console.log('\n--- Delta Summaries ---\n'); + for (const s of topVariants) { + const vPath = getGenPath(gen, s.variant); + if (fs.existsSync(vPath)) { + const vContent = fs.readFileSync(vPath, 'utf8'); + // Find strategy from variant index (round-robin through strategies) + const variantIdx = s.variant.charCodeAt(s.variant.length - 1) - 97; + const strategy = + config.evolution.mutationStrategies[ + variantIdx % config.evolution.mutationStrategies.length + ]; + console.log( + generateDelta( + baselineContent, + vContent, + s.variant, + strategy, + s.score, + baselineScore + ) + ); + console.log(''); + } + } + } + // Select best const best = scores[0]; @@ -1018,7 +1074,7 @@ async function run(generations = config.evolution.generations) { console.log(`Best variant: ${state.bestVariant}`); console.log(`Best score: ${(state.bestScore * 100).toFixed(1)}%`); console.log(`Generations: ${state.currentGeneration}`); - console.log(`\nTo apply: cp generations/current /path/to/your/CLAUDE.md`); + console.log(`\nTo apply: node optimize.js apply`); } /** @@ -1041,9 +1097,93 @@ function status() { } /** - * Diff two variants + * Generate a condensed delta summary between two variant files. + * Shows added/changed/removed sections + unified diff — not full files. */ -function diff(a, b) { +function generateDelta( + baseContent, + variantContent, + variantName, + strategy, + score, + baseScore +) { + const baseLines = baseContent.split('\n'); + const variantLines = variantContent.split('\n'); + + const baseSections = parseSections(baseLines); + const variantSections = parseSections(variantLines); + + const added = []; + const changed = []; + const removed = []; + + // Detect added/changed sections + for (const [heading, content] of Object.entries(variantSections)) { + if (!baseSections[heading]) { + added.push(heading); + } else if (baseSections[heading] !== content) { + changed.push(heading); + } + } + + // Detect removed sections + for (const heading of Object.keys(baseSections)) { + if (!variantSections[heading]) { + removed.push(heading); + } + } + + // Build condensed output + const lines = []; + const scoreDelta = + score !== undefined && baseScore !== undefined + ? ` (${score > baseScore ? '+' : ''}${((score - baseScore) * 100).toFixed(1)}% from baseline)` + : ''; + + lines.push(`## ${variantName} — Strategy: ${strategy || 'unknown'}`); + lines.push( + `Score: ${score !== undefined ? (score * 100).toFixed(1) + '%' : 'pending'}${scoreDelta}` + ); + lines.push(`Tokens: ${baseLines.length} → ${variantLines.length} lines`); + lines.push(''); + + if (added.length) lines.push(...added.map((s) => `+ Added: ${s}`)); + if (changed.length) lines.push(...changed.map((s) => `~ Changed: ${s}`)); + if (removed.length) lines.push(...removed.map((s) => `- Removed: ${s}`)); + + if (!added.length && !changed.length && !removed.length) { + lines.push(' (no structural changes)'); + } + + return lines.join('\n'); +} + +/** + * Parse markdown into section map: heading → content + */ +function parseSections(lines) { + const sections = {}; + let currentHeading = '__preamble__'; + let currentContent = []; + + for (const line of lines) { + if (/^#{1,4}\s/.test(line)) { + sections[currentHeading] = currentContent.join('\n').trim(); + currentHeading = line.replace(/^#+\s*/, '').trim(); + currentContent = []; + } else { + currentContent.push(line); + } + } + sections[currentHeading] = currentContent.join('\n').trim(); + return sections; +} + +/** + * Diff two variants — condensed delta by default, --full for unified diff + */ +function diff(a, b, showFull = false) { const state = getState(); const gen = state.currentGeneration; @@ -1055,17 +1195,157 @@ function diff(a, b) { return; } + const baseContent = fs.readFileSync(pathA, 'utf8'); + const variantContent = fs.readFileSync(pathB, 'utf8'); + + // Always show condensed delta + const delta = generateDelta( + baseContent, + variantContent, + b || state.bestVariant, + null, + state.bestScore, + 0 + ); + console.log(delta); + + // Show unified diff only with --full flag + if (showFull) { + console.log('\n--- Unified Diff ---'); + try { + execSync(`diff -u ${pathA} ${pathB}`, { stdio: 'inherit' }); + } catch { + // diff returns non-zero when files differ + } + } +} + +/** + * Apply best variant to target file with confirmation + */ +async function apply() { + const state = getState(); + + if (!state.bestVariant || state.bestVariant === 'baseline') { + console.log('No improved variant to apply (still on baseline).'); + return; + } + + const variantPath = getGenPath(state.currentGeneration, state.bestVariant); + const targetPath = state.targetPath; + + if (!fs.existsSync(variantPath)) { + console.error(`Variant file not found: ${variantPath}`); + return; + } + + if (!fs.existsSync(targetPath)) { + console.error(`Target file not found: ${targetPath}`); + return; + } + + const baseContent = fs.readFileSync(targetPath, 'utf8'); + const variantContent = fs.readFileSync(variantPath, 'utf8'); + + // Show condensed delta + const delta = generateDelta( + baseContent, + variantContent, + state.bestVariant, + null, + state.bestScore, + 0 + ); + console.log(delta); + console.log(`\n--- Unified Diff ---`); try { - execSync(`diff -u ${pathA} ${pathB}`, { stdio: 'inherit' }); - } catch (e) { + execSync(`diff -u "${targetPath}" "${variantPath}"`, { stdio: 'inherit' }); + } catch { // diff returns non-zero when files differ } + + // Backup original + if (config.target.backup !== false) { + const backupPath = `${targetPath}.bak.${Date.now()}`; + fs.copyFileSync(targetPath, backupPath); + console.log(`\nBackup: ${backupPath}`); + } + + // Patch in place + fs.copyFileSync(variantPath, targetPath); + console.log(`Applied ${state.bestVariant} → ${targetPath}`); +} + +/** + * List all configured targets + */ +function listTargets() { + const targets = config.targets || []; + if (!targets.length) { + console.log('No targets configured. Add a targets[] array to config.json.'); + return; + } + console.log('Configured targets:\n'); + for (const t of targets) { + const resolved = t.file.startsWith('~') + ? path.join(process.env.HOME, t.file.slice(1)) + : t.file; + const exists = fs.existsSync(resolved) ? '✓' : '✗'; + console.log(` ${exists} ${t.name.padEnd(15)} ${t.file}`); + if (t.description) console.log(` ${t.description}`); + if (t.evals) console.log(` evals: ${t.evals.join(', ')}`); + console.log(''); + } +} + +/** + * Run optimization across ALL targets sequentially + */ +async function runAll(generations = 3) { + const targets = config.targets || []; + if (!targets.length) { + console.log('No targets configured.'); + return; + } + + console.log( + `Running GEPA on ${targets.length} targets (${generations} generations each)\n` + ); + + for (const target of targets) { + const resolved = target.file.startsWith('~') + ? path.join(process.env.HOME, target.file.slice(1)) + : path.resolve(target.file); + + if (!fs.existsSync(resolved)) { + console.log(`Skipping ${target.name}: ${resolved} not found\n`); + continue; + } + + console.log(`\n${'═'.repeat(60)}`); + console.log(`TARGET: ${target.name} (${target.file})`); + console.log(`${'═'.repeat(60)}\n`); + + // Override config for this target + config.target.file = target.file; + if (target.evals) config.evals.files = target.evals; + + await init(resolved); + await run(generations); + + console.log(`\nCompleted ${target.name}\n`); + } + + console.log('\n' + '═'.repeat(60)); + console.log('ALL TARGETS COMPLETE'); + console.log('═'.repeat(60)); } // CLI const command = process.argv[2]; const arg1 = process.argv[3]; const arg2 = process.argv[4]; +const hasFlag = (flag) => process.argv.includes(flag); switch (command) { case 'init': @@ -1088,7 +1368,16 @@ switch (command) { status(); break; case 'diff': - diff(arg1, arg2); + diff(arg1, arg2, hasFlag('--full')); + break; + case 'apply': + apply(); + break; + case 'targets': + listTargets(); + break; + case 'run-all': + runAll(parseInt(arg1) || 3); break; default: console.log(` @@ -1101,10 +1390,17 @@ Usage: node optimize.js score Score all variants, select best node optimize.js run [generations] Full optimization loop node optimize.js status Show current status - node optimize.js diff [a] [b] Compare two variants + node optimize.js diff [a] [b] Compare two variants (condensed delta) + node optimize.js diff [a] [b] --full Compare with unified diff + node optimize.js apply Apply best variant to target file + + node optimize.js targets List available targets + node optimize.js run-all [generations] Run optimization on ALL targets Options: - --profile Use a named profile (default: claude-md) - Available: claude-md, conductor + --target Select target from targets[] config + Available: ${(config.targets || []).map((t) => t.name).join(', ')} + --profile Use a named profile (legacy) + Available: ${Object.keys(config.profiles || {}).join(', ')} `); } diff --git a/scripts/gepa/results/eval-1-baseline.json b/scripts/gepa/results/eval-1-baseline.json deleted file mode 100644 index bfeb1201..00000000 --- a/scripts/gepa/results/eval-1-baseline.json +++ /dev/null @@ -1,218 +0,0 @@ -{ - "variant": "baseline", - "generation": 1, - "results": [ - { - "taskId": "eval-001", - "taskName": "simple_function", - "weight": 1, - "passed": false, - "passRate": 0.3333333333333333, - "criteria": { - "has_function": { - "passed": true, - "reason": "A properly defined function `is_palindrome` with type hints and a working implementation is present." - }, - "handles_edge_cases": { - "passed": false, - "reason": "The function only handles spaces and case sensitivity, but does not handle punctuation, special characters, or numbers. A true palindrome checker should ignore these. For example, 'A man, a plan, a canal: Panama' would fail." - }, - "is_tested": { - "passed": false, - "reason": "No test cases or assertions are provided to demonstrate that the function works correctly." - } - }, - "judgeMode": "llm", - "duration": 9807, - "output": "```python\ndef is_palindrome(s: str) -> bool:\n s = s.lower().replace(\" \", \"\")\n return s == s[::-1]\n```\n" - }, - { - "taskId": "eval-002", - "taskName": "refactor_code", - "weight": 1.5, - "passed": false, - "passRate": 0, - "criteria": { - "uses_async": { - "passed": false, - "reason": "No async/await code was provided in the output. The AI only requested the code to be refactored without delivering any refactored code." - }, - "preserves_behavior": { - "passed": false, - "reason": "No refactored code exists to evaluate whether behavior is preserved. The output is a request for input, not a solution." - }, - "no_nested_callbacks": { - "passed": false, - "reason": "No code transformation was performed. The output does not demonstrate elimination of nested callbacks in any actual code." - } - }, - "judgeMode": "llm", - "duration": 11792, - "output": "I don't see any specific code provided. Could you share the code you'd like refactored, or point me to the file path?\n" - }, - { - "taskId": "eval-003", - "taskName": "fix_bug", - "weight": 1.2, - "passed": false, - "passRate": 0.3333333333333333, - "criteria": { - "bug_fixed": { - "passed": false, - "reason": "No code was provided by the user, so no bug could be fixed. The AI correctly identified the missing input but did not fix any bug." - }, - "no_new_bugs": { - "passed": true, - "reason": "The AI did not write any code, so no new bugs were introduced." - }, - "explains_fix": { - "passed": false, - "reason": "No explanation of a fix was provided because no code was available to analyze or fix." - } - }, - "judgeMode": "llm", - "duration": 12640, - "output": "I don't see any code in your message. Could you share the loop you'd like me to fix?\n" - }, - { - "taskId": "eval-004", - "taskName": "add_feature", - "weight": 1.5, - "passed": false, - "passRate": 0, - "criteria": { - "has_pagination": { - "passed": false, - "reason": "No pagination implementation provided. AI only asks for clarification instead of delivering pagination code." - }, - "has_limit": { - "passed": false, - "reason": "No limit parameter implemented in any code. The output contains no actual code implementation." - }, - "has_offset": { - "passed": false, - "reason": "No offset parameter implemented in any code. The output contains no actual code implementation." - }, - "backwards_compatible": { - "passed": false, - "reason": "No implementation provided to evaluate backwards compatibility. The output is a question rather than a solution." - } - }, - "judgeMode": "llm", - "duration": 10855, - "output": "Which API endpoint would you like to add pagination to? Please share the file path or endpoint name.\n" - }, - { - "taskId": "eval-005", - "taskName": "code_review", - "weight": 1.3, - "passed": false, - "passRate": 0, - "criteria": { - "identifies_security_issue": { - "passed": false, - "reason": "No security issues are identified. The output is a request for clarification rather than an actual review." - }, - "identifies_performance_issue": { - "passed": false, - "reason": "No performance issues are identified. The output does not analyze any code." - }, - "actionable_feedback": { - "passed": false, - "reason": "No actionable feedback is provided. The output only requests missing information instead of providing a review." - } - }, - "judgeMode": "llm", - "duration": 26949, - "output": "I need a PR number or URL to review. Which PR would you like me to review?\n" - }, - { - "taskId": "sm-001", - "taskName": "add_mcp_tool_handler", - "weight": 1.5, - "passed": true, - "passRate": 1, - "criteria": { - "has_switch_case": { - "passed": true, - "reason": "The output shows a proper switch case statement with 'get_frame_summary' as the case label, following the switch/case dispatch pattern requested." - }, - "has_zod_schema": { - "passed": true, - "reason": "Zod schema is defined with `z.object({ frameId: z.string() }).parse(args)` which validates the input object." - }, - "validates_input": { - "passed": true, - "reason": "Input validation is performed via Zod's `.parse()` method, which will throw if the input doesn't match the schema (frameId must be a string)." - }, - "returns_typed_response": { - "passed": true, - "reason": "The handler returns an object with the exact shape specified: `{ frameId, name, status, eventCount }` with appropriate values extracted from the frame object." - }, - "handles_not_found": { - "passed": true, - "reason": "The code includes explicit error handling with `if (!frame) throw new Error(...)` when the frame doesn't exist, consistent with the close_frame pattern mentioned." - } - }, - "judgeMode": "llm", - "duration": 97214, - "output": "Please approve the file write permission and I'll apply the change. The handler follows the exact same pattern as `close_frame`:\n\n```ts\ncase 'get_frame_summary': {\n const input = z.object({ frameId: z.string() }).parse(args);\n const frame = frames.get(input.frameId);\n if (!frame) throw new Error(`Frame not found: ${input.frameId}`);\n return { frameId: frame.id, name: frame.name, status: frame.status, eventCount: frame.events.length };\n}\n```\n\n- Zod validates `frameId` as a required string\n- Throws if the frame doesn't exist (consistent with `close_frame`)\n- Returns `{ frameId, name, status, eventCount }` where `eventCount` is derived from `frame.events.length`\n" - }, - { - "taskId": "sm-002", - "taskName": "fix_fts5_trigger_bug", - "weight": 1.8, - "passed": true, - "passRate": 1, - "criteria": { - "has_delete_trigger": { - "passed": true, - "reason": "A DELETE trigger named 'frames_ad' is explicitly defined with CREATE TRIGGER IF NOT EXISTS syntax, targeting the frames table with AFTER DELETE event." - }, - "uses_fts_delete_syntax": { - "passed": true, - "reason": "The trigger uses the correct FTS5 delete pattern: INSERT INTO frames_fts with the special 'delete' command as the first value, which is the standard FTS5 mechanism for removing indexed rows." - }, - "references_old_row": { - "passed": true, - "reason": "The trigger references the OLD row context (old.rowid, old.name, old.digest_text, old.inputs, old.outputs) which is the correct way to access deleted row values in an AFTER DELETE trigger." - }, - "matches_column_list": { - "passed": true, - "reason": "The column list in the INSERT statement (frames_fts, rowid, name, digest_text, inputs, outputs) matches the pattern used in the existing INSERT and UPDATE triggers, maintaining consistency across all three triggers." - } - }, - "judgeMode": "llm", - "duration": 28917, - "output": "The edit needs your approval. The trigger to add follows the same FTS5 \"delete\" command pattern used in the UPDATE trigger — issuing a special `INSERT ... VALUES ('delete', ...)` to remove the old row from the index:\n\n```sql\n-- Sync trigger: DELETE\nCREATE TRIGGER IF NOT EXISTS frames_ad AFTER DELETE ON frames BEGIN\n INSERT INTO frames_fts(frames_fts, rowid, name, digest_text, inputs, outputs)\n VALUES ('delete', old.rowid, old.name, old.digest_text, old.inputs, old.outputs);\nEND;\n```\n\nPlease approve the file write (or let me know if you'd like to apply it differently).\n" - }, - { - "taskId": "sm-003", - "taskName": "daemon_service_lifecycle", - "weight": 1.5, - "passed": true, - "passRate": 1, - "criteria": { - "clears_old_interval": { - "passed": true, - "reason": "The fix adds `if (this.isRunning) this.stop();` at the start of `start()`, which calls `clearInterval` on the old `intervalId` before creating a new one." - }, - "prevents_double_start": { - "passed": true, - "reason": "The guard `if (this.isRunning) this.stop();` prevents double-start by stopping any existing interval before starting a new one, ensuring only one interval runs at a time." - }, - "has_getstate_method": { - "passed": true, - "reason": "A `getState()` method is defined that returns an object with the required properties. The code is syntactically correct and callable." - }, - "returns_correct_state_shape": { - "passed": true, - "reason": "The `getState()` method returns an object with all four required properties: `isRunning`, `intervalMs`, `lastRunTime`, and `errorCount` with appropriate values derived from instance state." - } - }, - "judgeMode": "llm", - "duration": 44838, - "output": "It seems write permissions aren't granted. Here's the full fix to apply:\n\n**Bug fix** — add a guard at the top of `start()` so any existing interval is cleared before a new one is created:\n\n```diff\n- // BUG: No guard against double-start — calling start() twice\n- // creates two intervals but only stores the second one.\n- // The first interval leaks and keeps running forever.\n start(): void {\n+ if (this.isRunning) this.stop(); // clear any existing interval before starting\n this.isRunning = true;\n```\n\n**Remove the now-stale bug comment** from `updateConfig` (it's no longer accurate once `start()` self-clears).\n\n**Add `getState()`** after `updateConfig`:\n\n```ts\ngetState(): ServiceState {\n return {\n isRunning: this.isRunning,\n intervalMs: this.config.interval * 60 * 1000,\n lastRunTime: this.lastRunTime,\n errorCount: this.errorCount,\n };\n}\n```\n\n**Why this works:**\n- `start()` calling `stop()` first ensures `clearInterval` is always called on the old `intervalId` before overwriting it — no leak possible.\n- `updateConfig` already called `stop()` before `start()`, but the root bug was that `start()` itself had no guard. Now both paths are safe.\n- `getState()` reads the private fields directly; `intervalMs` is derived from config the same way `start()` computes it, keeping them in sync.\n" - } - ] -} \ No newline at end of file diff --git a/scripts/gepa/results/eval-1-variant-a.json b/scripts/gepa/results/eval-1-variant-a.json deleted file mode 100644 index bedb365c..00000000 --- a/scripts/gepa/results/eval-1-variant-a.json +++ /dev/null @@ -1,218 +0,0 @@ -{ - "variant": "variant-a", - "generation": 1, - "results": [ - { - "taskId": "eval-001", - "taskName": "simple_function", - "weight": 1, - "passed": false, - "passRate": 0.3333333333333333, - "criteria": { - "has_function": { - "passed": true, - "reason": "A complete, working function named is_palindrome is defined with proper syntax and type hints" - }, - "handles_edge_cases": { - "passed": false, - "reason": "The function only handles spaces and case sensitivity, but does not handle punctuation, special characters, or empty strings in a robust way" - }, - "is_tested": { - "passed": false, - "reason": "No test cases or assertions are provided to verify the function works correctly" - } - }, - "judgeMode": "llm", - "duration": 12084, - "output": "```python\ndef is_palindrome(s: str) -> bool:\n s = s.lower().replace(\" \", \"\")\n return s == s[::-1]\n```\n" - }, - { - "taskId": "eval-002", - "taskName": "refactor_code", - "weight": 1.5, - "passed": false, - "passRate": 0, - "criteria": { - "uses_async": { - "passed": false, - "reason": "No async/await code is present in the output. The AI only requested the code to be refactored without providing any refactored implementation." - }, - "preserves_behavior": { - "passed": false, - "reason": "No refactored code is provided, so behavior cannot be verified or preserved. The output is merely a request for input." - }, - "no_nested_callbacks": { - "passed": false, - "reason": "No code transformation was attempted. The output does not demonstrate removal of nested callbacks since no refactored code was delivered." - } - }, - "judgeMode": "llm", - "duration": 19736, - "output": "Please share the code you'd like me to refactor.\n" - }, - { - "taskId": "eval-003", - "taskName": "fix_bug", - "weight": 1.2, - "passed": false, - "passRate": 0, - "criteria": { - "bug_fixed": { - "passed": false, - "reason": "No code was provided to fix, so no bug was actually corrected" - }, - "no_new_bugs": { - "passed": false, - "reason": "Cannot evaluate without code being analyzed or modified" - }, - "explains_fix": { - "passed": false, - "reason": "No fix was explained because the AI correctly identified that no code was provided to work with" - } - }, - "judgeMode": "llm", - "duration": 16159, - "output": "I don't see any code shared yet. Could you paste the loop you'd like me to fix?\n" - }, - { - "taskId": "eval-004", - "taskName": "add_feature", - "weight": 1.5, - "passed": false, - "passRate": 0, - "criteria": { - "has_pagination": { - "passed": false, - "reason": "No pagination implementation provided. AI only asks for clarification instead of delivering working code." - }, - "has_limit": { - "passed": false, - "reason": "No limit parameter implemented in any code. Only a question asking for more information." - }, - "has_offset": { - "passed": false, - "reason": "No offset parameter implemented in any code. Only a question asking for more information." - }, - "backwards_compatible": { - "passed": false, - "reason": "No code provided to evaluate backwards compatibility. The response is a request for clarification, not a solution." - } - }, - "judgeMode": "llm", - "duration": 27626, - "output": "Which API endpoint do you want to add pagination to? Please share the file path or endpoint name so I can look at the current implementation.\n" - }, - { - "taskId": "eval-005", - "taskName": "code_review", - "weight": 1.3, - "passed": true, - "passRate": 0.6666666666666666, - "criteria": { - "identifies_security_issue": { - "passed": true, - "reason": "The output identifies five distinct security issues: SQL injection, plaintext password comparison, weak session tokens, insecure cookie flags, and plaintext password logging. These are all legitimate security vulnerabilities." - }, - "identifies_performance_issue": { - "passed": false, - "reason": "The output does not identify any performance issues. It mentions that the PR comment claims 'direct string interpolation for faster queries' but explicitly refutes this claim as incorrect. It does not identify or discuss any actual performance problems in the code." - }, - "actionable_feedback": { - "passed": true, - "reason": "The output provides specific, actionable recommendations for each issue: revert SQL to parameterized queries, use bcrypt.compare instead of direct comparison, use crypto.randomBytes for tokens, add specific cookie flags (httpOnly, secure, sameSite), and remove password logging. Each recommendation includes concrete code examples or clear guidance on what to change." - } - }, - "judgeMode": "llm", - "duration": 37583, - "output": "This PR has **critical security issues** — do not merge.\n\n## Issues Found\n\n### 1. SQL Injection (Critical)\n```diff\n- const user = await db.query('SELECT * FROM users WHERE email = $1', [email]);\n+ const user = await db.query(`SELECT * FROM users WHERE email = '${email}'`);\n```\nString interpolation into a SQL query allows any user to inject arbitrary SQL. The original parameterized query was correct — revert this immediately.\n\n### 2. Plaintext Password Comparison (Critical)\n```diff\n- const valid = await bcrypt.compare(password, user.passwordHash);\n+ const valid = password === user.passwordHash;\n```\nThis compares the raw password string directly against what should be a bcrypt hash. This means either (a) authentication is completely broken, or (b) passwords are stored in plaintext. Either way, revert to `bcrypt.compare`.\n\n### 3. Weak/Predictable Session Token (High)\n```js\nconst token = email + ':' + Date.now();\n```\nTokens built from `email + timestamp` are trivially guessable and forgeable. Use a cryptographically secure random value (e.g., `crypto.randomBytes(32).toString('hex')`).\n\n### 4. Session Cookie Missing Security Flags (High)\n```js\nres.cookie('session', token);\n```\nNo `httpOnly`, `secure`, or `sameSite` flags. This exposes the cookie to XSS and CSRF attacks. Should be at minimum:\n```js\nres.cookie('session', token, { httpOnly: true, secure: true, sameSite: 'strict' });\n```\n\n### 5. Password Logged in Plaintext (High)\n```js\nconsole.log(`Login: ${email} / ${password}`);\n```\nPasswords are being written to logs. Remove immediately — this violates basic security hygiene and likely compliance requirements (GDPR, SOC2, etc.).\n\n---\n\n**Summary**: This PR degrades security at every step — SQL injection, broken auth, weak tokens, insecure cookies, and leaked credentials. The comment \"Quick fix: direct string interpolation for faster queries\" is incorrect; parameterized queries have negligible overhead. **Reject and revert all changes.**\n" - }, - { - "taskId": "sm-001", - "taskName": "add_mcp_tool_handler", - "weight": 1.5, - "passed": true, - "passRate": 1, - "criteria": { - "has_switch_case": { - "passed": true, - "reason": "A switch case for 'get_frame_summary' is clearly defined with proper case syntax" - }, - "has_zod_schema": { - "passed": true, - "reason": "Zod schema is present: z.object({ frameId: z.string() }) validates the required string input" - }, - "validates_input": { - "passed": true, - "reason": "Input validation occurs via z.object().parse(args), which will throw if validation fails" - }, - "returns_typed_response": { - "passed": true, - "reason": "Returns an object with all four required fields: frameId, name, status, eventCount in the correct shape" - }, - "handles_not_found": { - "passed": true, - "reason": "Includes explicit check for missing frame with frames.get() followed by error throw with descriptive message" - } - }, - "judgeMode": "llm", - "duration": 30073, - "output": "Please approve the file write permission. The edit adds this case to the switch:\n\n```ts\ncase 'get_frame_summary': {\n const input = z.object({ frameId: z.string() }).parse(args);\n const frame = frames.get(input.frameId);\n if (!frame) throw new Error(`Frame not found: ${input.frameId}`);\n return { frameId: frame.id, name: frame.name, status: frame.status, eventCount: frame.events.length };\n}\n```\n\nIt follows the same pattern as `close_frame`: Zod parse → lookup → error if missing → return the requested shape.\n" - }, - { - "taskId": "sm-002", - "taskName": "fix_fts5_trigger_bug", - "weight": 1.8, - "passed": false, - "passRate": 0, - "criteria": { - "has_delete_trigger": { - "passed": false, - "reason": "No DELETE trigger is actually created in the output; only a request for approval is made" - }, - "uses_fts_delete_syntax": { - "passed": false, - "reason": "No FTS5 DELETE syntax (INSERT INTO frames_fts) is present in the output" - }, - "references_old_row": { - "passed": false, - "reason": "No OLD row references are used; the output contains no trigger code" - }, - "matches_column_list": { - "passed": false, - "reason": "No column list comparison exists; the output is just a request without implementation" - } - }, - "judgeMode": "llm", - "duration": 24722, - "output": "Please approve the write to `fts5-triggers.sql` so I can add the DELETE trigger.\n" - }, - { - "taskId": "sm-003", - "taskName": "daemon_service_lifecycle", - "weight": 1.5, - "passed": true, - "passRate": 1, - "criteria": { - "clears_old_interval": { - "passed": true, - "reason": "The fix adds `if (this.isRunning) this.stop()` at the start of `start()`, which calls `clearInterval()` before creating a new interval. This directly addresses the timer leak." - }, - "prevents_double_start": { - "passed": true, - "reason": "The guard `if (this.isRunning) this.stop()` prevents double-start by stopping any existing interval before starting a new one, whether called directly or via `updateConfig()`." - }, - "has_getstate_method": { - "passed": true, - "reason": "A `getState()` method is defined in the output that returns the required ServiceState object with all four properties." - }, - "returns_correct_state_shape": { - "passed": true, - "reason": "The `getState()` method returns an object with the exact shape requested: `isRunning`, `intervalMs`, `lastRunTime`, and `errorCount`. The intervalMs calculation (`this.config.interval * 60 * 1000`) correctly converts minutes to milliseconds." - } - }, - "judgeMode": "llm", - "duration": 69511, - "output": "It looks like write permissions aren't granted for this directory. Here are the two changes needed:\n\n**Fix 1 — Guard `start()` against double-start** (`leaky-service.ts:28`):\n```ts\nstart(): void {\n if (this.isRunning) this.stop(); // clear any existing interval before starting\n this.isRunning = true;\n // ... rest unchanged\n}\n```\n\nThis ensures `stop()` (which calls `clearInterval`) always runs before a new interval is created, preventing leaks from both direct double-calls and `updateConfig()` re-entry.\n\n**Fix 2 — Remove stale comment from `updateConfig()`** (the bug comment is now inaccurate since `start()` handles it).\n\n**Fix 3 — Add `getState()`** (replace the TODO comment):\n```ts\ngetState(): ServiceState {\n return {\n isRunning: this.isRunning,\n intervalMs: this.config.interval * 60 * 1000,\n lastRunTime: this.lastRunTime,\n errorCount: this.errorCount,\n };\n}\n```\n\nThe root cause was that `start()` unconditionally set `this.isRunning = true` and created a new interval without clearing the previous one. The fix moves responsibility to `start()` itself — if already running, stop first — rather than relying on every caller to avoid double-starts.\n" - } - ] -} \ No newline at end of file diff --git a/scripts/gepa/results/eval-1-variant-b.json b/scripts/gepa/results/eval-1-variant-b.json deleted file mode 100644 index ba23c0b6..00000000 --- a/scripts/gepa/results/eval-1-variant-b.json +++ /dev/null @@ -1,218 +0,0 @@ -{ - "variant": "variant-b", - "generation": 1, - "results": [ - { - "taskId": "eval-001", - "taskName": "simple_function", - "weight": 1, - "passed": false, - "passRate": 0.3333333333333333, - "criteria": { - "has_function": { - "passed": true, - "reason": "A real, working function named is_palindrome is defined with proper syntax, type hints, and implementation" - }, - "handles_edge_cases": { - "passed": false, - "reason": "The function only handles lowercase conversion and space removal, but doesn't handle punctuation, special characters, or empty strings. A complete edge case handler would address these scenarios" - }, - "is_tested": { - "passed": false, - "reason": "No test cases or assertions are provided in the output to verify the function works correctly" - } - }, - "judgeMode": "llm", - "duration": 58252, - "output": "```python\ndef is_palindrome(s: str) -> bool:\n s = s.lower().replace(\" \", \"\")\n return s == s[::-1]\n```\n" - }, - { - "taskId": "eval-002", - "taskName": "refactor_code", - "weight": 1.5, - "passed": false, - "passRate": 0, - "criteria": { - "uses_async": { - "passed": false, - "reason": "No async/await code is present in the output. The AI only requests the source code without providing any refactored implementation." - }, - "preserves_behavior": { - "passed": false, - "reason": "No refactored code is provided, so behavior cannot be preserved or verified." - }, - "no_nested_callbacks": { - "passed": false, - "reason": "No code transformation is shown. The AI did not perform the refactoring task and therefore did not eliminate nested callbacks." - } - }, - "judgeMode": "llm", - "duration": 20841, - "output": "Please share the code you'd like me to refactor.\n" - }, - { - "taskId": "eval-003", - "taskName": "fix_bug", - "weight": 1.2, - "passed": false, - "passRate": 0.3333333333333333, - "criteria": { - "bug_fixed": { - "passed": false, - "reason": "No code was provided to the AI, so no bug could be fixed. The AI correctly identified this but did not fix anything." - }, - "no_new_bugs": { - "passed": true, - "reason": "The AI did not write any code, so no new bugs were introduced." - }, - "explains_fix": { - "passed": false, - "reason": "No explanation of a fix was provided because the AI was waiting for code input. A generic explanation of off-by-one errors was not given." - } - }, - "judgeMode": "llm", - "duration": 35783, - "output": "I don't see any code or file provided to fix. Could you share the code with the off-by-one error?\n" - }, - { - "taskId": "eval-004", - "taskName": "add_feature", - "weight": 1.5, - "passed": false, - "passRate": 0, - "criteria": { - "has_pagination": { - "passed": false, - "reason": "No pagination code is implemented; AI only asked a clarifying question" - }, - "has_limit": { - "passed": false, - "reason": "No limit parameter is defined or implemented" - }, - "has_offset": { - "passed": false, - "reason": "No offset parameter is defined or implemented" - }, - "backwards_compatible": { - "passed": false, - "reason": "No implementation exists to maintain backwards compatibility" - } - }, - "judgeMode": "llm", - "duration": 73593, - "output": "Which API endpoint should I add pagination to? Please share the file path or endpoint name.\n" - }, - { - "taskId": "eval-005", - "taskName": "code_review", - "weight": 1.3, - "passed": false, - "passRate": 0, - "criteria": { - "identifies_security_issue": { - "passed": false, - "reason": "The AI output does not identify any security issues. It only requests clarification on which PR to review." - }, - "identifies_performance_issue": { - "passed": false, - "reason": "The AI output does not identify any performance issues. It only requests clarification on which PR to review." - }, - "actionable_feedback": { - "passed": false, - "reason": "The AI output provides no actionable feedback about code. It only asks for a PR number or URL without performing any actual review." - } - }, - "judgeMode": "llm", - "duration": 20273, - "output": "I need a PR number or URL to review. Could you provide the PR you'd like me to review?\n" - }, - { - "taskId": "sm-001", - "taskName": "add_mcp_tool_handler", - "weight": 1.5, - "passed": true, - "passRate": 1, - "criteria": { - "has_switch_case": { - "passed": true, - "reason": "Code includes a `case 'get_frame_summary':` statement, which is a real switch/case dispatch pattern matching the requirement" - }, - "has_zod_schema": { - "passed": true, - "reason": "Code includes `z.object({ frameId: z.string() }).parse(args)`, which is a real Zod schema definition and validation" - }, - "validates_input": { - "passed": true, - "reason": "Zod schema validation is executed with `.parse(args)`, which will throw if validation fails, properly validating the required string frameId" - }, - "returns_typed_response": { - "passed": true, - "reason": "Returns an object with all four required fields: frameId, name, status, and eventCount with appropriate types derived from frame object properties" - }, - "handles_not_found": { - "passed": true, - "reason": "Code includes null check `if (!frame)` with explicit error throw, matching the error handling pattern shown in the example" - } - }, - "judgeMode": "llm", - "duration": 28089, - "output": "The edit was denied. Here's the handler to add in place of the `// TODO` comment:\n\n```ts\ncase 'get_frame_summary': {\n const input = z.object({ frameId: z.string() }).parse(args);\n const frame = frames.get(input.frameId);\n if (!frame) throw new Error(`Frame not found: ${input.frameId}`);\n return {\n frameId: frame.id,\n name: frame.name,\n status: frame.status,\n eventCount: frame.events.length,\n };\n}\n```\n\n**What it does:**\n- Zod validates `frameId` as a required string (line 18 pattern)\n- Looks up the frame, throws if missing (matches `close_frame` pattern)\n- Returns the four fields: `frameId`, `name`, `status`, `eventCount` (derived from `frame.events.length`)\n" - }, - { - "taskId": "sm-002", - "taskName": "fix_fts5_trigger_bug", - "weight": 1.8, - "passed": true, - "passRate": 1, - "criteria": { - "has_delete_trigger": { - "passed": true, - "reason": "A complete CREATE TRIGGER statement named 'frames_ad' with AFTER DELETE ON frames is provided with full trigger body." - }, - "uses_fts_delete_syntax": { - "passed": true, - "reason": "The trigger uses the correct FTS5 external content table deletion syntax: INSERT INTO frames_fts(frames_fts, rowid, ...) VALUES ('delete', old.rowid, ...)." - }, - "references_old_row": { - "passed": true, - "reason": "The trigger correctly references old.rowid, old.name, old.digest_text, and old.inputs, old.outputs to identify the deleted row." - }, - "matches_column_list": { - "passed": true, - "reason": "The column list (frames_fts, rowid, name, digest_text, inputs, outputs) matches the pattern established in the UPDATE trigger and reflects the FTS5 table schema." - } - }, - "judgeMode": "llm", - "duration": 22304, - "output": "The edit was denied. Here's the trigger to add — it follows the same \"delete\" command pattern used in the UPDATE trigger:\n\n```sql\n-- Sync trigger: DELETE\nCREATE TRIGGER IF NOT EXISTS frames_ad AFTER DELETE ON frames BEGIN\n INSERT INTO frames_fts(frames_fts, rowid, name, digest_text, inputs, outputs)\n VALUES ('delete', old.rowid, old.name, old.digest_text, old.inputs, old.outputs);\nEND;\n```\n\n**How it works:** FTS5 external content tables require a special `INSERT ... VALUES ('delete', ...)` command to remove a row from the index. The UPDATE trigger already demonstrates this pattern — it issues a `'delete'` for the old row before inserting the new one. The DELETE trigger just does the `'delete'` step without re-inserting.\n" - }, - { - "taskId": "sm-003", - "taskName": "daemon_service_lifecycle", - "weight": 1.5, - "passed": true, - "passRate": 1, - "criteria": { - "clears_old_interval": { - "passed": true, - "reason": "The fix explicitly calls `this.stop()` before setting `isRunning = true`, which clears the existing interval before creating a new one." - }, - "prevents_double_start": { - "passed": true, - "reason": "The guard `if (this.isRunning) this.stop()` prevents double-start by ensuring the old interval is cleared if start() is called while already running." - }, - "has_getstate_method": { - "passed": true, - "reason": "A complete `getState()` method is defined with proper return type `ServiceState` and implementation." - }, - "returns_correct_state_shape": { - "passed": true, - "reason": "The method returns an object with all four required fields: isRunning, intervalMs, lastRunTime, and errorCount with appropriate values." - } - }, - "judgeMode": "llm", - "duration": 51339, - "output": "It looks like write permissions aren't enabled for this file. Here are the two changes needed:\n\n**Bug fix — `start()` guard** (`leaky-service.ts:28`):\n```ts\nstart(): void {\n if (this.isRunning) this.stop(); // clear existing interval before starting\n this.isRunning = true;\n ...\n}\n```\nThis ensures any leaked interval is cleared before a new one is created, whether called directly or via `updateConfig()`. The `updateConfig()` method itself is fine — the root issue was `start()` not being idempotent.\n\n**New `getState()` method** (replace the TODO comment):\n```ts\ngetState(): ServiceState {\n return {\n isRunning: this.isRunning,\n intervalMs: this.config.interval * 60 * 1000,\n lastRunTime: this.lastRunTime,\n errorCount: this.errorCount,\n };\n}\n```\n\nThe `ServiceState` interface is already defined at line 7-12, so `getState()` satisfies it directly. Please grant write access so I can apply these edits.\n" - } - ] -} \ No newline at end of file diff --git a/scripts/gepa/results/eval-1-variant-c.json b/scripts/gepa/results/eval-1-variant-c.json deleted file mode 100644 index c68ce957..00000000 --- a/scripts/gepa/results/eval-1-variant-c.json +++ /dev/null @@ -1,218 +0,0 @@ -{ - "variant": "variant-c", - "generation": 1, - "results": [ - { - "taskId": "eval-001", - "taskName": "simple_function", - "weight": 1, - "passed": false, - "passRate": 0.3333333333333333, - "criteria": { - "has_function": { - "passed": true, - "reason": "A properly defined function `is_palindrome` with type hints is provided and would execute correctly" - }, - "handles_edge_cases": { - "passed": false, - "reason": "The function only handles spaces and case sensitivity, but does not handle punctuation, numbers, or special characters that should be ignored in palindrome checking" - }, - "is_tested": { - "passed": false, - "reason": "Only two simple usage examples are shown, not actual test cases or assertions. No comprehensive testing framework or multiple edge case tests are provided" - } - }, - "judgeMode": "llm", - "duration": 59586, - "output": "```python\ndef is_palindrome(s: str) -> bool:\n s = s.lower().replace(\" \", \"\")\n return s == s[::-1]\n```\n\nUsage: `is_palindrome(\"racecar\")` → `True`, `is_palindrome(\"hello\")` → `False`\n" - }, - { - "taskId": "eval-002", - "taskName": "refactor_code", - "weight": 1.5, - "passed": false, - "passRate": 0, - "criteria": { - "uses_async": { - "passed": false, - "reason": "No async/await code is present in the output. The AI only requested the input code without providing any refactored solution." - }, - "preserves_behavior": { - "passed": false, - "reason": "No refactored code was provided, so behavior preservation cannot be verified. The output is a request for input rather than a solution." - }, - "no_nested_callbacks": { - "passed": false, - "reason": "No code was generated to evaluate callback nesting. The output contains no implementation whatsoever." - } - }, - "judgeMode": "llm", - "duration": 23652, - "output": "Please share the code or file path you'd like me to refactor.\n" - }, - { - "taskId": "eval-003", - "taskName": "fix_bug", - "weight": 1.2, - "passed": false, - "passRate": 0.3333333333333333, - "criteria": { - "bug_fixed": { - "passed": false, - "reason": "No code was provided by the user, so no bug could be fixed. The AI correctly identified this but did not fix anything." - }, - "no_new_bugs": { - "passed": true, - "reason": "The AI response contains no code, so no new bugs could have been introduced." - }, - "explains_fix": { - "passed": false, - "reason": "No explanation of a fix was provided because no code was given to fix. The AI only requested the missing code." - } - }, - "judgeMode": "llm", - "duration": 40805, - "output": "I don't see any code in your message. Could you share the loop you'd like me to fix?\n" - }, - { - "taskId": "eval-004", - "taskName": "add_feature", - "weight": 1.5, - "passed": false, - "passRate": 0, - "criteria": { - "has_pagination": { - "passed": false, - "reason": "No pagination code is provided; AI only asks for clarification instead of implementing pagination" - }, - "has_limit": { - "passed": false, - "reason": "No limit parameter is implemented in the output" - }, - "has_offset": { - "passed": false, - "reason": "No offset parameter is implemented in the output" - }, - "backwards_compatible": { - "passed": false, - "reason": "No implementation provided to verify backwards compatibility" - } - }, - "judgeMode": "llm", - "duration": 15357, - "output": "Which API endpoint should I add pagination to? Please share the file path or endpoint name.\n" - }, - { - "taskId": "eval-005", - "taskName": "code_review", - "weight": 1.3, - "passed": false, - "passRate": 0, - "criteria": { - "identifies_security_issue": { - "passed": false, - "reason": "The output does not identify any security issues. Instead, it requests input because the directory is not a git repository." - }, - "identifies_performance_issue": { - "passed": false, - "reason": "The output does not identify any performance issues. No code review is performed." - }, - "actionable_feedback": { - "passed": false, - "reason": "The output provides meta-level requests for information rather than actionable feedback on actual code. It explains what's needed to proceed, not how to fix issues." - } - }, - "judgeMode": "llm", - "duration": 34819, - "output": "This directory (`/private/tmp/gepa-eval-ERpk5g`) is not a git repository, so I can't list PRs directly.\n\nTo review a PR, please provide:\n1. A PR URL (e.g., `https://github.com/org/repo/pull/123`)\n2. Or the PR number and repo name\n" - }, - { - "taskId": "sm-001", - "taskName": "add_mcp_tool_handler", - "weight": 1.5, - "passed": true, - "passRate": 1, - "criteria": { - "has_switch_case": { - "passed": true, - "reason": "Code shows a proper `case 'get_frame_summary':` block that follows the switch/case dispatch pattern" - }, - "has_zod_schema": { - "passed": true, - "reason": "Zod schema is defined with `z.object({ frameId: z.string() }).parse(args)` for input validation" - }, - "validates_input": { - "passed": true, - "reason": "Input validation occurs via Zod's `.parse()` method which throws on invalid input; frameId is required string" - }, - "returns_typed_response": { - "passed": true, - "reason": "Returns object with exact required fields: frameId, name, status, eventCount in correct structure" - }, - "handles_not_found": { - "passed": true, - "reason": "Includes explicit null check `if (!frame) throw new Error()` that handles missing frame case" - } - }, - "judgeMode": "llm", - "duration": 41911, - "output": "Please approve the file write permission and I'll add the handler. The change inserts this case block before the `default:` branch:\n\n```ts\ncase 'get_frame_summary': {\n const input = z.object({ frameId: z.string() }).parse(args);\n const frame = frames.get(input.frameId);\n if (!frame) throw new Error(`Frame not found: ${input.frameId}`);\n return { frameId: frame.id, name: frame.name, status: frame.status, eventCount: frame.events.length };\n}\n```\n" - }, - { - "taskId": "sm-002", - "taskName": "fix_fts5_trigger_bug", - "weight": 1.8, - "passed": true, - "passRate": 1, - "criteria": { - "has_delete_trigger": { - "passed": true, - "reason": "A complete AFTER DELETE trigger named 'frames_ad' is defined with proper CREATE TRIGGER syntax" - }, - "uses_fts_delete_syntax": { - "passed": true, - "reason": "The trigger uses the FTS5 'delete' command by inserting 'delete' as the first value in the frames_fts table" - }, - "references_old_row": { - "passed": true, - "reason": "The trigger references old.rowid, old.name, old.digest_text, and old.inputs, old.outputs to access deleted row values" - }, - "matches_column_list": { - "passed": true, - "reason": "The column list (frames_fts, rowid, name, digest_text, inputs, outputs) matches the pattern used in the INSERT and UPDATE triggers" - } - }, - "judgeMode": "llm", - "duration": 46145, - "output": "It seems write permission is being blocked. Here's the trigger to add:\n\n```sql\n-- Sync trigger: DELETE\nCREATE TRIGGER IF NOT EXISTS frames_ad AFTER DELETE ON frames BEGIN\n INSERT INTO frames_fts(frames_fts, rowid, name, digest_text, inputs, outputs)\n VALUES ('delete', old.rowid, old.name, old.digest_text, old.inputs, old.outputs);\nEND;\n```\n\nThis follows the same FTS5 \"delete\" command pattern used in the UPDATE trigger — inserting a row with the special `'delete'` command removes it from the index. Replace the `-- BUG: Missing DELETE trigger!` comment block with the above.\n" - }, - { - "taskId": "sm-003", - "taskName": "daemon_service_lifecycle", - "weight": 1.5, - "passed": false, - "passRate": 0, - "criteria": { - "clears_old_interval": { - "passed": false, - "reason": "AI only describes the fix in pseudo-code but does not actually apply it to the file. No real code changes were made; the AI requested permission instead of providing the complete fixed code." - }, - "prevents_double_start": { - "passed": false, - "reason": "While the proposed logic (if this.isRunning) this.stop() would prevent double-start, it was never actually implemented in working code. Only a description of the intended fix was provided." - }, - "has_getstate_method": { - "passed": false, - "reason": "The getState() method is only shown in pseudo-code format. No actual working implementation was delivered to the file. The method was not truly defined in executable code." - }, - "returns_correct_state_shape": { - "passed": false, - "reason": "While the proposed return object structure looks correct in the description, there is no actual working getState() method implemented. The shape is theoretical only, not realized in code." - } - }, - "judgeMode": "llm", - "duration": 64211, - "output": "It seems file writes are being blocked. Please grant write permission to `/private/tmp/gepa-eval-akYawy/leaky-service.ts` so I can apply the fixes.\n\nHere's what the changes will do:\n\n**Bug fix — `start()` guard:**\n```ts\nstart(): void {\n if (this.isRunning) this.stop(); // clear any existing interval before starting\n ...\n}\n```\nThis ensures any existing interval is always cleared before creating a new one, whether called directly or via `updateConfig()`. The `updateConfig` comment/bug note is also removed since the fix makes it a non-issue.\n\n**New `getState()` method:**\n```ts\ngetState(): ServiceState {\n return {\n isRunning: this.isRunning,\n intervalMs: this.config.interval * 60 * 1000,\n lastRunTime: this.lastRunTime,\n errorCount: this.errorCount,\n };\n}\n```\n\nOnce you grant write permissions, I'll apply both changes at once.\n" - } - ] -} \ No newline at end of file diff --git a/scripts/gepa/results/eval-1-variant-d.json b/scripts/gepa/results/eval-1-variant-d.json deleted file mode 100644 index cf368225..00000000 --- a/scripts/gepa/results/eval-1-variant-d.json +++ /dev/null @@ -1,218 +0,0 @@ -{ - "variant": "variant-d", - "generation": 1, - "results": [ - { - "taskId": "eval-001", - "taskName": "simple_function", - "weight": 1, - "passed": false, - "passRate": 0.3333333333333333, - "criteria": { - "has_function": { - "passed": true, - "reason": "A real, working function `is_palindrome` is defined with proper syntax, type hints, and implementation" - }, - "handles_edge_cases": { - "passed": false, - "reason": "The function only handles spaces and case sensitivity, but doesn't handle punctuation, special characters, or empty strings. A true palindrome checker should ignore non-alphanumeric characters" - }, - "is_tested": { - "passed": false, - "reason": "Usage examples are provided but they are not formal tests. The third example also requires manual preprocessing with `.replace()`, suggesting the function itself doesn't fully handle the test case" - } - }, - "judgeMode": "llm", - "duration": 11268, - "output": "```python\ndef is_palindrome(s: str) -> bool:\n s = s.lower().replace(\" \", \"\")\n return s == s[::-1]\n```\n\nUsage:\n```python\nis_palindrome(\"racecar\") # True\nis_palindrome(\"hello\") # False\nis_palindrome(\"A man a plan a canal Panama\".replace(\" \", \"\")) # True\n```\n" - }, - { - "taskId": "eval-002", - "taskName": "refactor_code", - "weight": 1.5, - "passed": false, - "passRate": 0, - "criteria": { - "uses_async": { - "passed": false, - "reason": "No async/await code is present in the output. The AI only asks for input without providing any refactored code." - }, - "preserves_behavior": { - "passed": false, - "reason": "No refactored code is provided, so behavior cannot be preserved or verified." - }, - "no_nested_callbacks": { - "passed": false, - "reason": "No code refactoring was attempted. The output is merely a request for clarification rather than the actual refactoring task completion." - } - }, - "judgeMode": "llm", - "duration": 11979, - "output": "Which file or code snippet would you like me to refactor? Please share the file path or paste the code.\n" - }, - { - "taskId": "eval-003", - "taskName": "fix_bug", - "weight": 1.2, - "passed": false, - "passRate": 0, - "criteria": { - "bug_fixed": { - "passed": false, - "reason": "No code was provided to fix, so no bug was actually corrected" - }, - "no_new_bugs": { - "passed": false, - "reason": "Cannot evaluate without code being analyzed or fixed" - }, - "explains_fix": { - "passed": false, - "reason": "No explanation of a fix was provided; the AI only asked for missing code" - } - }, - "judgeMode": "llm", - "duration": 14057, - "output": "I don't see any code in your message. Could you share the loop you'd like me to fix?\n" - }, - { - "taskId": "eval-004", - "taskName": "add_feature", - "weight": 1.5, - "passed": false, - "passRate": 0, - "criteria": { - "has_pagination": { - "passed": false, - "reason": "No pagination implementation provided. The AI only asked for clarification rather than delivering pagination code." - }, - "has_limit": { - "passed": false, - "reason": "No limit parameter implemented in any code. The output contains only a question requesting more information." - }, - "has_offset": { - "passed": false, - "reason": "No offset parameter implemented in any code. The output contains only a question requesting more information." - }, - "backwards_compatible": { - "passed": false, - "reason": "No implementation provided to evaluate for backwards compatibility. The AI did not deliver working code." - } - }, - "judgeMode": "llm", - "duration": 23380, - "output": "Which API endpoint would you like to add pagination to? Please share the file path or endpoint name.\n" - }, - { - "taskId": "eval-005", - "taskName": "code_review", - "weight": 1.3, - "passed": true, - "passRate": 0.6666666666666666, - "criteria": { - "identifies_security_issue": { - "passed": true, - "reason": "Output identifies 4 distinct security vulnerabilities: SQL injection, plaintext password storage/comparison, credential logging, and weak session tokens. Each is clearly explained with concrete examples." - }, - "identifies_performance_issue": { - "passed": false, - "reason": "Output does not identify or discuss any performance issues. The review focuses exclusively on security vulnerabilities with no mention of performance impacts, inefficiencies, or optimization concerns." - }, - "actionable_feedback": { - "passed": true, - "reason": "Output provides specific, actionable guidance: revert changes, explains what was wrong with each change, recommends use of parameterized queries and bcrypt, and advises adding secure cookie flags. The 'Do not merge' verdict with clear rationale is actionable." - } - }, - "judgeMode": "llm", - "duration": 57435, - "output": "This PR has **critical security vulnerabilities** that must be blocked:\n\n---\n\n## PR Review: `src/auth/login.ts`\n\n### Critical Issues\n\n**1. SQL Injection (line 10)**\n```diff\n- const user = await db.query('SELECT * FROM users WHERE email = $1', [email]);\n+ const user = await db.query(`SELECT * FROM users WHERE email = '${email}'`);\n```\nDirect string interpolation in SQL = SQL injection. An attacker can input `' OR '1'='1` to bypass auth entirely or dump the database. The original parameterized query was correct — this \"fix\" is a severe regression.\n\n**2. Plaintext password comparison (line 14)**\n```diff\n- const valid = await bcrypt.compare(password, user.passwordHash);\n+ const valid = password === user.passwordHash;\n```\nThis only works if passwords are stored in plaintext, which means passwords are stored in plaintext. Complete removal of bcrypt is a critical auth bypass — any user's password can be read directly from the DB.\n\n**3. Credentials logged to console (line 22)**\n```ts\nconsole.log(`Login: ${email} / ${password}`);\n```\nPlaintext passwords in logs. These end up in log aggregators, monitoring tools, and potentially cloud storage — exposing every user's password on every login attempt.\n\n**4. Weak, predictable session token (line 18)**\n```ts\nconst token = email + ':' + Date.now();\n```\n`email:timestamp` is trivially guessable/forgeable. No entropy, no signing. The cookie also has no `httpOnly`, `secure`, or `sameSite` flags set.\n\n---\n\n### Verdict: **Do not merge.** Revert all changes to this function.\n" - }, - { - "taskId": "sm-001", - "taskName": "add_mcp_tool_handler", - "weight": 1.5, - "passed": true, - "passRate": 1, - "criteria": { - "has_switch_case": { - "passed": true, - "reason": "The output shows a complete case statement with 'get_frame_summary' as the case label, following switch/case dispatch pattern" - }, - "has_zod_schema": { - "passed": true, - "reason": "Zod schema is defined inline: z.object({ frameId: z.string() })" - }, - "validates_input": { - "passed": true, - "reason": "Input validation occurs via .parse(args) which will throw if frameId is missing or not a string" - }, - "returns_typed_response": { - "passed": true, - "reason": "Returns an object with all four required fields: frameId, name, status, and eventCount (computed from frame.events.length)" - }, - "handles_not_found": { - "passed": true, - "reason": "Includes explicit check for frame existence with descriptive error message thrown when frame is not found" - } - }, - "judgeMode": "llm", - "duration": 28203, - "output": "Please approve the file write permission, then I can apply the change. The edit adds:\n\n```ts\ncase 'get_frame_summary': {\n const input = z.object({ frameId: z.string() }).parse(args);\n const frame = frames.get(input.frameId);\n if (!frame) throw new Error(`Frame not found: ${input.frameId}`);\n return { frameId: frame.id, name: frame.name, status: frame.status, eventCount: frame.events.length };\n}\n```\n\n- Zod validates `frameId` as a required string (throws on missing/wrong type)\n- Looks up the frame, throws a descriptive error if not found\n- Returns the four requested fields: `frameId`, `name`, `status`, `eventCount`\n" - }, - { - "taskId": "sm-002", - "taskName": "fix_fts5_trigger_bug", - "weight": 1.8, - "passed": true, - "passRate": 1, - "criteria": { - "has_delete_trigger": { - "passed": true, - "reason": "A DELETE trigger named 'frames_ad' is explicitly defined with AFTER DELETE ON frames syntax" - }, - "uses_fts_delete_syntax": { - "passed": true, - "reason": "The trigger uses the FTS5 special 'delete' command via INSERT INTO frames_fts(frames_fts, rowid, ...) VALUES ('delete', ...)" - }, - "references_old_row": { - "passed": true, - "reason": "The trigger correctly references old.rowid, old.name, old.digest_text, and old.inputs, old.outputs to capture deleted row data" - }, - "matches_column_list": { - "passed": true, - "reason": "The column list (frames_fts, rowid, name, digest_text, inputs, outputs) matches the INSERT and UPDATE trigger patterns shown in the context" - } - }, - "judgeMode": "llm", - "duration": 36035, - "output": "The edit requires your approval. The change replaces the bug comment with:\n\n```sql\n-- Sync trigger: DELETE\nCREATE TRIGGER IF NOT EXISTS frames_ad AFTER DELETE ON frames BEGIN\n INSERT INTO frames_fts(frames_fts, rowid, name, digest_text, inputs, outputs)\n VALUES ('delete', old.rowid, old.name, old.digest_text, old.inputs, old.outputs);\nEND;\n```\n\nThis follows the same pattern as the delete half of the UPDATE trigger — using the special `'delete'` command insert with `old.*` values to remove the stale entry from the external content FTS5 index.\n" - }, - { - "taskId": "sm-003", - "taskName": "daemon_service_lifecycle", - "weight": 1.5, - "passed": true, - "passRate": 1, - "criteria": { - "clears_old_interval": { - "passed": true, - "reason": "The code snippet includes `if (this.intervalId) { clearInterval(this.intervalId); this.intervalId = undefined; }` at the start of `start()`, which directly clears any existing interval before creating a new one." - }, - "prevents_double_start": { - "passed": true, - "reason": "By clearing the interval at the beginning of `start()`, calling `start()` multiple times will safely clear the previous interval before setting a new one, preventing the leak from double-starts." - }, - "has_getstate_method": { - "passed": true, - "reason": "A complete `getState()` method is defined with proper TypeScript syntax, returning an object of type `ServiceState`." - }, - "returns_correct_state_shape": { - "passed": true, - "reason": "The `getState()` method returns an object with all four required properties: `isRunning`, `intervalMs` (correctly converted from minutes to milliseconds), `lastRunTime`, and `errorCount`." - } - }, - "judgeMode": "llm", - "duration": 51160, - "output": "Permissions aren't being granted. Here are the two changes to make manually:\n\n**Bug fix — `start()` at line 28:** Add a guard at the top to clear any existing interval before creating a new one:\n\n```ts\nstart(): void {\n if (this.intervalId) { // <-- add these 3 lines\n clearInterval(this.intervalId);\n this.intervalId = undefined;\n }\n this.isRunning = true;\n // ... rest unchanged\n```\n\nThis fixes both the double-`start()` leak and the `updateConfig()` bug — `stop()` already clears correctly, but `start()` now also clears defensively, so even if `isRunning` state drifts, no interval leaks.\n\n**Remove stale comments** on `start()` and `updateConfig()` (the bug comments are no longer accurate).\n\n**Add `getState()` after `updateConfig()`:**\n\n```ts\ngetState(): ServiceState {\n return {\n isRunning: this.isRunning,\n intervalMs: this.config.interval * 60 * 1000,\n lastRunTime: this.lastRunTime,\n errorCount: this.errorCount,\n };\n}\n```\n\nAlso replace the `// TODO` comment on line 57 with this method.\n" - } - ] -} \ No newline at end of file diff --git a/scripts/gepa/state.json b/scripts/gepa/state.json deleted file mode 100644 index 603c261b..00000000 --- a/scripts/gepa/state.json +++ /dev/null @@ -1,49 +0,0 @@ -{ - "currentGeneration": 1, - "bestVariant": "baseline", - "bestScore": 0.48967551622418876, - "targetPath": "/Users/jwu/Dev/rize/sol/CLAUDE.md", - "history": [ - { - "generation": 0, - "variant": "baseline", - "action": "init", - "timestamp": "2026-03-09T15:20:49.782Z" - }, - { - "generation": 1, - "action": "mutate", - "variants": [ - "variant-a", - "variant-b", - "variant-c", - "variant-d" - ], - "timestamp": "2026-03-09T15:23:14.976Z" - }, - { - "generation": 1, - "action": "select", - "scores": [ - { - "variant": "baseline", - "score": 0.48967551622418876 - }, - { - "variant": "variant-b", - "score": 0.48967551622418876 - }, - { - "variant": "variant-a", - "score": 0.3716814159292035 - }, - { - "variant": "variant-c", - "score": 0.35693215339233036 - } - ], - "best": "baseline", - "timestamp": "2026-03-09T15:41:55.313Z" - } - ] -} \ No newline at end of file diff --git a/src/cli/commands/rules.ts b/src/cli/commands/rules.ts new file mode 100644 index 00000000..3958d4c3 --- /dev/null +++ b/src/cli/commands/rules.ts @@ -0,0 +1,339 @@ +/** + * CLI commands for rule management. + * Usage: stackmemory rule list|check|enable|disable|seed + */ + +import { Command } from 'commander'; +import chalk from 'chalk'; +import Database from 'better-sqlite3'; +import * as fs from 'fs'; +import * as path from 'path'; +import { RuleEngine } from '../../core/rules/rule-engine.js'; +import type { RuleContext, RuleTrigger } from '../../core/rules/types.js'; +import { filterByScope } from '../../core/rules/built-in-rules.js'; + +function getDb(): Database.Database { + const smDir = path.join(process.cwd(), '.stackmemory'); + if (!fs.existsSync(smDir)) { + fs.mkdirSync(smDir, { recursive: true }); + } + return new Database(path.join(smDir, 'context.db')); +} + +function severityColor(severity: string): (s: string) => string { + switch (severity) { + case 'error': + return chalk.red; + case 'warn': + return chalk.yellow; + case 'info': + return chalk.blue; + default: + return chalk.gray; + } +} + +function severityIcon(severity: string): string { + switch (severity) { + case 'error': + return 'x'; + case 'warn': + return '!'; + case 'info': + return 'i'; + default: + return '-'; + } +} + +export function createRulesCommand(): Command { + const cmd = new Command('rule').description( + 'Manage project rules (lint, commit, migration checks)' + ); + + // ---- list ---- + cmd + .command('list') + .description('List configured rules') + .option('-t, --trigger ', 'Filter by trigger type') + .option('-a, --all', 'Include disabled rules') + .option('--json', 'Output as JSON') + .action((options: { trigger?: string; all?: boolean; json?: boolean }) => { + const db = getDb(); + try { + const engine = new RuleEngine(db); + const rules = engine.listRules({ + trigger: options.trigger as RuleTrigger | undefined, + enabled: options.all ? false : undefined, + }); + + if (options.json) { + console.log(JSON.stringify(rules, null, 2)); + return; + } + + if (rules.length === 0) { + console.log(chalk.gray('No rules found.')); + return; + } + + console.log(chalk.cyan(`\n Rules (${rules.length})\n`)); + for (const rule of rules) { + const enabled = rule.enabled ? chalk.green('on') : chalk.gray('off'); + const sev = severityColor(rule.severity)(rule.severity.toUpperCase()); + const builtin = rule.builtin ? chalk.gray(' [built-in]') : ''; + console.log( + ` ${enabled} ${sev} ${chalk.white(rule.id)}${builtin}` + ); + console.log(` ${chalk.gray(rule.description)}`); + console.log( + ` trigger: ${rule.trigger_type} scope: ${rule.scope}` + ); + console.log(); + } + } finally { + db.close(); + } + }); + + // ---- check ---- + cmd + .command('check') + .description('Run rules against files or commit message') + .option('-t, --trigger ', 'Trigger type filter', 'on-demand') + .option('-f, --files ', 'File glob to check') + .option('-m, --commit-message ', 'Commit message to check') + .option('--all', 'Run all rules regardless of trigger') + .option('--json', 'Output as JSON') + .action( + (options: { + trigger?: string; + files?: string; + commitMessage?: string; + all?: boolean; + json?: boolean; + }) => { + const db = getDb(); + try { + const engine = new RuleEngine(db); + const projectRoot = process.cwd(); + + // Collect files + let files: string[] = []; + if (options.files) { + files = collectFiles(projectRoot, options.files); + } + + // Read file contents + const content = new Map(); + for (const file of files) { + const fullPath = path.isAbsolute(file) + ? file + : path.join(projectRoot, file); + try { + content.set(file, fs.readFileSync(fullPath, 'utf-8')); + } catch { + // skip unreadable files + } + } + + const ctx: RuleContext = { + trigger: (options.trigger ?? 'on-demand') as RuleTrigger, + files, + content, + commitMessage: options.commitMessage ?? '', + projectRoot, + }; + + const result = options.all + ? engine.evaluateAll(ctx) + : engine.evaluate(ctx); + + if (options.json) { + console.log(JSON.stringify(result, null, 2)); + process.exitCode = result.passed ? 0 : 1; + return; + } + + if (result.passed) { + console.log(chalk.green('\n All rules passed.\n')); + return; + } + + console.log( + chalk.red(`\n ${result.violations.length} violation(s) found\n`) + ); + for (const v of result.violations) { + const icon = severityIcon(v.severity); + const color = severityColor(v.severity); + const loc = v.file ? `${v.file}${v.line ? `:${v.line}` : ''}` : ''; + console.log(` ${color(`[${icon}]`)} ${chalk.white(v.ruleName)}`); + console.log(` ${v.message}`); + if (loc) console.log(` ${chalk.gray(loc)}`); + if (v.suggestion) console.log(` ${chalk.cyan(v.suggestion)}`); + console.log(); + } + + const errors = result.violations.filter( + (v) => v.severity === 'error' + ); + if (errors.length > 0) { + process.exitCode = 1; + } + } finally { + db.close(); + } + } + ); + + // ---- enable ---- + cmd + .command('enable ') + .description('Enable a rule') + .action((id: string) => { + const db = getDb(); + try { + const engine = new RuleEngine(db); + if (engine.enableRule(id)) { + console.log(chalk.green(`Rule '${id}' enabled.`)); + } else { + console.log(chalk.red(`Rule '${id}' not found.`)); + process.exitCode = 1; + } + } finally { + db.close(); + } + }); + + // ---- disable ---- + cmd + .command('disable ') + .description('Disable a rule') + .action((id: string) => { + const db = getDb(); + try { + const engine = new RuleEngine(db); + if (engine.disableRule(id)) { + console.log(chalk.yellow(`Rule '${id}' disabled.`)); + } else { + console.log(chalk.red(`Rule '${id}' not found.`)); + process.exitCode = 1; + } + } finally { + db.close(); + } + }); + + // ---- seed ---- + cmd + .command('seed') + .description('Re-seed built-in rules (useful after upgrades)') + .action(() => { + const db = getDb(); + try { + const engine = new RuleEngine(db); + const rules = engine.listRules(); + const builtins = rules.filter((r) => r.builtin); + console.log(chalk.green(`Seeded ${builtins.length} built-in rules.`)); + } finally { + db.close(); + } + }); + + // ---- add ---- + cmd + .command('add ') + .description('Add a custom rule (metadata only)') + .requiredOption('-n, --name ', 'Rule display name') + .option('-d, --description ', 'Rule description', '') + .option('-t, --trigger ', 'Trigger type', 'on-demand') + .option('-s, --severity ', 'Severity level', 'warn') + .option('--scope ', 'File scope glob', '**/*') + .action( + ( + id: string, + options: { + name: string; + description: string; + trigger: string; + severity: string; + scope: string; + } + ) => { + const db = getDb(); + try { + const engine = new RuleEngine(db); + engine.getStore().upsert({ + id, + name: options.name, + description: options.description, + trigger_type: options.trigger, + severity: options.severity, + scope: options.scope, + enabled: 1, + builtin: 0, + }); + console.log(chalk.green(`Rule '${id}' added.`)); + } finally { + db.close(); + } + } + ); + + return cmd; +} + +/** + * Collect files matching a simple glob pattern relative to root. + */ +function collectFiles(root: string, pattern: string): string[] { + const results: string[] = []; + // For simple patterns, walk directory + if (pattern.includes('*')) { + walkDir(root, root, pattern, results); + } else { + // Treat as a single file or directory + const fullPath = path.join(root, pattern); + if (fs.existsSync(fullPath)) { + const stat = fs.statSync(fullPath); + if (stat.isFile()) { + results.push(pattern); + } else if (stat.isDirectory()) { + walkDir(fullPath, root, '**/*', results); + } + } + } + return results; +} + +function walkDir( + dir: string, + root: string, + pattern: string, + results: string[] +): void { + const SKIP = new Set([ + 'node_modules', + '.git', + 'dist', + 'coverage', + '.stackmemory', + ]); + try { + const entries = fs.readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + if (SKIP.has(entry.name)) continue; + const fullPath = path.join(dir, entry.name); + const relPath = path.relative(root, fullPath); + if (entry.isDirectory()) { + walkDir(fullPath, root, pattern, results); + } else if (entry.isFile()) { + if (filterByScope([relPath], pattern).length > 0) { + results.push(relPath); + } + } + } + } catch { + // skip unreadable dirs + } +} diff --git a/src/cli/commands/skill.ts b/src/cli/commands/skill.ts new file mode 100644 index 00000000..b12e2c28 --- /dev/null +++ b/src/cli/commands/skill.ts @@ -0,0 +1,559 @@ +#!/usr/bin/env node +/// +/** + * Knowledge Skill CLI Commands + * Manage .skill.md knowledge files — list, match, add, refresh, export + */ + +import { Command } from 'commander'; +import chalk from 'chalk'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; + +// ── Types ──────────────────────────────────────────────────────────────────── + +interface SkillFrontmatter { + name: string; + version: string; + domain: string; + expires: string; + activates_on: string[]; + sources: string[]; + context7: string | undefined; +} + +interface SkillFile { + path: string; + scope: 'global' | 'project'; + frontmatter: SkillFrontmatter; + body: string; + stale: boolean; +} + +// ── Frontmatter Parser ────────────────────────────────────────────────────── + +function parseFrontmatter(content: string): { + frontmatter: SkillFrontmatter; + body: string; +} { + const match = content.match(/^---\n([\s\S]*?)\n---\n([\s\S]*)$/); + if (!match) { + throw new Error('No YAML frontmatter found (expected --- delimiters)'); + } + + const yamlBlock = match[1]!; + const body = match[2]!; + + // Minimal YAML parser for flat + array fields + const fm: Record = {}; + let currentKey: string | null = null; + let currentArray: string[] | null = null; + + for (const line of yamlBlock.split('\n')) { + const trimmed = line.trim(); + if (!trimmed || trimmed.startsWith('#')) continue; + + // Array item under current key + if (trimmed.startsWith('- ') && currentKey && currentArray !== null) { + currentArray.push(trimmed.slice(2).trim()); + continue; + } + + // Flush previous array + if (currentKey && currentArray !== null) { + fm[currentKey] = currentArray; + currentArray = null; + currentKey = null; + } + + const kvMatch = trimmed.match(/^(\w[\w-]*):\s*(.*)$/); + if (!kvMatch) continue; + + const key = kvMatch[1]!; + const val = kvMatch[2]!.trim(); + + if (val === '' || val === '[]') { + // Next lines may be array items + currentKey = key; + currentArray = []; + } else if (val.startsWith('[') && val.endsWith(']')) { + // Inline array: [a, b, c] + fm[key] = val + .slice(1, -1) + .split(',') + .map((s) => s.trim().replace(/^['"]|['"]$/g, '')); + } else { + fm[key] = val.replace(/^['"]|['"]$/g, ''); + } + } + + // Flush trailing array + if (currentKey && currentArray !== null) { + fm[currentKey] = currentArray; + } + + return { + frontmatter: { + name: (fm['name'] as string) || 'unknown', + version: (fm['version'] as string) || '0.0.0', + domain: (fm['domain'] as string) || 'general', + expires: (fm['expires'] as string) || '', + activates_on: (fm['activates_on'] as string[]) || [], + sources: (fm['sources'] as string[]) || [], + context7: fm['context7'] as string | undefined, + }, + body, + }; +} + +// ── Skill Discovery ───────────────────────────────────────────────────────── + +function getSkillDirs(): { dir: string; scope: 'global' | 'project' }[] { + const dirs: { dir: string; scope: 'global' | 'project' }[] = []; + + // Global: ~/.claude/skills/knowledge/ + const globalDir = path.join(os.homedir(), '.claude', 'skills', 'knowledge'); + if (fs.existsSync(globalDir)) { + dirs.push({ dir: globalDir, scope: 'global' }); + } + + // Project: .claude/skills/knowledge/ (from cwd) + const projectDir = path.join(process.cwd(), '.claude', 'skills', 'knowledge'); + if (fs.existsSync(projectDir)) { + dirs.push({ dir: projectDir, scope: 'project' }); + } + + return dirs; +} + +function discoverSkills(): SkillFile[] { + const skills: SkillFile[] = []; + const now = new Date(); + + for (const { dir, scope } of getSkillDirs()) { + const files = fs.readdirSync(dir).filter((f) => f.endsWith('.skill.md')); + for (const file of files) { + const filePath = path.join(dir, file); + try { + const content = fs.readFileSync(filePath, 'utf-8'); + const { frontmatter, body } = parseFrontmatter(content); + const stale = frontmatter.expires + ? new Date(frontmatter.expires) < now + : false; + skills.push({ path: filePath, scope, frontmatter, body, stale }); + } catch { + // Skip unparseable files + } + } + } + + return skills; +} + +// ── Keyword Matching ──────────────────────────────────────────────────────── + +function matchSkills( + prompt: string, + skills: SkillFile[] +): { skill: SkillFile; score: number; matchedKeywords: string[] }[] { + const words = new Set(prompt.toLowerCase().split(/\s+/)); + const results: { + skill: SkillFile; + score: number; + matchedKeywords: string[]; + }[] = []; + + for (const skill of skills) { + const matched: string[] = []; + for (const keyword of skill.frontmatter.activates_on) { + const kw = keyword.toLowerCase(); + // Check exact word match or substring in multi-word tokens + if (words.has(kw) || prompt.toLowerCase().includes(kw)) { + matched.push(keyword); + } + } + + if (matched.length > 0) { + // Score: ratio of matched keywords, boosted by project scope + const ratio = matched.length / skill.frontmatter.activates_on.length; + const scopeBoost = skill.scope === 'project' ? 0.1 : 0; + const stalepenalty = skill.stale ? -0.2 : 0; + results.push({ + skill, + score: Math.min(1, ratio + scopeBoost + stalepenalty), + matchedKeywords: matched, + }); + } + } + + return results.sort((a, b) => b.score - a.score); +} + +// ── Command Factory ───────────────────────────────────────────────────────── + +export function createSkillCommand(): Command { + const cmd = new Command('skill').description( + 'Manage knowledge skills (.skill.md) — domain expertise for AI agents' + ); + + // ── skill list ────────────────────────────────────────────────────────── + + cmd + .command('list') + .description('List all discovered knowledge skills') + .option('--stale', 'Show only stale (expired) skills') + .option('--json', 'Output as JSON') + .action((options: { stale?: boolean; json?: boolean }) => { + let skills = discoverSkills(); + if (options.stale) { + skills = skills.filter((s) => s.stale); + } + + if (options.json) { + const out = skills.map((s) => ({ + name: s.frontmatter.name, + domain: s.frontmatter.domain, + version: s.frontmatter.version, + scope: s.scope, + expires: s.frontmatter.expires, + stale: s.stale, + keywords: s.frontmatter.activates_on.length, + path: s.path, + })); + console.log(JSON.stringify(out, null, 2)); + return; + } + + if (skills.length === 0) { + console.log(chalk.yellow('No knowledge skills found.')); + console.log(` Global: ~/.claude/skills/knowledge/*.skill.md`); + console.log(` Project: .claude/skills/knowledge/*.skill.md`); + return; + } + + console.log(chalk.bold(`\n Knowledge Skills (${skills.length})\n`)); + + for (const s of skills) { + const status = s.stale ? chalk.red('expired') : chalk.green('active'); + const scope = + s.scope === 'global' ? chalk.dim('global') : chalk.cyan('project'); + const keywords = chalk.dim( + `[${s.frontmatter.activates_on.slice(0, 5).join(', ')}${s.frontmatter.activates_on.length > 5 ? '...' : ''}]` + ); + + console.log( + ` ${chalk.bold(s.frontmatter.name.padEnd(20))} ${s.frontmatter.domain.padEnd(12)} ${s.frontmatter.version.padEnd(14)} ${scope.padEnd(18)} ${status} ${keywords}` + ); + } + + const staleCount = skills.filter((s) => s.stale).length; + if (staleCount > 0) { + console.log( + chalk.yellow( + `\n ${staleCount} skill(s) expired — run ${chalk.bold('stackmemory skill refresh')} to update` + ) + ); + } + console.log(); + }); + + // ── skill match ───────────────────────────────────────────────────────── + + cmd + .command('match ') + .description('Find skills matching a prompt (keyword match)') + .option('-n, --limit ', 'Max results', '5') + .option('--json', 'Output as JSON') + .action((prompt: string, options: { limit: string; json?: boolean }) => { + const skills = discoverSkills(); + const matches = matchSkills(prompt, skills).slice( + 0, + parseInt(options.limit, 10) + ); + + if (options.json) { + const out = matches.map((m) => ({ + name: m.skill.frontmatter.name, + domain: m.skill.frontmatter.domain, + score: Math.round(m.score * 100), + stale: m.skill.stale, + matchedKeywords: m.matchedKeywords, + path: m.skill.path, + })); + console.log(JSON.stringify(out, null, 2)); + return; + } + + if (matches.length === 0) { + console.log(chalk.yellow(`No skills match: "${prompt}"`)); + return; + } + + console.log(chalk.bold(`\n Matches for: "${prompt}"\n`)); + for (const m of matches) { + const pct = Math.round(m.score * 100); + const bar = + pct >= 50 ? chalk.green(`${pct}%`) : chalk.yellow(`${pct}%`); + const staleTag = m.skill.stale ? chalk.red(' [stale]') : ''; + console.log( + ` ${bar.padEnd(16)} ${chalk.bold(m.skill.frontmatter.name.padEnd(20))} ${chalk.dim(m.matchedKeywords.join(', '))}${staleTag}` + ); + } + console.log(); + }); + + // ── skill show ────────────────────────────────────────────────────────── + + cmd + .command('show ') + .description("Display a skill's full content") + .action((name: string) => { + const skills = discoverSkills(); + const skill = skills.find((s) => s.frontmatter.name === name); + + if (!skill) { + console.error(chalk.red(`Skill not found: ${name}`)); + console.log( + `Available: ${skills.map((s) => s.frontmatter.name).join(', ')}` + ); + process.exit(1); + } + + const status = skill.stale ? chalk.red('EXPIRED') : chalk.green('active'); + console.log( + chalk.bold(`\n ${skill.frontmatter.name}`) + + ` (${skill.frontmatter.domain}) ${status}` + ); + console.log( + chalk.dim( + ` v${skill.frontmatter.version} | expires ${skill.frontmatter.expires} | ${skill.scope}` + ) + ); + console.log(chalk.dim(` ${skill.path}`)); + if (skill.frontmatter.context7) { + console.log(chalk.dim(` context7: ${skill.frontmatter.context7}`)); + } + console.log( + chalk.dim(` keywords: ${skill.frontmatter.activates_on.join(', ')}`) + ); + console.log(chalk.dim(` sources:`)); + for (const src of skill.frontmatter.sources) { + console.log(chalk.dim(` - ${src}`)); + } + console.log(); + console.log(skill.body); + }); + + // ── skill add ─────────────────────────────────────────────────────────── + + cmd + .command('add ') + .description('Add a knowledge skill from a local file or URL') + .option( + '--global', + 'Install to ~/.claude/skills/knowledge/ (default: project)' + ) + .option('--name ', 'Override skill name') + .action( + async (source: string, options: { global?: boolean; name?: string }) => { + let content = ''; + + if (source.startsWith('http://') || source.startsWith('https://')) { + // Fetch from URL + try { + const res = await fetch(source); + if (!res.ok) throw new Error(`HTTP ${res.status}`); + content = await res.text(); + } catch (err) { + console.error(chalk.red(`Failed to fetch: ${source}`)); + console.error((err as Error).message); + process.exit(1); + return; + } + } else { + // Read local file + const resolved = path.resolve(source); + if (!fs.existsSync(resolved)) { + console.error(chalk.red(`File not found: ${resolved}`)); + process.exit(1); + return; + } + content = fs.readFileSync(resolved, 'utf-8'); + } + + // Validate frontmatter + let parsed: ReturnType | undefined; + try { + parsed = parseFrontmatter(content); + } catch (err) { + console.error( + chalk.red(`Invalid skill file: ${(err as Error).message}`) + ); + process.exit(1); + return; + } + if (!parsed) return; + + const skillName = options.name ?? parsed.frontmatter.name; + const targetDir = options.global + ? path.join(os.homedir(), '.claude', 'skills', 'knowledge') + : path.join(process.cwd(), '.claude', 'skills', 'knowledge'); + + fs.mkdirSync(targetDir, { recursive: true }); + const targetPath = path.join(targetDir, `${skillName}.skill.md`); + + const exists = fs.existsSync(targetPath); + fs.writeFileSync(targetPath, content, 'utf-8'); + + const action = exists ? 'Updated' : 'Added'; + const scope = options.global ? 'global' : 'project'; + console.log( + chalk.green(`${action} ${chalk.bold(skillName)} (${scope})`) + ); + console.log(chalk.dim(` ${targetPath}`)); + console.log( + chalk.dim( + ` domain: ${parsed.frontmatter.domain} | keywords: ${parsed.frontmatter.activates_on.length} | expires: ${parsed.frontmatter.expires}` + ) + ); + } + ); + + // ── skill refresh ─────────────────────────────────────────────────────── + + cmd + .command('refresh [name]') + .description('Check freshness and show stale skills that need updating') + .option('--fetch', 'Attempt to fetch source URLs and show diff summary') + .action(async (name: string | undefined, options: { fetch?: boolean }) => { + let skills = discoverSkills(); + + if (name) { + skills = skills.filter((s) => s.frontmatter.name === name); + if (skills.length === 0) { + console.error(chalk.red(`Skill not found: ${name}`)); + process.exit(1); + } + } + + const stale = skills.filter((s) => s.stale); + const fresh = skills.filter((s) => !s.stale); + + console.log(chalk.bold(`\n Skill Freshness Report\n`)); + console.log( + ` ${chalk.green(String(fresh.length))} active ${chalk.red(String(stale.length))} expired ${skills.length} total\n` + ); + + if (stale.length === 0) { + console.log(chalk.green(' All skills are fresh.')); + console.log(); + return; + } + + for (const s of stale) { + const daysPast = Math.floor( + (Date.now() - new Date(s.frontmatter.expires).getTime()) / 86400000 + ); + console.log( + ` ${chalk.red('expired')} ${chalk.bold(s.frontmatter.name.padEnd(20))} ${chalk.dim(`${daysPast}d ago`)} ${s.scope}` + ); + if (s.frontmatter.context7) { + console.log( + chalk.dim(` context7: ${s.frontmatter.context7}`) + ); + } + for (const src of s.frontmatter.sources) { + console.log(chalk.dim(` source: ${src}`)); + } + + if (options.fetch && s.frontmatter.sources.length > 0) { + const url = s.frontmatter.sources[0]!; + try { + const res = await fetch(url, { method: 'HEAD' }); + console.log( + res.ok + ? chalk.green(` ${url} — reachable (${res.status})`) + : chalk.yellow(` ${url} — ${res.status}`) + ); + } catch { + console.log(chalk.red(` ${url} — unreachable`)); + } + } + } + + console.log( + chalk.yellow( + `\n To update: edit the .skill.md file and bump the version + expires date.` + ) + ); + if (!options.fetch) { + console.log( + chalk.dim(` Run with --fetch to check source URL reachability.`) + ); + } + console.log(); + }); + + // ── skill init ────────────────────────────────────────────────────────── + + cmd + .command('init ') + .description('Scaffold a new .skill.md file') + .option('--global', 'Create in ~/.claude/skills/knowledge/') + .option('--domain ', 'Skill domain', 'general') + .action((name: string, options: { global?: boolean; domain: string }) => { + const targetDir = options.global + ? path.join(os.homedir(), '.claude', 'skills', 'knowledge') + : path.join(process.cwd(), '.claude', 'skills', 'knowledge'); + + fs.mkdirSync(targetDir, { recursive: true }); + const targetPath = path.join(targetDir, `${name}.skill.md`); + + if (fs.existsSync(targetPath)) { + console.error(chalk.red(`Already exists: ${targetPath}`)); + process.exit(1); + } + + const sixMonths = new Date(); + sixMonths.setMonth(sixMonths.getMonth() + 6); + const expiresDate = sixMonths.toISOString().slice(0, 10); + + const template = `--- +name: ${name} +version: ${new Date().toISOString().slice(0, 10).replace(/-/g, '.')} +domain: ${options.domain} +expires: ${expiresDate} +activates_on: [${name}] +sources: + - https://docs.example.com +context7: +--- + +# ${name.charAt(0).toUpperCase() + name.slice(1).replace(/-/g, ' ')} + +## Current SDK & Versions +- TODO: add current versions + +## Preferred Patterns +- TODO: document the right way to do things + +## Gotchas +- TODO: things LLMs consistently get wrong + +## Entry Points +- TODO: canonical doc URLs +`; + + fs.writeFileSync(targetPath, template, 'utf-8'); + console.log(chalk.green(`Created ${chalk.bold(name)} skill`)); + console.log(chalk.dim(` ${targetPath}`)); + console.log( + chalk.dim( + ` Edit the file to add domain knowledge, then update activates_on keywords.` + ) + ); + }); + + return cmd; +} diff --git a/src/cli/index.ts b/src/cli/index.ts index 8f34641f..924d9cec 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -66,8 +66,10 @@ import { createTeamCommands } from './commands/team.js'; import { createDesiresCommands } from './commands/desires.js'; import { createConductorCommands } from './commands/orchestrate.js'; import { createPreflightCommand } from './commands/preflight.js'; +import { createRulesCommand } from './commands/rules.js'; import { createSnapshotCommand } from './commands/snapshot.js'; import { createLoopCommand } from './commands/loop.js'; +import { createSkillCommand } from './commands/skill.js'; import chalk from 'chalk'; import * as fs from 'fs'; import * as path from 'path'; @@ -696,6 +698,8 @@ program.addCommand(createConductorCommands()); program.addCommand(createPreflightCommand()); program.addCommand(createSnapshotCommand()); program.addCommand(createLoopCommand()); +program.addCommand(createRulesCommand()); +program.addCommand(createSkillCommand()); // Register setup and diagnostic commands registerSetupCommands(program); diff --git a/src/core/context/__tests__/frame-manager-lifecycle.test.ts b/src/core/context/__tests__/frame-manager-lifecycle.test.ts new file mode 100644 index 00000000..30c71d3c --- /dev/null +++ b/src/core/context/__tests__/frame-manager-lifecycle.test.ts @@ -0,0 +1,440 @@ +/** + * Tests for FrameManager — lifecycle: create, close, error conditions + * Focuses on: input validation, stack depth limits, event/anchor errors, + * initialize flow, and edge cases not covered by existing test suites. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import Database from 'better-sqlite3'; +import { FrameManager } from '../index.js'; +import { FrameError, ErrorCode } from '../../errors/index.js'; +import { join } from 'path'; +import { mkdtempSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; + +describe('FrameManager - Lifecycle', () => { + let db: Database.Database; + let frameManager: FrameManager; + let tempDir: string; + const projectId = 'test-lifecycle'; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), 'stackmem-lifecycle-')); + const dbPath = join(tempDir, 'test.db'); + db = new Database(dbPath); + frameManager = new FrameManager(db, projectId, { maxStackDepth: 5 }); + }); + + afterEach(() => { + if (db) db.close(); + if (tempDir) rmSync(tempDir, { recursive: true, force: true }); + }); + + // ── createFrame validation ─────────────────────────────────────────── + + describe('createFrame input validation', () => { + it('should reject empty name', () => { + expect(() => + frameManager.createFrame({ type: 'task', name: '' }) + ).toThrow(FrameError); + }); + + it('should reject whitespace-only name', () => { + expect(() => + frameManager.createFrame({ type: 'task', name: ' ' }) + ).toThrow(FrameError); + }); + + it('should accept valid name', () => { + const id = frameManager.createFrame({ type: 'task', name: 'Valid' }); + expect(id).toBeDefined(); + expect(typeof id).toBe('string'); + }); + + it('should accept legacy (positional) function signature', () => { + const id = frameManager.createFrame('task', 'Legacy Name'); + expect(id).toBeDefined(); + const frame = frameManager.getFrame(id); + expect(frame?.name).toBe('Legacy Name'); + }); + + it('should use default empty inputs when not provided', () => { + const id = frameManager.createFrame({ type: 'task', name: 'No Inputs' }); + const frame = frameManager.getFrame(id); + expect(frame?.inputs).toEqual({}); + }); + + it('should store custom inputs', () => { + const id = frameManager.createFrame({ + type: 'task', + name: 'With Inputs', + inputs: { key: 'value' }, + }); + const frame = frameManager.getFrame(id); + expect(frame?.inputs).toEqual({ key: 'value' }); + }); + }); + + // ── Stack depth limit ──────────────────────────────────────────────── + + describe('stack depth limit', () => { + it('should enforce maxStackDepth', () => { + // maxStackDepth is 5 + for (let i = 0; i < 5; i++) { + frameManager.createFrame({ type: 'task', name: `Frame ${i}` }); + } + expect(frameManager.getStackDepth()).toBe(5); + + expect(() => + frameManager.createFrame({ type: 'task', name: 'Overflow' }) + ).toThrow(FrameError); + }); + + it('should allow creating after closing frames', () => { + for (let i = 0; i < 5; i++) { + frameManager.createFrame({ type: 'task', name: `Frame ${i}` }); + } + frameManager.closeFrame(); // pop one + expect(() => + frameManager.createFrame({ type: 'task', name: 'After close' }) + ).not.toThrow(); + }); + }); + + // ── closeFrame edge cases ──────────────────────────────────────────── + + describe('closeFrame edge cases', () => { + it('should throw when no active frame to close', () => { + expect(() => frameManager.closeFrame()).toThrow(FrameError); + }); + + it('should throw for nonexistent frame ID', () => { + frameManager.createFrame({ type: 'task', name: 'Active' }); + expect(() => frameManager.closeFrame('nonexistent-id')).toThrow( + FrameError + ); + }); + + it('should not throw when closing already-closed frame', () => { + const id = frameManager.createFrame({ type: 'task', name: 'F1' }); + frameManager.closeFrame(id); + // Closing again should not throw (silently ignored) + expect(() => frameManager.closeFrame(id)).not.toThrow(); + }); + + it('should generate digest on close', () => { + const id = frameManager.createFrame({ type: 'task', name: 'F1' }); + frameManager.addEvent('user_message', { text: 'hello' }); + frameManager.closeFrame(id); + + const frame = frameManager.getFrame(id); + expect(frame?.state).toBe('closed'); + expect(frame?.closed_at).toBeDefined(); + expect(frame?.digest_json).toBeDefined(); + }); + + it('should close parent and remove from stack', () => { + const parent = frameManager.createFrame({ + type: 'task', + name: 'Parent', + }); + expect(frameManager.getStackDepth()).toBe(1); + + frameManager.closeFrame(parent); + + expect(frameManager.getFrame(parent)?.state).toBe('closed'); + expect(frameManager.getStackDepth()).toBe(0); + }); + + it('should close current frame when no ID given', () => { + frameManager.createFrame({ type: 'task', name: 'F1' }); + const childId = frameManager.createFrame({ + type: 'subtask', + name: 'F2', + }); + expect(frameManager.getStackDepth()).toBe(2); + + frameManager.closeFrame(); // closes top = child + expect(frameManager.getFrame(childId)?.state).toBe('closed'); + expect(frameManager.getStackDepth()).toBe(1); + }); + }); + + // ── addEvent error conditions ──────────────────────────────────────── + + describe('addEvent error conditions', () => { + it('should throw when no active frame', () => { + expect(() => + frameManager.addEvent('user_message', { text: 'orphan' }) + ).toThrow(FrameError); + }); + + it('should add event to current frame', () => { + const id = frameManager.createFrame({ type: 'task', name: 'F1' }); + const eventId = frameManager.addEvent('user_message', { text: 'hi' }); + expect(eventId).toBeDefined(); + + const events = frameManager.getFrameEvents(id); + expect(events).toHaveLength(1); + expect(events[0].event_type).toBe('user_message'); + }); + + it('should add event to specific frame by ID', () => { + const id1 = frameManager.createFrame({ type: 'task', name: 'F1' }); + frameManager.createFrame({ type: 'subtask', name: 'F2' }); + + frameManager.addEvent('decision', { choice: 'A' }, id1); + expect(frameManager.getFrameEvents(id1)).toHaveLength(1); + }); + + it('should increment sequence numbers', () => { + frameManager.createFrame({ type: 'task', name: 'F1' }); + frameManager.addEvent('user_message', { text: '1' }); + frameManager.addEvent('user_message', { text: '2' }); + frameManager.addEvent('user_message', { text: '3' }); + + const events = frameManager.getFrameEvents( + frameManager.getCurrentFrameId()! + ); + // Events returned in DESC order (newest first) + expect(events).toHaveLength(3); + expect(events[0].seq).toBeGreaterThan(events[1].seq); + expect(events[1].seq).toBeGreaterThan(events[2].seq); + }); + }); + + // ── addAnchor error conditions ─────────────────────────────────────── + + describe('addAnchor error conditions', () => { + it('should throw when no active frame', () => { + expect(() => frameManager.addAnchor('FACT', 'orphan fact')).toThrow( + FrameError + ); + }); + + it('should add anchor to current frame', () => { + const id = frameManager.createFrame({ type: 'task', name: 'F1' }); + const anchorId = frameManager.addAnchor('DECISION', 'Chose X', 8); + expect(anchorId).toBeDefined(); + + const anchors = frameManager.getFrameAnchors(id); + expect(anchors).toHaveLength(1); + expect(anchors[0].type).toBe('DECISION'); + expect(anchors[0].text).toBe('Chose X'); + expect(anchors[0].priority).toBe(8); + }); + + it('should use default priority', () => { + frameManager.createFrame({ type: 'task', name: 'F1' }); + frameManager.addAnchor('FACT', 'Some fact'); + const anchors = frameManager.getFrameAnchors( + frameManager.getCurrentFrameId()! + ); + expect(anchors[0].priority).toBe(5); // default + }); + + it('should add anchor to specific frame', () => { + const id1 = frameManager.createFrame({ type: 'task', name: 'F1' }); + frameManager.createFrame({ type: 'subtask', name: 'F2' }); + + frameManager.addAnchor('CONSTRAINT', 'Must be fast', 9, {}, id1); + expect(frameManager.getFrameAnchors(id1)).toHaveLength(1); + }); + }); + + // ── initialize ─────────────────────────────────────────────────────── + + describe('initialize', () => { + it('should run recovery and rebuild stack', async () => { + frameManager.createFrame({ type: 'task', name: 'Before init' }); + await frameManager.initialize(); + // Should have a recovery report after initialization + const report = frameManager.getRecoveryReport(); + expect(report).not.toBeNull(); + }); + + it('should update recovery report', async () => { + await frameManager.initialize(); + const report = frameManager.getRecoveryReport(); + expect(report).toBeDefined(); + expect(report).toHaveProperty('recovered'); + expect(report).toHaveProperty('orphanedFrames'); + expect(report).toHaveProperty('integrityCheck'); + }); + }); + + // ── getCurrentFrameId / getStackDepth ──────────────────────────────── + + describe('frame stack accessors', () => { + it('should return undefined when no frames exist', () => { + expect(frameManager.getCurrentFrameId()).toBeUndefined(); + }); + + it('should return 0 depth when empty', () => { + expect(frameManager.getStackDepth()).toBe(0); + }); + + it('should track stack depth accurately', () => { + frameManager.createFrame({ type: 'task', name: 'F1' }); + expect(frameManager.getStackDepth()).toBe(1); + frameManager.createFrame({ type: 'subtask', name: 'F2' }); + expect(frameManager.getStackDepth()).toBe(2); + frameManager.closeFrame(); + expect(frameManager.getStackDepth()).toBe(1); + }); + }); + + // ── getActiveFramePath ─────────────────────────────────────────────── + + describe('getActiveFramePath', () => { + it('should return empty array when no frames', () => { + expect(frameManager.getActiveFramePath()).toEqual([]); + }); + + it('should return ordered path from root to current', () => { + frameManager.createFrame({ type: 'task', name: 'Root' }); + frameManager.createFrame({ type: 'subtask', name: 'Child' }); + + const path = frameManager.getActiveFramePath(); + expect(path).toHaveLength(2); + expect(path[0].name).toBe('Root'); + expect(path[1].name).toBe('Child'); + }); + }); + + // ── extractConstraints ─────────────────────────────────────────────── + + describe('extractConstraints', () => { + it('should extract constraints from inputs', () => { + const constraints = frameManager.extractConstraints({ + constraints: ['must be fast', 'no side effects'], + }); + expect(constraints).toEqual(['must be fast', 'no side effects']); + }); + + it('should extract requirements', () => { + const constraints = frameManager.extractConstraints({ + requirements: ['Node 20+'], + }); + expect(constraints).toEqual(['Node 20+']); + }); + + it('should extract limitations', () => { + const constraints = frameManager.extractConstraints({ + limitations: ['512MB max'], + }); + expect(constraints).toEqual(['512MB max']); + }); + + it('should combine all constraint sources', () => { + const constraints = frameManager.extractConstraints({ + constraints: ['a'], + requirements: ['b'], + limitations: ['c'], + }); + expect(constraints).toEqual(['a', 'b', 'c']); + }); + + it('should return empty array for no constraints', () => { + expect(frameManager.extractConstraints({})).toEqual([]); + }); + + it('should ignore non-array constraint values', () => { + expect( + frameManager.extractConstraints({ constraints: 'not an array' }) + ).toEqual([]); + }); + }); + + // ── validateStack ──────────────────────────────────────────────────── + + describe('validateStack', () => { + it('should validate empty stack as valid', () => { + const result = frameManager.validateStack(); + expect(result.isValid).toBe(true); + }); + + it('should validate stack with active frames', () => { + frameManager.createFrame({ type: 'task', name: 'F1' }); + frameManager.createFrame({ type: 'subtask', name: 'F2' }); + const result = frameManager.validateStack(); + expect(result.isValid).toBe(true); + }); + }); + + // ── parent frame resolution ────────────────────────────────────────── + + describe('parent frame resolution', () => { + it('should auto-assign current frame as parent', () => { + const parentId = frameManager.createFrame({ + type: 'task', + name: 'Parent', + }); + const childId = frameManager.createFrame({ + type: 'subtask', + name: 'Child', + }); + + const child = frameManager.getFrame(childId); + expect(child?.parent_frame_id).toBe(parentId); + expect(child?.depth).toBe(1); + }); + + it('should allow explicit parent override', () => { + const f1 = frameManager.createFrame({ type: 'task', name: 'F1' }); + frameManager.createFrame({ type: 'task', name: 'F2' }); + + const f3 = frameManager.createFrame({ + type: 'subtask', + name: 'F3', + parentFrameId: f1, + }); + + const frame = frameManager.getFrame(f3); + expect(frame?.parent_frame_id).toBe(f1); + }); + + it('should create root frame with no parent when stack is empty', () => { + const id = frameManager.createFrame({ type: 'task', name: 'Root' }); + const frame = frameManager.getFrame(id); + expect(frame?.parent_frame_id).toBeUndefined(); + expect(frame?.depth).toBe(0); + }); + }); + + // ── getActiveArtifacts ─────────────────────────────────────────────── + + describe('getActiveArtifacts', () => { + it('should return artifact paths from events', () => { + const id = frameManager.createFrame({ type: 'task', name: 'F1' }); + frameManager.addEvent('artifact', { path: '/src/index.ts' }); + frameManager.addEvent('artifact', { path: '/src/utils.ts' }); + + const artifacts = frameManager.getActiveArtifacts(id); + expect(artifacts).toContain('/src/index.ts'); + expect(artifacts).toContain('/src/utils.ts'); + }); + + it('should deduplicate artifact paths', () => { + const id = frameManager.createFrame({ type: 'task', name: 'F1' }); + frameManager.addEvent('artifact', { path: '/src/index.ts' }); + frameManager.addEvent('artifact', { path: '/src/index.ts' }); + + const artifacts = frameManager.getActiveArtifacts(id); + expect(artifacts).toHaveLength(1); + }); + + it('should ignore non-artifact events', () => { + const id = frameManager.createFrame({ type: 'task', name: 'F1' }); + frameManager.addEvent('user_message', { text: 'hello' }); + + const artifacts = frameManager.getActiveArtifacts(id); + expect(artifacts).toHaveLength(0); + }); + + it('should return empty for frame with no events', () => { + const id = frameManager.createFrame({ type: 'task', name: 'F1' }); + expect(frameManager.getActiveArtifacts(id)).toEqual([]); + }); + }); +}); diff --git a/src/core/context/__tests__/frame-stack.test.ts b/src/core/context/__tests__/frame-stack.test.ts new file mode 100644 index 00000000..dd37433b --- /dev/null +++ b/src/core/context/__tests__/frame-stack.test.ts @@ -0,0 +1,403 @@ +/** + * Tests for FrameStack — stack CRUD and frame ordering + * Covers: push/pop, getCurrentFrameId, getDepth, removeFrame, validateStack, edge cases + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import Database from 'better-sqlite3'; +import { FrameStack } from '../frame-stack.js'; +import { FrameDatabase } from '../frame-database.js'; +import { join } from 'path'; +import { mkdtempSync, rmSync } from 'fs'; +import { tmpdir } from 'os'; + +// ── Helpers ────────────────────────────────────────────────────────────── + +const PROJECT_ID = 'test-stack-project'; +const RUN_ID = 'test-run-1'; + +function insertTestFrame( + frameDb: FrameDatabase, + overrides: Partial<{ + frame_id: string; + parent_frame_id: string; + state: string; + project_id: string; + depth: number; + name: string; + }> = {} +) { + const id = + overrides.frame_id || `frame-${Math.random().toString(36).slice(2, 8)}`; + frameDb.insertFrame({ + frame_id: id, + run_id: RUN_ID, + project_id: overrides.project_id || PROJECT_ID, + parent_frame_id: overrides.parent_frame_id, + depth: overrides.depth ?? 0, + type: 'task', + name: overrides.name || `Frame ${id}`, + state: (overrides.state as any) || 'active', + inputs: {}, + outputs: {}, + digest_json: {}, + }); + return id; +} + +// ── Tests ──────────────────────────────────────────────────────────────── + +describe('FrameStack', () => { + let db: Database.Database; + let frameDb: FrameDatabase; + let stack: FrameStack; + let tempDir: string; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), 'stackmem-fs-test-')); + const dbPath = join(tempDir, 'test.db'); + db = new Database(dbPath); + frameDb = new FrameDatabase(db); + frameDb.initSchema(); + stack = new FrameStack(frameDb, PROJECT_ID, RUN_ID); + }); + + afterEach(() => { + if (db) db.close(); + if (tempDir) rmSync(tempDir, { recursive: true, force: true }); + }); + + // ── pushFrame ──────────────────────────────────────────────────────── + + describe('pushFrame', () => { + it('should add frame to stack', () => { + stack.pushFrame('frame-1'); + expect(stack.getDepth()).toBe(1); + expect(stack.getStack()).toEqual(['frame-1']); + }); + + it('should push multiple frames in order', () => { + stack.pushFrame('frame-1'); + stack.pushFrame('frame-2'); + stack.pushFrame('frame-3'); + expect(stack.getStack()).toEqual(['frame-1', 'frame-2', 'frame-3']); + }); + + it('should not push duplicate frames', () => { + stack.pushFrame('frame-1'); + stack.pushFrame('frame-1'); + expect(stack.getDepth()).toBe(1); + }); + }); + + // ── popFrame ───────────────────────────────────────────────────────── + + describe('popFrame', () => { + it('should pop top frame when no frameId given', () => { + stack.pushFrame('frame-1'); + stack.pushFrame('frame-2'); + const popped = stack.popFrame(); + expect(popped).toBe('frame-2'); + expect(stack.getDepth()).toBe(1); + }); + + it('should return undefined on empty stack', () => { + expect(stack.popFrame()).toBeUndefined(); + }); + + it('should pop specific frame and all frames above it', () => { + stack.pushFrame('frame-1'); + stack.pushFrame('frame-2'); + stack.pushFrame('frame-3'); + const popped = stack.popFrame('frame-2'); + expect(popped).toBe('frame-2'); + expect(stack.getStack()).toEqual(['frame-1']); + }); + + it('should return undefined for frame not on stack', () => { + stack.pushFrame('frame-1'); + expect(stack.popFrame('nonexistent')).toBeUndefined(); + expect(stack.getDepth()).toBe(1); // unchanged + }); + + it('should handle popping the only frame', () => { + stack.pushFrame('frame-1'); + expect(stack.popFrame('frame-1')).toBe('frame-1'); + expect(stack.getDepth()).toBe(0); + }); + }); + + // ── getCurrentFrameId ──────────────────────────────────────────────── + + describe('getCurrentFrameId', () => { + it('should return undefined on empty stack', () => { + expect(stack.getCurrentFrameId()).toBeUndefined(); + }); + + it('should return top frame', () => { + stack.pushFrame('frame-1'); + stack.pushFrame('frame-2'); + expect(stack.getCurrentFrameId()).toBe('frame-2'); + }); + + it('should update after pop', () => { + stack.pushFrame('frame-1'); + stack.pushFrame('frame-2'); + stack.popFrame(); + expect(stack.getCurrentFrameId()).toBe('frame-1'); + }); + }); + + // ── getDepth ───────────────────────────────────────────────────────── + + describe('getDepth', () => { + it('should return 0 for empty stack', () => { + expect(stack.getDepth()).toBe(0); + }); + + it('should track depth correctly', () => { + stack.pushFrame('a'); + stack.pushFrame('b'); + expect(stack.getDepth()).toBe(2); + stack.popFrame(); + expect(stack.getDepth()).toBe(1); + }); + }); + + // ── getStack ───────────────────────────────────────────────────────── + + describe('getStack', () => { + it('should return a copy (not the internal array)', () => { + stack.pushFrame('frame-1'); + const copy = stack.getStack(); + copy.push('mutated'); + expect(stack.getStack()).toEqual(['frame-1']); + }); + }); + + // ── getStackFrames ─────────────────────────────────────────────────── + + describe('getStackFrames', () => { + it('should return frame objects for IDs on stack', () => { + const id1 = insertTestFrame(frameDb, { frame_id: 'f1', name: 'First' }); + const id2 = insertTestFrame(frameDb, { + frame_id: 'f2', + name: 'Second', + parent_frame_id: id1, + depth: 1, + }); + stack.pushFrame(id1); + stack.pushFrame(id2); + + const frames = stack.getStackFrames(); + expect(frames).toHaveLength(2); + expect(frames[0].name).toBe('First'); + expect(frames[1].name).toBe('Second'); + }); + + it('should filter out frames not found in DB', () => { + stack.pushFrame('nonexistent'); + const frames = stack.getStackFrames(); + expect(frames).toHaveLength(0); + }); + }); + + // ── isFrameActive ──────────────────────────────────────────────────── + + describe('isFrameActive', () => { + it('should return true for frame on stack', () => { + stack.pushFrame('frame-1'); + expect(stack.isFrameActive('frame-1')).toBe(true); + }); + + it('should return false for frame not on stack', () => { + expect(stack.isFrameActive('frame-1')).toBe(false); + }); + }); + + // ── getParentFrameId ───────────────────────────────────────────────── + + describe('getParentFrameId', () => { + it('should return undefined when stack has fewer than 2 frames', () => { + expect(stack.getParentFrameId()).toBeUndefined(); + stack.pushFrame('frame-1'); + expect(stack.getParentFrameId()).toBeUndefined(); + }); + + it('should return second-to-top frame', () => { + stack.pushFrame('frame-1'); + stack.pushFrame('frame-2'); + expect(stack.getParentFrameId()).toBe('frame-1'); + }); + + it('should return correct parent with deep stack', () => { + stack.pushFrame('a'); + stack.pushFrame('b'); + stack.pushFrame('c'); + expect(stack.getParentFrameId()).toBe('b'); + }); + }); + + // ── getFrameStackDepth ─────────────────────────────────────────────── + + describe('getFrameStackDepth', () => { + it('should return 0-based index', () => { + stack.pushFrame('a'); + stack.pushFrame('b'); + stack.pushFrame('c'); + expect(stack.getFrameStackDepth('a')).toBe(0); + expect(stack.getFrameStackDepth('b')).toBe(1); + expect(stack.getFrameStackDepth('c')).toBe(2); + }); + + it('should return -1 for frame not on stack', () => { + expect(stack.getFrameStackDepth('nonexistent')).toBe(-1); + }); + }); + + // ── clear ──────────────────────────────────────────────────────────── + + describe('clear', () => { + it('should empty the stack', () => { + stack.pushFrame('a'); + stack.pushFrame('b'); + stack.clear(); + expect(stack.getDepth()).toBe(0); + expect(stack.getStack()).toEqual([]); + }); + + it('should be safe to call on empty stack', () => { + expect(() => stack.clear()).not.toThrow(); + }); + }); + + // ── removeFrame ────────────────────────────────────────────────────── + + describe('removeFrame', () => { + it('should remove a specific frame without affecting frames above', () => { + stack.pushFrame('a'); + stack.pushFrame('b'); + stack.pushFrame('c'); + const removed = stack.removeFrame('b'); + expect(removed).toBe(true); + expect(stack.getStack()).toEqual(['a', 'c']); + }); + + it('should return false for frame not on stack', () => { + expect(stack.removeFrame('nonexistent')).toBe(false); + }); + + it('should handle removing the only frame', () => { + stack.pushFrame('a'); + expect(stack.removeFrame('a')).toBe(true); + expect(stack.getDepth()).toBe(0); + }); + }); + + // ── validateStack ──────────────────────────────────────────────────── + + describe('validateStack', () => { + it('should pass for empty stack', () => { + const result = stack.validateStack(); + expect(result.isValid).toBe(true); + expect(result.errors).toHaveLength(0); + }); + + it('should pass for valid stack with proper parent chain', () => { + const id1 = insertTestFrame(frameDb, { frame_id: 'root', depth: 0 }); + const id2 = insertTestFrame(frameDb, { + frame_id: 'child', + parent_frame_id: id1, + depth: 1, + }); + stack.pushFrame(id1); + stack.pushFrame(id2); + + const result = stack.validateStack(); + expect(result.isValid).toBe(true); + }); + + it('should detect frame not in database', () => { + stack.pushFrame('ghost-frame'); + const result = stack.validateStack(); + expect(result.isValid).toBe(false); + expect(result.errors.some((e) => e.includes('not found'))).toBe(true); + }); + + it('should detect non-active frame on stack', () => { + insertTestFrame(frameDb, { frame_id: 'closed-f', state: 'closed' }); + stack.pushFrame('closed-f'); + const result = stack.validateStack(); + expect(result.isValid).toBe(false); + expect(result.errors.some((e) => e.includes('not active'))).toBe(true); + }); + + it('should detect wrong project frame on stack', () => { + insertTestFrame(frameDb, { + frame_id: 'other-proj', + project_id: 'different-project', + }); + stack.pushFrame('other-proj'); + const result = stack.validateStack(); + expect(result.isValid).toBe(false); + expect(result.errors.some((e) => e.includes('different project'))).toBe( + true + ); + }); + + it('should detect parent-child mismatch in stack order', () => { + // Create both frames, then modify parent relationship to create mismatch + const idA = insertTestFrame(frameDb, { frame_id: 'frame-a', depth: 0 }); + const idOther = insertTestFrame(frameDb, { + frame_id: 'other-parent', + depth: 0, + }); + // frame-b has other-parent as parent, not frame-a + insertTestFrame(frameDb, { + frame_id: 'frame-b', + depth: 1, + parent_frame_id: idOther, + }); + stack.pushFrame(idA); + stack.pushFrame('frame-b'); + + const result = stack.validateStack(); + expect(result.isValid).toBe(false); + expect(result.errors.some((e) => e.includes('parent mismatch'))).toBe( + true + ); + }); + }); + + // ── initialize ─────────────────────────────────────────────────────── + + describe('initialize', () => { + it('should rebuild stack from active frames in DB', async () => { + const id1 = insertTestFrame(frameDb, { frame_id: 'root', depth: 0 }); + insertTestFrame(frameDb, { + frame_id: 'child', + parent_frame_id: id1, + depth: 1, + }); + + await stack.initialize(); + + expect(stack.getDepth()).toBe(2); + expect(stack.getStack()[0]).toBe('root'); + expect(stack.getStack()[1]).toBe('child'); + }); + + it('should have empty stack when no active frames exist', async () => { + await stack.initialize(); + expect(stack.getDepth()).toBe(0); + }); + + it('should ignore closed frames', async () => { + insertTestFrame(frameDb, { frame_id: 'closed-1', state: 'closed' }); + insertTestFrame(frameDb, { frame_id: 'active-1', state: 'active' }); + + await stack.initialize(); + expect(stack.getDepth()).toBe(1); + }); + }); +}); diff --git a/src/core/context/__tests__/permission-manager.test.ts b/src/core/context/__tests__/permission-manager.test.ts new file mode 100644 index 00000000..43c3694a --- /dev/null +++ b/src/core/context/__tests__/permission-manager.test.ts @@ -0,0 +1,349 @@ +/** + * Tests for PermissionManager — access control for frames/stacks + * Covers: checkPermission, enforcePermission, admin access, bulk ops, edge cases + */ + +import { describe, it, expect, beforeEach } from 'vitest'; +import { PermissionManager } from '../permission-manager.js'; +import type { Operation, PermissionContext } from '../permission-manager.js'; +import { ValidationError, ErrorCode } from '../../errors/index.js'; + +// ── Helpers ────────────────────────────────────────────────────────────── + +function fullPermissions() { + return { + canRead: true, + canWrite: true, + canHandoff: true, + canMerge: true, + canAdminister: true, + }; +} + +function readOnlyPermissions() { + return { + canRead: true, + canWrite: false, + canHandoff: false, + canMerge: false, + canAdminister: false, + }; +} + +function noPermissions() { + return { + canRead: false, + canWrite: false, + canHandoff: false, + canMerge: false, + canAdminister: false, + }; +} + +function makeCtx( + userId: string, + operation: Operation, + resourceId: string = 'stack-1' +): PermissionContext { + return { + userId, + operation, + resourceType: 'stack', + resourceId, + }; +} + +// ── Tests ──────────────────────────────────────────────────────────────── + +describe('PermissionManager', () => { + let pm: PermissionManager; + + beforeEach(() => { + // Clear env to avoid default admin side effects + delete process.env['STACKMEMORY_DEFAULT_ADMIN']; + pm = new PermissionManager(); + }); + + // ── setStackPermissions / getStackPermissions ──────────────────────── + + describe('setStackPermissions / getStackPermissions', () => { + it('should store and retrieve permissions', () => { + pm.setStackPermissions('user-1', 'stack-a', fullPermissions()); + const perms = pm.getStackPermissions('user-1', 'stack-a'); + expect(perms).toEqual(fullPermissions()); + }); + + it('should return null for unknown user', () => { + expect(pm.getStackPermissions('unknown', 'stack-a')).toBeNull(); + }); + + it('should return null for unknown stack', () => { + pm.setStackPermissions('user-1', 'stack-a', fullPermissions()); + expect(pm.getStackPermissions('user-1', 'stack-b')).toBeNull(); + }); + + it('should overwrite existing permissions', () => { + pm.setStackPermissions('user-1', 'stack-a', fullPermissions()); + pm.setStackPermissions('user-1', 'stack-a', readOnlyPermissions()); + expect(pm.getStackPermissions('user-1', 'stack-a')).toEqual( + readOnlyPermissions() + ); + }); + + it('should support multiple stacks per user', () => { + pm.setStackPermissions('user-1', 'stack-a', fullPermissions()); + pm.setStackPermissions('user-1', 'stack-b', readOnlyPermissions()); + expect(pm.getStackPermissions('user-1', 'stack-a')).toEqual( + fullPermissions() + ); + expect(pm.getStackPermissions('user-1', 'stack-b')).toEqual( + readOnlyPermissions() + ); + }); + }); + + // ── checkPermission ────────────────────────────────────────────────── + + describe('checkPermission', () => { + it('should return true for permitted operation', async () => { + pm.setStackPermissions('user-1', 'stack-1', fullPermissions()); + expect(await pm.checkPermission(makeCtx('user-1', 'read'))).toBe(true); + expect(await pm.checkPermission(makeCtx('user-1', 'write'))).toBe(true); + expect(await pm.checkPermission(makeCtx('user-1', 'handoff'))).toBe(true); + expect(await pm.checkPermission(makeCtx('user-1', 'merge'))).toBe(true); + expect(await pm.checkPermission(makeCtx('user-1', 'administer'))).toBe( + true + ); + }); + + it('should return false for denied operation', async () => { + pm.setStackPermissions('user-1', 'stack-1', readOnlyPermissions()); + expect(await pm.checkPermission(makeCtx('user-1', 'write'))).toBe(false); + expect(await pm.checkPermission(makeCtx('user-1', 'handoff'))).toBe( + false + ); + expect(await pm.checkPermission(makeCtx('user-1', 'merge'))).toBe(false); + }); + + it('should return false for user with no permissions', async () => { + expect(await pm.checkPermission(makeCtx('nobody', 'read'))).toBe(false); + }); + + it('should return false when all permissions are false', async () => { + pm.setStackPermissions('user-1', 'stack-1', noPermissions()); + expect(await pm.checkPermission(makeCtx('user-1', 'read'))).toBe(false); + }); + + it('should use stackContext.stackId when available', async () => { + pm.setStackPermissions('user-1', 'stack-ctx', fullPermissions()); + const ctx: PermissionContext = { + userId: 'user-1', + operation: 'read', + resourceType: 'stack', + resourceId: 'fallback-id', + stackContext: { + stackId: 'stack-ctx', + type: 'individual', + projectId: 'proj-1', + }, + }; + expect(await pm.checkPermission(ctx)).toBe(true); + }); + + it('should fall back to resourceId when no stackContext', async () => { + pm.setStackPermissions('user-1', 'res-id', fullPermissions()); + expect( + await pm.checkPermission(makeCtx('user-1', 'read', 'res-id')) + ).toBe(true); + }); + }); + + // ── Admin access ───────────────────────────────────────────────────── + + describe('admin access', () => { + it('should grant admin full access to any operation', async () => { + pm.grantAdminAccess('admin-1'); + // No stack permissions set, but admin bypasses + expect(await pm.checkPermission(makeCtx('admin-1', 'read'))).toBe(true); + expect(await pm.checkPermission(makeCtx('admin-1', 'administer'))).toBe( + true + ); + }); + + it('should identify admin users', () => { + pm.grantAdminAccess('admin-1'); + expect(pm.isAdmin('admin-1')).toBe(true); + expect(pm.isAdmin('user-1')).toBe(false); + }); + + it('should revoke admin access', async () => { + pm.grantAdminAccess('admin-1'); + pm.revokeAdminAccess('admin-1'); + expect(pm.isAdmin('admin-1')).toBe(false); + expect(await pm.checkPermission(makeCtx('admin-1', 'read'))).toBe(false); + }); + + it('should initialize default admin from env', () => { + process.env['STACKMEMORY_DEFAULT_ADMIN'] = 'env-admin'; + const pm2 = new PermissionManager(); + expect(pm2.isAdmin('env-admin')).toBe(true); + delete process.env['STACKMEMORY_DEFAULT_ADMIN']; + }); + }); + + // ── enforcePermission ──────────────────────────────────────────────── + + describe('enforcePermission', () => { + it('should not throw for permitted operation', async () => { + pm.setStackPermissions('user-1', 'stack-1', fullPermissions()); + await expect( + pm.enforcePermission(makeCtx('user-1', 'read')) + ).resolves.not.toThrow(); + }); + + it('should throw ValidationError for denied operation', async () => { + pm.setStackPermissions('user-1', 'stack-1', readOnlyPermissions()); + await expect( + pm.enforcePermission(makeCtx('user-1', 'write')) + ).rejects.toThrow(ValidationError); + }); + + it('should include context in thrown error', async () => { + try { + await pm.enforcePermission(makeCtx('user-1', 'write', 'stack-99')); + expect.fail('Should have thrown'); + } catch (error) { + expect(error).toBeInstanceOf(ValidationError); + const ve = error as ValidationError; + expect(ve.code).toBe(ErrorCode.PERMISSION_VIOLATION); + expect(ve.message).toContain('user-1'); + expect(ve.message).toContain('write'); + expect(ve.message).toContain('stack-99'); + } + }); + + it('should not throw for admin user', async () => { + pm.grantAdminAccess('admin-1'); + await expect( + pm.enforcePermission(makeCtx('admin-1', 'administer')) + ).resolves.not.toThrow(); + }); + }); + + // ── getUserPermissions ─────────────────────────────────────────────── + + describe('getUserPermissions', () => { + it('should return empty map for unknown user', () => { + const perms = pm.getUserPermissions('unknown'); + expect(perms.size).toBe(0); + }); + + it('should return all stack permissions for user', () => { + pm.setStackPermissions('user-1', 'stack-a', fullPermissions()); + pm.setStackPermissions('user-1', 'stack-b', readOnlyPermissions()); + const perms = pm.getUserPermissions('user-1'); + expect(perms.size).toBe(2); + expect(perms.get('stack-a')).toEqual(fullPermissions()); + }); + }); + + // ── removeUserPermissions ──────────────────────────────────────────── + + describe('removeUserPermissions', () => { + it('should remove all permissions and admin status', async () => { + pm.setStackPermissions('user-1', 'stack-a', fullPermissions()); + pm.grantAdminAccess('user-1'); + pm.removeUserPermissions('user-1'); + expect(pm.isAdmin('user-1')).toBe(false); + expect(pm.getStackPermissions('user-1', 'stack-a')).toBeNull(); + expect(await pm.checkPermission(makeCtx('user-1', 'read'))).toBe(false); + }); + + it('should be safe to call for unknown user', () => { + expect(() => pm.removeUserPermissions('unknown')).not.toThrow(); + }); + }); + + // ── createContext ──────────────────────────────────────────────────── + + describe('createContext', () => { + it('should create a valid PermissionContext', () => { + const ctx = pm.createContext('user-1', 'write', 'frame', 'frame-1'); + expect(ctx.userId).toBe('user-1'); + expect(ctx.operation).toBe('write'); + expect(ctx.resourceType).toBe('frame'); + expect(ctx.resourceId).toBe('frame-1'); + expect(ctx.stackContext).toBeUndefined(); + }); + + it('should include optional stackContext', () => { + const stackCtx = { + stackId: 'stack-1', + type: 'shared' as const, + projectId: 'proj-1', + }; + const ctx = pm.createContext( + 'user-1', + 'read', + 'stack', + 'stack-1', + stackCtx + ); + expect(ctx.stackContext).toEqual(stackCtx); + }); + }); + + // ── setBulkStackPermissions ────────────────────────────────────────── + + describe('setBulkStackPermissions', () => { + it('should set permissions for multiple stacks at once', () => { + pm.setBulkStackPermissions('user-1', { + 'stack-a': fullPermissions(), + 'stack-b': readOnlyPermissions(), + }); + expect(pm.getStackPermissions('user-1', 'stack-a')).toEqual( + fullPermissions() + ); + expect(pm.getStackPermissions('user-1', 'stack-b')).toEqual( + readOnlyPermissions() + ); + }); + + it('should merge with existing permissions', () => { + pm.setStackPermissions('user-1', 'stack-a', readOnlyPermissions()); + pm.setBulkStackPermissions('user-1', { + 'stack-b': fullPermissions(), + }); + // Original should still be there + expect(pm.getStackPermissions('user-1', 'stack-a')).toEqual( + readOnlyPermissions() + ); + expect(pm.getStackPermissions('user-1', 'stack-b')).toEqual( + fullPermissions() + ); + }); + + it('should handle empty object', () => { + pm.setBulkStackPermissions('user-1', {}); + expect(pm.getUserPermissions('user-1').size).toBe(0); + }); + }); + + // ── getPermissionSummary ───────────────────────────────────────────── + + describe('getPermissionSummary', () => { + it('should return summary with admin status and permissions', () => { + pm.grantAdminAccess('user-1'); + pm.setStackPermissions('user-1', 'stack-a', fullPermissions()); + const summary = pm.getPermissionSummary('user-1'); + expect(summary.isAdmin).toBe(true); + expect(summary.stackPermissions['stack-a']).toEqual(fullPermissions()); + }); + + it('should return empty summary for unknown user', () => { + const summary = pm.getPermissionSummary('unknown'); + expect(summary.isAdmin).toBe(false); + expect(Object.keys(summary.stackPermissions)).toHaveLength(0); + }); + }); +}); diff --git a/src/core/context/__tests__/validation.test.ts b/src/core/context/__tests__/validation.test.ts new file mode 100644 index 00000000..5041f89b --- /dev/null +++ b/src/core/context/__tests__/validation.test.ts @@ -0,0 +1,596 @@ +/** + * Tests for validation.ts — Zod schemas and validation helpers + * Covers: all schemas, validateInput, validateInputSafe, edge cases + */ + +import { describe, it, expect } from 'vitest'; +import { + StackPermissionsSchema, + CreateSharedStackSchema, + SwitchStackSchema, + FrameContextSchema, + BusinessContextSchema, + HandoffRequestSchema, + InitiateHandoffSchema, + HandoffApprovalSchema, + ConflictResolutionSchema, + MergePolicyRuleSchema, + CreateMergePolicySchema, + StartMergeSessionSchema, + validateInput, + validateInputSafe, +} from '../validation.js'; +import { ValidationError, ErrorCode } from '../../errors/index.js'; + +// ── Helpers ────────────────────────────────────────────────────────────── + +function validPermissions() { + return { + canRead: true, + canWrite: true, + canHandoff: false, + canMerge: false, + canAdminister: false, + }; +} + +function validFrameContext() { + return { + totalFrames: 5, + frameTypes: ['task'], + estimatedSize: 1024, + dependencies: [], + }; +} + +function validHandoffRequest() { + return { + initiatedAt: new Date(), + initiatorId: 'user-1', + frameContext: validFrameContext(), + }; +} + +// ── StackPermissionsSchema ─────────────────────────────────────────────── + +describe('StackPermissionsSchema', () => { + it('should accept valid permissions', () => { + const result = StackPermissionsSchema.parse(validPermissions()); + expect(result.canRead).toBe(true); + expect(result.canAdminister).toBe(false); + }); + + it('should reject missing fields', () => { + expect(() => StackPermissionsSchema.parse({ canRead: true })).toThrow(); + }); + + it('should reject non-boolean values', () => { + expect(() => + StackPermissionsSchema.parse({ + ...validPermissions(), + canRead: 'yes', + }) + ).toThrow(); + }); +}); + +// ── CreateSharedStackSchema ────────────────────────────────────────────── + +describe('CreateSharedStackSchema', () => { + it('should accept valid input', () => { + const result = CreateSharedStackSchema.parse({ + teamId: 'team-1', + name: 'My Stack', + ownerId: 'user-1', + }); + expect(result.name).toBe('My Stack'); + }); + + it('should accept optional permissions', () => { + const result = CreateSharedStackSchema.parse({ + teamId: 'team-1', + name: 'Stack', + ownerId: 'user-1', + permissions: validPermissions(), + }); + expect(result.permissions?.canRead).toBe(true); + }); + + it('should reject empty name', () => { + expect(() => + CreateSharedStackSchema.parse({ + teamId: 'team-1', + name: '', + ownerId: 'user-1', + }) + ).toThrow(); + }); + + it('should reject name exceeding 200 chars', () => { + expect(() => + CreateSharedStackSchema.parse({ + teamId: 'team-1', + name: 'x'.repeat(201), + ownerId: 'user-1', + }) + ).toThrow(); + }); + + it('should reject empty teamId', () => { + expect(() => + CreateSharedStackSchema.parse({ + teamId: '', + name: 'Stack', + ownerId: 'user-1', + }) + ).toThrow(); + }); +}); + +// ── SwitchStackSchema ──────────────────────────────────────────────────── + +describe('SwitchStackSchema', () => { + it('should accept valid stackId', () => { + const result = SwitchStackSchema.parse({ stackId: 'stack-123' }); + expect(result.stackId).toBe('stack-123'); + }); + + it('should reject empty stackId', () => { + expect(() => SwitchStackSchema.parse({ stackId: '' })).toThrow(); + }); + + it('should reject stackId exceeding 200 chars', () => { + expect(() => + SwitchStackSchema.parse({ stackId: 'x'.repeat(201) }) + ).toThrow(); + }); +}); + +// ── FrameContextSchema ─────────────────────────────────────────────────── + +describe('FrameContextSchema', () => { + it('should accept valid frame context', () => { + const result = FrameContextSchema.parse(validFrameContext()); + expect(result.totalFrames).toBe(5); + }); + + it('should reject totalFrames < 1', () => { + expect(() => + FrameContextSchema.parse({ ...validFrameContext(), totalFrames: 0 }) + ).toThrow(); + }); + + it('should reject totalFrames > 10000', () => { + expect(() => + FrameContextSchema.parse({ ...validFrameContext(), totalFrames: 10001 }) + ).toThrow(); + }); + + it('should reject empty frameTypes', () => { + expect(() => + FrameContextSchema.parse({ ...validFrameContext(), frameTypes: [] }) + ).toThrow(); + }); + + it('should reject estimatedSize > 1MB', () => { + expect(() => + FrameContextSchema.parse({ + ...validFrameContext(), + estimatedSize: 1000001, + }) + ).toThrow(); + }); + + it('should reject negative estimatedSize', () => { + expect(() => + FrameContextSchema.parse({ ...validFrameContext(), estimatedSize: -1 }) + ).toThrow(); + }); + + it('should reject more than 100 dependencies', () => { + expect(() => + FrameContextSchema.parse({ + ...validFrameContext(), + dependencies: Array.from({ length: 101 }, (_, i) => `dep-${i}`), + }) + ).toThrow(); + }); +}); + +// ── BusinessContextSchema ──────────────────────────────────────────────── + +describe('BusinessContextSchema', () => { + it('should accept empty object (all optional)', () => { + const result = BusinessContextSchema.parse({}); + expect(result).toBeDefined(); + }); + + it('should accept valid priority enum', () => { + const result = BusinessContextSchema.parse({ priority: 'critical' }); + expect(result.priority).toBe('critical'); + }); + + it('should reject invalid priority', () => { + expect(() => BusinessContextSchema.parse({ priority: 'urgent' })).toThrow(); + }); + + it('should reject milestone exceeding 100 chars', () => { + expect(() => + BusinessContextSchema.parse({ milestone: 'x'.repeat(101) }) + ).toThrow(); + }); + + it('should reject more than 20 stakeholders', () => { + expect(() => + BusinessContextSchema.parse({ + stakeholders: Array.from({ length: 21 }, (_, i) => `user-${i}`), + }) + ).toThrow(); + }); +}); + +// ── HandoffRequestSchema ───────────────────────────────────────────────── + +describe('HandoffRequestSchema', () => { + it('should accept valid handoff request', () => { + const result = HandoffRequestSchema.parse(validHandoffRequest()); + expect(result.initiatorId).toBe('user-1'); + }); + + it('should accept optional businessContext', () => { + const result = HandoffRequestSchema.parse({ + ...validHandoffRequest(), + businessContext: { priority: 'high' }, + }); + expect(result.businessContext?.priority).toBe('high'); + }); + + it('should reject missing initiatorId', () => { + const { initiatorId: _, ...rest } = validHandoffRequest(); + expect(() => HandoffRequestSchema.parse(rest)).toThrow(); + }); +}); + +// ── InitiateHandoffSchema ──────────────────────────────────────────────── + +describe('InitiateHandoffSchema', () => { + it('should accept valid initiate handoff input', () => { + const result = InitiateHandoffSchema.parse({ + targetStackId: 'stack-1', + frameIds: ['frame-1'], + handoffRequest: validHandoffRequest(), + }); + expect(result.targetStackId).toBe('stack-1'); + }); + + it('should reject empty frameIds', () => { + expect(() => + InitiateHandoffSchema.parse({ + targetStackId: 'stack-1', + frameIds: [], + handoffRequest: validHandoffRequest(), + }) + ).toThrow(); + }); + + it('should reject more than 1000 frameIds', () => { + expect(() => + InitiateHandoffSchema.parse({ + targetStackId: 'stack-1', + frameIds: Array.from({ length: 1001 }, (_, i) => `frame-${i}`), + handoffRequest: validHandoffRequest(), + }) + ).toThrow(); + }); + + it('should accept optional reviewerId and description', () => { + const result = InitiateHandoffSchema.parse({ + targetStackId: 'stack-1', + frameIds: ['frame-1'], + handoffRequest: validHandoffRequest(), + reviewerId: 'reviewer-1', + description: 'Handoff description', + }); + expect(result.reviewerId).toBe('reviewer-1'); + }); + + it('should reject description exceeding 1000 chars', () => { + expect(() => + InitiateHandoffSchema.parse({ + targetStackId: 'stack-1', + frameIds: ['frame-1'], + handoffRequest: validHandoffRequest(), + description: 'x'.repeat(1001), + }) + ).toThrow(); + }); +}); + +// ── HandoffApprovalSchema ──────────────────────────────────────────────── + +describe('HandoffApprovalSchema', () => { + it('should accept valid approval', () => { + const result = HandoffApprovalSchema.parse({ + reviewerId: 'reviewer-1', + decision: 'approved', + }); + expect(result.decision).toBe('approved'); + }); + + it('should accept all decision values', () => { + for (const decision of ['approved', 'rejected', 'needs_changes']) { + const result = HandoffApprovalSchema.parse({ + reviewerId: 'r-1', + decision, + }); + expect(result.decision).toBe(decision); + } + }); + + it('should reject invalid decision', () => { + expect(() => + HandoffApprovalSchema.parse({ + reviewerId: 'r-1', + decision: 'maybe', + }) + ).toThrow(); + }); + + it('should accept optional suggestedChanges', () => { + const result = HandoffApprovalSchema.parse({ + reviewerId: 'r-1', + decision: 'needs_changes', + suggestedChanges: [{ frameId: 'frame-1', suggestion: 'Fix naming' }], + }); + expect(result.suggestedChanges).toHaveLength(1); + }); + + it('should reject feedback exceeding 2000 chars', () => { + expect(() => + HandoffApprovalSchema.parse({ + reviewerId: 'r-1', + decision: 'rejected', + feedback: 'x'.repeat(2001), + }) + ).toThrow(); + }); +}); + +// ── ConflictResolutionSchema ───────────────────────────────────────────── + +describe('ConflictResolutionSchema', () => { + it('should accept valid resolution', () => { + const result = ConflictResolutionSchema.parse({ + strategy: 'source_wins', + resolvedBy: 'user-1', + }); + expect(result.strategy).toBe('source_wins'); + }); + + it('should accept all strategy values', () => { + for (const strategy of [ + 'source_wins', + 'target_wins', + 'merge_both', + 'manual', + ]) { + expect(() => + ConflictResolutionSchema.parse({ strategy, resolvedBy: 'u-1' }) + ).not.toThrow(); + } + }); + + it('should reject invalid strategy', () => { + expect(() => + ConflictResolutionSchema.parse({ + strategy: 'auto', + resolvedBy: 'u-1', + }) + ).toThrow(); + }); +}); + +// ── MergePolicyRuleSchema ──────────────────────────────────────────────── + +describe('MergePolicyRuleSchema', () => { + it('should accept valid rule', () => { + const result = MergePolicyRuleSchema.parse({ + condition: 'type == task', + action: 'source_wins', + priority: 1, + }); + expect(result.priority).toBe(1); + }); + + it('should reject priority outside 1-10', () => { + expect(() => + MergePolicyRuleSchema.parse({ + condition: 'x', + action: 'source_wins', + priority: 0, + }) + ).toThrow(); + expect(() => + MergePolicyRuleSchema.parse({ + condition: 'x', + action: 'source_wins', + priority: 11, + }) + ).toThrow(); + }); + + it('should reject empty condition', () => { + expect(() => + MergePolicyRuleSchema.parse({ + condition: '', + action: 'source_wins', + priority: 1, + }) + ).toThrow(); + }); +}); + +// ── CreateMergePolicySchema ────────────────────────────────────────────── + +describe('CreateMergePolicySchema', () => { + it('should accept valid policy', () => { + const result = CreateMergePolicySchema.parse({ + name: 'Default Policy', + rules: [{ condition: 'always', action: 'source_wins', priority: 1 }], + autoApplyThreshold: 'medium', + }); + expect(result.name).toBe('Default Policy'); + }); + + it('should reject empty rules', () => { + expect(() => + CreateMergePolicySchema.parse({ + name: 'P', + rules: [], + autoApplyThreshold: 'low', + }) + ).toThrow(); + }); + + it('should reject more than 20 rules', () => { + const rules = Array.from({ length: 21 }, (_, i) => ({ + condition: `cond-${i}`, + action: 'source_wins' as const, + priority: 1, + })); + expect(() => + CreateMergePolicySchema.parse({ + name: 'P', + rules, + autoApplyThreshold: 'low', + }) + ).toThrow(); + }); + + it('should reject invalid autoApplyThreshold', () => { + expect(() => + CreateMergePolicySchema.parse({ + name: 'P', + rules: [{ condition: 'x', action: 'source_wins', priority: 1 }], + autoApplyThreshold: 'extreme', + }) + ).toThrow(); + }); +}); + +// ── StartMergeSessionSchema ────────────────────────────────────────────── + +describe('StartMergeSessionSchema', () => { + it('should accept valid merge session input', () => { + const result = StartMergeSessionSchema.parse({ + sourceStackId: 'stack-a', + targetStackId: 'stack-b', + }); + expect(result.sourceStackId).toBe('stack-a'); + }); + + it('should accept optional frameIds and policyName', () => { + const result = StartMergeSessionSchema.parse({ + sourceStackId: 'a', + targetStackId: 'b', + frameIds: ['f1', 'f2'], + policyName: 'default', + }); + expect(result.frameIds).toHaveLength(2); + }); + + it('should reject empty sourceStackId', () => { + expect(() => + StartMergeSessionSchema.parse({ + sourceStackId: '', + targetStackId: 'b', + }) + ).toThrow(); + }); +}); + +// ── validateInput ──────────────────────────────────────────────────────── + +describe('validateInput', () => { + it('should return parsed data for valid input', () => { + const result = validateInput(SwitchStackSchema, { stackId: 'stack-1' }); + expect(result.stackId).toBe('stack-1'); + }); + + it('should throw ValidationError for invalid input', () => { + expect(() => validateInput(SwitchStackSchema, { stackId: '' })).toThrow( + ValidationError + ); + }); + + it('should include field path in error message', () => { + try { + validateInput(SwitchStackSchema, { stackId: '' }); + } catch (error) { + expect(error).toBeInstanceOf(ValidationError); + expect((error as ValidationError).message).toContain('Validation failed'); + expect((error as ValidationError).code).toBe(ErrorCode.VALIDATION_FAILED); + } + }); + + it('should re-throw non-Zod errors', () => { + const badSchema = { + parse: () => { + throw new Error('Not a Zod error'); + }, + }; + expect(() => validateInput(badSchema as any, {})).toThrow( + 'Not a Zod error' + ); + }); + + it('should handle null input', () => { + expect(() => validateInput(SwitchStackSchema, null)).toThrow( + ValidationError + ); + }); + + it('should handle undefined input', () => { + expect(() => validateInput(SwitchStackSchema, undefined)).toThrow( + ValidationError + ); + }); +}); + +// ── validateInputSafe ──────────────────────────────────────────────────── + +describe('validateInputSafe', () => { + it('should return success for valid input', () => { + const result = validateInputSafe(SwitchStackSchema, { stackId: 's-1' }); + expect(result.success).toBe(true); + if (result.success) { + expect(result.data.stackId).toBe('s-1'); + } + }); + + it('should return error string for invalid input', () => { + const result = validateInputSafe(SwitchStackSchema, { stackId: '' }); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error).toContain('Validation failed'); + } + }); + + it('should return unknown error for non-Zod errors', () => { + const badSchema = { + parse: () => { + throw new Error('Boom'); + }, + }; + const result = validateInputSafe(badSchema as any, {}); + expect(result.success).toBe(false); + if (!result.success) { + expect(result.error).toBe('Unknown validation error'); + } + }); + + it('should handle null input gracefully', () => { + const result = validateInputSafe(SwitchStackSchema, null); + expect(result.success).toBe(false); + }); +}); diff --git a/src/core/rules/__tests__/rule-engine.test.ts b/src/core/rules/__tests__/rule-engine.test.ts new file mode 100644 index 00000000..527675c2 --- /dev/null +++ b/src/core/rules/__tests__/rule-engine.test.ts @@ -0,0 +1,474 @@ +/** + * Tests for the rules engine. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import Database from 'better-sqlite3'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; +import { RuleEngine } from '../rule-engine.js'; +import { RuleStore } from '../rule-store.js'; +import { BUILT_IN_RULES, matchesScope } from '../built-in-rules.js'; +import type { RuleContext, RuleDefinition } from '../types.js'; + +function makeContext(overrides: Partial = {}): RuleContext { + return { + trigger: 'on-demand', + files: [], + content: new Map(), + commitMessage: '', + projectRoot: '/tmp/test-project', + ...overrides, + }; +} + +describe('RuleStore', () => { + let db: Database.Database; + let store: RuleStore; + + beforeEach(() => { + db = new Database(':memory:'); + store = new RuleStore(db); + }); + + afterEach(() => { + db.close(); + }); + + it('creates table on construction', () => { + const tables = db + .prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name='rules'" + ) + .all(); + expect(tables).toHaveLength(1); + }); + + it('upserts and retrieves rules', () => { + store.upsert({ + id: 'test-rule', + name: 'Test', + description: 'A test rule', + trigger_type: 'lint', + severity: 'warn', + scope: '**/*', + enabled: 1, + builtin: 0, + }); + const rule = store.getById('test-rule'); + expect(rule).toBeDefined(); + expect(rule!.name).toBe('Test'); + expect(rule!.enabled).toBe(1); + }); + + it('seeds builtins with INSERT OR IGNORE', () => { + store.seedBuiltins([ + { + id: 'builtin-1', + name: 'B1', + description: '', + trigger_type: 'lint', + severity: 'warn', + scope: '**/*', + enabled: 1, + builtin: 1, + }, + ]); + // Seed again — should not duplicate + store.seedBuiltins([ + { + id: 'builtin-1', + name: 'B1 updated', + description: '', + trigger_type: 'lint', + severity: 'warn', + scope: '**/*', + enabled: 1, + builtin: 1, + }, + ]); + const all = store.getAll(); + expect(all).toHaveLength(1); + expect(all[0]!.name).toBe('B1'); // not updated + }); + + it('enables and disables rules', () => { + store.upsert({ + id: 'toggle', + name: 'Toggle', + description: '', + trigger_type: 'lint', + severity: 'warn', + scope: '**/*', + enabled: 1, + builtin: 0, + }); + store.setEnabled('toggle', false); + expect(store.getById('toggle')!.enabled).toBe(0); + expect(store.getEnabled()).toHaveLength(0); + + store.setEnabled('toggle', true); + expect(store.getById('toggle')!.enabled).toBe(1); + }); + + it('filters by trigger', () => { + store.upsert({ + id: 'r1', + name: 'R1', + description: '', + trigger_type: 'lint', + severity: 'warn', + scope: '**/*', + enabled: 1, + builtin: 0, + }); + store.upsert({ + id: 'r2', + name: 'R2', + description: '', + trigger_type: 'commit', + severity: 'warn', + scope: '**/*', + enabled: 1, + builtin: 0, + }); + expect(store.getByTrigger('lint')).toHaveLength(1); + expect(store.getByTrigger('commit')).toHaveLength(1); + }); + + it('deletes rules', () => { + store.upsert({ + id: 'del', + name: 'Del', + description: '', + trigger_type: 'lint', + severity: 'warn', + scope: '**/*', + enabled: 1, + builtin: 0, + }); + expect(store.delete('del')).toBe(true); + expect(store.getById('del')).toBeUndefined(); + expect(store.delete('nonexistent')).toBe(false); + }); +}); + +describe('RuleEngine', () => { + let db: Database.Database; + let engine: RuleEngine; + + beforeEach(() => { + db = new Database(':memory:'); + engine = new RuleEngine(db); + }); + + afterEach(() => { + db.close(); + }); + + it('seeds built-in rules on construction', () => { + const rules = engine.listRules(); + expect(rules.length).toBe(BUILT_IN_RULES.length); + for (const builtin of BUILT_IN_RULES) { + expect(rules.find((r) => r.id === builtin.id)).toBeDefined(); + } + }); + + it('registers custom rules', () => { + const custom: RuleDefinition = { + id: 'custom-1', + name: 'Custom', + description: 'test', + trigger: 'on-demand', + severity: 'warn', + scope: '**/*', + enabled: true, + builtin: false, + check: () => ({ passed: true, violations: [] }), + }; + engine.registerRule(custom); + const rules = engine.listRules(); + expect(rules.find((r) => r.id === 'custom-1')).toBeDefined(); + }); + + it('skips disabled rules', () => { + engine.disableRule('no-coauthor'); + const ctx = makeContext({ + trigger: 'commit', + commitMessage: 'test\nCo-Authored-By: someone', + }); + const result = engine.evaluate(ctx); + expect(result.passed).toBe(true); + }); +}); + +describe('Built-in rules', () => { + let db: Database.Database; + let engine: RuleEngine; + + beforeEach(() => { + db = new Database(':memory:'); + engine = new RuleEngine(db); + }); + + afterEach(() => { + db.close(); + }); + + describe('no-coauthor', () => { + it('passes on clean commit message', () => { + const result = engine.evaluate( + makeContext({ + trigger: 'commit', + commitMessage: 'feat: add new feature', + }) + ); + expect(result.passed).toBe(true); + }); + + it('fails on Co-Authored-By', () => { + const result = engine.evaluate( + makeContext({ + trigger: 'commit', + commitMessage: 'feat: thing\n\nCo-Authored-By: bot ', + }) + ); + expect(result.passed).toBe(false); + expect(result.violations[0]!.ruleId).toBe('no-coauthor'); + }); + }); + + describe('no-jest-globals', () => { + it('detects @jest/globals import', () => { + const content = new Map(); + content.set( + 'src/utils/__tests__/helper.test.ts', + "import { describe, it } from '@jest/globals';\n" + ); + const result = engine.evaluate( + makeContext({ + trigger: 'lint', + files: ['src/utils/__tests__/helper.test.ts'], + content, + }) + ); + expect(result.passed).toBe(false); + expect(result.violations[0]!.ruleId).toBe('no-jest-globals'); + }); + + it('passes on clean test file', () => { + const content = new Map(); + content.set( + 'src/utils/__tests__/helper.test.ts', + "describe('helper', () => { it('works', () => {}) });\n" + ); + const result = engine.evaluate( + makeContext({ + trigger: 'lint', + files: ['src/utils/__tests__/helper.test.ts'], + content, + }) + ); + const jestViolations = result.violations.filter( + (v) => v.ruleId === 'no-jest-globals' + ); + expect(jestViolations).toHaveLength(0); + }); + }); + + describe('catch-no-underscore', () => { + it('catches underscore-prefixed catch variable', () => { + const content = new Map(); + content.set( + 'src/utils/helper.ts', + 'try { foo() } catch (_err) { log() }\n' + ); + const result = engine.evaluate( + makeContext({ + trigger: 'lint', + files: ['src/utils/helper.ts'], + content, + }) + ); + const violations = result.violations.filter( + (v) => v.ruleId === 'catch-no-underscore' + ); + expect(violations).toHaveLength(1); + }); + + it('passes on empty catch', () => { + const content = new Map(); + content.set('src/utils/helper.ts', 'try { foo() } catch { log() }\n'); + const result = engine.evaluate( + makeContext({ + trigger: 'lint', + files: ['src/utils/helper.ts'], + content, + }) + ); + const violations = result.violations.filter( + (v) => v.ruleId === 'catch-no-underscore' + ); + expect(violations).toHaveLength(0); + }); + }); + + describe('return-dont-throw', () => { + it('warns on throw in service code', () => { + const content = new Map(); + content.set( + 'src/services/user.ts', + 'function getUser() { throw new Error("not found"); }\n' + ); + const result = engine.evaluate( + makeContext({ + trigger: 'lint', + files: ['src/services/user.ts'], + content, + }) + ); + const violations = result.violations.filter( + (v) => v.ruleId === 'return-dont-throw' + ); + expect(violations).toHaveLength(1); + }); + + it('skips middleware and error files', () => { + const content = new Map(); + content.set( + 'src/middleware/auth.ts', + 'throw new Error("unauthorized");\n' + ); + content.set('src/errors/custom.ts', 'throw new Error("custom");\n'); + const result = engine.evaluate( + makeContext({ + trigger: 'lint', + files: ['src/middleware/auth.ts', 'src/errors/custom.ts'], + content, + }) + ); + const violations = result.violations.filter( + (v) => v.ruleId === 'return-dont-throw' + ); + expect(violations).toHaveLength(0); + }); + }); + + describe('migration-sequential', () => { + it('detects gaps in migration numbering', () => { + const files = [ + 'src/db/migrations/001_init.sql', + 'src/db/migrations/002_users.sql', + 'src/db/migrations/005_recipes.sql', + ]; + const result = engine.evaluateAll( + makeContext({ + trigger: 'on-demand', + files, + }) + ); + const violations = result.violations.filter( + (v) => v.ruleId === 'migration-sequential' + ); + expect(violations).toHaveLength(1); + expect(violations[0]!.message).toContain('002'); + expect(violations[0]!.message).toContain('005'); + }); + + it('passes on sequential migrations', () => { + const files = [ + 'src/db/migrations/001_init.sql', + 'src/db/migrations/002_users.sql', + 'src/db/migrations/003_recipes.sql', + ]; + const result = engine.evaluateAll( + makeContext({ + trigger: 'on-demand', + files, + }) + ); + const violations = result.violations.filter( + (v) => v.ruleId === 'migration-sequential' + ); + expect(violations).toHaveLength(0); + }); + }); + + describe('mock-lifecycle', () => { + it('warns when clearAllMocks without re-setup', () => { + const content = new Map(); + content.set( + 'src/services/__tests__/user.test.ts', + ` +beforeEach(() => { + jest.clearAllMocks(); +}); + +describe('getUser', () => { + it('returns user', () => {}); +}); +` + ); + const result = engine.evaluate( + makeContext({ + trigger: 'lint', + files: ['src/services/__tests__/user.test.ts'], + content, + }) + ); + const violations = result.violations.filter( + (v) => v.ruleId === 'mock-lifecycle' + ); + expect(violations).toHaveLength(1); + }); + + it('passes when mocks are re-set after clear', () => { + const content = new Map(); + content.set( + 'src/services/__tests__/user.test.ts', + ` +beforeEach(() => { + jest.clearAllMocks(); + mockDb.query.mockReturnValue([]); +}); +` + ); + const result = engine.evaluate( + makeContext({ + trigger: 'lint', + files: ['src/services/__tests__/user.test.ts'], + content, + }) + ); + const violations = result.violations.filter( + (v) => v.ruleId === 'mock-lifecycle' + ); + expect(violations).toHaveLength(0); + }); + }); +}); + +describe('matchesScope', () => { + it('matches wildcard', () => { + expect(matchesScope('anything.ts', '*')).toBe(true); + expect(matchesScope('deep/path/file.ts', '**/*')).toBe(true); + }); + + it('matches extension patterns', () => { + expect(matchesScope('src/foo.test.ts', 'src/**/*.test.{ts,js}')).toBe(true); + expect(matchesScope('src/deep/bar.test.js', 'src/**/*.test.{ts,js}')).toBe( + true + ); + expect(matchesScope('src/foo.ts', 'src/**/*.test.{ts,js}')).toBe(false); + }); + + it('matches migration patterns', () => { + expect( + matchesScope('db/migrations/001_init.sql', '**/migrations/*.sql') + ).toBe(true); + expect( + matchesScope('src/db/migrations/002.sql', '**/migrations/*.sql') + ).toBe(true); + }); +}); diff --git a/src/core/rules/built-in-rules.ts b/src/core/rules/built-in-rules.ts new file mode 100644 index 00000000..f0b591a5 --- /dev/null +++ b/src/core/rules/built-in-rules.ts @@ -0,0 +1,355 @@ +/** + * Built-in rules derived from ProvenantAI development patterns. + */ + +import type { + RuleDefinition, + RuleContext, + RuleResult, + RuleViolation, + RuleRow, +} from './types.js'; +import { prReviewRule } from './pr-review-rule.js'; + +function violation( + ruleId: string, + ruleName: string, + severity: RuleDefinition['severity'], + message: string, + file?: string, + line?: number, + suggestion?: string +): RuleViolation { + return { ruleId, ruleName, severity, message, file, line, suggestion }; +} + +function pass(): RuleResult { + return { passed: true, violations: [] }; +} + +function fail(violations: RuleViolation[]): RuleResult { + return { passed: false, violations }; +} + +/** + * Simple glob matching (avoids external dependency). + * Supports: *, **, ?, {a,b} patterns. + */ +function escapeGlobPart(part: string): string { + return part + .replace(/\./g, '\\.') + .replace( + /\{([^}]+)\}/g, + (_m, choices: string) => `(${choices.split(',').join('|')})` + ) + .replace(/\*/g, '[^/]*') + .replace(/\?/g, '[^/]'); +} + +function globToRegex(pattern: string): RegExp { + // Handle leading ** (match any prefix) + if (pattern.startsWith('**/')) { + const rest = escapeGlobPart(pattern.slice(3)); + return new RegExp(`^(?:.+/)?${rest}$`); + } + // Split on /**/ to handle mid-pattern globstar + const parts = pattern.split('/**/'); + if (parts.length === 1) { + return new RegExp(`^${escapeGlobPart(pattern)}$`); + } + const regexParts = parts.map(escapeGlobPart); + return new RegExp(`^${regexParts.join('(?:/.*?/|/)')}$`); +} + +export function matchesScope(filePath: string, scope: string): boolean { + if (scope === '**/*' || scope === '*') return true; + const re = globToRegex(scope); + return re.test(filePath); +} + +export function filterByScope(files: string[], scope: string): string[] { + return files.filter((f) => matchesScope(f, scope)); +} + +// --------------------------------------------------------------------------- +// Rule: no-coauthor +// --------------------------------------------------------------------------- +const noCoauthor: RuleDefinition = { + id: 'no-coauthor', + name: 'No Co-Authored-By', + description: 'Block Co-Authored-By lines in commit messages', + trigger: 'commit', + severity: 'error', + scope: '*', + enabled: true, + builtin: true, + check(ctx: RuleContext): RuleResult { + if (!ctx.commitMessage) return pass(); + if (/co-authored-by/i.test(ctx.commitMessage)) { + return fail([ + violation( + this.id, + this.name, + this.severity, + 'Commit message contains Co-Authored-By line', + undefined, + undefined, + 'Remove the Co-Authored-By trailer' + ), + ]); + } + return pass(); + }, +}; + +// --------------------------------------------------------------------------- +// Rule: no-jest-globals +// --------------------------------------------------------------------------- +const noJestGlobals: RuleDefinition = { + id: 'no-jest-globals', + name: 'No @jest/globals imports', + description: + 'Flag @jest/globals imports in src/ tests (causes redeclaration errors)', + trigger: 'lint', + severity: 'error', + scope: 'src/**/*.test.{ts,js}', + enabled: true, + builtin: true, + check(ctx: RuleContext): RuleResult { + const violations: RuleViolation[] = []; + const files = filterByScope(ctx.files, this.scope); + for (const file of files) { + const content = ctx.content.get(file); + if (!content) continue; + const lines = content.split('\n'); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (line && /@jest\/globals/.test(line)) { + violations.push( + violation( + this.id, + this.name, + this.severity, + `Import from @jest/globals found — use global jest instead`, + file, + i + 1, + 'Remove the import; jest/describe/it/expect are globally available' + ) + ); + } + } + } + return violations.length > 0 ? fail(violations) : pass(); + }, +}; + +// --------------------------------------------------------------------------- +// Rule: catch-no-underscore +// --------------------------------------------------------------------------- +const catchNoUnderscore: RuleDefinition = { + id: 'catch-no-underscore', + name: 'Catch without underscore prefix', + description: + 'Enforce catch {} not catch (_err) {} — underscore prefix not in allowed ESLint pattern', + trigger: 'lint', + severity: 'warn', + scope: 'src/**/*.{ts,js}', + enabled: true, + builtin: true, + check(ctx: RuleContext): RuleResult { + const violations: RuleViolation[] = []; + const files = filterByScope(ctx.files, this.scope); + for (const file of files) { + const content = ctx.content.get(file); + if (!content) continue; + const lines = content.split('\n'); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (line && /catch\s*\(\s*_\w*\s*\)/.test(line)) { + violations.push( + violation( + this.id, + this.name, + this.severity, + 'catch with underscore-prefixed variable', + file, + i + 1, + 'Use catch {} (empty) or catch (err) {} (without underscore)' + ) + ); + } + } + } + return violations.length > 0 ? fail(violations) : pass(); + }, +}; + +// --------------------------------------------------------------------------- +// Rule: return-dont-throw +// --------------------------------------------------------------------------- +const THROW_EXCLUDE_PATTERNS = [ + /middleware/i, + /errors?\//i, + /errors?\.(ts|js)$/i, + /index\.(ts|js)$/, + /\.test\.(ts|js)$/, + /__tests__/, +]; + +const returnDontThrow: RuleDefinition = { + id: 'return-dont-throw', + name: 'Return undefined over throw', + description: + 'Warn on throw in non-boundary code — prefer return undefined + log', + trigger: 'lint', + severity: 'info', + scope: 'src/**/*.{ts,js}', + enabled: true, + builtin: true, + check(ctx: RuleContext): RuleResult { + const violations: RuleViolation[] = []; + const files = filterByScope(ctx.files, this.scope); + for (const file of files) { + if (THROW_EXCLUDE_PATTERNS.some((p) => p.test(file))) continue; + const content = ctx.content.get(file); + if (!content) continue; + const lines = content.split('\n'); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (line && /throw\s+new\s+/.test(line)) { + violations.push( + violation( + this.id, + this.name, + this.severity, + 'throw statement in non-boundary code', + file, + i + 1, + 'Consider returning undefined and logging the error instead' + ) + ); + } + } + } + return violations.length > 0 ? fail(violations) : pass(); + }, +}; + +// --------------------------------------------------------------------------- +// Rule: migration-sequential +// --------------------------------------------------------------------------- +const migrationSequential: RuleDefinition = { + id: 'migration-sequential', + name: 'Sequential migration numbering', + description: 'Validate migration files have no numbering gaps', + trigger: 'on-demand', + severity: 'error', + scope: '**/migrations/*.sql', + enabled: true, + builtin: true, + check(ctx: RuleContext): RuleResult { + const files = filterByScope(ctx.files, this.scope); + const numbers: number[] = []; + for (const file of files) { + const basename = file.split('/').pop() ?? ''; + const match = /^(\d+)/.exec(basename); + if (match?.[1]) { + numbers.push(parseInt(match[1], 10)); + } + } + if (numbers.length < 2) return pass(); + + numbers.sort((a, b) => a - b); + const violations: RuleViolation[] = []; + for (let i = 1; i < numbers.length; i++) { + const prev = numbers[i - 1]!; + const curr = numbers[i]!; + if (curr - prev > 1) { + violations.push( + violation( + this.id, + this.name, + this.severity, + `Migration gap: ${String(prev).padStart(3, '0')} → ${String(curr).padStart(3, '0')} (missing ${curr - prev - 1} file(s))`, + undefined, + undefined, + `Add migration(s) for numbers ${prev + 1}–${curr - 1}` + ) + ); + } + } + return violations.length > 0 ? fail(violations) : pass(); + }, +}; + +// --------------------------------------------------------------------------- +// Rule: mock-lifecycle +// --------------------------------------------------------------------------- +const mockLifecycle: RuleDefinition = { + id: 'mock-lifecycle', + name: 'Mock lifecycle in tests', + description: + 'Warn if clearAllMocks() is called without re-setting mocks in beforeEach', + trigger: 'lint', + severity: 'warn', + scope: 'src/**/*.test.{ts,js}', + enabled: true, + builtin: true, + check(ctx: RuleContext): RuleResult { + const violations: RuleViolation[] = []; + const files = filterByScope(ctx.files, this.scope); + for (const file of files) { + const content = ctx.content.get(file); + if (!content) continue; + const hasClearAll = /clearAllMocks\(\)/.test(content); + if (!hasClearAll) continue; + // Check if there's a beforeEach that re-sets mocks after clearing + const hasBeforeEach = /beforeEach/.test(content); + const hasMockSetup = + /mock(ReturnValue|ResolvedValue|Implementation)\s*\(/.test(content); + if (hasClearAll && hasBeforeEach && !hasMockSetup) { + violations.push( + violation( + this.id, + this.name, + this.severity, + 'clearAllMocks() used but no mock re-setup found (mockReturnValue/mockResolvedValue/mockImplementation)', + file, + undefined, + 'Re-set mock return values in beforeEach after clearAllMocks resets them' + ) + ); + } + } + return violations.length > 0 ? fail(violations) : pass(); + }, +}; + +// --------------------------------------------------------------------------- +// Exports +// --------------------------------------------------------------------------- + +export const BUILT_IN_RULES: RuleDefinition[] = [ + noCoauthor, + noJestGlobals, + catchNoUnderscore, + returnDontThrow, + migrationSequential, + mockLifecycle, + prReviewRule, +]; + +export function getBuiltinRows(): Array< + Omit +> { + return BUILT_IN_RULES.map((r) => ({ + id: r.id, + name: r.name, + description: r.description, + trigger_type: r.trigger, + severity: r.severity, + scope: r.scope, + enabled: r.enabled ? 1 : 0, + builtin: r.builtin ? 1 : 0, + })); +} diff --git a/src/core/rules/pr-review-rule.ts b/src/core/rules/pr-review-rule.ts new file mode 100644 index 00000000..c0d5b9c6 --- /dev/null +++ b/src/core/rules/pr-review-rule.ts @@ -0,0 +1,114 @@ +/** + * PR review comment rule. + * Checks for common patterns that PR reviewers flag. + * Runs the same checks locally before push to catch issues early. + */ + +import type { + RuleDefinition, + RuleContext, + RuleResult, + RuleViolation, +} from './types.js'; +import { filterByScope } from './built-in-rules.js'; + +function violation( + ruleId: string, + ruleName: string, + severity: RuleDefinition['severity'], + message: string, + file?: string, + line?: number, + suggestion?: string +): RuleViolation { + return { ruleId, ruleName, severity, message, file, line, suggestion }; +} + +// Common patterns that PR reviewers flag +const PR_REVIEW_PATTERNS: Array<{ + id: string; + pattern: RegExp; + message: string; + suggestion: string; + severity: RuleDefinition['severity']; +}> = [ + { + id: 'console-log', + pattern: /console\.(log|debug|info)\(/, + message: 'console.log left in code', + suggestion: 'Remove or replace with structured logger', + severity: 'warn', + }, + { + id: 'todo-fixme', + pattern: /\/\/\s*(TODO|FIXME|HACK|XXX)(?!\s*\()/, + message: 'TODO/FIXME without attribution', + suggestion: 'Add author and ticket: // TODO(user): STA-XXX description', + severity: 'info', + }, + { + id: 'hardcoded-secret', + pattern: /(api[_-]?key|secret|password|token)\s*[:=]\s*['"][^'"]{8,}['"]/i, + message: 'Possible hardcoded secret', + suggestion: 'Move to environment variable', + severity: 'error', + }, + { + id: 'any-type', + pattern: /:\s*any\b/, + message: 'Explicit any type', + suggestion: 'Use a specific type or unknown', + severity: 'warn', + }, + { + id: 'empty-catch', + pattern: /catch\s*\{[\s]*\}/, + message: 'Empty catch block (swallows errors silently)', + suggestion: + 'Log the error or add a comment explaining why it is safe to ignore', + severity: 'warn', + }, +]; + +export const prReviewRule: RuleDefinition = { + id: 'pr-review-patterns', + name: 'PR Review Patterns', + description: + 'Catch common issues that PR reviewers flag (console.log, TODO, secrets, any type)', + trigger: 'pre-commit', + severity: 'warn', + scope: 'src/**/*.{ts,js,tsx,jsx}', + enabled: true, + builtin: true, + check(ctx: RuleContext): RuleResult { + const violations: RuleViolation[] = []; + const files = filterByScope(ctx.files, this.scope); + for (const file of files) { + const content = ctx.content.get(file); + if (!content) continue; + const lines = content.split('\n'); + for (let i = 0; i < lines.length; i++) { + const line = lines[i]; + if (!line) continue; + for (const check of PR_REVIEW_PATTERNS) { + if (check.pattern.test(line)) { + violations.push( + violation( + this.id, + `${this.name}: ${check.id}`, + check.severity, + check.message, + file, + i + 1, + check.suggestion + ) + ); + } + } + } + } + return violations.length > 0 + ? { passed: false, violations } + : { passed: true, violations: [] }; + }, +}; diff --git a/src/core/rules/rule-engine.ts b/src/core/rules/rule-engine.ts new file mode 100644 index 00000000..ae469569 --- /dev/null +++ b/src/core/rules/rule-engine.ts @@ -0,0 +1,109 @@ +/** + * Rule evaluation engine. + * Orchestrates rule checking against project context. + */ + +import type Database from 'better-sqlite3'; +import type { + RuleDefinition, + RuleContext, + RuleResult, + RuleViolation, + RuleTrigger, + RuleRow, +} from './types.js'; +import { RuleStore } from './rule-store.js'; +import { + BUILT_IN_RULES, + getBuiltinRows, + filterByScope, +} from './built-in-rules.js'; + +export class RuleEngine { + private store: RuleStore; + private checkFns = new Map(); + + constructor(db: Database.Database) { + this.store = new RuleStore(db); + this.store.seedBuiltins(getBuiltinRows()); + for (const rule of BUILT_IN_RULES) { + this.checkFns.set(rule.id, rule.check.bind(rule)); + } + } + + registerRule(rule: RuleDefinition): void { + this.store.upsert({ + id: rule.id, + name: rule.name, + description: rule.description, + trigger_type: rule.trigger, + severity: rule.severity, + scope: rule.scope, + enabled: rule.enabled ? 1 : 0, + builtin: rule.builtin ? 1 : 0, + }); + this.checkFns.set(rule.id, rule.check.bind(rule)); + } + + evaluate(context: RuleContext): RuleResult { + const rows = this.store.getByTrigger(context.trigger); + return this.runRules(rows, context); + } + + evaluateAll(context: RuleContext): RuleResult { + const rows = this.store.getEnabled(); + return this.runRules(rows, context); + } + + listRules(filter?: { + trigger?: RuleTrigger | undefined; + enabled?: boolean | undefined; + }): RuleRow[] { + if (filter?.trigger) { + return this.store.getByTrigger(filter.trigger); + } + if (filter?.enabled === false) { + return this.store.getAll(); + } + return this.store.getEnabled(); + } + + enableRule(id: string): boolean { + return this.store.setEnabled(id, true); + } + + disableRule(id: string): boolean { + return this.store.setEnabled(id, false); + } + + getStore(): RuleStore { + return this.store; + } + + private runRules(rows: RuleRow[], context: RuleContext): RuleResult { + const allViolations: RuleViolation[] = []; + for (const row of rows) { + const checkFn = this.checkFns.get(row.id); + if (!checkFn) continue; + const scopedFiles = filterByScope(context.files, row.scope); + if ( + scopedFiles.length === 0 && + row.trigger_type !== 'commit' && + row.trigger_type !== 'pre-commit' + ) + continue; + const scopedCtx: RuleContext = { + ...context, + files: scopedFiles, + }; + const result = checkFn(scopedCtx); + if (!result.passed) { + allViolations.push(...result.violations); + } + } + return { + passed: allViolations.length === 0, + violations: allViolations, + }; + } +} diff --git a/src/core/rules/rule-store.ts b/src/core/rules/rule-store.ts new file mode 100644 index 00000000..4105cc42 --- /dev/null +++ b/src/core/rules/rule-store.ts @@ -0,0 +1,119 @@ +/** + * SQLite-backed rule storage. + */ + +import type Database from 'better-sqlite3'; +import type { RuleRow, RuleTrigger } from './types.js'; + +const SCHEMA = ` +CREATE TABLE IF NOT EXISTS rules ( + id TEXT PRIMARY KEY, + name TEXT NOT NULL, + description TEXT NOT NULL DEFAULT '', + trigger_type TEXT NOT NULL DEFAULT 'on-demand', + severity TEXT NOT NULL DEFAULT 'warn', + scope TEXT NOT NULL DEFAULT '**/*', + enabled INTEGER NOT NULL DEFAULT 1, + builtin INTEGER NOT NULL DEFAULT 0, + created_at INTEGER NOT NULL, + updated_at INTEGER NOT NULL +)`; + +export class RuleStore { + constructor(private db: Database.Database) { + this.db.exec(SCHEMA); + } + + upsert(rule: Omit): void { + const now = Date.now(); + this.db + .prepare( + `INSERT INTO rules (id, name, description, trigger_type, severity, scope, enabled, builtin, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(id) DO UPDATE SET + name = excluded.name, + description = excluded.description, + trigger_type = excluded.trigger_type, + severity = excluded.severity, + scope = excluded.scope, + enabled = excluded.enabled, + builtin = excluded.builtin, + updated_at = excluded.updated_at` + ) + .run( + rule.id, + rule.name, + rule.description, + rule.trigger_type, + rule.severity, + rule.scope, + rule.enabled, + rule.builtin, + now, + now + ); + } + + getAll(): RuleRow[] { + return this.db + .prepare('SELECT * FROM rules ORDER BY id') + .all() as RuleRow[]; + } + + getEnabled(): RuleRow[] { + return this.db + .prepare('SELECT * FROM rules WHERE enabled = 1 ORDER BY id') + .all() as RuleRow[]; + } + + getByTrigger(trigger: RuleTrigger): RuleRow[] { + return this.db + .prepare( + 'SELECT * FROM rules WHERE enabled = 1 AND trigger_type = ? ORDER BY id' + ) + .all(trigger) as RuleRow[]; + } + + getById(id: string): RuleRow | undefined { + return this.db.prepare('SELECT * FROM rules WHERE id = ?').get(id) as + | RuleRow + | undefined; + } + + setEnabled(id: string, enabled: boolean): boolean { + const result = this.db + .prepare('UPDATE rules SET enabled = ?, updated_at = ? WHERE id = ?') + .run(enabled ? 1 : 0, Date.now(), id); + return result.changes > 0; + } + + delete(id: string): boolean { + const result = this.db.prepare('DELETE FROM rules WHERE id = ?').run(id); + return result.changes > 0; + } + + seedBuiltins(rules: Array>): void { + const now = Date.now(); + const stmt = this.db.prepare( + `INSERT OR IGNORE INTO rules (id, name, description, trigger_type, severity, scope, enabled, builtin, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?)` + ); + const tx = this.db.transaction(() => { + for (const rule of rules) { + stmt.run( + rule.id, + rule.name, + rule.description, + rule.trigger_type, + rule.severity, + rule.scope, + rule.enabled, + rule.builtin, + now, + now + ); + } + }); + tx(); + } +} diff --git a/src/core/rules/types.ts b/src/core/rules/types.ts new file mode 100644 index 00000000..6235a092 --- /dev/null +++ b/src/core/rules/types.ts @@ -0,0 +1,59 @@ +/** + * Rule engine types. + */ + +export type RuleTrigger = + | 'commit' + | 'file-save' + | 'pre-commit' + | 'lint' + | 'on-demand'; +export type RuleSeverity = 'error' | 'warn' | 'info'; + +export interface RuleContext { + trigger: RuleTrigger; + files: string[]; + content: Map; + commitMessage: string; + projectRoot: string; +} + +export interface RuleViolation { + ruleId: string; + ruleName: string; + severity: RuleSeverity; + message: string; + file: string | undefined; + line: number | undefined; + suggestion: string | undefined; +} + +export interface RuleResult { + passed: boolean; + violations: RuleViolation[]; +} + +export interface RuleDefinition { + id: string; + name: string; + description: string; + trigger: RuleTrigger; + severity: RuleSeverity; + scope: string; + check: (context: RuleContext) => RuleResult; + enabled: boolean; + builtin: boolean; +} + +export interface RuleRow { + id: string; + name: string; + description: string; + trigger_type: string; + severity: string; + scope: string; + enabled: number; + builtin: number; + created_at: number; + updated_at: number; +} diff --git a/src/core/skills/__tests__/skill-integration.test.ts b/src/core/skills/__tests__/skill-integration.test.ts new file mode 100644 index 00000000..1d46bbb8 --- /dev/null +++ b/src/core/skills/__tests__/skill-integration.test.ts @@ -0,0 +1,199 @@ +/** + * Skill Integration Test — Golden test: seed rules, match prompts, verify parity with CJS + * + * Tests that the TypeScript matcher produces the same results as skill-eval.cjs + * when given the same rules and prompts. + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { SkillRegistry } from '../skill-registry.js'; +import { matchPrompt } from '../skill-matcher.js'; +import type { SkillRulesFile } from '../types.js'; + +let tmpDir: string; +let registry: SkillRegistry; +let rulesFile: SkillRulesFile; + +// Load the actual skill-rules.json used by the CJS hook +const RULES_PATH = path.resolve( + __dirname, + '../../../../.claude/hooks/skill-rules.json' +); + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-integration-')); + registry = new SkillRegistry(path.join(tmpDir, 'skills.db')); + + if (fs.existsSync(RULES_PATH)) { + rulesFile = JSON.parse(fs.readFileSync(RULES_PATH, 'utf-8')); + registry.seedFromRulesJson(rulesFile); + } else { + // Fallback minimal rules for CI where .claude/ may not exist + rulesFile = { + version: '2.0', + config: { + minConfidenceScore: 3, + showMatchReasons: true, + maxSkillsToShow: 5, + }, + scoring: { + keyword: 2, + keywordPattern: 3, + pathPattern: 4, + directoryMatch: 5, + intentPattern: 4, + contentPattern: 3, + contextPattern: 2, + }, + directoryMappings: { 'src/core': 'frame-management' }, + skills: { + 'frame-management': { + description: 'Frame management', + priority: 9, + triggers: { + keywords: ['frame', 'stack', 'context'], + keywordPatterns: ['\\bframe\\b'], + pathPatterns: ['**/frame-*.ts'], + contentPatterns: ['FrameManager'], + }, + relatedSkills: ['context-bridge'], + }, + 'linear-integration': { + description: 'Linear integration', + priority: 8, + triggers: { + keywords: ['linear', 'issue', 'sync'], + keywordPatterns: ['\\blinear\\b'], + }, + }, + 'testing-patterns': { + description: 'Testing patterns', + priority: 7, + triggers: { + keywords: ['test', 'jest', 'spec'], + keywordPatterns: ['\\btest\\b'], + pathPatterns: ['**/*.test.ts'], + }, + }, + }, + }; + registry.seedFromRulesJson(rulesFile); + } +}); + +afterEach(() => { + registry.close(); + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +describe('Golden tests: TS matcher parity with CJS', () => { + function matchWithRegistry(prompt: string) { + const rules = registry.getAllRules(); + const { config, scoring } = registry.getMatcherConfig(); + const mappings = registry.getDirectoryMappings(); + return matchPrompt(prompt, rules, config, scoring, mappings); + } + + it('frame management prompt matches frame-management skill', () => { + const result = matchWithRegistry( + 'Fix the frame manager to handle nested contexts' + ); + const names = result.matches.map((m) => m.name); + expect(names).toContain('frame-management'); + }); + + it('linear sync prompt matches linear-integration skill', () => { + const result = matchWithRegistry( + 'Sync the linear issues and update tracking' + ); + const names = result.matches.map((m) => m.name); + expect(names).toContain('linear-integration'); + }); + + it('test file path triggers testing-patterns', () => { + const result = matchWithRegistry( + 'Fix src/core/skills/__tests__/skill-matcher.test.ts' + ); + const names = result.matches.map((m) => m.name); + expect(names).toContain('testing-patterns'); + }); + + it('directory mapping triggers frame-management from src/core path', () => { + const result = matchWithRegistry('Edit src/core/context/frame-manager.ts'); + const frameMatch = result.matches.find( + (m) => m.name === 'frame-management' + ); + expect(frameMatch).toBeDefined(); + }); + + it('unrelated prompt returns no matches', () => { + const result = matchWithRegistry('How is the weather today?'); + expect(result.matches).toEqual([]); + }); + + it('results are sorted by score descending', () => { + const result = matchWithRegistry('frame linear test sync context issue'); + for (let i = 1; i < result.matches.length; i++) { + const prev = result.matches[i - 1]; + const curr = result.matches[i]; + expect(prev.score).toBeGreaterThanOrEqual(curr.score); + } + }); + + it('related skills are resolved', () => { + const result = matchWithRegistry('Push frame onto the stack'); + if (result.matches.some((m) => m.name === 'frame-management')) { + expect(result.relatedSkills).toContain('context-bridge'); + } + }); + + it('file paths are extracted and reported', () => { + const result = matchWithRegistry( + 'Check src/core/frame-manager.ts for bugs' + ); + expect(result.filePaths).toContain('src/core/frame-manager.ts'); + }); +}); + +describe('End-to-end: seed → store skill → match → query', () => { + it('full workflow', () => { + // 1. Registry is already seeded from beforeEach + + // 2. Create a skill + const skill = registry.createSkill({ + content: 'Always validate frame depth before push', + category: 'pitfall', + priority: 'high', + tags: ['frames'], + tool: 'frame-manager', + source: 'correction', + }); + expect(skill.id).toBeDefined(); + + // 3. Match a prompt + const rules = registry.getAllRules(); + const { config, scoring } = registry.getMatcherConfig(); + const mappings = registry.getDirectoryMappings(); + const result = matchPrompt( + 'Fix the frame manager depth validation', + rules, + config, + scoring, + mappings + ); + expect(result.matches.length).toBeGreaterThan(0); + + // 4. Query skills related to the match + const relevant = registry.getRelevantSkills({ tool: 'frame-manager' }); + expect( + relevant.some((s) => s.content.includes('validate frame depth')) + ).toBe(true); + + // 5. Validate the skill + const validated = registry.validateSkill(skill.id); + expect(validated!.validatedCount).toBe(1); + }); +}); diff --git a/src/core/skills/__tests__/skill-matcher.test.ts b/src/core/skills/__tests__/skill-matcher.test.ts new file mode 100644 index 00000000..a37325ac --- /dev/null +++ b/src/core/skills/__tests__/skill-matcher.test.ts @@ -0,0 +1,258 @@ +/** + * Skill Matcher — Pure scoring engine tests (no DB) + */ + +import { describe, it, expect } from 'vitest'; +import { + extractFilePaths, + matchesGlob, + matchPrompt, + formatConfidence, +} from '../skill-matcher.js'; +import type { SkillRule, MatcherConfig, ScoringWeights } from '../types.js'; + +const defaultConfig: MatcherConfig = { + minConfidenceScore: 3, + showMatchReasons: true, + maxSkillsToShow: 5, +}; + +const defaultScoring: ScoringWeights = { + keyword: 2, + keywordPattern: 3, + pathPattern: 4, + directoryMatch: 5, + intentPattern: 4, + contentPattern: 3, + contextPattern: 2, +}; + +describe('extractFilePaths', () => { + it('extracts paths with extensions', () => { + const paths = extractFilePaths( + 'Fix the bug in src/core/skills/types.ts please' + ); + expect(paths).toContain('src/core/skills/types.ts'); + }); + + it('extracts paths from common directories', () => { + const paths = extractFilePaths('Look at src/integrations/mcp/server'); + expect(paths).toContain('src/integrations/mcp/server'); + }); + + it('extracts quoted paths', () => { + const paths = extractFilePaths('Open "hooks/skill-eval"'); + expect(paths).toContain('hooks/skill-eval'); + }); + + it('returns empty for no paths', () => { + expect(extractFilePaths('Hello world')).toEqual([]); + }); + + it('deduplicates paths', () => { + const paths = extractFilePaths('src/core/types.ts and src/core/types.ts'); + const typesCount = paths.filter((p) => p === 'src/core/types.ts').length; + expect(typesCount).toBe(1); + }); +}); + +describe('matchesGlob', () => { + it('matches ** wildcards', () => { + expect(matchesGlob('src/core/frame-manager.ts', '**/frame-*.ts')).toBe( + true + ); + }); + + it('matches * wildcard', () => { + expect(matchesGlob('src/core/context/index.ts', '**/context/*.ts')).toBe( + true + ); + }); + + it('rejects non-matching paths', () => { + expect(matchesGlob('src/cli/index.ts', '**/mcp/**')).toBe(false); + }); + + it('is case-insensitive', () => { + expect(matchesGlob('SRC/Core/Frame.ts', '**/frame.ts')).toBe(true); + }); +}); + +describe('formatConfidence', () => { + it('returns HIGH for 3x min score', () => { + expect(formatConfidence(9, 3)).toBe('HIGH'); + }); + + it('returns MEDIUM for 2x min score', () => { + expect(formatConfidence(6, 3)).toBe('MEDIUM'); + }); + + it('returns LOW below 2x', () => { + expect(formatConfidence(4, 3)).toBe('LOW'); + }); +}); + +describe('matchPrompt', () => { + const rules: Record = { + 'frame-management': { + description: 'Frame stack management', + priority: 9, + triggers: { + keywords: ['frame', 'stack', 'context'], + keywordPatterns: ['\\bframe\\b'], + pathPatterns: ['**/frame-*.ts'], + intentPatterns: ['(?:manage|handle).*(?:frame|context)'], + contentPatterns: ['FrameManager'], + }, + relatedSkills: ['context-bridge'], + }, + 'linear-integration': { + description: 'Linear API integration', + priority: 8, + triggers: { + keywords: ['linear', 'issue', 'sync'], + keywordPatterns: ['\\blinear\\b'], + }, + }, + }; + + const mappings = { 'src/core': 'frame-management' }; + + it('matches on keyword', () => { + const result = matchPrompt( + 'I need to push a frame onto the stack', + rules, + defaultConfig, + defaultScoring, + mappings + ); + expect(result.matches.length).toBeGreaterThan(0); + expect(result.matches[0].name).toBe('frame-management'); + }); + + it('includes related skills', () => { + const result = matchPrompt( + 'manage the frame context please', + rules, + defaultConfig, + defaultScoring, + mappings + ); + expect(result.relatedSkills).toContain('context-bridge'); + }); + + it('matches on file path', () => { + const result = matchPrompt( + 'Look at src/core/frame-manager.ts', + rules, + defaultConfig, + defaultScoring, + mappings + ); + const frameMatch = result.matches.find( + (m) => m.name === 'frame-management' + ); + expect(frameMatch).toBeDefined(); + expect(frameMatch!.reasons.some((r) => r.includes('path'))).toBe(true); + }); + + it('matches on directory mapping', () => { + const result = matchPrompt( + 'Look at src/core/index.ts', + rules, + defaultConfig, + defaultScoring, + mappings + ); + const frameMatch = result.matches.find( + (m) => m.name === 'frame-management' + ); + expect(frameMatch).toBeDefined(); + expect(frameMatch!.reasons).toContain('directory mapping'); + }); + + it('returns empty for unrelated prompt', () => { + const result = matchPrompt( + 'What is the weather today?', + rules, + defaultConfig, + defaultScoring, + mappings + ); + expect(result.matches).toEqual([]); + }); + + it('filters below minConfidenceScore', () => { + const strictConfig = { ...defaultConfig, minConfidenceScore: 100 }; + const result = matchPrompt( + 'frame stack context', + rules, + strictConfig, + defaultScoring, + mappings + ); + expect(result.matches).toEqual([]); + }); + + it('respects maxSkillsToShow', () => { + const limitConfig = { ...defaultConfig, maxSkillsToShow: 1 }; + const result = matchPrompt( + 'frame linear sync issue', + rules, + limitConfig, + defaultScoring, + mappings + ); + expect(result.matches.length).toBeLessThanOrEqual(1); + }); + + it('respects excludePatterns', () => { + const rulesWithExclude: Record = { + docs: { + description: 'Docs', + priority: 5, + triggers: { keywords: ['document'] }, + excludePatterns: ['no docs'], + }, + }; + const result = matchPrompt( + 'no docs needed, just document something', + rulesWithExclude, + { ...defaultConfig, minConfidenceScore: 1 }, + defaultScoring + ); + expect(result.matches).toEqual([]); + }); + + it('sorts by score descending', () => { + const result = matchPrompt( + 'frame stack context FrameManager linear', + rules, + defaultConfig, + defaultScoring, + mappings + ); + if (result.matches.length >= 2) { + expect(result.matches[0].score).toBeGreaterThanOrEqual( + result.matches[1].score + ); + } + }); + + it('matches content patterns (case-sensitive code)', () => { + const result = matchPrompt( + 'Use the FrameManager to push frames', + rules, + defaultConfig, + defaultScoring, + mappings + ); + const frameMatch = result.matches.find( + (m) => m.name === 'frame-management' + ); + expect(frameMatch).toBeDefined(); + expect(frameMatch!.reasons.some((r) => r.includes('code pattern'))).toBe( + true + ); + }); +}); diff --git a/src/core/skills/__tests__/skill-registry.test.ts b/src/core/skills/__tests__/skill-registry.test.ts new file mode 100644 index 00000000..d06bded5 --- /dev/null +++ b/src/core/skills/__tests__/skill-registry.test.ts @@ -0,0 +1,367 @@ +/** + * Skill Registry — SQLite CRUD tests with temp dirs + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import * as fs from 'fs'; +import * as os from 'os'; +import * as path from 'path'; +import { SkillRegistry } from '../skill-registry.js'; +import type { SkillRulesFile } from '../types.js'; + +let tmpDir: string; +let registry: SkillRegistry; + +beforeEach(() => { + tmpDir = fs.mkdtempSync(path.join(os.tmpdir(), 'skill-registry-')); + registry = new SkillRegistry(path.join(tmpDir, 'skills.db')); +}); + +afterEach(() => { + registry.close(); + fs.rmSync(tmpDir, { recursive: true, force: true }); +}); + +describe('Skill CRUD', () => { + it('creates and retrieves a skill', () => { + const skill = registry.createSkill({ + content: 'Always use .js extensions in ESM imports', + category: 'pitfall', + priority: 'high', + tags: ['esm', 'imports'], + source: 'correction', + }); + + expect(skill.id).toBeDefined(); + expect(skill.content).toBe('Always use .js extensions in ESM imports'); + expect(skill.category).toBe('pitfall'); + expect(skill.priority).toBe('high'); + expect(skill.tags).toEqual(['esm', 'imports']); + + const retrieved = registry.getSkill(skill.id); + expect(retrieved).toBeDefined(); + expect(retrieved!.id).toBe(skill.id); + }); + + it('updates a skill', () => { + const skill = registry.createSkill({ + content: 'Original content', + category: 'pattern', + source: 'observation', + }); + + const updated = registry.updateSkill({ + id: skill.id, + content: 'Updated content', + priority: 'critical', + }); + + expect(updated).toBeDefined(); + expect(updated!.content).toBe('Updated content'); + expect(updated!.priority).toBe('critical'); + }); + + it('validates a skill (increments count)', () => { + const skill = registry.createSkill({ + content: 'Test skill', + category: 'tool', + source: 'explicit', + }); + + expect(skill.validatedCount).toBe(0); + + const v1 = registry.validateSkill(skill.id); + expect(v1!.validatedCount).toBe(1); + expect(v1!.lastValidated).toBeDefined(); + + const v2 = registry.validateSkill(skill.id); + expect(v2!.validatedCount).toBe(2); + }); + + it('deletes a skill', () => { + const skill = registry.createSkill({ + content: 'To delete', + category: 'tool', + source: 'observation', + }); + + expect(registry.deleteSkill(skill.id)).toBe(true); + expect(registry.getSkill(skill.id)).toBeUndefined(); + expect(registry.deleteSkill(skill.id)).toBe(false); + }); + + it('returns undefined for non-existent skill', () => { + expect(registry.getSkill('non-existent-id')).toBeUndefined(); + expect(registry.updateSkill({ id: 'non-existent' })).toBeUndefined(); + expect(registry.validateSkill('non-existent')).toBeUndefined(); + }); +}); + +describe('Skill Queries', () => { + beforeEach(() => { + registry.createSkill({ + content: 'Critical correction', + category: 'correction', + priority: 'critical', + tool: 'eslint', + tags: ['lint'], + source: 'correction', + }); + registry.createSkill({ + content: 'Low pattern', + category: 'pattern', + priority: 'low', + source: 'observation', + }); + registry.createSkill({ + content: 'Medium workflow', + category: 'workflow', + priority: 'medium', + tool: 'eslint', + source: 'observation', + }); + }); + + it('queries by category', () => { + const results = registry.querySkills({ + categories: ['correction'], + limit: 50, + offset: 0, + sortBy: 'priority', + sortOrder: 'desc', + }); + expect(results).toHaveLength(1); + expect(results[0].category).toBe('correction'); + }); + + it('queries by tool', () => { + const results = registry.querySkills({ + tool: 'eslint', + limit: 50, + offset: 0, + sortBy: 'priority', + sortOrder: 'desc', + }); + expect(results).toHaveLength(2); + }); + + it('queries with limit', () => { + const results = registry.querySkills({ + limit: 1, + offset: 0, + sortBy: 'priority', + sortOrder: 'desc', + }); + expect(results).toHaveLength(1); + }); + + it('getRelevantSkills returns critical and tool-specific', () => { + const results = registry.getRelevantSkills({ tool: 'eslint' }); + expect(results.length).toBeGreaterThanOrEqual(2); + const priorities = results.map((s) => s.priority); + expect(priorities).toContain('critical'); + }); +}); + +describe('Skill Rules CRUD', () => { + it('upserts and retrieves a rule', () => { + registry.upsertRule('test-rule', { + description: 'A test rule', + priority: 7, + triggers: { keywords: ['test'] }, + }); + + const rule = registry.getRule('test-rule'); + expect(rule).toBeDefined(); + expect(rule!.description).toBe('A test rule'); + expect(rule!.triggers.keywords).toEqual(['test']); + }); + + it('getAllRules returns all', () => { + registry.upsertRule('r1', { + description: 'Rule 1', + priority: 5, + triggers: {}, + }); + registry.upsertRule('r2', { + description: 'Rule 2', + priority: 3, + triggers: {}, + }); + + const all = registry.getAllRules(); + expect(Object.keys(all)).toHaveLength(2); + }); + + it('deletes a rule', () => { + registry.upsertRule('to-delete', { + description: 'Delete me', + priority: 1, + triggers: {}, + }); + expect(registry.deleteRule('to-delete')).toBe(true); + expect(registry.getRule('to-delete')).toBeUndefined(); + }); +}); + +describe('Directory Mappings', () => { + it('sets and retrieves mappings', () => { + registry.setDirectoryMapping('src/core', 'frame-management'); + registry.setDirectoryMapping('src/cli', 'cli-commands'); + + const mappings = registry.getDirectoryMappings(); + expect(mappings['src/core']).toBe('frame-management'); + expect(mappings['src/cli']).toBe('cli-commands'); + }); +}); + +describe('Matcher Config', () => { + it('returns defaults when not set', () => { + const { config, scoring } = registry.getMatcherConfig(); + expect(config.minConfidenceScore).toBe(3); + expect(scoring.keyword).toBe(2); + }); + + it('persists config', () => { + registry.setMatcherConfig( + { minConfidenceScore: 5, showMatchReasons: false, maxSkillsToShow: 3 }, + { + keyword: 10, + keywordPattern: 10, + pathPattern: 10, + directoryMatch: 10, + intentPattern: 10, + contentPattern: 10, + contextPattern: 10, + } + ); + const { config, scoring } = registry.getMatcherConfig(); + expect(config.minConfidenceScore).toBe(5); + expect(config.showMatchReasons).toBe(false); + expect(scoring.keyword).toBe(10); + }); +}); + +describe('Journal', () => { + it('creates and retrieves journal entries', () => { + const entry = registry.createJournalEntry( + 'session-1', + 'decision', + 'Use SQLite', + 'Decided to use SQLite over Redis', + { file: 'skill-registry.ts' } + ); + + expect(entry.id).toBeDefined(); + expect(entry.type).toBe('decision'); + + const entries = registry.getSessionJournal('session-1'); + expect(entries).toHaveLength(1); + expect(entries[0].title).toBe('Use SQLite'); + }); + + it('promotes journal entry to skill', () => { + const entry = registry.createJournalEntry( + 'session-2', + 'correction', + 'Add .js extensions', + 'Always add .js to ESM imports' + ); + + const skill = registry.promoteToSkill(entry.id, 'pitfall', 'high'); + expect(skill).toBeDefined(); + expect(skill!.content).toBe('Always add .js to ESM imports'); + expect(skill!.category).toBe('pitfall'); + }); +}); + +describe('Session Management', () => { + it('starts and ends a session', () => { + registry.startSession('s1'); + + registry.createJournalEntry('s1', 'correction', 'Fix A', 'Fixed A'); + registry.createJournalEntry('s1', 'decision', 'Choose B', 'Chose B'); + + const summary = registry.endSession('s1'); + expect(summary).toBeDefined(); + expect(summary!.entriesCount).toBe(2); + expect(summary!.correctionsCount).toBe(1); + expect(summary!.decisionsCount).toBe(1); + expect(summary!.endedAt).toBeDefined(); + }); + + it('returns undefined for non-existent session', () => { + expect(registry.endSession('nope')).toBeUndefined(); + expect(registry.getSessionSummary('nope')).toBeUndefined(); + }); +}); + +describe('Metrics', () => { + it('returns metrics', () => { + registry.createSkill({ + content: 'S1', + category: 'tool', + source: 'observation', + }); + registry.createSkill({ + content: 'S2', + category: 'tool', + source: 'observation', + }); + registry.upsertRule('r1', { + description: 'R1', + priority: 5, + triggers: {}, + }); + + const metrics = registry.getMetrics(); + expect(metrics.skillsTotal).toBe(2); + expect(metrics.skillsByCategory['tool']).toBe(2); + expect(metrics.rulesTotal).toBe(1); + }); +}); + +describe('seedFromRulesJson', () => { + it('seeds rules, config, and mappings', () => { + const rulesFile: SkillRulesFile = { + version: '2.0', + config: { + minConfidenceScore: 5, + showMatchReasons: true, + maxSkillsToShow: 3, + }, + scoring: { + keyword: 2, + keywordPattern: 3, + pathPattern: 4, + directoryMatch: 5, + intentPattern: 4, + contentPattern: 3, + contextPattern: 2, + }, + directoryMappings: { + 'src/core': 'frame-management', + }, + skills: { + 'frame-management': { + description: 'Frame management', + priority: 9, + triggers: { keywords: ['frame'] }, + relatedSkills: ['context-bridge'], + }, + }, + }; + + registry.seedFromRulesJson(rulesFile); + + const rules = registry.getAllRules(); + expect(Object.keys(rules)).toHaveLength(1); + expect(rules['frame-management'].priority).toBe(9); + + const mappings = registry.getDirectoryMappings(); + expect(mappings['src/core']).toBe('frame-management'); + + const { config } = registry.getMatcherConfig(); + expect(config.minConfidenceScore).toBe(5); + }); +}); diff --git a/src/core/skills/index.ts b/src/core/skills/index.ts index 3387190f..1f6d765a 100644 --- a/src/core/skills/index.ts +++ b/src/core/skills/index.ts @@ -1,7 +1,29 @@ /** * Skills Module - * Persistent agent learning and skill memory + * Persistent agent learning, skill memory, and prompt matching */ export * from './types.js'; export * from './skill-storage.js'; +export * from './skill-matcher.js'; +export { + SkillRegistry, + getSkillRegistry, + resetSkillRegistry, +} from './skill-registry.js'; + +import { getSkillRegistry } from './skill-registry.js'; +import { matchPrompt as matchPromptFn } from './skill-matcher.js'; +import type { MatchResult } from './types.js'; + +/** + * Convenience: match a prompt using the singleton registry's rules, config, and mappings. + * Returns empty result if no rules are loaded. + */ +export function matchPromptFromRegistry(prompt: string): MatchResult { + const registry = getSkillRegistry(); + const rules = registry.getAllRules(); + const { config, scoring } = registry.getMatcherConfig(); + const mappings = registry.getDirectoryMappings(); + return matchPromptFn(prompt, rules, config, scoring, mappings); +} diff --git a/src/core/skills/skill-matcher.ts b/src/core/skills/skill-matcher.ts new file mode 100644 index 00000000..bac5b308 --- /dev/null +++ b/src/core/skills/skill-matcher.ts @@ -0,0 +1,289 @@ +/** + * Skill Matcher — Stateless scoring engine + * Port of skill-eval.cjs to TypeScript with zero DB dependencies. + * Pure functions, unit-testable. + */ + +import type { + SkillRule, + SkillMatch, + MatchResult, + MatcherConfig, + ScoringWeights, + DirectoryMapping, + ConfidenceLevel, +} from './types.js'; + +/** + * Extract file paths mentioned in a prompt + */ +export function extractFilePaths(prompt: string): string[] { + const paths = new Set(); + + // Match explicit paths with extensions + const extensionPattern = + /(?:^|\s|["'`])([\w\-./]+\.(?:[tj]sx?|json|gql|ya?ml|md|sh))\b/gi; + let match: RegExpExecArray | null; + while ((match = extensionPattern.exec(prompt)) !== null) { + paths.add(match[1]); + } + + // Match paths starting with common directories + const dirPattern = + /(?:^|\s|["'`])((?:src|app|components|screens|hooks|utils|services|navigation|graphql|localization|\.claude|\.github|\.maestro)\/[\w\-./]+)/gi; + while ((match = dirPattern.exec(prompt)) !== null) { + paths.add(match[1]); + } + + // Match quoted paths + const quotedPattern = /["'`]([\w\-./]+\/[\w\-./]+)["'`]/g; + while ((match = quotedPattern.exec(prompt)) !== null) { + paths.add(match[1]); + } + + return Array.from(paths); +} + +/** + * Test a regex pattern against text + */ +function matchesPattern(text: string, pattern: string, flags = 'i'): boolean { + try { + return new RegExp(pattern, flags).test(text); + } catch { + return false; + } +} + +/** + * Convert a simplified glob pattern to a regex and test against a file path + */ +export function matchesGlob(filePath: string, globPattern: string): boolean { + const regexPattern = globPattern + .replace(/\./g, '\\.') + .replace(/\?/g, '<<>>') + .replace(/\*\*\//g, '<<>>') + .replace(/\*\*/g, '<<>>') + .replace(/\*/g, '[^/]*') + .replace(/<<>>/g, '(?:.*\\/)?') + .replace(/<<>>/g, '.*') + .replace(/<<>>/g, '.'); + + try { + return new RegExp(`^${regexPattern}$`, 'i').test(filePath); + } catch { + return false; + } +} + +/** + * Check if a file path matches a directory mapping and return the mapped skill name + */ +function matchDirectoryMapping( + filePath: string, + mappings: DirectoryMapping +): string | undefined { + for (const [dir, skillName] of Object.entries(mappings)) { + if (filePath === dir || filePath.startsWith(dir + '/')) { + return skillName; + } + } + return undefined; +} + +/** + * Evaluate a single skill rule against a prompt + */ +function evaluateSkill( + skillName: string, + skill: SkillRule, + prompt: string, + promptLower: string, + filePaths: string[], + scoring: ScoringWeights, + directoryMappings: DirectoryMapping +): SkillMatch | undefined { + const { triggers, excludePatterns = [], priority = 5 } = skill; + + let score = 0; + const reasons: string[] = []; + + // Check exclude patterns first + for (const excludePattern of excludePatterns) { + if (matchesPattern(promptLower, excludePattern)) { + return undefined; + } + } + + // 1. Keywords + if (triggers.keywords) { + for (const keyword of triggers.keywords) { + if (promptLower.includes(keyword.toLowerCase())) { + score += scoring.keyword; + reasons.push(`keyword "${keyword}"`); + } + } + } + + // 2. Keyword patterns (regex) + if (triggers.keywordPatterns) { + for (const pattern of triggers.keywordPatterns) { + if (matchesPattern(promptLower, pattern)) { + score += scoring.keywordPattern; + reasons.push(`pattern /${pattern}/`); + } + } + } + + // 3. Intent patterns (first match only) + if (triggers.intentPatterns) { + for (const pattern of triggers.intentPatterns) { + if (matchesPattern(promptLower, pattern)) { + score += scoring.intentPattern; + reasons.push('intent detected'); + break; + } + } + } + + // 4. Context patterns + if (triggers.contextPatterns) { + for (const pattern of triggers.contextPatterns) { + if (promptLower.includes(pattern.toLowerCase())) { + score += scoring.contextPattern; + reasons.push(`context "${pattern}"`); + } + } + } + + // 5. Path patterns + if (triggers.pathPatterns && filePaths.length > 0) { + for (const filePath of filePaths) { + for (const pattern of triggers.pathPatterns) { + if (matchesGlob(filePath, pattern)) { + score += scoring.pathPattern; + reasons.push(`path "${filePath}"`); + break; + } + } + } + } + + // 6. Directory mappings + if (filePaths.length > 0) { + for (const filePath of filePaths) { + const mappedSkill = matchDirectoryMapping(filePath, directoryMappings); + if (mappedSkill === skillName) { + score += scoring.directoryMatch; + reasons.push('directory mapping'); + break; + } + } + } + + // 7. Content patterns (code snippets — case-sensitive) + if (triggers.contentPatterns) { + for (const pattern of triggers.contentPatterns) { + if (matchesPattern(prompt, pattern)) { + score += scoring.contentPattern; + reasons.push('code pattern detected'); + break; + } + } + } + + if (score > 0) { + return { + name: skillName, + score, + reasons: [...new Set(reasons)], + priority, + }; + } + + return undefined; +} + +/** + * Resolve related skills that aren't already matched + */ +function getRelatedSkills( + matches: SkillMatch[], + rules: Record +): string[] { + const matchedNames = new Set(matches.map((m) => m.name)); + const related = new Set(); + + for (const m of matches) { + const skill = rules[m.name]; + if (skill?.relatedSkills) { + for (const relatedName of skill.relatedSkills) { + if (!matchedNames.has(relatedName)) { + related.add(relatedName); + } + } + } + } + + return Array.from(related); +} + +/** + * Format confidence level from a numeric score + */ +export function formatConfidence( + score: number, + minScore: number +): ConfidenceLevel { + if (score >= minScore * 3) return 'HIGH'; + if (score >= minScore * 2) return 'MEDIUM'; + return 'LOW'; +} + +/** + * Main matching function — pure, stateless, no side effects + * + * @param prompt - User's prompt text + * @param rules - Skill rule definitions (keyed by skill name) + * @param config - Matcher config (minConfidenceScore, maxSkillsToShow) + * @param scoring - Per-match-type scoring weights + * @param directoryMappings - Directory → skill name map + * @returns MatchResult with sorted matches, file paths, and related skills + */ +export function matchPrompt( + prompt: string, + rules: Record, + config: MatcherConfig, + scoring: ScoringWeights, + directoryMappings: DirectoryMapping = {} +): MatchResult { + const promptLower = prompt.toLowerCase(); + const filePaths = extractFilePaths(prompt); + + const matches: SkillMatch[] = []; + for (const [name, skill] of Object.entries(rules)) { + const m = evaluateSkill( + name, + skill, + prompt, + promptLower, + filePaths, + scoring, + directoryMappings + ); + if (m && m.score >= config.minConfidenceScore) { + matches.push(m); + } + } + + // Sort by score desc, then priority desc + matches.sort((a, b) => { + if (b.score !== a.score) return b.score - a.score; + return b.priority - a.priority; + }); + + const topMatches = matches.slice(0, config.maxSkillsToShow); + const relatedSkills = getRelatedSkills(topMatches, rules); + + return { matches: topMatches, filePaths, relatedSkills }; +} diff --git a/src/core/skills/skill-registry.ts b/src/core/skills/skill-registry.ts new file mode 100644 index 00000000..21c5d389 --- /dev/null +++ b/src/core/skills/skill-registry.ts @@ -0,0 +1,881 @@ +/** + * Skill Registry — SQLite backend (replaces Redis-based skill-storage) + * + * Standalone ~/.stackmemory/skills.db (not shared with frame DB). + * Follows traces.db precedent for separate-concern databases. + */ + +import Database from 'better-sqlite3'; +import { v4 as uuidv4 } from 'uuid'; +import * as path from 'path'; +import * as fs from 'fs'; +import { logger } from '../monitoring/logger.js'; +import type { + Skill, + CreateSkillInput, + UpdateSkillInput, + SkillQuery, + SkillCategory, + SkillPriority, + JournalEntry, + JournalEntryType, + SessionSummary, + SkillRule, + SkillRulesFile, + MatcherConfig, + ScoringWeights, + DirectoryMapping, +} from './types.js'; + +// ============================================================ +// SCHEMA +// ============================================================ + +const SCHEMA_VERSION = 1; + +const SCHEMA_SQL = ` + CREATE TABLE IF NOT EXISTS schema_version ( + version INTEGER PRIMARY KEY + ); + + CREATE TABLE IF NOT EXISTS skills ( + id TEXT PRIMARY KEY, + content TEXT NOT NULL, + summary TEXT, + category TEXT NOT NULL, + priority TEXT NOT NULL DEFAULT 'medium', + tags TEXT NOT NULL DEFAULT '[]', + tool TEXT, + project TEXT, + language TEXT, + framework TEXT, + validated_count INTEGER NOT NULL DEFAULT 0, + last_validated TEXT, + source TEXT NOT NULL, + session_id TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL, + expires_at TEXT + ); + + CREATE INDEX IF NOT EXISTS idx_skills_category ON skills(category); + CREATE INDEX IF NOT EXISTS idx_skills_priority ON skills(priority); + CREATE INDEX IF NOT EXISTS idx_skills_tool ON skills(tool); + CREATE INDEX IF NOT EXISTS idx_skills_created ON skills(created_at); + + CREATE TABLE IF NOT EXISTS skill_rules ( + name TEXT PRIMARY KEY, + description TEXT NOT NULL, + priority INTEGER NOT NULL DEFAULT 5, + triggers TEXT NOT NULL DEFAULT '{}', + exclude_patterns TEXT NOT NULL DEFAULT '[]', + related_skills TEXT NOT NULL DEFAULT '[]', + suggestion TEXT, + created_at TEXT NOT NULL, + updated_at TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS directory_mappings ( + directory TEXT PRIMARY KEY, + skill_name TEXT NOT NULL + ); + + CREATE TABLE IF NOT EXISTS matcher_config ( + id INTEGER PRIMARY KEY CHECK (id = 1), + min_confidence_score INTEGER NOT NULL DEFAULT 3, + show_match_reasons INTEGER NOT NULL DEFAULT 1, + max_skills_to_show INTEGER NOT NULL DEFAULT 5, + scoring TEXT NOT NULL DEFAULT '{}' + ); + + CREATE TABLE IF NOT EXISTS journal_entries ( + id TEXT PRIMARY KEY, + session_id TEXT NOT NULL, + type TEXT NOT NULL, + title TEXT NOT NULL, + content TEXT NOT NULL, + context_file TEXT, + context_tool TEXT, + context_command TEXT, + outcome TEXT, + promoted_to_skill_id TEXT, + created_at TEXT NOT NULL + ); + + CREATE INDEX IF NOT EXISTS idx_journal_session ON journal_entries(session_id); + CREATE INDEX IF NOT EXISTS idx_journal_type ON journal_entries(type); + + CREATE TABLE IF NOT EXISTS session_summaries ( + session_id TEXT PRIMARY KEY, + started_at TEXT NOT NULL, + ended_at TEXT, + entries_count INTEGER NOT NULL DEFAULT 0, + corrections_count INTEGER NOT NULL DEFAULT 0, + decisions_count INTEGER NOT NULL DEFAULT 0, + key_learnings TEXT NOT NULL DEFAULT '[]', + promoted_skill_ids TEXT NOT NULL DEFAULT '[]' + ); +`; + +// ============================================================ +// HELPERS +// ============================================================ + +function getDefaultDbPath(): string { + const home = process.env['HOME'] || process.env['USERPROFILE'] || '/tmp'; + return path.join(home, '.stackmemory', 'skills.db'); +} + +function priorityScore(priority: SkillPriority): number { + const scores: Record = { + critical: 1000, + high: 100, + medium: 10, + low: 1, + }; + return scores[priority] ?? 10; +} + +function rowToSkill(row: Record): Skill { + return { + id: row['id'] as string, + content: row['content'] as string, + summary: (row['summary'] as string) || undefined, + category: row['category'] as SkillCategory, + priority: row['priority'] as SkillPriority, + tags: JSON.parse((row['tags'] as string) || '[]') as string[], + tool: (row['tool'] as string) || undefined, + project: (row['project'] as string) || undefined, + language: (row['language'] as string) || undefined, + framework: (row['framework'] as string) || undefined, + validatedCount: (row['validated_count'] as number) || 0, + lastValidated: (row['last_validated'] as string) || undefined, + source: row['source'] as Skill['source'], + sessionId: (row['session_id'] as string) || undefined, + createdAt: row['created_at'] as string, + updatedAt: row['updated_at'] as string, + expiresAt: (row['expires_at'] as string) || undefined, + }; +} + +function rowToJournalEntry(row: Record): JournalEntry { + const context: JournalEntry['context'] = + row['context_file'] || row['context_tool'] || row['context_command'] + ? { + file: (row['context_file'] as string) || undefined, + tool: (row['context_tool'] as string) || undefined, + command: (row['context_command'] as string) || undefined, + } + : undefined; + + return { + id: row['id'] as string, + sessionId: row['session_id'] as string, + type: row['type'] as JournalEntryType, + title: row['title'] as string, + content: row['content'] as string, + context, + outcome: (row['outcome'] as JournalEntry['outcome']) || undefined, + createdAt: row['created_at'] as string, + promotedToSkillId: (row['promoted_to_skill_id'] as string) || undefined, + }; +} + +// ============================================================ +// SKILL REGISTRY +// ============================================================ + +export class SkillRegistry { + private db: Database.Database; + private readonly dbPath: string; + + constructor(dbPath?: string) { + this.dbPath = dbPath || getDefaultDbPath(); + + // Ensure directory exists + const dir = path.dirname(this.dbPath); + if (!fs.existsSync(dir)) { + fs.mkdirSync(dir, { recursive: true }); + } + + this.db = new Database(this.dbPath); + this.db.pragma('journal_mode = WAL'); + this.db.pragma('busy_timeout = 5000'); + this.db.pragma('foreign_keys = ON'); + + this.initSchema(); + } + + private initSchema(): void { + const versionRow = (() => { + try { + return this.db + .prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name='schema_version'" + ) + .get() as Record | undefined; + } catch { + return undefined; + } + })(); + + if (!versionRow) { + this.db.exec(SCHEMA_SQL); + this.db + .prepare('INSERT OR REPLACE INTO schema_version (version) VALUES (?)') + .run(SCHEMA_VERSION); + logger.debug('SkillRegistry: created schema v' + SCHEMA_VERSION); + } + } + + // ============================================================ + // SKILL CRUD + // ============================================================ + + createSkill(input: CreateSkillInput): Skill { + const now = new Date().toISOString(); + const id = uuidv4(); + + this.db + .prepare( + `INSERT INTO skills (id, content, summary, category, priority, tags, tool, project, language, framework, + validated_count, source, session_id, created_at, updated_at, expires_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?, ?, 0, ?, ?, ?, ?, ?)` + ) + .run( + id, + input.content, + input.summary ?? null, + input.category, + input.priority ?? 'medium', + JSON.stringify(input.tags ?? []), + input.tool ?? null, + input.project ?? null, + input.language ?? null, + input.framework ?? null, + input.source, + input.sessionId ?? null, + now, + now, + input.expiresAt ?? null + ); + + const skill = this.getSkill(id); + if (!skill) throw new Error(`Skill not found after creation: ${id}`); + return skill; + } + + getSkill(id: string): Skill | undefined { + const row = this.db.prepare('SELECT * FROM skills WHERE id = ?').get(id) as + | Record + | undefined; + return row ? rowToSkill(row) : undefined; + } + + updateSkill(input: UpdateSkillInput): Skill | undefined { + const existing = this.getSkill(input.id); + if (!existing) return undefined; + + const now = new Date().toISOString(); + const updates: string[] = ['updated_at = ?']; + const params: unknown[] = [now]; + + if (input.content !== undefined) { + updates.push('content = ?'); + params.push(input.content); + } + if (input.summary !== undefined) { + updates.push('summary = ?'); + params.push(input.summary); + } + if (input.category !== undefined) { + updates.push('category = ?'); + params.push(input.category); + } + if (input.priority !== undefined) { + updates.push('priority = ?'); + params.push(input.priority); + } + if (input.tags !== undefined) { + updates.push('tags = ?'); + params.push(JSON.stringify(input.tags)); + } + if (input.tool !== undefined) { + updates.push('tool = ?'); + params.push(input.tool); + } + + params.push(input.id); + this.db + .prepare(`UPDATE skills SET ${updates.join(', ')} WHERE id = ?`) + .run(...params); + + return this.getSkill(input.id); + } + + validateSkill(id: string): Skill | undefined { + const skill = this.getSkill(id); + if (!skill) return undefined; + + const now = new Date().toISOString(); + this.db + .prepare( + 'UPDATE skills SET validated_count = validated_count + 1, last_validated = ?, updated_at = ? WHERE id = ?' + ) + .run(now, now, id); + + return this.getSkill(id); + } + + deleteSkill(id: string): boolean { + const result = this.db.prepare('DELETE FROM skills WHERE id = ?').run(id); + return result.changes > 0; + } + + querySkills(query: SkillQuery): Skill[] { + const conditions: string[] = []; + const params: unknown[] = []; + + if (query.categories?.length) { + conditions.push( + `category IN (${query.categories.map(() => '?').join(',')})` + ); + params.push(...query.categories); + } + if (query.priorities?.length) { + conditions.push( + `priority IN (${query.priorities.map(() => '?').join(',')})` + ); + params.push(...query.priorities); + } + if (query.tool) { + conditions.push('tool = ?'); + params.push(query.tool); + } + if (query.language) { + conditions.push('language = ?'); + params.push(query.language); + } + if (query.framework) { + conditions.push('framework = ?'); + params.push(query.framework); + } + if (query.minValidatedCount !== undefined) { + conditions.push('validated_count >= ?'); + params.push(query.minValidatedCount); + } + + const where = conditions.length ? 'WHERE ' + conditions.join(' AND ') : ''; + + // Map sortBy to column names + const sortColMap: Record = { + priority: 'priority', + validatedCount: 'validated_count', + createdAt: 'created_at', + updatedAt: 'updated_at', + }; + const sortCol = sortColMap[query.sortBy ?? 'priority'] ?? 'priority'; + const sortDir = query.sortOrder === 'asc' ? 'ASC' : 'DESC'; + + const sql = `SELECT * FROM skills ${where} ORDER BY ${sortCol} ${sortDir} LIMIT ? OFFSET ?`; + params.push(query.limit ?? 50, query.offset ?? 0); + + const rows = this.db.prepare(sql).all(...params) as Record< + string, + unknown + >[]; + + let skills = rows.map(rowToSkill); + + // In-memory sort for priority (text field needs numeric comparison) + if (sortCol === 'priority') { + skills.sort((a, b) => { + const diff = priorityScore(b.priority) - priorityScore(a.priority); + return sortDir === 'DESC' ? diff : -diff; + }); + } + + // Tag filtering (in-memory since tags are JSON) + if (query.tags?.length) { + const tags = query.tags; + skills = skills.filter((s) => tags.some((t) => s.tags.includes(t))); + } + + return skills; + } + + getRelevantSkills(context: { + tool?: string; + language?: string; + framework?: string; + tags?: string[]; + }): Skill[] { + const skills: Skill[] = []; + const seenIds = new Set(); + + // Critical skills always included + const critical = this.db + .prepare("SELECT * FROM skills WHERE priority = 'critical'") + .all() as Record[]; + for (const row of critical) { + const skill = rowToSkill(row); + if (!seenIds.has(skill.id)) { + skills.push(skill); + seenIds.add(skill.id); + } + } + + // Tool-specific + if (context.tool) { + const toolRows = this.db + .prepare( + 'SELECT * FROM skills WHERE tool = ? ORDER BY validated_count DESC LIMIT 20' + ) + .all(context.tool) as Record[]; + for (const row of toolRows) { + const skill = rowToSkill(row); + if (!seenIds.has(skill.id)) { + skills.push(skill); + seenIds.add(skill.id); + } + } + } + + // Highly validated + const validated = this.db + .prepare('SELECT * FROM skills ORDER BY validated_count DESC LIMIT 10') + .all() as Record[]; + for (const row of validated) { + const skill = rowToSkill(row); + if (!seenIds.has(skill.id)) { + skills.push(skill); + seenIds.add(skill.id); + } + } + + return skills.slice(0, 50); + } + + // ============================================================ + // SKILL RULES CRUD + // ============================================================ + + upsertRule(name: string, rule: SkillRule): void { + const now = new Date().toISOString(); + this.db + .prepare( + `INSERT INTO skill_rules (name, description, priority, triggers, exclude_patterns, related_skills, suggestion, created_at, updated_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?) + ON CONFLICT(name) DO UPDATE SET + description = excluded.description, + priority = excluded.priority, + triggers = excluded.triggers, + exclude_patterns = excluded.exclude_patterns, + related_skills = excluded.related_skills, + suggestion = excluded.suggestion, + updated_at = excluded.updated_at` + ) + .run( + name, + rule.description, + rule.priority, + JSON.stringify(rule.triggers), + JSON.stringify(rule.excludePatterns ?? []), + JSON.stringify(rule.relatedSkills ?? []), + rule.suggestion ?? null, + now, + now + ); + } + + getRule(name: string): SkillRule | undefined { + const row = this.db + .prepare('SELECT * FROM skill_rules WHERE name = ?') + .get(name) as Record | undefined; + if (!row) return undefined; + return { + description: row['description'] as string, + priority: row['priority'] as number, + triggers: JSON.parse(row['triggers'] as string), + excludePatterns: JSON.parse(row['exclude_patterns'] as string), + relatedSkills: JSON.parse(row['related_skills'] as string), + suggestion: (row['suggestion'] as string) || undefined, + }; + } + + getAllRules(): Record { + const rows = this.db.prepare('SELECT * FROM skill_rules').all() as Record< + string, + unknown + >[]; + const result: Record = {}; + for (const row of rows) { + result[row['name'] as string] = { + description: row['description'] as string, + priority: row['priority'] as number, + triggers: JSON.parse(row['triggers'] as string), + excludePatterns: JSON.parse(row['exclude_patterns'] as string), + relatedSkills: JSON.parse(row['related_skills'] as string), + suggestion: (row['suggestion'] as string) || undefined, + }; + } + return result; + } + + deleteRule(name: string): boolean { + return ( + this.db.prepare('DELETE FROM skill_rules WHERE name = ?').run(name) + .changes > 0 + ); + } + + // ============================================================ + // DIRECTORY MAPPINGS + // ============================================================ + + setDirectoryMapping(directory: string, skillName: string): void { + this.db + .prepare( + 'INSERT OR REPLACE INTO directory_mappings (directory, skill_name) VALUES (?, ?)' + ) + .run(directory, skillName); + } + + getDirectoryMappings(): DirectoryMapping { + const rows = this.db + .prepare('SELECT * FROM directory_mappings') + .all() as Record[]; + const result: DirectoryMapping = {}; + for (const row of rows) { + result[row['directory'] as string] = row['skill_name'] as string; + } + return result; + } + + // ============================================================ + // MATCHER CONFIG + // ============================================================ + + getMatcherConfig(): { config: MatcherConfig; scoring: ScoringWeights } { + const row = this.db + .prepare('SELECT * FROM matcher_config WHERE id = 1') + .get() as Record | undefined; + + if (!row) { + // Return defaults + return { + config: { + minConfidenceScore: 3, + showMatchReasons: true, + maxSkillsToShow: 5, + }, + scoring: { + keyword: 2, + keywordPattern: 3, + pathPattern: 4, + directoryMatch: 5, + intentPattern: 4, + contentPattern: 3, + contextPattern: 2, + }, + }; + } + + return { + config: { + minConfidenceScore: row['min_confidence_score'] as number, + showMatchReasons: !!(row['show_match_reasons'] as number), + maxSkillsToShow: row['max_skills_to_show'] as number, + }, + scoring: JSON.parse(row['scoring'] as string), + }; + } + + setMatcherConfig(config: MatcherConfig, scoring: ScoringWeights): void { + this.db + .prepare( + `INSERT OR REPLACE INTO matcher_config (id, min_confidence_score, show_match_reasons, max_skills_to_show, scoring) + VALUES (1, ?, ?, ?, ?)` + ) + .run( + config.minConfidenceScore, + config.showMatchReasons ? 1 : 0, + config.maxSkillsToShow, + JSON.stringify(scoring) + ); + } + + // ============================================================ + // JOURNAL + // ============================================================ + + createJournalEntry( + sessionId: string, + type: JournalEntryType, + title: string, + content: string, + context?: JournalEntry['context'] + ): JournalEntry { + const id = uuidv4(); + const now = new Date().toISOString(); + + this.db + .prepare( + `INSERT INTO journal_entries (id, session_id, type, title, content, context_file, context_tool, context_command, created_at) + VALUES (?, ?, ?, ?, ?, ?, ?, ?, ?)` + ) + .run( + id, + sessionId, + type, + title, + content, + context?.file ?? null, + context?.tool ?? null, + context?.command ?? null, + now + ); + + return { + id, + sessionId, + type, + title, + content, + context, + createdAt: now, + }; + } + + getSessionJournal(sessionId: string): JournalEntry[] { + const rows = this.db + .prepare( + 'SELECT * FROM journal_entries WHERE session_id = ? ORDER BY created_at DESC' + ) + .all(sessionId) as Record[]; + return rows.map(rowToJournalEntry); + } + + promoteToSkill( + entryId: string, + category: SkillCategory, + priority: SkillPriority = 'medium' + ): Skill | undefined { + const row = this.db + .prepare('SELECT * FROM journal_entries WHERE id = ?') + .get(entryId) as Record | undefined; + if (!row) return undefined; + + const entry = rowToJournalEntry(row); + const skill = this.createSkill({ + content: entry.content, + summary: entry.title, + category, + priority, + tags: [], + tool: entry.context?.tool, + source: 'observation', + sessionId: entry.sessionId, + }); + + // Link entry to promoted skill + this.db + .prepare( + 'UPDATE journal_entries SET promoted_to_skill_id = ? WHERE id = ?' + ) + .run(skill.id, entryId); + + return skill; + } + + // ============================================================ + // SESSION MANAGEMENT + // ============================================================ + + startSession(sessionId: string): void { + const now = new Date().toISOString(); + this.db + .prepare( + `INSERT OR REPLACE INTO session_summaries (session_id, started_at, entries_count, corrections_count, decisions_count) + VALUES (?, ?, 0, 0, 0)` + ) + .run(sessionId, now); + } + + endSession(sessionId: string): SessionSummary | undefined { + const row = this.db + .prepare('SELECT * FROM session_summaries WHERE session_id = ?') + .get(sessionId) as Record | undefined; + if (!row) return undefined; + + const now = new Date().toISOString(); + const entries = this.getSessionJournal(sessionId); + + const corrections = entries.filter((e) => e.type === 'correction').length; + const decisions = entries.filter((e) => e.type === 'decision').length; + const keyLearnings = entries + .filter((e) => e.type === 'correction' || e.type === 'resolution') + .slice(0, 5) + .map((e) => e.title); + const promotedSkillIds = entries + .filter((e) => e.promotedToSkillId != null) + .map((e) => e.promotedToSkillId as string); + + this.db + .prepare( + `UPDATE session_summaries SET + ended_at = ?, entries_count = ?, corrections_count = ?, + decisions_count = ?, key_learnings = ?, promoted_skill_ids = ? + WHERE session_id = ?` + ) + .run( + now, + entries.length, + corrections, + decisions, + JSON.stringify(keyLearnings), + JSON.stringify(promotedSkillIds), + sessionId + ); + + return { + sessionId, + startedAt: row['started_at'] as string, + endedAt: now, + entriesCount: entries.length, + correctionsCount: corrections, + decisionsCount: decisions, + keyLearnings, + promotedSkillIds, + }; + } + + getSessionSummary(sessionId: string): SessionSummary | undefined { + const row = this.db + .prepare('SELECT * FROM session_summaries WHERE session_id = ?') + .get(sessionId) as Record | undefined; + if (!row) return undefined; + + return { + sessionId: row['session_id'] as string, + startedAt: row['started_at'] as string, + endedAt: (row['ended_at'] as string) || undefined, + entriesCount: (row['entries_count'] as number) || 0, + correctionsCount: (row['corrections_count'] as number) || 0, + decisionsCount: (row['decisions_count'] as number) || 0, + keyLearnings: JSON.parse((row['key_learnings'] as string) || '[]'), + promotedSkillIds: JSON.parse( + (row['promoted_skill_ids'] as string) || '[]' + ), + }; + } + + // ============================================================ + // METRICS + // ============================================================ + + getMetrics(): { + skillsTotal: number; + skillsByCategory: Record; + rulesTotal: number; + journalEntriesTotal: number; + sessionsTotal: number; + } { + const skillsTotal = ( + this.db.prepare('SELECT COUNT(*) as c FROM skills').get() as { + c: number; + } + ).c; + + const catRows = this.db + .prepare('SELECT category, COUNT(*) as c FROM skills GROUP BY category') + .all() as { category: string; c: number }[]; + const skillsByCategory: Record = {}; + for (const row of catRows) { + skillsByCategory[row.category] = row.c; + } + + const rulesTotal = ( + this.db.prepare('SELECT COUNT(*) as c FROM skill_rules').get() as { + c: number; + } + ).c; + + const journalEntriesTotal = ( + this.db.prepare('SELECT COUNT(*) as c FROM journal_entries').get() as { + c: number; + } + ).c; + + const sessionsTotal = ( + this.db.prepare('SELECT COUNT(*) as c FROM session_summaries').get() as { + c: number; + } + ).c; + + return { + skillsTotal, + skillsByCategory, + rulesTotal, + journalEntriesTotal, + sessionsTotal, + }; + } + + // ============================================================ + // SEED FROM RULES JSON + // ============================================================ + + seedFromRulesJson(rulesFile: SkillRulesFile): void { + const tx = this.db.transaction(() => { + // Seed config + this.setMatcherConfig(rulesFile.config, rulesFile.scoring); + + // Seed directory mappings + for (const [dir, skill] of Object.entries( + rulesFile.directoryMappings || {} + )) { + this.setDirectoryMapping(dir, skill); + } + + // Seed rules + for (const [name, rule] of Object.entries(rulesFile.skills)) { + this.upsertRule(name, rule); + } + }); + tx(); + + logger.info('SkillRegistry: seeded from rules JSON', { + rules: Object.keys(rulesFile.skills).length, + mappings: Object.keys(rulesFile.directoryMappings || {}).length, + }); + } + + // ============================================================ + // LIFECYCLE + // ============================================================ + + close(): void { + this.db.close(); + } +} + +// ============================================================ +// SINGLETON +// ============================================================ + +let registryInstance: SkillRegistry | undefined; + +export function getSkillRegistry(dbPath?: string): SkillRegistry { + if (!registryInstance) { + registryInstance = new SkillRegistry(dbPath); + } + return registryInstance; +} + +export function resetSkillRegistry(): void { + if (registryInstance) { + registryInstance.close(); + registryInstance = undefined; + } +} diff --git a/src/core/skills/types.ts b/src/core/skills/types.ts index 0ca54884..74e14d02 100644 --- a/src/core/skills/types.ts +++ b/src/core/skills/types.ts @@ -203,68 +203,91 @@ export const SkillQuerySchema = z.object({ export type SkillQuery = z.infer; // ============================================================ -// REDIS KEY PATTERNS +// SKILL RULE / MATCHER TYPES // ============================================================ /** - * Redis key generators - all keys are namespaced by userId - * Use 'global' as userId for shared/team skills + * Trigger conditions for a skill rule */ -export const REDIS_KEYS = { - // Skills (namespaced by user) - skill: (userId: string, id: string) => `user:${userId}:skill:${id}`, - skillsByTool: (userId: string, tool: string) => - `user:${userId}:skills:tool:${tool}`, - skillsByCategory: (userId: string, category: SkillCategory) => - `user:${userId}:skills:category:${category}`, - skillsByTag: (userId: string, tag: string) => - `user:${userId}:skills:tag:${tag}`, - skillsRecent: (userId: string) => `user:${userId}:skills:recent`, - skillsValidated: (userId: string) => `user:${userId}:skills:validated`, - - // Session journal (namespaced by user) - journalEntry: (userId: string, id: string) => - `user:${userId}:journal:entry:${id}`, - journalSession: (userId: string, sessionId: string) => - `user:${userId}:journal:session:${sessionId}`, - journalRecent: (userId: string) => `user:${userId}:journal:recent`, - - // Session tracking (namespaced by user) - sessionSummary: (userId: string, sessionId: string) => - `user:${userId}:session:summary:${sessionId}`, - sessionsActive: (userId: string) => `user:${userId}:sessions:active`, - - // Promotion tracking (namespaced by user) - promotionCandidates: (userId: string) => - `user:${userId}:skills:promotion:candidates`, - - // Locks (global) - syncLock: (resource: string) => `lock:skill:${resource}`, -} as const; +export interface SkillTriggers { + keywords?: string[]; + keywordPatterns?: string[]; + pathPatterns?: string[]; + intentPatterns?: string[]; + contentPatterns?: string[]; + contextPatterns?: string[]; +} -// ============================================================ -// CACHE TTL CONSTANTS (in seconds) -// ============================================================ +/** + * A single skill rule definition (from skill-rules.json) + */ +export interface SkillRule { + description: string; + priority: number; + triggers: SkillTriggers; + excludePatterns?: string[]; + relatedSkills?: string[]; + suggestion?: string; +} + +/** + * Result of matching a single skill against a prompt + */ +export interface SkillMatch { + name: string; + score: number; + reasons: string[]; + priority: number; +} + +/** + * Full result of matching all rules against a prompt + */ +export interface MatchResult { + matches: SkillMatch[]; + filePaths: string[]; + relatedSkills: string[]; +} -export const CACHE_TTL = { - // Base skill TTL: 7 days minimum - skillBase: 604800, // 7 days - // Max skill TTL: 90 days for frequently used skills - skillMax: 7776000, // 90 days - // TTL increment per validation/use: +7 days - skillIncrement: 604800, // 7 days +/** + * Global matcher configuration + */ +export interface MatcherConfig { + minConfidenceScore: number; + showMatchReasons: boolean; + maxSkillsToShow: number; +} - skillIndex: 86400, // 1 day (was 1 hour) - session: 604800, // 7 days - journal: 2592000, // 30 days - lock: 30, // 30 seconds -} as const; +/** + * Scoring weights per match type + */ +export interface ScoringWeights { + keyword: number; + keywordPattern: number; + pathPattern: number; + directoryMatch: number; + intentPattern: number; + contentPattern: number; + contextPattern: number; +} + +/** + * Directory path → skill name mapping + */ +export type DirectoryMapping = Record; + +/** + * Confidence level label + */ +export type ConfidenceLevel = 'HIGH' | 'MEDIUM' | 'LOW'; /** - * Calculate TTL for a skill based on usage - * Base: 7 days, +7 days per validation, max 90 days + * Complete rules file structure (mirrors skill-rules.json) */ -export function calculateSkillTTL(validatedCount: number): number { - const ttl = CACHE_TTL.skillBase + validatedCount * CACHE_TTL.skillIncrement; - return Math.min(ttl, CACHE_TTL.skillMax); +export interface SkillRulesFile { + version: string; + config: MatcherConfig; + scoring: ScoringWeights; + directoryMappings: DirectoryMapping; + skills: Record; } diff --git a/src/integrations/mcp/handlers/skill-handlers.ts b/src/integrations/mcp/handlers/skill-handlers.ts index 4ee396f1..60b317f6 100644 --- a/src/integrations/mcp/handlers/skill-handlers.ts +++ b/src/integrations/mcp/handlers/skill-handlers.ts @@ -1,33 +1,22 @@ /** * MCP Skill Handlers * Handlers for persistent agent learning and skill operations + * Backed by SQLite via SkillRegistry (replaces Redis) */ import { logger } from '../../../core/monitoring/logger.js'; - -// Type-safe environment variable access -function _getEnv(key: string, defaultValue?: string): string { - const value = process.env[key]; - if (value === undefined) { - if (defaultValue !== undefined) return defaultValue; - throw new Error(`Environment variable ${key} is required`); - } - return value; -} - -function _getOptionalEnv(key: string): string | undefined { - return process.env[key]; -} - import { - SkillStorageService, - getSkillStorage, - getDefaultUserId, + SkillRegistry, + getSkillRegistry, + matchPromptFromRegistry, +} from '../../../core/skills/index.js'; +import type { Skill, SkillCategory, SkillPriority, JournalEntryType, SkillQuery, + MatchResult, } from '../../../core/skills/index.js'; export interface SkillHandlerContext { @@ -37,54 +26,25 @@ export interface SkillHandlerContext { } export class SkillHandlers { - private skillStorage: SkillStorageService | null = null; - private userId: string; + private registry: SkillRegistry | null = null; - constructor( - private redisUrl?: string, - userId?: string - ) { - this.userId = userId || getDefaultUserId(); - } + constructor(private dbPath?: string) {} - /** - * Lazy initialization of skill storage - */ - private getStorage(): SkillStorageService { - if (!this.skillStorage) { - const url = this.redisUrl || process.env['REDIS_URL']; - if (!url) { - throw new Error('REDIS_URL not configured for skill storage'); - } - this.skillStorage = getSkillStorage({ - redisUrl: url, - userId: this.userId, - }); + private getRegistry(): SkillRegistry { + if (!this.registry) { + this.registry = getSkillRegistry(this.dbPath); } - return this.skillStorage; + return this.registry; } - /** - * Get current user ID - */ - getUserId(): string { - return this.userId; - } - - /** - * Check if skill storage is available - */ isAvailable(): boolean { - return !!(this.redisUrl || process.env['REDIS_URL']); + return !!(process.env['STACKMEMORY_SKILLS'] || process.env['SM_SKILLS']); } // ============================================================ // SKILL OPERATIONS // ============================================================ - /** - * Record a new skill/learning - */ async recordSkill( args: { content: string; @@ -97,9 +57,8 @@ export class SkillHandlers { context: SkillHandlerContext ): Promise<{ success: boolean; skill?: Skill; error?: string }> { try { - const storage = this.getStorage(); - - const skill = await storage.createSkill({ + const registry = this.getRegistry(); + const skill = registry.createSkill({ content: args.content, category: args.category as SkillCategory, priority: (args.priority || 'medium') as SkillPriority, @@ -108,12 +67,6 @@ export class SkillHandlers { source: (args.source || 'observation') as Skill['source'], sessionId: context.sessionId, }); - - logger.info('Recorded skill via MCP', { - skillId: skill.id, - category: skill.category, - }); - return { success: true, skill }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -122,9 +75,6 @@ export class SkillHandlers { } } - /** - * Get relevant skills for current context - */ async getRelevantSkills(args: { tool?: string; language?: string; @@ -133,17 +83,14 @@ export class SkillHandlers { limit?: number; }): Promise<{ success: boolean; skills?: Skill[]; error?: string }> { try { - const storage = this.getStorage(); - - const skills = await storage.getRelevantSkills({ + const registry = this.getRegistry(); + const skills = registry.getRelevantSkills({ tool: args.tool, language: args.language, framework: args.framework, tags: args.tags, }); - const limited = args.limit ? skills.slice(0, args.limit) : skills; - return { success: true, skills: limited }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -152,9 +99,6 @@ export class SkillHandlers { } } - /** - * Query skills with filters - */ async querySkills(args: { categories?: string[]; priorities?: string[]; @@ -170,8 +114,7 @@ export class SkillHandlers { error?: string; }> { try { - const storage = this.getStorage(); - + const registry = this.getRegistry(); const query: SkillQuery = { categories: args.categories as SkillCategory[], priorities: args.priorities as SkillPriority[], @@ -183,9 +126,7 @@ export class SkillHandlers { sortBy: (args.sortBy || 'priority') as SkillQuery['sortBy'], sortOrder: 'desc', }; - - const skills = await storage.querySkills(query); - + const skills = registry.querySkills(query); return { success: true, skills, total: skills.length }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -194,20 +135,13 @@ export class SkillHandlers { } } - /** - * Validate/reinforce a skill - */ async validateSkill(args: { skill_id: string; }): Promise<{ success: boolean; skill?: Skill; error?: string }> { try { - const storage = this.getStorage(); - - const skill = await storage.validateSkill(args.skill_id); - if (!skill) { - return { success: false, error: 'Skill not found' }; - } - + const registry = this.getRegistry(); + const skill = registry.validateSkill(args.skill_id); + if (!skill) return { success: false, error: 'Skill not found' }; return { success: true, skill }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -216,9 +150,6 @@ export class SkillHandlers { } } - /** - * Update a skill - */ async updateSkill(args: { skill_id: string; content?: string; @@ -226,19 +157,14 @@ export class SkillHandlers { tags?: string[]; }): Promise<{ success: boolean; skill?: Skill; error?: string }> { try { - const storage = this.getStorage(); - - const skill = await storage.updateSkill({ + const registry = this.getRegistry(); + const skill = registry.updateSkill({ id: args.skill_id, content: args.content, priority: args.priority as SkillPriority, tags: args.tags, }); - - if (!skill) { - return { success: false, error: 'Skill not found' }; - } - + if (!skill) return { success: false, error: 'Skill not found' }; return { success: true, skill }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -247,20 +173,13 @@ export class SkillHandlers { } } - /** - * Delete a skill - */ async deleteSkill(args: { skill_id: string; }): Promise<{ success: boolean; error?: string }> { try { - const storage = this.getStorage(); - - const deleted = await storage.deleteSkill(args.skill_id); - if (!deleted) { - return { success: false, error: 'Skill not found' }; - } - + const registry = this.getRegistry(); + const deleted = registry.deleteSkill(args.skill_id); + if (!deleted) return { success: false, error: 'Skill not found' }; return { success: true }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -269,13 +188,27 @@ export class SkillHandlers { } } + // ============================================================ + // MATCH PROMPT (NEW) + // ============================================================ + + async matchPrompt(args: { + prompt: string; + }): Promise<{ success: boolean; result?: MatchResult; error?: string }> { + try { + const result = matchPromptFromRegistry(args.prompt); + return { success: true, result }; + } catch (error: unknown) { + const message = error instanceof Error ? error.message : 'Unknown error'; + logger.error('Failed to match prompt', { error: message }); + return { success: false, error: message }; + } + } + // ============================================================ // SESSION JOURNAL OPERATIONS // ============================================================ - /** - * Record a journal entry - */ async recordJournalEntry( args: { type: string; @@ -287,17 +220,15 @@ export class SkillHandlers { context: SkillHandlerContext ): Promise<{ success: boolean; entryId?: string; error?: string }> { try { - const storage = this.getStorage(); + const registry = this.getRegistry(); const sessionId = context.sessionId || 'default'; - - const entry = await storage.createJournalEntry( + const entry = registry.createJournalEntry( sessionId, args.type as JournalEntryType, args.title, args.content, { tool: args.tool, file: args.file } ); - return { success: true, entryId: entry.id }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -306,21 +237,14 @@ export class SkillHandlers { } } - /** - * Get session journal - */ async getSessionJournal( - args: { - session_id?: string; - }, + args: { session_id?: string }, context: SkillHandlerContext - ): Promise<{ success: boolean; entries?: any[]; error?: string }> { + ): Promise<{ success: boolean; entries?: unknown[]; error?: string }> { try { - const storage = this.getStorage(); + const registry = this.getRegistry(); const sessionId = args.session_id || context.sessionId || 'default'; - - const entries = await storage.getSessionJournal(sessionId); - + const entries = registry.getSessionJournal(sessionId); return { success: true, entries }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -329,27 +253,19 @@ export class SkillHandlers { } } - /** - * Promote a journal entry to a skill - */ async promoteToSkill(args: { entry_id: string; category: string; priority?: string; }): Promise<{ success: boolean; skill?: Skill; error?: string }> { try { - const storage = this.getStorage(); - - const skill = await storage.promoteToSkill( + const registry = this.getRegistry(); + const skill = registry.promoteToSkill( args.entry_id, args.category as SkillCategory, (args.priority || 'medium') as SkillPriority ); - - if (!skill) { - return { success: false, error: 'Journal entry not found' }; - } - + if (!skill) return { success: false, error: 'Journal entry not found' }; return { success: true, skill }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -362,15 +278,12 @@ export class SkillHandlers { // SESSION MANAGEMENT // ============================================================ - /** - * Start session tracking - */ async startSession(args: { session_id: string; }): Promise<{ success: boolean; error?: string }> { try { - const storage = this.getStorage(); - await storage.startSession(args.session_id); + const registry = this.getRegistry(); + registry.startSession(args.session_id); return { success: true }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -379,20 +292,13 @@ export class SkillHandlers { } } - /** - * End session and get summary - */ async endSession(args: { session_id: string; - }): Promise<{ success: boolean; summary?: any; error?: string }> { + }): Promise<{ success: boolean; summary?: unknown; error?: string }> { try { - const storage = this.getStorage(); - const summary = await storage.endSession(args.session_id); - - if (!summary) { - return { success: false, error: 'Session not found' }; - } - + const registry = this.getRegistry(); + const summary = registry.endSession(args.session_id); + if (!summary) return { success: false, error: 'Session not found' }; return { success: true, summary }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -402,78 +308,17 @@ export class SkillHandlers { } // ============================================================ - // KNOWLEDGE MANAGEMENT + // METRICS // ============================================================ - /** - * Get promotion candidates - */ - async getPromotionCandidates(): Promise<{ - success: boolean; - skills?: Skill[]; - error?: string; - }> { - try { - const storage = this.getStorage(); - const skills = await storage.getPromotionCandidates(); - return { success: true, skills }; - } catch (error: unknown) { - const message = error instanceof Error ? error.message : 'Unknown error'; - logger.error('Failed to get promotion candidates', { error: message }); - return { success: false, error: message }; - } - } - - /** - * Promote skill priority - */ - async promoteSkillPriority(args: { - skill_id: string; - }): Promise<{ success: boolean; skill?: Skill; error?: string }> { - try { - const storage = this.getStorage(); - const skill = await storage.promoteSkill(args.skill_id); - - if (!skill) { - return { success: false, error: 'Skill not found' }; - } - - return { success: true, skill }; - } catch (error: unknown) { - const message = error instanceof Error ? error.message : 'Unknown error'; - logger.error('Failed to promote skill priority', { error: message }); - return { success: false, error: message }; - } - } - - /** - * Archive stale skills - */ - async archiveStaleSkills(args: { - days_threshold?: number; - }): Promise<{ success: boolean; archivedCount?: number; error?: string }> { - try { - const storage = this.getStorage(); - const count = await storage.archiveStaleSkills(args.days_threshold || 90); - return { success: true, archivedCount: count }; - } catch (error: unknown) { - const message = error instanceof Error ? error.message : 'Unknown error'; - logger.error('Failed to archive stale skills', { error: message }); - return { success: false, error: message }; - } - } - - /** - * Get skill storage metrics - */ async getSkillMetrics(): Promise<{ success: boolean; - metrics?: any; + metrics?: unknown; error?: string; }> { try { - const storage = this.getStorage(); - const metrics = await storage.getMetrics(); + const registry = this.getRegistry(); + const metrics = registry.getMetrics(); return { success: true, metrics }; } catch (error: unknown) { const message = error instanceof Error ? error.message : 'Unknown error'; @@ -673,4 +518,19 @@ export const SKILL_TOOL_DEFINITIONS = [ properties: {}, }, }, + { + name: 'match_prompt', + description: + 'Match a prompt against skill rules and return scored skill suggestions', + inputSchema: { + type: 'object' as const, + properties: { + prompt: { + type: 'string', + description: 'The prompt text to match against skill rules', + }, + }, + required: ['prompt'], + }, + }, ]; diff --git a/tsconfig.check.json b/tsconfig.check.json index 0608e100..6f35c5d8 100644 --- a/tsconfig.check.json +++ b/tsconfig.check.json @@ -6,9 +6,9 @@ "declarationMap": false, "sourceMap": false, "incremental": true, - "tsBuildInfoFile": ".tsbuildinfo" + "tsBuildInfoFile": ".tsbuildinfo-check" }, - "include": ["src/**/*"], + "include": ["src/**/*", "scripts/**/*"], "exclude": [ "node_modules", "dist",