From 2bba6569ffd5fc6d94945effd5b57fe3015d3ef3 Mon Sep 17 00:00:00 2001 From: "StackMemory Bot (CLI)" Date: Wed, 8 Apr 2026 12:25:33 -0400 Subject: [PATCH 01/11] feat(conductor): GitButler virtual branch mode for workspace management --- .husky/post-checkout | 72 +++++++++ .husky/pre-commit | 52 ++++-- .husky/pre-commit-user | 13 ++ scripts/dspy/optimized_state.json | 4 +- scripts/gepa/generations/gen-000/baseline.md | 2 +- scripts/gepa/generations/gen-001/baseline.md | 2 +- src/cli/commands/orchestrate.ts | 6 + src/cli/commands/orchestrator.ts | 158 ++++++++++++++++--- 8 files changed, 276 insertions(+), 33 deletions(-) create mode 100755 .husky/post-checkout create mode 100755 .husky/pre-commit-user diff --git a/.husky/post-checkout b/.husky/post-checkout new file mode 100755 index 00000000..fd875bc5 --- /dev/null +++ b/.husky/post-checkout @@ -0,0 +1,72 @@ +#!/bin/sh +# GITBUTLER_MANAGED_HOOK_V1 +# This hook auto-cleans GitButler hooks when you checkout away from gitbutler/workspace. + +PREV_HEAD=$1 +NEW_HEAD=$2 +BRANCH_CHECKOUT=$3 + +# Only act on branch checkouts (not file checkouts) +if [ "$BRANCH_CHECKOUT" != "1" ]; then + # Run user's hook if it exists + if [ -x "$(dirname "$0")/post-checkout-user" ]; then + exec "$(dirname "$0")/post-checkout-user" "$@" + fi + exit 0 +fi + +# Get the new branch name +NEW_BRANCH=$(git symbolic-ref --short HEAD 2>/dev/null) + +# If we just left gitbutler/workspace (and aren't coming back to it) +PREV_BRANCH=$(git name-rev --name-only "$PREV_HEAD" 2>/dev/null | sed 's|^remotes/||') +if echo "$PREV_BRANCH" | grep -q "gitbutler/workspace"; then + if [ "$NEW_BRANCH" != "gitbutler/workspace" ]; then + echo "" + echo "NOTE: You have left GitButler's managed workspace branch." + echo "Cleaning up GitButler hooks..." + + HOOKS_DIR=$(dirname "$0") + + # Restore pre-commit - but only if it's GitButler-managed + if [ -f "$HOOKS_DIR/pre-commit-user" ]; then + mv "$HOOKS_DIR/pre-commit-user" "$HOOKS_DIR/pre-commit" + echo " Restored: pre-commit" + elif [ -f "$HOOKS_DIR/pre-commit" ]; then + # Only remove if it's GitButler-managed (has our signature) + if grep -q "GITBUTLER_MANAGED_HOOK_V1" "$HOOKS_DIR/pre-commit"; then + rm "$HOOKS_DIR/pre-commit" + echo " Removed: pre-commit (GitButler managed)" + else + echo " Warning: pre-commit hook is not GitButler-managed, leaving it untouched" + fi + fi + + # Run user's post-checkout if it exists, then clean up + if [ -x "$HOOKS_DIR/post-checkout-user" ]; then + "$HOOKS_DIR/post-checkout-user" "$@" + mv "$HOOKS_DIR/post-checkout-user" "$HOOKS_DIR/post-checkout" + echo " Restored: post-checkout" + else + # Only remove self if we're GitButler-managed (we should be, but check anyway) + if grep -q "GITBUTLER_MANAGED_HOOK_V1" "$HOOKS_DIR/post-checkout"; then + rm "$HOOKS_DIR/post-checkout" + echo " Removed: post-checkout (GitButler managed)" + else + echo " Warning: post-checkout hook is not GitButler-managed, leaving it untouched" + fi + fi + + echo "" + echo "To return to GitButler mode, run: but setup" + echo "" + exit 0 + fi +fi + +# Run user's hook if it exists +if [ -x "$(dirname "$0")/post-checkout-user" ]; then + exec "$(dirname "$0")/post-checkout-user" "$@" +fi + +exit 0 diff --git a/.husky/pre-commit b/.husky/pre-commit index d8089be9..869a3294 100755 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1,13 +1,43 @@ -# Use Node version from .nvmrc -export NVM_DIR="$HOME/.nvm" -if [ -s "$NVM_DIR/nvm.sh" ]; then - . "$NVM_DIR/nvm.sh" - nvm use 2>/dev/null -elif [ -d "$HOME/.nvm/versions/node" ]; then - NODE_VER=$(cat "$(git rev-parse --show-toplevel)/.nvmrc" 2>/dev/null || echo "20") - NODE_PATH=$(ls -d "$HOME/.nvm/versions/node/v${NODE_VER}"* 2>/dev/null | head -1) - [ -n "$NODE_PATH" ] && export PATH="$NODE_PATH/bin:$PATH" +#!/bin/sh +# GITBUTLER_MANAGED_HOOK_V1 +# This hook is managed by GitButler to prevent accidental commits on the workspace branch. +# Your original pre-commit hook has been preserved as 'pre-commit-user'. + +HOOKS_DIR=$(dirname "$0") + +# Run user's hook first if it exists - if it fails, stop here +if [ -x "$HOOKS_DIR/pre-commit-user" ]; then + "$HOOKS_DIR/pre-commit-user" "$@" || exit $? +fi + +# Get the current branch name +BRANCH=$(git symbolic-ref --short HEAD 2>/dev/null) + +if [ "$BRANCH" = "gitbutler/workspace" ]; then + echo "" + echo "GITBUTLER_ERROR: Cannot commit directly to gitbutler/workspace branch." + echo "" + echo "GitButler manages commits on this branch. Please use GitButler to commit your changes:" + echo " - Use the GitButler app to create commits" + echo " - Or run 'but commit' from the command line" + echo "" + echo "If you want to exit GitButler mode and use normal git:" + echo " - Run 'but teardown' to switch to a regular branch" + echo " - Or directly checkout another branch: git checkout " + echo "" + echo "If you no longer have the GitButler CLI installed, you can simply remove this hook and checkout another branch:" + printf ' rm "%s/pre-commit"\n' "$HOOKS_DIR" + echo "" + exit 1 +fi + +# Not on workspace branch - run user's original hook if it exists +if [ -x "$HOOKS_DIR/pre-commit-user" ]; then + echo "" + echo "WARNING: GitButler's pre-commit hook is still installed but you're not on gitbutler/workspace." + echo "If you're no longer using GitButler, you can restore your original hook:" + printf ' mv "%s/pre-commit-user" "%s/pre-commit"\n' "$HOOKS_DIR" "$HOOKS_DIR" + echo "" fi -npx lint-staged -npm run build +exit 0 diff --git a/.husky/pre-commit-user b/.husky/pre-commit-user new file mode 100755 index 00000000..d8089be9 --- /dev/null +++ b/.husky/pre-commit-user @@ -0,0 +1,13 @@ +# Use Node version from .nvmrc +export NVM_DIR="$HOME/.nvm" +if [ -s "$NVM_DIR/nvm.sh" ]; then + . "$NVM_DIR/nvm.sh" + nvm use 2>/dev/null +elif [ -d "$HOME/.nvm/versions/node" ]; then + NODE_VER=$(cat "$(git rev-parse --show-toplevel)/.nvmrc" 2>/dev/null || echo "20") + NODE_PATH=$(ls -d "$HOME/.nvm/versions/node/v${NODE_VER}"* 2>/dev/null | head -1) + [ -n "$NODE_PATH" ] && export PATH="$NODE_PATH/bin:$PATH" +fi + +npx lint-staged +npm run build diff --git a/scripts/dspy/optimized_state.json b/scripts/dspy/optimized_state.json index 40481fa0..4f13fb2a 100644 --- a/scripts/dspy/optimized_state.json +++ b/scripts/dspy/optimized_state.json @@ -10,7 +10,7 @@ "token_budget": 4096, "session_summary": "Frames: 50, recent activity in project", "available_frames": "- c05e94b2-3400-438b-bda1-d040c41a4d06: \"StackMemory v0.3.0 Development\" (task, score: 0.50, events: 0)\n- 722d8b90-29d2-462d-8077-a1ce0920db58: \"Test tool call storage\" (task, score: 0.50, events: 0)\n- 5fc00ed8-96a9-430f-a05e-ab83e2a411ac: \"working-on-cli\" (task, score: 0.50, events: 0)\n- 67cb9f1b-a458-4d57-82c0-c29308e00e87: \"cli-session\" (task, score: 0.50, events: 0)\n- 278f2eab-6bd7-4693-b9dc-7d583acadb8c: \"test-frame\" (task, score: 0.50, events: 0)\n- 86001f96-262a-4dd0-9320-f1545bf07b37: \"frame-1\" (task, score: 0.50, events: 0)\n- 88129690-84d0-48f5-825d-29e1f1deae86: \"frame-2\" (task, score: 0.50, events: 0)\n- 4ec15099-8217-453c-97bb-78f9806295e0: \"frame-3\" (task, score: 0.50, events: 0)\n- 67268788-f362-45f9-a68b-31801196e56b: \"test-stack-trace-capture\" (task, score: 0.50, events: 0)\n- 0f48dc4c-3a4b-4a69-ba23-95ac3be8d6e4: \"test-stack-trace-capture\" (task, score: 0.50, events: 0)\n- bd55adfc-6dc6-4dfb-a726-fda040796486: \"cli-session\" (task, score: 0.50, events: 0)\n- 7179084c-e4cc-4fda-82e1-47165c3629fb: \"cli-session\" (task, score: 0.50, events: 0)\n- 2e6692f7-3fc0-4777-8578-b1674658bbdc: \"team_share\" (tool_scope, score: 0.50, events: 0)\n- b41fbc67-c9f1-4fa6-b4f0-f2d523b93b2d: \"team_share\" (tool_scope, score: 0.50, events: 0)\n- d1eabbf4-fdab-4460-985c-5413f7307004: \"team_share\" (tool_scope, score: 0.50, events: 0)", - "key_decisions": "- Exit code is 0. The remaining 4 warnings are in `skill-storage.ts`, which is not...\n- I now have a complete picture. Here is the implementation plan.\n\n---\n\n## Impleme...\n- Perfect! I have all the key files. Let me create a comprehensive report of the s...\n- Perfect. Now I have all the information you need. Let me compile a comprehensive...\n- Now I have all the information I need. Let me provide a comprehensive analysis:\n...", + "key_decisions": "- The lint script only runs on `.ts` files, not `.js` files. The next.config.js er...\n- Task completed by agent: Add web clipper ingest pipeline. Watch a raw/ directory...\n- Task completed by agent: Build Obsidian vault adapter for frame serialization. W...\n- Task completed by agent: Wire Obsidian adapter into config + CLI. Add obsidianVa...\n- Task completed by agent: Test board end-to-end. Launch board, create session, se...", "reasoning": "Frame 'StackMemory v0.3.0 Development' directly matches the query topic.", "frames_to_retrieve": "[{\"frameId\": \"c05e94b2-3400-438b-bda1-d040c41a4d06\", \"priority\": 9, \"reason\": \"Direct match\", \"includeEvents\": true, \"includeAnchors\": true}]", "confidence_score": 0.9 @@ -20,7 +20,7 @@ "token_budget": 4096, "session_summary": "Frames: 50, recent activity in project", "available_frames": "- c05e94b2-3400-438b-bda1-d040c41a4d06: \"StackMemory v0.3.0 Development\" (task, score: 0.50, events: 0)\n- 722d8b90-29d2-462d-8077-a1ce0920db58: \"Test tool call storage\" (task, score: 0.50, events: 0)\n- 5fc00ed8-96a9-430f-a05e-ab83e2a411ac: \"working-on-cli\" (task, score: 0.50, events: 0)\n- 67cb9f1b-a458-4d57-82c0-c29308e00e87: \"cli-session\" (task, score: 0.50, events: 0)\n- 278f2eab-6bd7-4693-b9dc-7d583acadb8c: \"test-frame\" (task, score: 0.50, events: 0)\n- 86001f96-262a-4dd0-9320-f1545bf07b37: \"frame-1\" (task, score: 0.50, events: 0)\n- 88129690-84d0-48f5-825d-29e1f1deae86: \"frame-2\" (task, score: 0.50, events: 0)\n- 4ec15099-8217-453c-97bb-78f9806295e0: \"frame-3\" (task, score: 0.50, events: 0)\n- 67268788-f362-45f9-a68b-31801196e56b: \"test-stack-trace-capture\" (task, score: 0.50, events: 0)\n- 0f48dc4c-3a4b-4a69-ba23-95ac3be8d6e4: \"test-stack-trace-capture\" (task, score: 0.50, events: 0)\n- bd55adfc-6dc6-4dfb-a726-fda040796486: \"cli-session\" (task, score: 0.50, events: 0)\n- 7179084c-e4cc-4fda-82e1-47165c3629fb: \"cli-session\" (task, score: 0.50, events: 0)\n- 2e6692f7-3fc0-4777-8578-b1674658bbdc: \"team_share\" (tool_scope, score: 0.50, events: 0)\n- b41fbc67-c9f1-4fa6-b4f0-f2d523b93b2d: \"team_share\" (tool_scope, score: 0.50, events: 0)\n- d1eabbf4-fdab-4460-985c-5413f7307004: \"team_share\" (tool_scope, score: 0.50, events: 0)", - "key_decisions": "- Exit code is 0. The remaining 4 warnings are in `skill-storage.ts`, which is not...\n- I now have a complete picture. Here is the implementation plan.\n\n---\n\n## Impleme...\n- Perfect! I have all the key files. Let me create a comprehensive report of the s...\n- Perfect. Now I have all the information you need. Let me compile a comprehensive...\n- Now I have all the information I need. Let me provide a comprehensive analysis:\n...", + "key_decisions": "- The lint script only runs on `.ts` files, not `.js` files. The next.config.js er...\n- Task completed by agent: Add web clipper ingest pipeline. Watch a raw/ directory...\n- Task completed by agent: Build Obsidian vault adapter for frame serialization. W...\n- Task completed by agent: Wire Obsidian adapter into config + CLI. Add obsidianVa...\n- Task completed by agent: Test board end-to-end. Launch board, create session, se...", "reasoning": "Frame 'cli-session' directly matches the query topic.", "frames_to_retrieve": "[{\"frameId\": \"67cb9f1b-a458-4d57-82c0-c29308e00e87\", \"priority\": 9, \"reason\": \"Direct match\", \"includeEvents\": true, \"includeAnchors\": true}]", "confidence_score": 0.9 diff --git a/scripts/gepa/generations/gen-000/baseline.md b/scripts/gepa/generations/gen-000/baseline.md index 4dc0ebb0..5fd37e77 100644 --- a/scripts/gepa/generations/gen-000/baseline.md +++ b/scripts/gepa/generations/gen-000/baseline.md @@ -112,7 +112,7 @@ When adding or renaming GitHub Actions workflows that should be triggerable via | Workflow | Script path | Category | |---|---|---| | `weekly-start.yml` | `voyager/scripts/content-brief.mjs` + `voyager/scripts/content-audit.mjs` + `ops/fathom-social-content.mjs` + `ops/fathom-testimonial-scan.mjs` + `ops/perplexity-citation-audit.mjs` + `commit/profound-aeo-pulse.mjs` + `voyager/scripts/generate-blog-scaffold.mjs` + `ops/ahrefs-firehose-digest.mjs` + `ops/export-dripify.mjs` + `commit/prospect-discovery.mjs` + `ops/repush-clay-leads.mjs` + `ops/snitcher-outreach.mjs` | GHA cron (Mon) | -| `weekly-end.yml` | `diag/fathom-demo-scorecard.mjs` + `commit/feedback/collect-*.mjs` | GHA cron (Fri) | +| `weekly-end.yml` | `diag/fathom-demo-scorecard.mjs` + `commit/feedback/collect-*.mjs` + `commit/feedback/collect-ops-feedback.mjs` + `diag/weekly-retro.mjs` | GHA cron (Fri) | | `anneal-keywords.yml` | `commit/anneal-keywords.mjs` | GHA cron (Sun) | | `g2-review-monitor.yml` | `ops/g2-to-senja.mjs` | GHA cron (Daily) | | `testimonial-pipeline.yml` | `commit/testimonial-pipeline.mjs` | Manual | diff --git a/scripts/gepa/generations/gen-001/baseline.md b/scripts/gepa/generations/gen-001/baseline.md index 4dc0ebb0..5fd37e77 100644 --- a/scripts/gepa/generations/gen-001/baseline.md +++ b/scripts/gepa/generations/gen-001/baseline.md @@ -112,7 +112,7 @@ When adding or renaming GitHub Actions workflows that should be triggerable via | Workflow | Script path | Category | |---|---|---| | `weekly-start.yml` | `voyager/scripts/content-brief.mjs` + `voyager/scripts/content-audit.mjs` + `ops/fathom-social-content.mjs` + `ops/fathom-testimonial-scan.mjs` + `ops/perplexity-citation-audit.mjs` + `commit/profound-aeo-pulse.mjs` + `voyager/scripts/generate-blog-scaffold.mjs` + `ops/ahrefs-firehose-digest.mjs` + `ops/export-dripify.mjs` + `commit/prospect-discovery.mjs` + `ops/repush-clay-leads.mjs` + `ops/snitcher-outreach.mjs` | GHA cron (Mon) | -| `weekly-end.yml` | `diag/fathom-demo-scorecard.mjs` + `commit/feedback/collect-*.mjs` | GHA cron (Fri) | +| `weekly-end.yml` | `diag/fathom-demo-scorecard.mjs` + `commit/feedback/collect-*.mjs` + `commit/feedback/collect-ops-feedback.mjs` + `diag/weekly-retro.mjs` | GHA cron (Fri) | | `anneal-keywords.yml` | `commit/anneal-keywords.mjs` | GHA cron (Sun) | | `g2-review-monitor.yml` | `ops/g2-to-senja.mjs` | GHA cron (Daily) | | `testimonial-pipeline.yml` | `commit/testimonial-pipeline.mjs` | Manual | diff --git a/src/cli/commands/orchestrate.ts b/src/cli/commands/orchestrate.ts index 50e75101..405c41d4 100644 --- a/src/cli/commands/orchestrate.ts +++ b/src/cli/commands/orchestrate.ts @@ -2045,6 +2045,11 @@ export function createConductorCommands(): Command { '--no-pr', 'Disable automatic GitHub PR creation after agent success' ) + .option( + '--workspace-mode ', + 'Workspace mode: "auto" (detect GitButler), "gitbutler", or "worktree"', + 'auto' + ) .action(async (options) => { // Ensure default prompt template exists on first start ensureDefaultPromptTemplate(); @@ -2065,6 +2070,7 @@ export function createConductorCommands(): Command { agentMode: options.mode === 'adapter' ? 'adapter' : 'cli', model: options.model, autoPR: options.pr, + workspaceMode: options.workspaceMode, }); await conductor.start(); diff --git a/src/cli/commands/orchestrator.ts b/src/cli/commands/orchestrator.ts index 05be1dc3..749edcba 100644 --- a/src/cli/commands/orchestrator.ts +++ b/src/cli/commands/orchestrator.ts @@ -86,6 +86,8 @@ export interface ConductorConfig { model?: string; /** Auto-create GitHub PRs after successful agent runs (default: true) */ autoPR?: boolean; + /** Workspace mode: 'auto' (detect GitButler), 'gitbutler', or 'worktree' (default: 'auto') */ + workspaceMode?: 'auto' | 'gitbutler' | 'worktree'; } export interface RunningIssue { @@ -205,7 +207,7 @@ function logAgentOutcome(entry: AgentOutcomeEntry): void { appendFileSync(getOutcomesLogPath(), JSON.stringify(entry) + '\n'); } -/** Best-effort PR creation via GitHub CLI after successful agent run */ +/** Best-effort PR creation via GitHub CLI (or GitButler) after successful agent run */ function createPullRequest(opts: { branch: string; baseBranch: string; @@ -214,15 +216,9 @@ function createPullRequest(opts: { filesModified: number; toolCalls: number; workspacePath: string; + useGitButler?: boolean; }): string | null { try { - // Push the branch first - execSync(`git push -u origin "${opts.branch}"`, { - cwd: opts.workspacePath, - stdio: 'pipe', - timeout: 60000, - }); - const prTitle = `feat(conductor): ${opts.issueId} — ${opts.title}`; const prBody = [ '## Summary', @@ -235,6 +231,38 @@ function createPullRequest(opts: { '_This PR was auto-created by StackMemory Conductor._', ].join('\n'); + if (opts.useGitButler) { + // GitButler: push branch then create PR via but cli + execSync(`but push --branch "${opts.branch}"`, { + cwd: opts.workspacePath, + stdio: 'pipe', + timeout: 60000, + }); + + const result = execSync( + `but pr create --branch "${opts.branch}" --title "${prTitle.replace(/"/g, '\\"')}" --body "${prBody.replace(/"/g, '\\"')}"`, + { + cwd: opts.workspacePath, + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'], + timeout: 30000, + } + ); + const prUrl = result.trim(); + logger.info('Created PR via GitButler', { + issueId: opts.issueId, + prUrl, + }); + return prUrl; + } + + // Standard git + gh CLI + execSync(`git push -u origin "${opts.branch}"`, { + cwd: opts.workspacePath, + stdio: 'pipe', + timeout: 60000, + }); + const result = execSync( `gh pr create --base "${opts.baseBranch}" --head "${opts.branch}" --title "${prTitle.replace(/"/g, '\\"')}" --body "${prBody.replace(/"/g, '\\"')}"`, { @@ -664,6 +692,8 @@ export class Conductor { private stateCache: Map = new Map(); private activeStatesLower: string[]; private terminalStatesLower: string[]; + /** Whether to use GitButler virtual branches instead of git worktrees */ + private useGitButler = false; /** Global rate limit backoff state */ private rateLimit: RateLimitState = { @@ -728,8 +758,37 @@ export class Conductor { } } - // Ensure workspace root exists - if (!existsSync(this.config.workspaceRoot)) { + // Detect workspace mode: GitButler virtual branches or git worktrees + const wsMode = this.config.workspaceMode || 'auto'; + if (wsMode === 'gitbutler' || wsMode === 'auto') { + try { + const butVersion = execSync('but --version', { + cwd: this.config.repoRoot, + encoding: 'utf-8', + stdio: ['pipe', 'pipe', 'pipe'], + timeout: 5000, + }).trim(); + // Check if repo is in GitButler mode (gitbutler/workspace branch exists) + const gbDir = join(this.config.repoRoot, '.git', 'gitbutler'); + if (wsMode === 'gitbutler' || existsSync(gbDir)) { + this.useGitButler = true; + logger.info('Using GitButler virtual branches', { + version: butVersion, + }); + console.log(`[conductor] GitButler mode (${butVersion})`); + } + } catch { + if (wsMode === 'gitbutler') { + throw new Error( + 'GitButler CLI (but) not found. Install: brew install --cask gitbutler' + ); + } + // auto mode: fall through to worktrees + } + } + + // Ensure workspace root exists (only needed for worktree mode) + if (!this.useGitButler && !existsSync(this.config.workspaceRoot)) { mkdirSync(this.config.workspaceRoot, { recursive: true }); } @@ -1309,6 +1368,7 @@ export class Conductor { filesModified: run.filesModified, toolCalls: run.toolCalls, workspacePath: run.workspacePath, + useGitButler: this.useGitButler, }); if (url) { prUrl = url; @@ -1680,6 +1740,52 @@ export class Conductor { private async createWorkspace(issue: LinearIssue): Promise { const wsKey = this.sanitizeIdentifier(issue.identifier); + + if (this.useGitButler) { + return this.createGitButlerBranch(issue, wsKey); + } + return this.createWorktree(issue, wsKey); + } + + private createGitButlerBranch(issue: LinearIssue, wsKey: string): string { + const branchName = `conductor/${wsKey}`; + + try { + // Pull latest changes + execSync('but pull', { + cwd: this.config.repoRoot, + stdio: 'pipe', + timeout: 30000, + }); + } catch { + // Non-fatal — may be offline + } + + try { + // Create virtual branch + execSync(`but branch new "${branchName}"`, { + cwd: this.config.repoRoot, + stdio: 'pipe', + timeout: 10000, + }); + + logger.info('Created GitButler virtual branch', { + identifier: issue.identifier, + branch: branchName, + }); + } catch { + // Branch may already exist — that's fine + logger.info('GitButler branch may already exist, reusing', { + identifier: issue.identifier, + branch: branchName, + }); + } + + // GitButler: agents work in repo root, not a separate dir + return this.config.repoRoot; + } + + private createWorktree(issue: LinearIssue, wsKey: string): string { const wsPath = join(this.config.workspaceRoot, wsKey); if (existsSync(wsPath)) { @@ -1690,18 +1796,15 @@ export class Conductor { return wsPath; } - // Create git worktree const branchName = `conductor/${wsKey}`; try { - // Fetch latest execSync('git fetch origin', { cwd: this.config.repoRoot, stdio: 'pipe', timeout: 30000, }); - // Create worktree with new branch from base execSync( `git worktree add "${wsPath}" -b "${branchName}" "origin/${this.config.baseBranch}"`, { @@ -1717,7 +1820,6 @@ export class Conductor { branch: branchName, }); } catch (err) { - // Branch may already exist — try checking it out try { execSync(`git worktree add "${wsPath}" "${branchName}"`, { cwd: this.config.repoRoot, @@ -1736,14 +1838,34 @@ export class Conductor { private async removeWorkspace(issue: LinearIssue): Promise { const wsKey = this.sanitizeIdentifier(issue.identifier); - const wsPath = join(this.config.workspaceRoot, wsKey); + const branchName = `conductor/${wsKey}`; + if (this.useGitButler) { + // Unapply virtual branch (keeps it in history, just removes from workspace) + await this.runHook('before-remove', this.config.repoRoot, issue).catch( + () => {} + ); + try { + execSync(`but unapply "${branchName}"`, { + cwd: this.config.repoRoot, + stdio: 'pipe', + timeout: 10000, + }); + } catch { + // May already be unapplied + logger.debug('GitButler branch already unapplied', { + identifier: issue.identifier, + }); + } + return; + } + + // Worktree mode + const wsPath = join(this.config.workspaceRoot, wsKey); if (!existsSync(wsPath)) return; - // Run before_remove hook await this.runHook('before-remove', wsPath, issue).catch(() => {}); - // Remove git worktree try { execSync(`git worktree remove "${wsPath}" --force`, { cwd: this.config.repoRoot, @@ -1751,7 +1873,6 @@ export class Conductor { timeout: 30000, }); } catch { - // Fallback: manual cleanup try { rmSync(wsPath, { recursive: true, force: true }); execSync('git worktree prune', { @@ -2679,6 +2800,7 @@ export class Conductor { filesModified: run.filesModified, toolCalls: run.toolCalls, workspacePath: wsPath, + useGitButler: this.useGitButler, }); if (url) { prUrl = url; From 6c756c714c84d4821f5112524c85bf4f9885f62d Mon Sep 17 00:00:00 2001 From: "StackMemory Bot (CLI)" Date: Wed, 8 Apr 2026 13:17:21 -0400 Subject: [PATCH 02/11] fix(conductor): state filter + labels flatten for issue dispatch --- src/cli/commands/orchestrator.ts | 7 +++---- 1 file changed, 3 insertions(+), 4 deletions(-) diff --git a/src/cli/commands/orchestrator.ts b/src/cli/commands/orchestrator.ts index 749edcba..396b2711 100644 --- a/src/cli/commands/orchestrator.ts +++ b/src/cli/commands/orchestrator.ts @@ -1185,15 +1185,14 @@ export class Conductor { const allCandidates: LinearIssue[] = []; - // Fetch issues for each active state - // Linear API filters by state type, but we need state name matching - // Use 'unstarted' type which covers Todo-like states + // Fetch issues with unstarted state type (covers Todo-like states) + // Then filter by exact state name match const issues = await this.client.getIssues({ teamId: this.config.teamId, + stateType: 'unstarted', limit: 50, }); - // Filter by active state names (case-insensitive, pre-computed) for (const issue of issues) { const stateName = issue.state.name.trim().toLowerCase(); if (this.activeStatesLower.includes(stateName)) { From 48d1d68a5fe627075c78d8d8ac899f8bad1e39cb Mon Sep 17 00:00:00 2001 From: "StackMemory Bot (CLI)" Date: Wed, 8 Apr 2026 13:17:52 -0400 Subject: [PATCH 03/11] fix(linear): flatten labels in getIssues response --- src/integrations/linear/client.ts | 12 +++++++++++- 1 file changed, 11 insertions(+), 1 deletion(-) diff --git a/src/integrations/linear/client.ts b/src/integrations/linear/client.ts index 9f1967e8..a38259c7 100644 --- a/src/integrations/linear/client.ts +++ b/src/integrations/linear/client.ts @@ -719,7 +719,17 @@ export class LinearClient { first: options?.limit || 50, }); - return result.issues.nodes; + // Flatten labels from { nodes: [...] } to plain array + return result.issues.nodes.map((issue) => ({ + ...issue, + labels: Array.isArray(issue.labels) + ? issue.labels + : ( + issue.labels as unknown as { + nodes: Array<{ id: string; name: string }>; + } + )?.nodes || [], + })); } /** From 019e4997e3627ac51bc270867e24be079b12c98d Mon Sep 17 00:00:00 2001 From: "StackMemory Bot (CLI)" Date: Wed, 8 Apr 2026 20:15:58 -0400 Subject: [PATCH 04/11] feat(cross-search): multi-database frame search across projects (STA-480) - Add CrossProjectSearch engine with FTS5/BM25 ranking across N databases - Project registry (~/.stackmemory/projects.json) with CRUD + auto-discovery - Read-only SQLite connections for safety, LIKE fallback for non-FTS databases - 4 MCP tools: sm_cross_search, sm_cross_discover, sm_cross_register, sm_cross_list - CLI: `stackmemory search --all-projects "query"` for cross-project search - 17 tests: registry CRUD, multi-db FTS5 search, ranking, LIKE fallback, graceful skip --- src/cli/commands/search.ts | 39 ++ .../__tests__/cross-project-search.test.ts | 333 +++++++++++ src/core/cross-search/cross-project-search.ts | 400 +++++++++++++ src/core/cross-search/index.ts | 7 + .../mcp/__tests__/tool-alias-registry.test.ts | 321 ++++++++++ .../mcp/handlers/cross-search-handlers.ts | 225 +++++++ src/integrations/mcp/handlers/index.ts | 47 +- src/integrations/mcp/server.ts | 122 +++- src/integrations/mcp/tool-alias-registry.ts | 556 ++++++++++++++++++ src/integrations/mcp/tool-definitions.ts | 121 +++- 10 files changed, 2151 insertions(+), 20 deletions(-) create mode 100644 src/core/cross-search/__tests__/cross-project-search.test.ts create mode 100644 src/core/cross-search/cross-project-search.ts create mode 100644 src/core/cross-search/index.ts create mode 100644 src/integrations/mcp/__tests__/tool-alias-registry.test.ts create mode 100644 src/integrations/mcp/handlers/cross-search-handlers.ts create mode 100644 src/integrations/mcp/tool-alias-registry.ts diff --git a/src/cli/commands/search.ts b/src/cli/commands/search.ts index 7e90f266..1a09ce4a 100644 --- a/src/cli/commands/search.ts +++ b/src/cli/commands/search.ts @@ -8,6 +8,7 @@ import Database from 'better-sqlite3'; import { join } from 'path'; import { existsSync } from 'fs'; import { z } from 'zod'; +import { CrossProjectSearch } from '../../core/cross-search/cross-project-search.js'; /** Raw task row from task_cache table */ interface TaskRow { @@ -58,6 +59,10 @@ export function createSearchCommand(): Command { .argument('', 'Search query') .option('-t, --tasks', 'Search only tasks') .option('-c, --context', 'Search only context') + .option( + '-a, --all-projects', + 'Search across all registered project databases' + ) .option('-l, --limit ', 'Limit results', '20') .action(async (rawQuery, options) => { const projectRoot = process.cwd(); @@ -86,6 +91,40 @@ export function createSearchCommand(): Command { return; } + // Cross-project search mode + if (options.allProjects) { + console.log( + `\nšŸ” Searching across all projects for "${rawQuery}"...\n` + ); + const crossSearch = new CrossProjectSearch(); + const results = await crossSearch.search({ + query: rawQuery, + limit, + }); + + if (results.length === 0) { + console.log('No results found across project databases.\n'); + console.log( + 'Tip: Run "stackmemory search --all-projects" after "stackmemory projects scan" to discover databases.' + ); + return; + } + + console.log(`šŸ“ Cross-Project Results (${results.length})\n`); + for (const r of results) { + const date = new Date(r.createdAt).toLocaleDateString(); + console.log( + ` [${r.projectName}] ${r.name} (${r.type}, score: ${r.score.toFixed(3)})` + ); + if (r.digestText) { + console.log(` ${r.digestText.slice(0, 100)}`); + } + console.log(` ${date} | ${r.projectPath}`); + } + console.log(`\nFound ${results.length} results.\n`); + return; + } + const db = new Database(dbPath); const searchTasks = !options.context || options.tasks; const searchContext = !options.tasks || options.context; diff --git a/src/core/cross-search/__tests__/cross-project-search.test.ts b/src/core/cross-search/__tests__/cross-project-search.test.ts new file mode 100644 index 00000000..13d81ecb --- /dev/null +++ b/src/core/cross-search/__tests__/cross-project-search.test.ts @@ -0,0 +1,333 @@ +/** + * Tests for Cross-Project Search + * Tests project registry CRUD and cross-database FTS5 search + */ + +import { describe, it, expect, beforeEach, afterEach } from 'vitest'; +import { CrossProjectSearch } from '../cross-project-search.js'; +import { SQLiteAdapter } from '../../database/sqlite-adapter.js'; +import * as fs from 'fs'; +import * as path from 'path'; +import * as os from 'os'; + +describe('CrossProjectSearch', () => { + let tmpDir: string; + let crossSearch: CrossProjectSearch; + + beforeEach(() => { + tmpDir = fs.mkdtempSync( + path.join(os.tmpdir(), 'stackmemory-cross-search-') + ); + crossSearch = new CrossProjectSearch(tmpDir); + }); + + afterEach(() => { + try { + fs.rmSync(tmpDir, { recursive: true }); + } catch { + // cleanup best-effort + } + }); + + describe('Project Registry CRUD', () => { + it('should start with empty registry', () => { + const projects = crossSearch.listProjects(); + expect(projects).toEqual([]); + }); + + it('should register a project', () => { + crossSearch.registerProject({ + name: 'test-project', + path: '/tmp/test-project', + dbPath: '/tmp/test-project/.stackmemory/context.db', + lastAccessed: Date.now(), + }); + + const projects = crossSearch.listProjects(); + expect(projects).toHaveLength(1); + expect(projects[0].name).toBe('test-project'); + }); + + it('should update existing project on re-register with same path', () => { + const entry = { + name: 'test-project', + path: '/tmp/test-project', + dbPath: '/tmp/test-project/.stackmemory/context.db', + lastAccessed: 1000, + }; + + crossSearch.registerProject(entry); + crossSearch.registerProject({ ...entry, lastAccessed: 2000 }); + + const projects = crossSearch.listProjects(); + expect(projects).toHaveLength(1); + expect(projects[0].lastAccessed).toBe(2000); + }); + + it('should unregister a project by path', () => { + crossSearch.registerProject({ + name: 'a', + path: '/tmp/a', + dbPath: '/tmp/a/.stackmemory/context.db', + lastAccessed: Date.now(), + }); + crossSearch.registerProject({ + name: 'b', + path: '/tmp/b', + dbPath: '/tmp/b/.stackmemory/context.db', + lastAccessed: Date.now(), + }); + + const removed = crossSearch.unregisterProject('/tmp/a'); + expect(removed).toBe(true); + expect(crossSearch.listProjects()).toHaveLength(1); + expect(crossSearch.listProjects()[0].name).toBe('b'); + }); + + it('should unregister a project by name', () => { + crossSearch.registerProject({ + name: 'my-app', + path: '/tmp/my-app', + dbPath: '/tmp/my-app/.stackmemory/context.db', + lastAccessed: Date.now(), + }); + + const removed = crossSearch.unregisterProject('my-app'); + expect(removed).toBe(true); + expect(crossSearch.listProjects()).toHaveLength(0); + }); + + it('should return false when unregistering non-existent project', () => { + const removed = crossSearch.unregisterProject('ghost'); + expect(removed).toBe(false); + }); + + it('should persist registry to disk', () => { + crossSearch.registerProject({ + name: 'persisted', + path: '/tmp/persisted', + dbPath: '/tmp/persisted/.stackmemory/context.db', + lastAccessed: Date.now(), + }); + + // Load from disk in a new instance + const crossSearch2 = new CrossProjectSearch(tmpDir); + const projects = crossSearch2.listProjects(); + expect(projects).toHaveLength(1); + expect(projects[0].name).toBe('persisted'); + }); + }); + + describe('Cross-Database Search', () => { + let projectADir: string; + let projectBDir: string; + let adapterA: SQLiteAdapter; + let adapterB: SQLiteAdapter; + + beforeEach(async () => { + // Create two project databases with frames + projectADir = path.join(tmpDir, 'project-a', '.stackmemory'); + projectBDir = path.join(tmpDir, 'project-b', '.stackmemory'); + fs.mkdirSync(projectADir, { recursive: true }); + fs.mkdirSync(projectBDir, { recursive: true }); + + const dbPathA = path.join(projectADir, 'context.db'); + const dbPathB = path.join(projectBDir, 'context.db'); + + adapterA = new SQLiteAdapter('project-a', { dbPath: dbPathA }); + adapterB = new SQLiteAdapter('project-b', { dbPath: dbPathB }); + + await adapterA.connect(); + await adapterA.initializeSchema(); + await adapterB.connect(); + await adapterB.initializeSchema(); + + // Populate project A + await adapterA.createFrame({ + run_id: 'run-a1', + project_id: 'project-a', + type: 'task', + name: 'authentication login flow', + digest_text: 'implements JWT-based auth with refresh tokens', + }); + await adapterA.createFrame({ + run_id: 'run-a1', + project_id: 'project-a', + type: 'debug', + name: 'fix database migration', + digest_text: 'resolved foreign key constraint on users table', + }); + + // Populate project B + await adapterB.createFrame({ + run_id: 'run-b1', + project_id: 'project-b', + type: 'task', + name: 'authentication OAuth integration', + digest_text: 'added Google and GitHub OAuth providers', + }); + await adapterB.createFrame({ + run_id: 'run-b1', + project_id: 'project-b', + type: 'task', + name: 'API rate limiting', + digest_text: 'token bucket algorithm for API endpoints', + }); + + await adapterA.disconnect(); + await adapterB.disconnect(); + + // Register both projects + crossSearch.registerProject({ + name: 'project-a', + path: path.join(tmpDir, 'project-a'), + dbPath: dbPathA, + lastAccessed: Date.now(), + }); + crossSearch.registerProject({ + name: 'project-b', + path: path.join(tmpDir, 'project-b'), + dbPath: dbPathB, + lastAccessed: Date.now(), + }); + }); + + it('should search across multiple databases with FTS5', async () => { + const results = await crossSearch.search({ query: 'authentication' }); + + expect(results.length).toBe(2); + // Both projects should have auth-related results + const projectNames = results.map((r) => r.projectName); + expect(projectNames).toContain('project-a'); + expect(projectNames).toContain('project-b'); + }); + + it('should rank results by BM25 score', async () => { + const results = await crossSearch.search({ query: 'authentication' }); + + // Results should be sorted by score descending + for (let i = 1; i < results.length; i++) { + expect(results[i - 1].score).toBeGreaterThanOrEqual(results[i].score); + } + }); + + it('should search with term matching in digest_text', async () => { + const results = await crossSearch.search({ query: 'OAuth' }); + + expect(results.length).toBeGreaterThanOrEqual(1); + expect(results[0].projectName).toBe('project-b'); + }); + + it('should respect limit parameter', async () => { + const results = await crossSearch.search({ + query: 'authentication', + limit: 1, + }); + + expect(results.length).toBe(1); + }); + + it('should exclude a project when specified', async () => { + const results = await crossSearch.search({ + query: 'authentication', + excludeProject: 'project-a', + }); + + expect(results.length).toBe(1); + expect(results[0].projectName).toBe('project-b'); + }); + + it('should return empty array when no matches', async () => { + const results = await crossSearch.search({ + query: 'xyznonexistent123', + }); + + expect(results).toEqual([]); + }); + + it('should skip missing databases gracefully', async () => { + crossSearch.registerProject({ + name: 'ghost', + path: '/tmp/nonexistent', + dbPath: '/tmp/nonexistent/.stackmemory/context.db', + lastAccessed: Date.now(), + }); + + // Should not throw, just skip the missing db + const results = await crossSearch.search({ query: 'authentication' }); + expect(results.length).toBe(2); + }); + + it('should return empty array with no registered projects', async () => { + const emptySearch = new CrossProjectSearch( + fs.mkdtempSync(path.join(os.tmpdir(), 'empty-')) + ); + const results = await emptySearch.search({ query: 'test' }); + expect(results).toEqual([]); + }); + + it('should include project metadata in results', async () => { + const results = await crossSearch.search({ query: 'migration' }); + + expect(results.length).toBeGreaterThanOrEqual(1); + const result = results[0]; + expect(result.projectName).toBeDefined(); + expect(result.projectPath).toBeDefined(); + expect(result.frameId).toBeDefined(); + expect(result.name).toBeDefined(); + expect(result.type).toBeDefined(); + expect(typeof result.score).toBe('number'); + expect(typeof result.createdAt).toBe('number'); + }); + }); + + describe('LIKE fallback', () => { + let projectDir: string; + + beforeEach(async () => { + // Create a database without FTS5 table + projectDir = path.join(tmpDir, 'no-fts', '.stackmemory'); + fs.mkdirSync(projectDir, { recursive: true }); + const dbPath = path.join(projectDir, 'context.db'); + + // Manually create a minimal frames table without FTS + const Database = (await import('better-sqlite3')).default; + const db = new Database(dbPath); + db.exec(` + CREATE TABLE frames ( + rowid INTEGER PRIMARY KEY AUTOINCREMENT, + frame_id TEXT UNIQUE, + run_id TEXT, + project_id TEXT, + type TEXT DEFAULT 'task', + name TEXT DEFAULT '', + state TEXT DEFAULT 'active', + depth INTEGER DEFAULT 0, + inputs TEXT DEFAULT '{}', + outputs TEXT DEFAULT '{}', + digest_text TEXT DEFAULT '', + digest_json TEXT DEFAULT '{}', + created_at INTEGER DEFAULT 0 + ); + INSERT INTO frames (frame_id, name, type, state, digest_text, inputs, created_at) + VALUES ('f1', 'fallback test frame', 'task', 'active', 'should be found via LIKE', '{}', 1000); + `); + db.close(); + + crossSearch.registerProject({ + name: 'no-fts-project', + path: path.join(tmpDir, 'no-fts'), + dbPath, + lastAccessed: Date.now(), + }); + }); + + it('should fall back to LIKE search when FTS5 table is absent', async () => { + const results = await crossSearch.search({ query: 'fallback' }); + + expect(results.length).toBe(1); + expect(results[0].name).toBe('fallback test frame'); + expect(results[0].projectName).toBe('no-fts-project'); + }); + }); +}); diff --git a/src/core/cross-search/cross-project-search.ts b/src/core/cross-search/cross-project-search.ts new file mode 100644 index 00000000..10393e22 --- /dev/null +++ b/src/core/cross-search/cross-project-search.ts @@ -0,0 +1,400 @@ +/** + * Cross-Project Search Engine + * Queries frames across multiple project databases using FTS5/BM25 + * Opens read-only SQLite connections to each database for safety + */ + +import Database from 'better-sqlite3'; +import { + existsSync, + readFileSync, + writeFileSync, + mkdirSync, + readdirSync, +} from 'fs'; +import { join } from 'path'; +import { homedir } from 'os'; +import { logger } from '../monitoring/logger.js'; + +export interface ProjectEntry { + name: string; + path: string; + dbPath: string; + lastAccessed: number; +} + +export interface ProjectRegistry { + projects: ProjectEntry[]; +} + +export interface CrossSearchResult { + projectName: string; + projectPath: string; + frameId: string; + name: string; + type: string; + state: string; + digestText: string | null; + score: number; + createdAt: number; +} + +export interface CrossSearchOptions { + query: string; + limit?: number; + excludeProject?: string; +} + +/** + * Sanitize user input for FTS5 MATCH queries. + * Mirrors the logic in SQLiteAdapter.sanitizeFtsQuery(). + */ +function sanitizeFtsQuery(query: string): string { + const wantsPrefix = query.trimEnd().endsWith('*'); + + const cleaned = query + .replace(/['"(){}[\]^~*\\,]/g, ' ') + .replace(/\b(AND|OR|NOT|NEAR)\b/gi, '') + .trim(); + + const terms = cleaned.split(/\s+/).filter((t) => t.length > 0); + if (terms.length === 0) return '""'; + + const quoted = terms.map((t) => `"${t}"`); + + if (wantsPrefix) { + quoted[quoted.length - 1] = quoted[quoted.length - 1] + '*'; + } + + return quoted.join(' '); +} + +export class CrossProjectSearch { + private registryPath: string; + + constructor(registryDir?: string) { + const dir = registryDir || join(homedir(), '.stackmemory'); + if (!existsSync(dir)) { + mkdirSync(dir, { recursive: true }); + } + this.registryPath = join(dir, 'projects.json'); + } + + // --- Project Registry CRUD --- + + loadRegistry(): ProjectRegistry { + if (!existsSync(this.registryPath)) { + return { projects: [] }; + } + try { + const raw = readFileSync(this.registryPath, 'utf-8'); + return JSON.parse(raw) as ProjectRegistry; + } catch { + logger.warn('Failed to parse projects.json, returning empty registry'); + return { projects: [] }; + } + } + + saveRegistry(registry: ProjectRegistry): void { + writeFileSync(this.registryPath, JSON.stringify(registry, null, 2)); + } + + registerProject(entry: ProjectEntry): void { + const registry = this.loadRegistry(); + const idx = registry.projects.findIndex( + (p) => p.path === entry.path || p.dbPath === entry.dbPath + ); + if (idx >= 0) { + registry.projects[idx] = entry; + } else { + registry.projects.push(entry); + } + this.saveRegistry(registry); + } + + unregisterProject(pathOrName: string): boolean { + const registry = this.loadRegistry(); + const before = registry.projects.length; + registry.projects = registry.projects.filter( + (p) => p.path !== pathOrName && p.name !== pathOrName + ); + if (registry.projects.length < before) { + this.saveRegistry(registry); + return true; + } + return false; + } + + listProjects(): ProjectEntry[] { + return this.loadRegistry().projects; + } + + /** + * Auto-discover projects by scanning common directories for .stackmemory/context.db + */ + discoverProjects(basePaths?: string[]): ProjectEntry[] { + const paths = basePaths || [ + join(homedir(), 'Dev'), + join(homedir(), 'dev'), + join(homedir(), 'Projects'), + join(homedir(), 'projects'), + join(homedir(), 'Work'), + join(homedir(), 'work'), + join(homedir(), 'code'), + join(homedir(), 'Code'), + ]; + + // Also check ~/.stackmemory/context.db (global/home project) + const homeDb = join(homedir(), '.stackmemory', 'context.db'); + const discovered: ProjectEntry[] = []; + + if (existsSync(homeDb)) { + discovered.push({ + name: 'global', + path: homedir(), + dbPath: homeDb, + lastAccessed: Date.now(), + }); + } + + for (const basePath of paths) { + if (!existsSync(basePath)) continue; + + try { + // Scan 3 levels deep for .stackmemory/context.db + this.scanForDatabases(basePath, 0, 3, discovered); + } catch { + // Skip inaccessible directories + } + } + + // Merge with existing registry + const registry = this.loadRegistry(); + for (const entry of discovered) { + const existing = registry.projects.find((p) => p.dbPath === entry.dbPath); + if (!existing) { + registry.projects.push(entry); + } + } + this.saveRegistry(registry); + + return discovered; + } + + private scanForDatabases( + dir: string, + depth: number, + maxDepth: number, + results: ProjectEntry[] + ): void { + if (depth > maxDepth) return; + + const dbPath = join(dir, '.stackmemory', 'context.db'); + if (existsSync(dbPath)) { + const name = dir.split('/').pop() || dir; + results.push({ + name, + path: dir, + dbPath, + lastAccessed: Date.now(), + }); + return; // Don't scan subdirectories of a project + } + + // Scan subdirectories + try { + const entries = readdirSync(dir, { withFileTypes: true }); + for (const entry of entries) { + if ( + entry.isDirectory() && + !entry.name.startsWith('.') && + entry.name !== 'node_modules' && + entry.name !== 'dist' && + entry.name !== 'build' + ) { + this.scanForDatabases( + join(dir, entry.name), + depth + 1, + maxDepth, + results + ); + } + } + } catch { + // Permission denied or other errors + } + } + + // --- Cross-Project Search --- + + /** + * Search across all registered project databases using FTS5/BM25. + * Opens read-only connections. Skips missing/locked databases gracefully. + */ + async search(options: CrossSearchOptions): Promise { + const { query, limit = 20, excludeProject } = options; + const registry = this.loadRegistry(); + + if (registry.projects.length === 0) { + return []; + } + + const allResults: CrossSearchResult[] = []; + const perDbLimit = Math.max(limit, 10); // Fetch more per-db, merge later + + for (const project of registry.projects) { + if (excludeProject && project.name === excludeProject) continue; + if (!existsSync(project.dbPath)) { + logger.debug(`Skipping missing database: ${project.dbPath}`); + continue; + } + + try { + const results = this.searchSingleDb(project, query, perDbLimit); + allResults.push(...results); + } catch (error) { + logger.debug( + `Skipping database ${project.dbPath}: ${error instanceof Error ? error.message : String(error)}` + ); + } + } + + // Sort all results by BM25 score descending, then limit + allResults.sort((a, b) => b.score - a.score); + return allResults.slice(0, limit); + } + + /** + * Search a single project database (read-only connection). + */ + private searchSingleDb( + project: ProjectEntry, + query: string, + limit: number + ): CrossSearchResult[] { + let db: Database.Database | null = null; + + try { + db = new Database(project.dbPath, { + readonly: true, + fileMustExist: true, + }); + + // Check if FTS5 table exists + const hasFts = db + .prepare( + "SELECT name FROM sqlite_master WHERE type='table' AND name='frames_fts'" + ) + .get(); + + if (hasFts) { + return this.searchFts(db, project, query, limit); + } else { + return this.searchLike(db, project, query, limit); + } + } finally { + if (db) { + try { + db.close(); + } catch { + // best-effort close + } + } + } + } + + private searchFts( + db: Database.Database, + project: ProjectEntry, + query: string, + limit: number + ): CrossSearchResult[] { + const sanitized = sanitizeFtsQuery(query); + + const sql = ` + SELECT f.frame_id, f.name, f.type, f.state, f.digest_text, f.created_at, + -bm25(frames_fts, 10.0, 5.0, 2.0, 1.0) as score + FROM frames_fts fts + JOIN frames f ON f.rowid = fts.rowid + WHERE frames_fts MATCH ? + ORDER BY score DESC + LIMIT ? + `; + + const rows = db.prepare(sql).all(sanitized, limit) as Array<{ + frame_id: string; + name: string; + type: string; + state: string; + digest_text: string | null; + score: number; + created_at: number; + }>; + + return rows.map((row) => ({ + projectName: project.name, + projectPath: project.path, + frameId: row.frame_id, + name: row.name, + type: row.type, + state: row.state, + digestText: row.digest_text, + score: row.score, + createdAt: row.created_at, + })); + } + + private searchLike( + db: Database.Database, + project: ProjectEntry, + query: string, + limit: number + ): CrossSearchResult[] { + const likeParam = `%${query}%`; + const sql = ` + SELECT frame_id, name, type, state, digest_text, created_at, + CASE + WHEN name LIKE ? THEN 1.0 + WHEN digest_text LIKE ? THEN 0.8 + WHEN inputs LIKE ? THEN 0.6 + ELSE 0.5 + END as score + FROM frames + WHERE (name LIKE ? OR digest_text LIKE ? OR inputs LIKE ?) + ORDER BY score DESC + LIMIT ? + `; + + const rows = db + .prepare(sql) + .all( + likeParam, + likeParam, + likeParam, + likeParam, + likeParam, + likeParam, + limit + ) as Array<{ + frame_id: string; + name: string; + type: string; + state: string; + digest_text: string | null; + score: number; + created_at: number; + }>; + + return rows.map((row) => ({ + projectName: project.name, + projectPath: project.path, + frameId: row.frame_id, + name: row.name, + type: row.type, + state: row.state, + digestText: row.digest_text, + score: row.score, + createdAt: row.created_at, + })); + } +} diff --git a/src/core/cross-search/index.ts b/src/core/cross-search/index.ts new file mode 100644 index 00000000..99d8fa83 --- /dev/null +++ b/src/core/cross-search/index.ts @@ -0,0 +1,7 @@ +export { + CrossProjectSearch, + type ProjectEntry, + type ProjectRegistry, + type CrossSearchResult, + type CrossSearchOptions, +} from './cross-project-search.js'; diff --git a/src/integrations/mcp/__tests__/tool-alias-registry.test.ts b/src/integrations/mcp/__tests__/tool-alias-registry.test.ts new file mode 100644 index 00000000..03abefe9 --- /dev/null +++ b/src/integrations/mcp/__tests__/tool-alias-registry.test.ts @@ -0,0 +1,321 @@ +/** + * Tests for Tool Alias Registry + * Verifies tool name resolution, parameter aliasing, and registry integrity + */ + +import { describe, it, expect } from 'vitest'; +import { + resolveToolAlias, + resolveParamAliases, + getAliasesForTool, + getToolsWithAliases, + getAliasRegistry, + getParamAliasRegistry, +} from '../tool-alias-registry.js'; + +describe('Tool Alias Registry', () => { + describe('resolveToolAlias', () => { + it('resolves known aliases to canonical names', () => { + const cases: [string, string][] = [ + ['sm_save', 'save_context'], + ['sm_load', 'load_context'], + ['sm_context_search', 'sm_search'], + ['search', 'sm_search'], + ['context', 'get_context'], + ['discover', 'sm_discover'], + ['fuzzy_edit', 'sm_edit'], + ['desires', 'sm_desire_paths'], + ['spawn', 'cord_spawn'], + ['delegate', 'delegate_to_model'], + ['plan', 'plan_only'], + ['linear_issues', 'linear_get_tasks'], + ['decision_search', 'provenant_search'], + ['digest', 'sm_digest'], + ['remember', 'diffmem_store_learning'], + ]; + + for (const [alias, expected] of cases) { + const result = resolveToolAlias(alias); + expect(result.canonicalName).toBe(expected); + expect(result.wasAlias).toBe(true); + expect(result.originalName).toBe(alias); + } + }); + + it('returns canonical names unchanged', () => { + const canonicals = [ + 'get_context', + 'add_decision', + 'start_frame', + 'close_frame', + 'sm_search', + 'sm_discover', + 'sm_edit', + 'sm_desire_paths', + 'create_task', + 'linear_get_tasks', + 'delegate_to_model', + 'provenant_search', + 'cord_spawn', + 'sm_digest', + ]; + + for (const name of canonicals) { + const result = resolveToolAlias(name); + expect(result.canonicalName).toBe(name); + expect(result.wasAlias).toBe(false); + expect(result.originalName).toBe(name); + } + }); + + it('returns unknown tool names unchanged', () => { + const result = resolveToolAlias('completely_made_up_tool'); + expect(result.canonicalName).toBe('completely_made_up_tool'); + expect(result.wasAlias).toBe(false); + }); + }); + + describe('resolveParamAliases', () => { + it('resolves known parameter aliases', () => { + const result = resolveParamAliases('sm_search', { + search_term: 'hello', + max: 5, + }); + expect(result.resolvedParams).toEqual({ query: 'hello', limit: 5 }); + expect(result.renames).toEqual({ + search_term: 'query', + max: 'limit', + }); + }); + + it('preserves canonical params over aliases', () => { + const result = resolveParamAliases('sm_search', { + query: 'canonical value', + search_term: 'alias value', + limit: 10, + max: 20, + }); + // canonical 'query' should win over alias 'search_term' + expect(result.resolvedParams.query).toBe('canonical value'); + expect(result.resolvedParams.limit).toBe(10); + // alias values should NOT be in the result + expect(result.resolvedParams.search_term).toBeUndefined(); + expect(result.resolvedParams.max).toBeUndefined(); + }); + + it('passes through params without aliases for unknown tools', () => { + const params = { foo: 'bar', baz: 42 }; + const result = resolveParamAliases('unknown_tool', params); + expect(result.resolvedParams).toEqual(params); + expect(result.renames).toEqual({}); + }); + + it('passes through unrecognized params', () => { + const result = resolveParamAliases('sm_search', { + query: 'test', + unknown_param: 'value', + }); + expect(result.resolvedParams).toEqual({ + query: 'test', + unknown_param: 'value', + }); + expect(result.renames).toEqual({}); + }); + + it('handles empty params', () => { + const result = resolveParamAliases('sm_search', {}); + expect(result.resolvedParams).toEqual({}); + expect(result.renames).toEqual({}); + }); + + it('resolves smart_context token budget aliases', () => { + const result = resolveParamAliases('smart_context', { + search: 'test query', + token_budget: 8000, + force: true, + }); + expect(result.resolvedParams).toEqual({ + query: 'test query', + tokenBudget: 8000, + forceRefresh: true, + }); + }); + + it('resolves provenant_search aliases', () => { + const result = resolveParamAliases('provenant_search', { + text: 'architecture', + by: 'jwu', + after: '2026-01-01', + }); + expect(result.resolvedParams).toEqual({ + query: 'architecture', + actor: 'jwu', + since: '2026-01-01', + }); + }); + + it('resolves create_task name->title alias', () => { + const result = resolveParamAliases('create_task', { + name: 'My Task', + desc: 'Details here', + }); + expect(result.resolvedParams).toEqual({ + title: 'My Task', + description: 'Details here', + }); + }); + + it('resolves cord_spawn aliases', () => { + const result = resolveParamAliases('cord_spawn', { + task: 'Build feature', + instructions: 'Implement the new API', + depends_on: ['task-1'], + parent: 'root-task', + }); + expect(result.resolvedParams).toEqual({ + goal: 'Build feature', + prompt: 'Implement the new API', + blocked_by: ['task-1'], + parent_id: 'root-task', + }); + }); + + it('resolves sm_edit file path aliases', () => { + const result = resolveParamAliases('sm_edit', { + file: '/path/to/file.ts', + find: 'old code', + replace: 'new code', + }); + expect(result.resolvedParams).toEqual({ + file_path: '/path/to/file.ts', + old_string: 'old code', + new_string: 'new code', + }); + }); + }); + + describe('getAliasesForTool', () => { + it('returns all aliases for a canonical tool', () => { + const aliases = getAliasesForTool('get_context'); + expect(aliases).toContain('context'); + expect(aliases).toContain('get_ctx'); + expect(aliases).toContain('sm_context'); + expect(aliases).toContain('sm_get_context'); + expect(aliases).toContain('fetch_context'); + expect(aliases).toContain('read_context'); + }); + + it('returns empty array for tool with no aliases', () => { + const aliases = getAliasesForTool('nonexistent_tool'); + expect(aliases).toEqual([]); + }); + }); + + describe('getToolsWithAliases', () => { + it('returns unique canonical names', () => { + const tools = getToolsWithAliases(); + expect(tools.length).toBeGreaterThan(0); + // Should be deduplicated + expect(new Set(tools).size).toBe(tools.length); + // Should include major tools + expect(tools).toContain('get_context'); + expect(tools).toContain('sm_search'); + expect(tools).toContain('create_task'); + }); + }); + + describe('Registry integrity', () => { + it('no alias points to another alias (no chaining)', () => { + const registry = getAliasRegistry(); + for (const [alias, target] of Object.entries(registry)) { + expect(registry[target]).toBeUndefined(); + } + }); + + it('no alias shadows a canonical tool name', () => { + // Ensure aliases don't accidentally override canonical tool names + // that are used in the switch statement + const canonicalTools = [ + 'get_context', + 'add_decision', + 'start_frame', + 'close_frame', + 'add_anchor', + 'get_hot_stack', + 'create_task', + 'update_task_status', + 'get_active_tasks', + 'get_task_metrics', + 'sm_search', + 'sm_discover', + 'sm_related_files', + 'sm_session_summary', + 'sm_edit', + 'sm_digest', + 'sm_desire_paths', + 'smart_context', + 'get_summary', + 'linear_sync', + 'linear_update_task', + 'linear_get_tasks', + 'linear_status', + 'get_traces', + 'plan_only', + 'call_codex', + 'call_claude', + 'plan_gate', + 'approve_plan', + 'pending_list', + 'pending_clear', + 'pending_show', + 'delegate_to_model', + 'batch_submit', + 'batch_check', + 'cord_spawn', + 'cord_fork', + 'cord_complete', + 'cord_ask', + 'cord_tree', + 'team_context_get', + 'team_context_share', + 'team_search', + 'provenant_search', + 'provenant_log', + 'provenant_status', + 'provenant_contradictions', + 'provenant_resolve', + 'diffmem_get_user_context', + 'diffmem_store_learning', + 'diffmem_search', + 'diffmem_status', + ]; + + const registry = getAliasRegistry(); + for (const canonical of canonicalTools) { + // No alias key should equal a canonical name (it would shadow it) + if (registry[canonical]) { + // This is a problem: an alias key matches a canonical tool name + throw new Error( + `Alias "${canonical}" shadows canonical tool "${canonical}" -> "${registry[canonical]}"` + ); + } + } + }); + + it('param alias targets exist in tool schemas', () => { + const paramRegistry = getParamAliasRegistry(); + // Just verify structure - each tool has a non-empty mapping + for (const [tool, aliases] of Object.entries(paramRegistry)) { + expect(typeof tool).toBe('string'); + expect(Object.keys(aliases).length).toBeGreaterThan(0); + // Each alias should map to a string canonical param name + for (const [alias, canonical] of Object.entries(aliases)) { + expect(typeof alias).toBe('string'); + expect(typeof canonical).toBe('string'); + // Alias and canonical should differ + expect(alias).not.toBe(canonical); + } + } + }); + }); +}); diff --git a/src/integrations/mcp/handlers/cross-search-handlers.ts b/src/integrations/mcp/handlers/cross-search-handlers.ts new file mode 100644 index 00000000..dc558585 --- /dev/null +++ b/src/integrations/mcp/handlers/cross-search-handlers.ts @@ -0,0 +1,225 @@ +/** + * Cross-Project Search MCP Tool Handlers + * Enables querying frames across multiple project databases + */ + +import { + CrossProjectSearch, + type CrossSearchResult, +} from '../../../core/cross-search/cross-project-search.js'; +import { logger } from '../../../core/monitoring/logger.js'; + +export interface CrossSearchHandlerDependencies { + crossSearch?: CrossProjectSearch; +} + +export class CrossSearchHandlers { + private crossSearch: CrossProjectSearch; + + constructor(deps: CrossSearchHandlerDependencies) { + this.crossSearch = deps.crossSearch || new CrossProjectSearch(); + } + + /** + * sm_cross_search: Search across all registered project databases. + */ + async handleCrossSearch(args: any): Promise { + try { + const { query, limit = 20, exclude_current = false } = args; + + if (!query) { + throw new Error('query is required'); + } + + const projects = this.crossSearch.listProjects(); + if (projects.length === 0) { + return { + content: [ + { + type: 'text', + text: 'No projects registered. Use sm_cross_discover to scan for project databases, or sm_cross_register to add one manually.', + }, + ], + }; + } + + const excludeProject = exclude_current + ? this.getCurrentProjectName() + : undefined; + + const start = Date.now(); + const results = await this.crossSearch.search({ + query, + limit, + excludeProject, + }); + const elapsed = Date.now() - start; + + if (results.length === 0) { + return { + content: [ + { + type: 'text', + text: `No results found for "${query}" across ${projects.length} project databases (${elapsed}ms).`, + }, + ], + }; + } + + const text = this.formatResults(results, query, projects.length, elapsed); + + return { + content: [{ type: 'text', text }], + metadata: { + results: results.map((r) => ({ + project: r.projectName, + frameId: r.frameId, + name: r.name, + score: r.score, + })), + total: results.length, + projectsSearched: projects.length, + elapsedMs: elapsed, + }, + }; + } catch (error: unknown) { + logger.error( + 'Cross-project search failed', + error instanceof Error ? error : new Error(String(error)) + ); + throw error; + } + } + + /** + * sm_cross_discover: Auto-discover project databases. + */ + async handleCrossDiscover(args: any): Promise { + try { + const paths = args.paths as string[] | undefined; + const discovered = this.crossSearch.discoverProjects(paths); + const all = this.crossSearch.listProjects(); + + return { + content: [ + { + type: 'text', + text: + `Discovered ${discovered.length} project database(s).\n` + + `Total registered: ${all.length}\n\n` + + all + .map((p) => ` ${p.name}: ${p.path}\n db: ${p.dbPath}`) + .join('\n'), + }, + ], + metadata: { discovered: discovered.length, total: all.length }, + }; + } catch (error: unknown) { + logger.error( + 'Cross-project discover failed', + error instanceof Error ? error : new Error(String(error)) + ); + throw error; + } + } + + /** + * sm_cross_register: Register a project database manually. + */ + async handleCrossRegister(args: any): Promise { + try { + const { name, path, db_path } = args; + + if (!name || !path || !db_path) { + throw new Error('name, path, and db_path are required'); + } + + this.crossSearch.registerProject({ + name, + path, + dbPath: db_path, + lastAccessed: Date.now(), + }); + + return { + content: [ + { + type: 'text', + text: `Registered project "${name}" at ${path} (db: ${db_path})`, + }, + ], + }; + } catch (error: unknown) { + logger.error( + 'Cross-project register failed', + error instanceof Error ? error : new Error(String(error)) + ); + throw error; + } + } + + /** + * sm_cross_list: List all registered project databases. + */ + async handleCrossList(): Promise { + try { + const projects = this.crossSearch.listProjects(); + + if (projects.length === 0) { + return { + content: [ + { + type: 'text', + text: 'No projects registered. Use sm_cross_discover to scan for project databases.', + }, + ], + }; + } + + const text = + `Registered projects (${projects.length}):\n\n` + + projects + .map( + (p) => + ` ${p.name}\n path: ${p.path}\n db: ${p.dbPath}\n last: ${new Date(p.lastAccessed).toLocaleDateString()}` + ) + .join('\n'); + + return { + content: [{ type: 'text', text }], + metadata: { projects }, + }; + } catch (error: unknown) { + logger.error( + 'Cross-project list failed', + error instanceof Error ? error : new Error(String(error)) + ); + throw error; + } + } + + private formatResults( + results: CrossSearchResult[], + query: string, + projectCount: number, + elapsed: number + ): string { + const header = `Cross-project search: ${results.length} results for "${query}" across ${projectCount} databases (${elapsed}ms):\n\n`; + + const body = results + .map( + (r) => + `[${r.projectName}] ${r.name} (${r.type}, score: ${r.score.toFixed(3)})` + + (r.digestText ? `\n ${r.digestText.slice(0, 120)}` : '') + ) + .join('\n'); + + return header + body; + } + + private getCurrentProjectName(): string | undefined { + // Best-effort: derive from cwd + const cwd = process.cwd(); + return cwd.split('/').pop(); + } +} diff --git a/src/integrations/mcp/handlers/index.ts b/src/integrations/mcp/handlers/index.ts index a19ce7a1..b6548ab1 100644 --- a/src/integrations/mcp/handlers/index.ts +++ b/src/integrations/mcp/handlers/index.ts @@ -30,6 +30,10 @@ export { ProvenantHandlers, type ProvenantHandlerDependencies, } from './provenant-handlers.js'; +export { + CrossSearchHandlers, + type CrossSearchHandlerDependencies, +} from './cross-search-handlers.js'; import { ContextHandlers, @@ -45,6 +49,11 @@ import { ProviderHandlers } from './provider-handlers.js'; import { TeamHandlers, TeamHandlerDependencies } from './team-handlers.js'; import { CordHandlers } from './cord-handlers.js'; import { ProvenantHandlers } from './provenant-handlers.js'; +import { CrossSearchHandlers } from './cross-search-handlers.js'; +import { + resolveToolAlias, + resolveParamAliases, +} from '../tool-alias-registry.js'; // Combined dependencies interface export interface MCPHandlerDependencies @@ -69,6 +78,7 @@ export class MCPHandlerFactory { private teamHandlers?: TeamHandlers; private cordHandlers?: CordHandlers; private provenantHandlers?: ProvenantHandlers; + private crossSearchHandlers: CrossSearchHandlers; constructor(deps: MCPHandlerDependencies) { this.contextHandlers = new ContextHandlers({ @@ -109,13 +119,17 @@ export class MCPHandlerFactory { projectDir: deps.projectDir, }); } + + this.crossSearchHandlers = new CrossSearchHandlers({}); } /** - * Get handler for a specific tool + * Get handler for a specific tool. + * Resolves tool name aliases before lookup. */ getHandler(toolName: string): (args: any) => Promise { - switch (toolName) { + const { canonicalName } = resolveToolAlias(toolName); + switch (canonicalName) { // Context handlers case 'get_context': return this.contextHandlers.handleGetContext.bind(this.contextHandlers); @@ -241,6 +255,24 @@ export class MCPHandlerFactory { this.provenantHandlers ); + // Cross-project search handlers + case 'sm_cross_search': + return this.crossSearchHandlers.handleCrossSearch.bind( + this.crossSearchHandlers + ); + case 'sm_cross_discover': + return this.crossSearchHandlers.handleCrossDiscover.bind( + this.crossSearchHandlers + ); + case 'sm_cross_register': + return this.crossSearchHandlers.handleCrossRegister.bind( + this.crossSearchHandlers + ); + case 'sm_cross_list': + return this.crossSearchHandlers.handleCrossList.bind( + this.crossSearchHandlers + ); + default: throw new Error(`Unknown tool: ${toolName}`); } @@ -301,13 +333,20 @@ export class MCPHandlerFactory { 'provenant_status', 'provenant_contradictions', 'provenant_resolve', + + // Cross-project search tools + 'sm_cross_search', + 'sm_cross_discover', + 'sm_cross_register', + 'sm_cross_list', ]; } /** - * Check if a tool exists + * Check if a tool exists (resolves aliases) */ hasHandler(toolName: string): boolean { - return this.getAvailableTools().includes(toolName); + const { canonicalName } = resolveToolAlias(toolName); + return this.getAvailableTools().includes(canonicalName); } } diff --git a/src/integrations/mcp/server.ts b/src/integrations/mcp/server.ts index 5b0c0bf8..8b2cfccc 100644 --- a/src/integrations/mcp/server.ts +++ b/src/integrations/mcp/server.ts @@ -52,6 +52,7 @@ import { DiffMemHandlers } from './handlers/diffmem-handlers.js'; import { GreptileHandlers } from './handlers/greptile-handlers.js'; import { CordHandlers } from './handlers/cord-handlers.js'; import { TeamHandlers } from './handlers/team-handlers.js'; +import { CrossSearchHandlers } from './handlers/cross-search-handlers.js'; import { SQLiteAdapter } from '../../core/database/sqlite-adapter.js'; import { generateChronologicalDigest, @@ -59,6 +60,10 @@ import { } from '../../core/digest/chronological-digest.js'; import { fuzzyEdit } from '../../utils/fuzzy-edit.js'; import { v4 as uuidv4 } from 'uuid'; +import { + resolveToolAlias, + resolveParamAliases, +} from './tool-alias-registry.js'; import { DEFAULT_PLANNER_MODEL, DEFAULT_IMPLEMENTER, @@ -103,6 +108,7 @@ class LocalStackMemoryMCP { | null = null; private cordHandlers: CordHandlers | null = null; private teamHandlers: TeamHandlers | null = null; + private crossSearchHandlers: CrossSearchHandlers; private pendingPlans: Map = new Map(); constructor() { @@ -203,6 +209,9 @@ class LocalStackMemoryMCP { // Initialize Greptile Handlers this.greptileHandlers = new GreptileHandlers(); + // Initialize Cross-Project Search Handlers + this.crossSearchHandlers = new CrossSearchHandlers({}); + // Initialize Cord and Team Handlers (async - best effort) this.initCordTeamHandlers(); @@ -1427,6 +1436,79 @@ class LocalStackMemoryMCP { required: ['period'], }, }, + // Cross-project search tools + { + name: 'sm_cross_search', + description: + 'Search frames across all registered project databases using FTS5/BM25. Returns results ranked by relevance with source project attribution.', + inputSchema: { + type: 'object', + properties: { + query: { + type: 'string', + description: 'Search query (natural language or keywords)', + }, + limit: { + type: 'number', + default: 20, + description: 'Maximum results to return', + }, + exclude_current: { + type: 'boolean', + default: false, + description: 'Exclude the current project from results', + }, + }, + required: ['query'], + }, + }, + { + name: 'sm_cross_discover', + description: + 'Auto-discover project databases by scanning common directories for .stackmemory/context.db files.', + inputSchema: { + type: 'object', + properties: { + paths: { + type: 'array', + items: { type: 'string' }, + description: 'Custom directory paths to scan', + }, + }, + }, + }, + { + name: 'sm_cross_register', + description: + 'Manually register a project database for cross-project search.', + inputSchema: { + type: 'object', + properties: { + name: { + type: 'string', + description: 'Project display name', + }, + path: { + type: 'string', + description: 'Project root directory path', + }, + db_path: { + type: 'string', + description: 'Path to the SQLite context.db file', + }, + }, + required: ['name', 'path', 'db_path'], + }, + }, + { + name: 'sm_cross_list', + description: + 'List all project databases registered for cross-project search.', + inputSchema: { + type: 'object', + properties: {}, + }, + }, ], }; } @@ -1442,7 +1524,28 @@ class LocalStackMemoryMCP { }), }), async (request) => { - const { name, arguments: args } = request.params; + const { name: rawName, arguments: rawArgs } = request.params; + + // Resolve tool name aliases (e.g., "sm_save" -> "save_context") + const aliasResolution = resolveToolAlias(rawName); + const name = aliasResolution.canonicalName; + + // Resolve parameter aliases for the canonical tool + const paramResolution = resolveParamAliases(name, rawArgs); + const args = paramResolution.resolvedParams; + + // Log alias resolution for observability + if ( + aliasResolution.wasAlias || + Object.keys(paramResolution.renames).length > 0 + ) { + logger.debug('Tool alias resolved', { + originalTool: rawName, + canonicalTool: name, + paramRenames: paramResolution.renames, + }); + } + const callId = uuidv4(); const startTime = Date.now(); @@ -1453,6 +1556,7 @@ class LocalStackMemoryMCP { tool_name: name, arguments: args, timestamp: startTime, + ...(aliasResolution.wasAlias ? { alias_from: rawName } : {}), }); } @@ -1830,6 +1934,22 @@ class LocalStackMemoryMCP { result = this.handleDesirePaths(args); break; + case 'sm_cross_search': + result = await this.crossSearchHandlers.handleCrossSearch(args); + break; + + case 'sm_cross_discover': + result = await this.crossSearchHandlers.handleCrossDiscover(args); + break; + + case 'sm_cross_register': + result = await this.crossSearchHandlers.handleCrossRegister(args); + break; + + case 'sm_cross_list': + result = await this.crossSearchHandlers.handleCrossList(); + break; + default: throw new Error(`Unknown tool: ${name}`); } diff --git a/src/integrations/mcp/tool-alias-registry.ts b/src/integrations/mcp/tool-alias-registry.ts new file mode 100644 index 00000000..48297c27 --- /dev/null +++ b/src/integrations/mcp/tool-alias-registry.ts @@ -0,0 +1,556 @@ +/** + * Tool Alias Registry + * + * Maps common misspellings, abbreviations, and variant names to canonical + * MCP tool names. Built from desire paths analysis — what agents try to call + * vs what actually exists. + * + * Also handles parameter name aliases so agents can use common variants + * (e.g., `query` vs `search_term`) and have them resolved transparently. + */ + +export interface AliasResolution { + /** The canonical tool name */ + canonicalName: string; + /** Whether the name was an alias (false = already canonical) */ + wasAlias: boolean; + /** The original name that was looked up */ + originalName: string; +} + +export interface ParamResolution { + /** Resolved parameters with canonical names */ + resolvedParams: Record; + /** Map of param renames that were applied: original -> canonical */ + renames: Record; +} + +/** + * Static registry of tool name aliases. + * + * Key: alias name (what agents try to call) + * Value: canonical tool name (what actually exists) + * + * Organized by category for readability. + */ +const TOOL_ALIASES: Record = { + // --- Context tools --- + context: 'get_context', + get_ctx: 'get_context', + sm_context: 'get_context', + sm_get_context: 'get_context', + fetch_context: 'get_context', + read_context: 'get_context', + + record_decision: 'add_decision', + log_decision: 'add_decision', + save_decision: 'add_decision', + sm_decision: 'add_decision', + + push_frame: 'start_frame', + open_frame: 'start_frame', + begin_frame: 'start_frame', + new_frame: 'start_frame', + + pop_frame: 'close_frame', + end_frame: 'close_frame', + finish_frame: 'close_frame', + + anchor: 'add_anchor', + sm_anchor: 'add_anchor', + save_anchor: 'add_anchor', + + hot_stack: 'get_hot_stack', + stack: 'get_hot_stack', + sm_stack: 'get_hot_stack', + + // --- Task tools --- + new_task: 'create_task', + add_task: 'create_task', + sm_task: 'create_task', + sm_create_task: 'create_task', + + update_task: 'update_task_status', + set_task_status: 'update_task_status', + task_update: 'update_task_status', + + list_tasks: 'get_active_tasks', + tasks: 'get_active_tasks', + sm_tasks: 'get_active_tasks', + active_tasks: 'get_active_tasks', + + task_metrics: 'get_task_metrics', + metrics: 'get_task_metrics', + + // --- Search & Discovery --- + sm_context_search: 'sm_search', + search: 'sm_search', + context_search: 'sm_search', + sm_find: 'sm_search', + find: 'sm_search', + + discover: 'sm_discover', + sm_explore: 'sm_discover', + explore: 'sm_discover', + + related: 'sm_related_files', + find_related: 'sm_related_files', + + session_summary: 'sm_session_summary', + summary: 'sm_session_summary', + + // --- Save/Load context (old MCP server) --- + sm_save: 'save_context', + sm_context_save: 'save_context', + store_context: 'save_context', + + sm_load: 'load_context', + sm_context_load: 'load_context', + retrieve_context: 'load_context', + + // --- Linear tools --- + linear_issues: 'linear_get_tasks', + linear_list: 'linear_get_tasks', + get_linear_tasks: 'linear_get_tasks', + + linear_update: 'linear_update_task', + update_linear: 'linear_update_task', + + linear_comment: 'linear_create_comment', + comment_on_issue: 'linear_create_comment', + + linear_comments: 'linear_list_comments', + + // --- Trace tools --- + traces: 'get_traces', + sm_traces: 'get_traces', + list_traces: 'get_traces', + + trace_stats: 'get_trace_statistics', + trace_statistics: 'get_trace_statistics', + + // --- Smart context --- + smart: 'smart_context', + sm_smart: 'smart_context', + intelligent_context: 'smart_context', + + sm_summary: 'get_summary', + project_summary: 'get_summary', + + // --- Planning tools --- + plan: 'plan_only', + generate_plan: 'plan_only', + sm_plan: 'plan_only', + + codex: 'call_codex', + run_codex: 'call_codex', + + claude: 'call_claude', + ask_claude: 'call_claude', + + gate: 'plan_gate', + plan_and_gate: 'plan_gate', + + approve: 'approve_plan', + execute_plan: 'approve_plan', + + // --- Pending tools --- + pending: 'pending_list', + list_pending: 'pending_list', + + clear_pending: 'pending_clear', + + show_pending: 'pending_show', + + // --- Edit tools --- + fuzzy_edit: 'sm_edit', + edit: 'sm_edit', + sm_fuzzy_edit: 'sm_edit', + + // --- DiffMem tools --- + user_context: 'diffmem_get_user_context', + get_user_context: 'diffmem_get_user_context', + user_memory: 'diffmem_get_user_context', + + store_learning: 'diffmem_store_learning', + learn: 'diffmem_store_learning', + remember: 'diffmem_store_learning', + + memory_search: 'diffmem_search', + search_memory: 'diffmem_search', + + diffmem: 'diffmem_status', + memory_status: 'diffmem_status', + + // --- Digest tools --- + digest: 'sm_digest', + activity_digest: 'sm_digest', + daily_digest: 'sm_digest', + + // --- Desire paths --- + desire_paths: 'sm_desire_paths', + desires: 'sm_desire_paths', + failed_tools: 'sm_desire_paths', + + // --- Provider tools --- + delegate: 'delegate_to_model', + route: 'delegate_to_model', + send_to_model: 'delegate_to_model', + + batch: 'batch_submit', + submit_batch: 'batch_submit', + + check_batch: 'batch_check', + batch_status: 'batch_check', + + // --- Team tools --- + team_get: 'team_context_get', + team_context: 'team_context_get', + get_team_context: 'team_context_get', + + team_share: 'team_context_share', + share_context: 'team_context_share', + share: 'team_context_share', + + // --- Cord tools --- + spawn: 'cord_spawn', + subtask: 'cord_spawn', + + fork: 'cord_fork', + fork_task: 'cord_fork', + + complete: 'cord_complete', + done: 'cord_complete', + finish: 'cord_complete', + + ask: 'cord_ask', + question: 'cord_ask', + + tree: 'cord_tree', + task_tree: 'cord_tree', + + // --- Greptile tools --- + pr_comments: 'greptile_pr_comments', + review_comments: 'greptile_pr_comments', + + pr_details: 'greptile_pr_details', + pr_info: 'greptile_pr_details', + + list_prs: 'greptile_list_prs', + prs: 'greptile_list_prs', + + trigger_review: 'greptile_trigger_review', + review_pr: 'greptile_trigger_review', + + search_patterns: 'greptile_search_patterns', + patterns: 'greptile_search_patterns', + + create_pattern: 'greptile_create_pattern', + add_pattern: 'greptile_create_pattern', + + greptile: 'greptile_status', + + // --- Provenant tools --- + decision_search: 'provenant_search', + search_decisions: 'provenant_search', + + log_decision_graph: 'provenant_log', + decision_log: 'provenant_log', + + decision_status: 'provenant_status', + graph_status: 'provenant_status', + + contradictions: 'provenant_contradictions', + conflicts: 'provenant_contradictions', + + resolve: 'provenant_resolve', + resolve_contradiction: 'provenant_resolve', +}; + +/** + * Parameter alias mappings per tool. + * + * Key: canonical tool name + * Value: Record mapping alias param name -> canonical param name + * + * Only tools where agents commonly send wrong param names are listed. + */ +const PARAM_ALIASES: Record> = { + // Agents often send `query` for search-like tools + sm_search: { + search_term: 'query', + search: 'query', + text: 'query', + q: 'query', + max: 'limit', + max_results: 'limit', + count: 'limit', + }, + sm_discover: { + search: 'query', + q: 'query', + search_query: 'query', + max: 'maxFiles', + max_files: 'maxFiles', + limit: 'maxFiles', + include: 'includePatterns', + exclude: 'excludePatterns', + }, + get_context: { + search: 'query', + q: 'query', + text: 'query', + max: 'limit', + max_results: 'limit', + count: 'limit', + }, + smart_context: { + search: 'query', + q: 'query', + tokens: 'tokenBudget', + token_budget: 'tokenBudget', + max_tokens: 'tokenBudget', + budget: 'tokenBudget', + refresh: 'forceRefresh', + force: 'forceRefresh', + }, + get_active_tasks: { + state: 'status', + max: 'limit', + max_results: 'limit', + count: 'limit', + query: 'search', + q: 'search', + }, + linear_get_tasks: { + status: 'state', + max: 'limit', + max_results: 'limit', + count: 'limit', + q: 'search', + query: 'search', + team: 'team_id', + assignee: 'assignee_id', + }, + add_decision: { + text: 'content', + decision: 'content', + value: 'content', + kind: 'type', + category: 'type', + }, + add_anchor: { + content: 'text', + value: 'text', + anchor: 'text', + kind: 'type', + category: 'type', + importance: 'priority', + weight: 'priority', + }, + start_frame: { + title: 'name', + goal: 'name', + label: 'name', + kind: 'type', + frame_type: 'type', + }, + create_task: { + name: 'title', + goal: 'title', + label: 'title', + desc: 'description', + detail: 'description', + details: 'description', + }, + sm_desire_paths: { + type: 'category', + kind: 'category', + max: 'limit', + max_results: 'limit', + lookback: 'days', + period: 'days', + }, + delegate_to_model: { + text: 'prompt', + message: 'prompt', + input: 'prompt', + tokens: 'maxTokens', + max_tokens: 'maxTokens', + temp: 'temperature', + task: 'taskType', + task_type: 'taskType', + }, + provenant_search: { + text: 'query', + search: 'query', + q: 'query', + max: 'limit', + max_results: 'limit', + from: 'since', + after: 'since', + by: 'actor', + who: 'actor', + }, + provenant_log: { + decision: 'content', + text: 'content', + value: 'content', + by: 'actor', + who: 'actor', + why: 'reasoning', + reason: 'reasoning', + rationale: 'reasoning', + }, + diffmem_store_learning: { + insight: 'content', + text: 'content', + value: 'content', + type: 'category', + kind: 'category', + }, + diffmem_search: { + text: 'query', + search: 'query', + q: 'query', + max: 'limit', + max_results: 'limit', + time: 'timeRange', + range: 'timeRange', + time_range: 'timeRange', + min_confidence: 'minConfidence', + threshold: 'minConfidence', + }, + sm_edit: { + path: 'file_path', + file: 'file_path', + find: 'old_string', + search: 'old_string', + replace: 'new_string', + replacement: 'new_string', + }, + cord_spawn: { + task: 'goal', + title: 'goal', + name: 'goal', + instructions: 'prompt', + description: 'prompt', + depends_on: 'blocked_by', + blockers: 'blocked_by', + parent: 'parent_id', + }, + cord_fork: { + task: 'goal', + title: 'goal', + name: 'goal', + instructions: 'prompt', + description: 'prompt', + depends_on: 'blocked_by', + blockers: 'blocked_by', + parent: 'parent_id', + }, + cord_complete: { + id: 'task_id', + output: 'result', + response: 'result', + answer: 'result', + }, + team_search: { + text: 'query', + search: 'query', + q: 'query', + max: 'limit', + max_results: 'limit', + }, + sm_digest: { + time: 'period', + range: 'period', + timeframe: 'period', + }, + get_summary: { + refresh: 'forceRefresh', + force: 'forceRefresh', + }, +}; + +/** + * Resolve a tool name to its canonical form. + * Returns the canonical name and whether an alias was used. + */ +export function resolveToolAlias(name: string): AliasResolution { + const alias = TOOL_ALIASES[name]; + if (alias) { + return { canonicalName: alias, wasAlias: true, originalName: name }; + } + return { canonicalName: name, wasAlias: false, originalName: name }; +} + +/** + * Resolve parameter aliases for a given tool. + * Remaps aliased param names to canonical names. + * Original params take precedence over aliases (don't overwrite). + */ +export function resolveParamAliases( + toolName: string, + params: Record +): ParamResolution { + const aliases = PARAM_ALIASES[toolName]; + if (!aliases) { + return { resolvedParams: { ...params }, renames: {} }; + } + + const resolved: Record = {}; + const renames: Record = {}; + + // First pass: copy all canonical params + for (const [key, value] of Object.entries(params)) { + if (!aliases[key]) { + // Not an alias, keep as-is + resolved[key] = value; + } + } + + // Second pass: apply aliases (only if canonical name not already set) + for (const [key, value] of Object.entries(params)) { + const canonicalKey = aliases[key]; + if (canonicalKey && !(canonicalKey in resolved)) { + resolved[canonicalKey] = value; + renames[key] = canonicalKey; + } + } + + return { resolvedParams: resolved, renames }; +} + +/** + * Get all registered aliases for a canonical tool name. + * Useful for enriching tool descriptions. + */ +export function getAliasesForTool(canonicalName: string): string[] { + return Object.entries(TOOL_ALIASES) + .filter(([, target]) => target === canonicalName) + .map(([alias]) => alias); +} + +/** + * Get all canonical tool names that have aliases. + */ +export function getToolsWithAliases(): string[] { + return Array.from(new Set(Object.values(TOOL_ALIASES))); +} + +/** + * Get the full alias registry (for debugging/analysis). + */ +export function getAliasRegistry(): Readonly> { + return TOOL_ALIASES; +} + +/** + * Get the full param alias registry (for debugging/analysis). + */ +export function getParamAliasRegistry(): Readonly< + Record> +> { + return PARAM_ALIASES; +} diff --git a/src/integrations/mcp/tool-definitions.ts b/src/integrations/mcp/tool-definitions.ts index d1096730..9001763c 100644 --- a/src/integrations/mcp/tool-definitions.ts +++ b/src/integrations/mcp/tool-definitions.ts @@ -37,6 +37,7 @@ export class MCPToolDefinitions { ...this.getDigestTools(), ...this.getDesirePathTools(), ...this.getProvenantTools(), + ...this.getCrossSearchTools(), ]; } @@ -47,7 +48,8 @@ export class MCPToolDefinitions { return [ { name: 'get_context', - description: 'Get current project context and active frame information', + description: + 'Get current project context and active frame information. Aliases: context, get_ctx, sm_context, fetch_context', inputSchema: { type: 'object', properties: { @@ -65,7 +67,8 @@ export class MCPToolDefinitions { }, { name: 'add_decision', - description: 'Record a decision, constraint, or important information', + description: + 'Record a decision, constraint, or important information. Aliases: record_decision, log_decision, save_decision', inputSchema: { type: 'object', properties: { @@ -84,7 +87,8 @@ export class MCPToolDefinitions { }, { name: 'start_frame', - description: 'Start a new frame (task/subtask) on the call stack', + description: + 'Start a new frame (task/subtask) on the call stack. Aliases: push_frame, open_frame, begin_frame', inputSchema: { type: 'object', properties: { @@ -193,7 +197,7 @@ export class MCPToolDefinitions { return [ { name: 'create_task', - description: 'Create a new task', + description: 'Create a new task. Aliases: new_task, add_task, sm_task', inputSchema: { type: 'object', properties: { @@ -257,7 +261,8 @@ export class MCPToolDefinitions { }, { name: 'get_active_tasks', - description: 'Get active tasks with optional filtering', + description: + 'Get active tasks with optional filtering. Aliases: list_tasks, tasks, sm_tasks, active_tasks', inputSchema: { type: 'object', properties: { @@ -730,7 +735,7 @@ export class MCPToolDefinitions { { name: 'smart_context', description: - 'LLM-driven context retrieval - intelligently selects relevant frames based on query', + 'LLM-driven context retrieval - intelligently selects relevant frames based on query. Aliases: smart, sm_smart, intelligent_context', inputSchema: { type: 'object', properties: { @@ -775,7 +780,7 @@ export class MCPToolDefinitions { { name: 'sm_discover', description: - 'Discover relevant files based on current context. Extracts keywords from active frames and searches codebase for related files.', + 'Discover relevant files based on current context. Extracts keywords from active frames and searches codebase for related files. Aliases: discover, sm_explore, explore', inputSchema: { type: 'object', properties: { @@ -855,7 +860,7 @@ export class MCPToolDefinitions { { name: 'sm_search', description: - 'Search across StackMemory context - frames, events, decisions, and tasks.', + 'Search across StackMemory context - frames, events, decisions, and tasks. Aliases: search, context_search, sm_find, sm_context_search', inputSchema: { type: 'object', properties: { @@ -889,7 +894,7 @@ export class MCPToolDefinitions { { name: 'sm_edit', description: - "Fuzzy file edit — fallback when Claude Code's Edit tool fails on whitespace or indentation mismatches. Uses four-tier matching: exact, whitespace-normalized, indentation-insensitive, and line-level fuzzy (Levenshtein).", + "Fuzzy file edit — fallback when Claude Code's Edit tool fails on whitespace or indentation mismatches. Uses four-tier matching: exact, whitespace-normalized, indentation-insensitive, and line-level fuzzy (Levenshtein). Aliases: fuzzy_edit, edit, sm_fuzzy_edit", inputSchema: { type: 'object', properties: { @@ -1185,7 +1190,7 @@ export class MCPToolDefinitions { { name: 'delegate_to_model', description: - 'Route a prompt to a specific provider/model. Uses smart cost-based routing by default.', + 'Route a prompt to a specific provider/model. Uses smart cost-based routing by default. Aliases: delegate, route, send_to_model', inputSchema: { type: 'object', properties: { @@ -1271,7 +1276,7 @@ export class MCPToolDefinitions { { name: 'sm_digest', description: - 'Generate a chronological activity digest for a time period', + 'Generate a chronological activity digest for a time period. Aliases: digest, activity_digest, daily_digest', inputSchema: { type: 'object', properties: { @@ -1295,7 +1300,7 @@ export class MCPToolDefinitions { { name: 'sm_desire_paths', description: - 'Analyze failed tool calls (desire paths) — what agents want but cannot get. Use mode "summary" for aggregated counts or "list" for recent failures.', + 'Analyze failed tool calls (desire paths) — what agents want but cannot get. Use mode "summary" for aggregated counts or "list" for recent failures. Aliases: desire_paths, desires, failed_tools', inputSchema: { type: 'object', properties: { @@ -1427,7 +1432,7 @@ export class MCPToolDefinitions { { name: 'cord_spawn', description: - 'Create a subtask with clean context (spawn). Child sees only its prompt and completed blocker results.', + 'Create a subtask with clean context (spawn). Child sees only its prompt and completed blocker results. Aliases: spawn, subtask', inputSchema: { type: 'object', properties: { @@ -1554,7 +1559,7 @@ export class MCPToolDefinitions { { name: 'provenant_search', description: - 'Search the decision graph for past decisions, patterns, and context by meaning', + 'Search the decision graph for past decisions, patterns, and context by meaning. Aliases: decision_search, search_decisions', inputSchema: { type: 'object', properties: { @@ -1582,7 +1587,7 @@ export class MCPToolDefinitions { { name: 'provenant_log', description: - 'Log a decision to the graph. Use when a product or technical decision is made during a session.', + 'Log a decision to the graph. Use when a product or technical decision is made during a session. Aliases: decision_log, log_decision_graph', inputSchema: { type: 'object', properties: { @@ -1650,6 +1655,89 @@ export class MCPToolDefinitions { ]; } + /** + * Cross-project search tools + */ + getCrossSearchTools(): MCPToolDefinition[] { + return [ + { + name: 'sm_cross_search', + description: + 'Search frames across all registered project databases using FTS5/BM25. Returns results ranked by relevance with source project attribution.', + inputSchema: { + type: 'object', + properties: { + query: { + type: 'string', + description: 'Search query (natural language or keywords)', + }, + limit: { + type: 'number', + default: 20, + description: 'Maximum results to return', + }, + exclude_current: { + type: 'boolean', + default: false, + description: + 'Exclude the current project from results (useful when searching for external context)', + }, + }, + required: ['query'], + }, + }, + { + name: 'sm_cross_discover', + description: + 'Auto-discover project databases by scanning common directories for .stackmemory/context.db files.', + inputSchema: { + type: 'object', + properties: { + paths: { + type: 'array', + items: { type: 'string' }, + description: + 'Custom directory paths to scan (defaults to ~/Dev, ~/Projects, etc.)', + }, + }, + }, + }, + { + name: 'sm_cross_register', + description: + 'Manually register a project database for cross-project search.', + inputSchema: { + type: 'object', + properties: { + name: { + type: 'string', + description: 'Project display name', + }, + path: { + type: 'string', + description: 'Project root directory path', + }, + db_path: { + type: 'string', + description: + 'Path to the SQLite context.db file (e.g. /path/to/project/.stackmemory/context.db)', + }, + }, + required: ['name', 'path', 'db_path'], + }, + }, + { + name: 'sm_cross_list', + description: + 'List all project databases registered for cross-project search.', + inputSchema: { + type: 'object', + properties: {}, + }, + }, + ]; + } + /** * Get tool definition by name */ @@ -1679,6 +1767,7 @@ export class MCPToolDefinitions { | 'cord' | 'digest' | 'provenant' + | 'cross_search' ): MCPToolDefinition[] { switch (category) { case 'context': @@ -1717,6 +1806,8 @@ export class MCPToolDefinitions { return this.getDesirePathTools(); case 'provenant': return this.getProvenantTools(); + case 'cross_search': + return this.getProvenantTools(); default: return []; } From 39c1b39751aeebfe45b79333f30c2935bb8be21f Mon Sep 17 00:00:00 2001 From: "StackMemory Bot (CLI)" Date: Mon, 13 Apr 2026 16:13:52 -0400 Subject: [PATCH 05/11] feat(shared-state): add canonical instance coordination --- package-lock.json | 4 +- package.json | 2 +- src/cli/claude-sm.ts | 323 ++++++-- src/cli/codex-sm.ts | 170 ++++- src/cli/commands/daemon.ts | 47 ++ src/cli/commands/state.ts | 380 ++++++++++ src/cli/index.ts | 63 ++ src/core/session/session-manager.ts | 28 + src/core/shared-state/canonical-store.ts | 905 +++++++++++++++++++++++ src/daemon/daemon-config.ts | 14 + src/daemon/services/github-service.ts | 158 ++++ src/daemon/unified-daemon.ts | 32 + src/features/sweep/pty-wrapper.ts | 18 +- src/integrations/github/pr-state.ts | 209 ++++++ 14 files changed, 2250 insertions(+), 103 deletions(-) create mode 100644 src/cli/commands/state.ts create mode 100644 src/core/shared-state/canonical-store.ts create mode 100644 src/daemon/services/github-service.ts create mode 100644 src/integrations/github/pr-state.ts diff --git a/package-lock.json b/package-lock.json index 2d9e9c39..b0dfc90c 100644 --- a/package-lock.json +++ b/package-lock.json @@ -1,12 +1,12 @@ { "name": "@stackmemoryai/stackmemory", - "version": "1.10.1", + "version": "1.10.6", "lockfileVersion": 3, "requires": true, "packages": { "": { "name": "@stackmemoryai/stackmemory", - "version": "1.10.1", + "version": "1.10.6", "hasInstallScript": true, "license": "BUSL-1.1", "dependencies": { diff --git a/package.json b/package.json index 40b183dc..52284729 100644 --- a/package.json +++ b/package.json @@ -1,6 +1,6 @@ { "name": "@stackmemoryai/stackmemory", - "version": "1.10.5", + "version": "1.10.6", "description": "Lossless, project-scoped memory for AI coding tools. Durable context across sessions with 56 MCP tools, FTS5 search, conductor orchestrator, loop/watch monitoring, snapshot capture, pre-flight overlap checks, Claude/Codex/OpenCode wrappers, Linear sync, and automatic hooks.", "engines": { "node": ">=20.0.0", diff --git a/src/cli/claude-sm.ts b/src/cli/claude-sm.ts index 1e1cacdc..0bc7339b 100644 --- a/src/cli/claude-sm.ts +++ b/src/cli/claude-sm.ts @@ -12,10 +12,15 @@ import { spawn, execSync, execFileSync } from 'child_process'; import * as fs from 'fs'; import * as path from 'path'; import * as os from 'os'; +import { fileURLToPath } from 'url'; import { program } from 'commander'; import { v4 as uuidv4 } from 'uuid'; import chalk from 'chalk'; import { initializeTracing, trace } from '../core/trace/index.js'; +import { + canonicalStateStore, + projectIdFromIdentifier, +} from '../core/shared-state/canonical-store.js'; import { getModelRouter, loadModelRouterConfig, @@ -50,7 +55,7 @@ import { getSettingsPath, } from '../utils/hook-installer.js'; -// __filename and __dirname are provided by esbuild banner for ESM compatibility +const runtimeDirname = path.dirname(fileURLToPath(import.meta.url)); interface ClaudeSMConfig { defaultWorktree: boolean; @@ -128,6 +133,9 @@ class ClaudeSM { private worktreeScriptPath: string; private claudeConfigDir: string; private smConfig: ClaudeSMConfig; + private sessionId: string; + private ownsSession: boolean; + private sessionEnded: boolean; constructor() { // Load persistent defaults @@ -151,10 +159,13 @@ class ClaudeSM { this.stackmemoryPath = this.findStackMemory(); this.worktreeScriptPath = path.join( - __dirname, + runtimeDirname, '../../scripts/claude-worktree-manager.sh' ); this.claudeConfigDir = path.join(os.homedir(), '.claude'); + this.sessionId = process.env['STACKMEMORY_SESSION'] || uuidv4(); + this.ownsSession = !process.env['STACKMEMORY_SESSION']; + this.sessionEnded = false; // Ensure config directory exists if (!fs.existsSync(this.claudeConfigDir)) { @@ -236,6 +247,24 @@ class ClaudeSM { } } + private getProjectId(): string | undefined { + const root = this.getRepoRoot() || process.cwd(); + + try { + const remote = execSync('git config --get remote.origin.url', { + cwd: root, + encoding: 'utf8', + }).trim(); + if (remote) { + return projectIdFromIdentifier(remote); + } + } catch { + // Fall back to the current path below. + } + + return projectIdFromIdentifier(root); + } + private hasUncommittedChanges(): boolean { try { const status = execSync('git status --porcelain', { encoding: 'utf8' }); @@ -281,9 +310,9 @@ class ClaudeSM { // Find GEPA scripts directory (check multiple locations) const gepaPaths = [ // From dist/src/cli -> scripts/gepa (3 levels up) - path.join(__dirname, '../../../scripts/gepa/hooks/auto-optimize.js'), + path.join(runtimeDirname, '../../../scripts/gepa/hooks/auto-optimize.js'), // From src/cli -> scripts/gepa (2 levels up, for dev mode) - path.join(__dirname, '../../scripts/gepa/hooks/auto-optimize.js'), + path.join(runtimeDirname, '../../scripts/gepa/hooks/auto-optimize.js'), // Global install location path.join( os.homedir(), @@ -295,7 +324,7 @@ class ClaudeSM { ), // npm global install path.join( - __dirname, + runtimeDirname, '..', '..', 'scripts', @@ -495,10 +524,10 @@ class ClaudeSM { // 2. Find templates dir (dev → dist → global npm) const candidateDirs = [ - path.join(__dirname, '../../templates/claude-hooks'), - path.join(__dirname, '../../../templates/claude-hooks'), + path.join(runtimeDirname, '../../templates/claude-hooks'), + path.join(runtimeDirname, '../../../templates/claude-hooks'), path.join( - __dirname, + runtimeDirname, '..', '..', '..', @@ -613,6 +642,176 @@ class ClaudeSM { console.log(chalk.gray(`\nSession ended (exit ${exitCode ?? 0})`)); } + private async publishSessionStart(): Promise { + const projectPath = process.cwd(); + const projectId = this.getProjectId(); + const branch = this.isGitRepo() ? this.getCurrentBranch() : undefined; + + await canonicalStateStore.upsertSession({ + sessionId: this.sessionId, + tool: 'claude', + projectId, + projectPath, + branch, + instanceId: this.config.instanceId, + metadata: { + task: this.config.task, + sandbox: this.config.useSandbox, + chrome: this.config.useChrome, + }, + }); + + await canonicalStateStore.upsertInstance({ + instanceId: this.config.instanceId, + tool: 'claude', + sessionId: this.sessionId, + projectId, + projectPath, + branch, + worktreePath: this.config.worktreePath, + pid: process.pid, + status: 'active', + metadata: { + task: this.config.task, + sandbox: this.config.useSandbox, + chrome: this.config.useChrome, + }, + }); + + await canonicalStateStore.appendEvent({ + type: 'session_start', + tool: 'claude', + sessionId: this.sessionId, + instanceId: this.config.instanceId, + projectId, + projectPath, + branch, + payload: { + task: this.config.task, + worktreePath: this.config.worktreePath, + sandbox: this.config.useSandbox, + chrome: this.config.useChrome, + }, + }); + + const claimResult = await canonicalStateStore.claimPaths({ + tool: 'claude', + sessionId: this.sessionId, + instanceId: this.config.instanceId, + projectId, + projectPath, + branch, + paths: [], + metadata: { + task: this.config.task, + scope: 'branch', + }, + }); + + if (claimResult.conflicts.length > 0) { + console.log(chalk.yellow('āš ļø Shared state conflict detected')); + for (const conflict of claimResult.conflicts.slice(0, 3)) { + console.log( + chalk.gray( + ` Claim ${conflict.claimId.slice(0, 8)} already owns ${conflict.branch || 'overlapping work'}` + ) + ); + } + } + } + + private async publishSessionEnd( + eventType: 'session_end' | 'session_interrupt' | 'session_terminate', + payload: Record = {} + ): Promise { + if (this.sessionEnded) { + return; + } + this.sessionEnded = true; + + const projectPath = process.cwd(); + const projectId = this.getProjectId(); + const branch = this.isGitRepo() ? this.getCurrentBranch() : undefined; + + await canonicalStateStore.appendEvent({ + type: eventType, + tool: 'claude', + sessionId: this.sessionId, + instanceId: this.config.instanceId, + projectId, + projectPath, + branch, + payload, + }); + await canonicalStateStore.releaseClaims({ + instanceId: this.config.instanceId, + reason: eventType, + }); + await canonicalStateStore.endInstance(this.config.instanceId); + if (this.ownsSession) { + await canonicalStateStore.endSession(this.sessionId); + } + } + + private async finalizeSession( + eventType: 'session_end' | 'session_interrupt' | 'session_terminate', + exitCode: number | null, + payload: Record = {} + ): Promise { + this.stopGEPAWatcher(); + + this.saveContext( + eventType === 'session_end' + ? 'Claude session ended' + : eventType === 'session_interrupt' + ? 'Claude session interrupted' + : 'Claude session terminated', + { + action: eventType, + exitCode, + ...payload, + } + ); + + await this.publishSessionEnd(eventType, { + exitCode, + ...payload, + }); + + if (eventType === 'session_end' && process.env['LINEAR_API_KEY']) { + try { + execSync('stackmemory linear sync', { + stdio: 'ignore', + timeout: 10000, + }); + } catch { + // Non-fatal: don't block exit + } + } + + if (this.config.tracingEnabled) { + const summary = trace.getExecutionSummary(); + console.log(); + console.log(chalk.gray('─'.repeat(42))); + console.log(chalk.blue('Debug Trace Summary:')); + console.log(chalk.gray(summary)); + } + + if (eventType === 'session_end' && this.config.notifyOnDone) { + this.notifyDone(exitCode); + } + + if (this.config.worktreePath) { + console.log(); + console.log(chalk.gray('─'.repeat(42))); + console.log(chalk.blue('Session ended in worktree:')); + console.log(chalk.gray(` ${this.config.worktreePath}`)); + console.log(); + console.log(chalk.gray('To remove worktree: gd_claude')); + console.log(chalk.gray('To merge to main: cwm')); + } + } + public async run(args: string[]): Promise { // Parse arguments const claudeArgs: string[] = []; @@ -826,10 +1025,28 @@ class ClaudeSM { // Setup environment process.env['CLAUDE_INSTANCE_ID'] = this.config.instanceId; + process.env['STACKMEMORY_SESSION'] = this.sessionId; if (this.config.worktreePath) { process.env['CLAUDE_WORKTREE_PATH'] = this.config.worktreePath; } + const claudeBin = this.resolveClaudeBin(); + if (!claudeBin) { + console.error(chalk.red('āŒ Claude CLI not found.')); + console.log( + chalk.gray( + ' Install Claude CLI or set an override:\n' + + ' export CLAUDE_BIN=/path/to/claude\n' + + ' claude-sm --help\n\n' + + ' Ensure PATH includes npm global bin (npm bin -g).' + ) + ); + process.exit(1); + return; + } + + await this.publishSessionStart(); console.log(chalk.gray(`šŸ¤– Instance ID: ${this.config.instanceId}`)); + console.log(chalk.gray(`🧠 Session ID: ${this.sessionId.slice(0, 8)}`)); console.log(chalk.gray(`šŸ“ Working in: ${process.cwd()}`)); if (this.config.useSandbox) { @@ -932,12 +1149,6 @@ class ClaudeSM { // ── Launch ──────────────────────────────────────────────────── // Sweep PTY wrapper: next-edit predictions (falls back to direct launch) if (this.config.useSweep) { - const claudeBin = this.resolveClaudeBin(); - if (!claudeBin) { - console.error(chalk.red('Claude CLI not found.')); - process.exit(1); - return; - } console.log( chalk.cyan('[Sweep] Launching Claude with prediction bar...') ); @@ -947,6 +1158,16 @@ class ClaudeSM { claudeBin, claudeArgs, initialInput: initialInput || undefined, + onExit: async (exitCode) => { + await this.finalizeSession('session_end', exitCode); + }, + onSignal: async (signal) => { + await this.finalizeSession( + signal === 'SIGINT' ? 'session_interrupt' : 'session_terminate', + null, + { signal } + ); + }, }); // PTY wrapper is now running — it calls process.exit() on child exit. // Return to prevent falling through to the fallback-monitor path, @@ -974,21 +1195,6 @@ class ClaudeSM { console.log(chalk.gray('Starting Claude...')); console.log(chalk.gray('─'.repeat(42))); - const claudeBin = this.resolveClaudeBin(); - if (!claudeBin) { - console.error(chalk.red('āŒ Claude CLI not found.')); - console.log( - chalk.gray( - ' Install Claude CLI or set an override:\n' + - ' export CLAUDE_BIN=/path/to/claude\n' + - ' claude-sm --help\n\n' + - ' Ensure PATH includes npm global bin (npm bin -g).' - ) - ); - process.exit(1); - return; - } - // Setup fallback monitor for automatic Qwen switching on Claude failures const fallbackMonitor = new FallbackMonitor({ enabled: true, @@ -1037,9 +1243,6 @@ class ClaudeSM { // Handle exit claude.on('exit', async (code) => { - // Stop GEPA watcher if running - this.stopGEPAWatcher(); - // Check if we were in fallback mode const status = fallbackMonitor.getStatus(); if (status.inFallback) { @@ -1049,63 +1252,21 @@ class ClaudeSM { ) ); } - // Save final context - this.saveContext('Claude session ended', { - action: 'session_end', - exitCode: code, - }); - - // Sync Linear on exit if configured - if (process.env['LINEAR_API_KEY']) { - try { - execSync('stackmemory linear sync', { - stdio: 'ignore', - timeout: 10000, - }); - } catch { - // Non-fatal: don't block exit - } - } - - // End tracing and show summary if enabled - if (this.config.tracingEnabled) { - const summary = trace.getExecutionSummary(); - console.log(); - console.log(chalk.gray('─'.repeat(42))); - console.log(chalk.blue('Debug Trace Summary:')); - console.log(chalk.gray(summary)); - } - - // Bell notification when done - if (this.config.notifyOnDone) { - this.notifyDone(code); - } - - // Offer to clean up worktree - if (this.config.worktreePath) { - console.log(); - console.log(chalk.gray('─'.repeat(42))); - console.log(chalk.blue('Session ended in worktree:')); - console.log(chalk.gray(` ${this.config.worktreePath}`)); - console.log(); - console.log(chalk.gray('To remove worktree: gd_claude')); - console.log(chalk.gray('To merge to main: cwm')); - } - + await this.finalizeSession('session_end', code); process.exit(code || 0); }); // Handle signals - process.on('SIGINT', () => { - this.saveContext('Claude session interrupted', { - action: 'session_interrupt', + process.on('SIGINT', async () => { + await this.finalizeSession('session_interrupt', null, { + signal: 'SIGINT', }); claude.kill('SIGINT'); }); - process.on('SIGTERM', () => { - this.saveContext('Claude session terminated', { - action: 'session_terminate', + process.on('SIGTERM', async () => { + await this.finalizeSession('session_terminate', null, { + signal: 'SIGTERM', }); claude.kill('SIGTERM'); }); diff --git a/src/cli/codex-sm.ts b/src/cli/codex-sm.ts index 1045043d..2a3a8d84 100644 --- a/src/cli/codex-sm.ts +++ b/src/cli/codex-sm.ts @@ -13,6 +13,10 @@ import { program } from 'commander'; import { v4 as uuidv4 } from 'uuid'; import chalk from 'chalk'; import { initializeTracing, trace } from '../core/trace/index.js'; +import { + canonicalStateStore, + projectIdFromIdentifier, +} from '../core/shared-state/canonical-store.js'; interface CodexConfig { instanceId: string; @@ -29,6 +33,9 @@ interface CodexConfig { class CodexSM { private config: CodexConfig; private stackmemoryPath: string; + private sessionId: string; + private ownsSession: boolean; + private sessionEnded: boolean; constructor() { this.config = { @@ -40,6 +47,9 @@ class CodexSM { }; this.stackmemoryPath = this.findStackMemory(); + this.sessionId = process.env['STACKMEMORY_SESSION'] || uuidv4(); + this.ownsSession = !process.env['STACKMEMORY_SESSION']; + this.sessionEnded = false; } private getRepoRoot(): string | null { @@ -113,6 +123,24 @@ class CodexSM { } } + private getProjectId(): string | undefined { + const root = this.getRepoRoot() || process.cwd(); + + try { + const remote = execSync('git config --get remote.origin.url', { + cwd: root, + encoding: 'utf8', + }).trim(); + if (remote) { + return projectIdFromIdentifier(remote); + } + } catch { + // Fall back to current path below. + } + + return projectIdFromIdentifier(root); + } + private hasUncommittedChanges(): boolean { try { const status = execSync('git status --porcelain', { encoding: 'utf8' }); @@ -220,18 +248,17 @@ class CodexSM { if (!this.config.contextEnabled) return; try { console.log(chalk.blue('šŸ“š Loading previous context...')); - const cmd = `${this.stackmemoryPath} context list --limit 5 --format json`; - const output = execSync(cmd, { encoding: 'utf8' }); - const contexts = JSON.parse(output); - if (Array.isArray(contexts) && contexts.length > 0) { - console.log(chalk.gray('Recent context loaded:')); - contexts.forEach( - (ctx: { message: string; metadata?: { timestamp?: string } }) => { - console.log( - chalk.gray(` - ${ctx.message} (${ctx.metadata?.timestamp})`) - ); - } - ); + const cmd = `${this.stackmemoryPath} context show`; + const output = execSync(cmd, { + encoding: 'utf8', + stdio: ['pipe', 'pipe', 'pipe'], + }); + const lines = output + .trim() + .split('\n') + .filter((line) => line.trim()); + if (lines.length > 3) { + console.log(chalk.gray('Context stack loaded')); } } catch { // ignore @@ -247,6 +274,111 @@ class CodexSM { } } + private async publishSessionStart(): Promise { + const projectPath = process.cwd(); + const projectId = this.getProjectId(); + const branch = this.isGitRepo() ? this.getCurrentBranch() : undefined; + + await canonicalStateStore.upsertSession({ + sessionId: this.sessionId, + tool: 'codex', + projectId, + projectPath, + branch, + instanceId: this.config.instanceId, + metadata: { + task: this.config.task, + }, + }); + + await canonicalStateStore.upsertInstance({ + instanceId: this.config.instanceId, + tool: 'codex', + sessionId: this.sessionId, + projectId, + projectPath, + branch, + worktreePath: this.config.worktreePath, + pid: process.pid, + status: 'active', + metadata: { + task: this.config.task, + }, + }); + + await canonicalStateStore.appendEvent({ + type: 'session_start', + tool: 'codex', + sessionId: this.sessionId, + instanceId: this.config.instanceId, + projectId, + projectPath, + branch, + payload: { + task: this.config.task, + worktreePath: this.config.worktreePath, + }, + }); + + const claimResult = await canonicalStateStore.claimPaths({ + tool: 'codex', + sessionId: this.sessionId, + instanceId: this.config.instanceId, + projectId, + projectPath, + branch, + paths: [], + metadata: { + task: this.config.task, + scope: 'branch', + }, + }); + + if (claimResult.conflicts.length > 0) { + console.log(chalk.yellow('āš ļø Shared state conflict detected')); + for (const conflict of claimResult.conflicts.slice(0, 3)) { + console.log( + chalk.gray( + ` Claim ${conflict.claimId.slice(0, 8)} already owns ${conflict.branch || 'overlapping work'}` + ) + ); + } + } + } + + private async publishSessionEnd( + eventType: 'session_end' | 'session_interrupt' | 'session_terminate', + payload: Record = {} + ): Promise { + if (this.sessionEnded) { + return; + } + this.sessionEnded = true; + + const projectPath = process.cwd(); + const projectId = this.getProjectId(); + const branch = this.isGitRepo() ? this.getCurrentBranch() : undefined; + + await canonicalStateStore.appendEvent({ + type: eventType, + tool: 'codex', + sessionId: this.sessionId, + instanceId: this.config.instanceId, + projectId, + projectPath, + branch, + payload, + }); + await canonicalStateStore.releaseClaims({ + instanceId: this.config.instanceId, + reason: eventType, + }); + await canonicalStateStore.endInstance(this.config.instanceId); + if (this.ownsSession) { + await canonicalStateStore.endSession(this.sessionId); + } + } + public async run(args: string[]): Promise { const codexArgs: string[] = []; let i = 0; @@ -350,10 +482,13 @@ class CodexSM { this.loadContext(); process.env['CODEX_INSTANCE_ID'] = this.config.instanceId; + process.env['STACKMEMORY_SESSION'] = this.sessionId; if (this.config.worktreePath) process.env['CODEX_WORKTREE_PATH'] = this.config.worktreePath; + await this.publishSessionStart(); console.log(chalk.gray(`šŸ¤– Instance ID: ${this.config.instanceId}`)); + console.log(chalk.gray(`🧠 Session ID: ${this.sessionId.slice(0, 8)}`)); console.log(chalk.gray(`šŸ“ Working in: ${process.cwd()}`)); console.log(); @@ -401,11 +536,14 @@ class CodexSM { process.exit(1); }); - child.on('exit', (code) => { + child.on('exit', async (code) => { this.saveContext('Codex session ended', { action: 'session_end', exitCode: code, }); + await this.publishSessionEnd('session_end', { + exitCode: code, + }); // Sync Linear on exit — let sync command handle auth detection // (supports API key env var, .env files, and OAuth tokens) @@ -434,17 +572,19 @@ class CodexSM { process.exit(code || 0); }); - process.on('SIGINT', () => { + process.on('SIGINT', async () => { this.saveContext('Codex session interrupted', { action: 'session_interrupt', }); + await this.publishSessionEnd('session_interrupt'); child.kill('SIGINT'); }); - process.on('SIGTERM', () => { + process.on('SIGTERM', async () => { this.saveContext('Codex session terminated', { action: 'session_terminate', }); + await this.publishSessionEnd('session_terminate'); child.kill('SIGTERM'); }); } diff --git a/src/cli/commands/daemon.ts b/src/cli/commands/daemon.ts index 71f43f03..ab0a209c 100644 --- a/src/cli/commands/daemon.ts +++ b/src/cli/commands/daemon.ts @@ -140,6 +140,7 @@ The daemon provides: const services = []; if (newStatus.services.context.enabled) services.push('context'); if (newStatus.services.linear.enabled) services.push('linear'); + if (newStatus.services.github?.enabled) services.push('github'); if (newStatus.services.maintenance?.enabled) services.push('maintenance'); if (newStatus.services.memory?.enabled) services.push('memory'); @@ -303,6 +304,26 @@ The daemon provides: } } + const gh = status.services.github; + if (gh) { + console.log( + ` GitHub: ${gh.enabled ? chalk.green('Enabled') : chalk.gray('Disabled')}` + ); + if (gh.enabled) { + console.log( + chalk.gray(` Interval: ${config.github.interval} min`) + ); + if (gh.syncCount) { + console.log(chalk.gray(` Refreshes: ${gh.syncCount}`)); + } + if (gh.lastProjectionState) { + console.log( + chalk.gray(` Last PR state: ${gh.lastProjectionState}`) + ); + } + } + } + // Maintenance service const maint = status.services.maintenance; if (maint) { @@ -844,6 +865,27 @@ function getServiceHealthChecks( }); } + const gh = status.services.github; + if (gh?.enabled) { + const intervalMs = config.github.interval * 60_000; + const overdue = gh.lastRun + ? Date.now() - gh.lastRun > intervalMs * 2 + : false; + checks.push({ + name: 'GitHub Service', + status: overdue ? 'warn' : 'ok', + detail: gh.lastRun + ? `Last refresh: ${formatTimeAgo(gh.lastRun)} | Refreshes: ${gh.syncCount ?? 0}${gh.lastProjectionState ? ` | Last PR state: ${gh.lastProjectionState}` : ''}` + : `Enabled (interval: ${config.github.interval}m) | No refreshes yet`, + }); + } else { + checks.push({ + name: 'GitHub Service', + status: 'ok', + detail: 'Disabled', + }); + } + // Maintenance service const maint = status.services.maintenance; if (maint?.enabled) { @@ -980,6 +1022,11 @@ function buildHealthReport( enabled: status.services.linear.enabled, lastRun: status.services.linear.lastRun, }, + { + key: 'github', + enabled: status.services.github?.enabled ?? false, + lastRun: status.services.github?.lastRun, + }, { key: 'maintenance', enabled: status.services.maintenance?.enabled ?? false, diff --git a/src/cli/commands/state.ts b/src/cli/commands/state.ts new file mode 100644 index 00000000..d015a0dd --- /dev/null +++ b/src/cli/commands/state.ts @@ -0,0 +1,380 @@ +import { Command } from 'commander'; +import chalk from 'chalk'; +import { + canonicalStateStore, + type SharedToolName, +} from '../../core/shared-state/canonical-store.js'; +import { + getCurrentRepoGitHubInfo, + refreshCurrentRepoPullRequestState, +} from '../../integrations/github/pr-state.js'; + +function parseJsonObject(input: string | undefined): Record { + if (!input) { + return {}; + } + + const parsed = JSON.parse(input); + if (!parsed || typeof parsed !== 'object' || Array.isArray(parsed)) { + throw new Error('Expected a JSON object'); + } + + return parsed as Record; +} + +export function createStateCommand(): Command { + const cmd = new Command('state').description( + 'Manage canonical user-scoped shared state across instances and sessions' + ); + + const instance = cmd.command('instance').description('Manage instance state'); + instance + .command('upsert') + .requiredOption('--id ', 'Instance identifier') + .requiredOption( + '--tool ', + 'Tool name (claude|codex|opencode|stackmemory)' + ) + .option('--session ', 'Session identifier') + .option('--project ', 'Project identifier') + .option('--project-path ', 'Project path') + .option('--branch ', 'Git branch') + .option('--worktree-path ', 'Worktree path') + .option('--pid ', 'Process id') + .option('--status ', 'Status', 'active') + .option('--metadata ', 'Metadata JSON object') + .action(async (options) => { + const record = await canonicalStateStore.upsertInstance({ + instanceId: options.id, + tool: options.tool as SharedToolName, + sessionId: options.session, + projectId: options.project, + projectPath: options.projectPath, + branch: options.branch, + worktreePath: options.worktreePath, + pid: options.pid ? Number(options.pid) : undefined, + status: options.status, + metadata: parseJsonObject(options.metadata), + }); + + console.log(JSON.stringify(record, null, 2)); + }); + + instance + .command('end') + .requiredOption('--id ', 'Instance identifier') + .action(async (options) => { + await canonicalStateStore.endInstance(options.id); + console.log(chalk.green(`Ended instance ${options.id}`)); + }); + + const session = cmd.command('session').description('Manage session state'); + session + .command('upsert') + .requiredOption('--id ', 'Session identifier') + .requiredOption( + '--tool ', + 'Tool name (claude|codex|opencode|stackmemory)' + ) + .option('--project ', 'Project identifier') + .option('--project-path ', 'Project path') + .option('--branch ', 'Git branch') + .option('--instance ', 'Associated instance identifier') + .option('--status ', 'Status', 'active') + .option('--metadata ', 'Metadata JSON object') + .action(async (options) => { + const record = await canonicalStateStore.upsertSession({ + sessionId: options.id, + tool: options.tool as SharedToolName, + projectId: options.project, + projectPath: options.projectPath, + branch: options.branch, + instanceId: options.instance, + status: options.status, + metadata: parseJsonObject(options.metadata), + }); + + console.log(JSON.stringify(record, null, 2)); + }); + + session + .command('end') + .requiredOption('--id ', 'Session identifier') + .option('--status ', 'Status', 'closed') + .action(async (options) => { + await canonicalStateStore.endSession(options.id, options.status); + console.log(chalk.green(`Ended session ${options.id}`)); + }); + + cmd + .command('event') + .description('Append a shared-state event') + .requiredOption('--type ', 'Event type') + .option('--tool ', 'Tool name (claude|codex|opencode|stackmemory)') + .option('--instance ', 'Instance identifier') + .option('--session ', 'Session identifier') + .option('--project ', 'Project identifier') + .option('--project-path ', 'Project path') + .option('--branch ', 'Git branch') + .option('--payload ', 'Payload JSON object') + .action(async (options) => { + const event = await canonicalStateStore.appendEvent({ + type: options.type, + tool: options.tool as SharedToolName | undefined, + instanceId: options.instance, + sessionId: options.session, + projectId: options.project, + projectPath: options.projectPath, + branch: options.branch, + payload: parseJsonObject(options.payload), + }); + + console.log(JSON.stringify(event, null, 2)); + }); + + cmd + .command('show') + .description('Show canonical shared state for a project') + .option('--project ', 'Project identifier') + .option('--project-path ', 'Project path') + .option('--limit ', 'Recent event limit', '10') + .option('--json', 'Emit JSON output') + .action(async (options) => { + const summary = await canonicalStateStore.getProjectSummary({ + projectId: options.project, + projectPath: options.projectPath, + eventLimit: Number(options.limit), + }); + + if (options.json) { + console.log(JSON.stringify(summary, null, 2)); + return; + } + + console.log(chalk.bold('Canonical Shared State')); + console.log( + ` Active sessions: ${summary.activeSessions.length} | Active instances: ${summary.activeInstances.length} | Active claims: ${summary.activeClaims.length}` + ); + if (summary.projectId) { + console.log(` Project: ${summary.projectId}`); + } + if (summary.activeSessions.length > 0) { + console.log(chalk.bold('\nSessions')); + for (const record of summary.activeSessions) { + console.log( + ` ${record.sessionId.slice(0, 8)} ${record.tool} ${record.branch || ''}`.trim() + ); + } + } + if (summary.activeInstances.length > 0) { + console.log(chalk.bold('\nInstances')); + for (const record of summary.activeInstances) { + console.log( + ` ${record.instanceId} ${record.tool} ${record.branch || ''}`.trim() + ); + } + } + if (summary.activeClaims.length > 0) { + console.log(chalk.bold('\nClaims')); + for (const claim of summary.activeClaims) { + const scopes = [ + claim.branch ? `branch:${claim.branch}` : '', + ...claim.paths.map((item) => `path:${item}`), + ] + .filter(Boolean) + .join(', '); + console.log( + ` ${claim.claimId.slice(0, 8)} ${claim.tool} ${scopes || '(no scope)'}`.trim() + ); + } + } + if (summary.recentEvents.length > 0) { + console.log(chalk.bold('\nRecent events')); + for (const event of summary.recentEvents) { + console.log( + ` ${event.type} ${new Date(event.timestamp).toISOString()}` + ); + } + } + }); + + const claims = cmd + .command('claims') + .description('Manage shared ownership claims'); + + claims + .command('claim') + .requiredOption( + '--tool ', + 'Tool name (claude|codex|opencode|stackmemory)' + ) + .option('--session ', 'Session identifier') + .option('--instance ', 'Instance identifier') + .option('--project ', 'Project identifier') + .option('--project-path ', 'Project path') + .option('--branch ', 'Git branch') + .option('--path ', 'Claimed file or directory path(s)') + .option('--ttl-ms ', 'Time to live in milliseconds', '86400000') + .option('--metadata ', 'Metadata JSON object') + .option('--json', 'Emit JSON output') + .action(async (options) => { + const result = await canonicalStateStore.claimPaths({ + tool: options.tool as SharedToolName, + sessionId: options.session, + instanceId: options.instance, + projectId: options.project, + projectPath: options.projectPath, + branch: options.branch, + paths: options.path || [], + ttlMs: Number(options.ttlMs), + metadata: parseJsonObject(options.metadata), + }); + + if (options.json) { + console.log(JSON.stringify(result, null, 2)); + return; + } + + console.log(chalk.green(`Claimed ${result.record.claimId.slice(0, 8)}`)); + if (result.conflicts.length > 0) { + console.log(chalk.yellow(`Conflicts: ${result.conflicts.length}`)); + for (const conflict of result.conflicts) { + console.log( + ` ${conflict.claimId.slice(0, 8)} ${conflict.branch || ''} ${conflict.paths.join(', ')}`.trim() + ); + } + } + }); + + claims + .command('release') + .option('--claim ', 'Claim identifier') + .option('--session ', 'Session identifier') + .option('--instance ', 'Instance identifier') + .option('--project ', 'Project identifier') + .option('--project-path ', 'Project path') + .option('--branch ', 'Git branch') + .option('--reason ', 'Release reason') + .action(async (options) => { + const released = await canonicalStateStore.releaseClaims({ + claimId: options.claim, + sessionId: options.session, + instanceId: options.instance, + projectId: options.project, + projectPath: options.projectPath, + branch: options.branch, + reason: options.reason, + }); + console.log(chalk.green(`Released ${released} claim(s)`)); + }); + + claims + .command('show') + .option('--project ', 'Project identifier') + .option('--project-path ', 'Project path') + .option('--all', 'Show released and expired claims too') + .option('--json', 'Emit JSON output') + .action(async (options) => { + const records = await canonicalStateStore.listPathClaims({ + projectId: options.project, + projectPath: options.projectPath, + activeOnly: !options.all, + }); + + if (options.json) { + console.log(JSON.stringify(records, null, 2)); + return; + } + + if (records.length === 0) { + console.log(chalk.yellow('No claims found')); + return; + } + + for (const claim of records) { + const scopes = [ + claim.branch ? `branch:${claim.branch}` : '', + ...claim.paths.map((item) => `path:${item}`), + ] + .filter(Boolean) + .join(', '); + console.log( + `${claim.claimId.slice(0, 8)} ${claim.status} ${claim.tool} ${scopes || '(no scope)'}` + ); + } + }); + + const github = cmd.command('github').description('GitHub projection state'); + + github + .command('refresh') + .description('Refresh current repo branch PR state from GitHub CLI') + .option('--json', 'Emit JSON output') + .action(async (options) => { + const projection = await refreshCurrentRepoPullRequestState(); + if (!projection) { + console.log( + chalk.yellow( + 'No GitHub PR projection available for current repo/branch' + ) + ); + return; + } + + if (options.json) { + console.log(JSON.stringify(projection, null, 2)); + return; + } + + console.log(chalk.green(`Refreshed PR #${projection.prNumber}`)); + console.log(`${projection.state} ${projection.title}`); + console.log(projection.url); + }); + + github + .command('show') + .description('Show cached current repo branch PR projection') + .option('--json', 'Emit JSON output') + .action(async (options) => { + const info = getCurrentRepoGitHubInfo(); + if (!info) { + console.log(chalk.yellow('Not in a GitHub repository')); + return; + } + + const projection = await canonicalStateStore.getGitHubPullRequest({ + repo: info.repo, + branch: info.branch, + }); + if (!projection) { + console.log( + chalk.yellow('No cached GitHub PR projection for current branch') + ); + return; + } + + if (options.json) { + console.log(JSON.stringify(projection, null, 2)); + return; + } + + console.log(chalk.bold(`PR #${projection.prNumber}`)); + console.log(`${projection.state} ${projection.title}`); + console.log(`Repo: ${projection.repo}`); + console.log( + `Branch: ${projection.headRefName} -> ${projection.baseRefName}` + ); + if (projection.reviewDecision) { + console.log(`Review: ${projection.reviewDecision}`); + } + if (projection.statusCheckRollup) { + console.log(`Checks: ${projection.statusCheckRollup}`); + } + console.log(`Synced: ${new Date(projection.lastSyncedAt).toISOString()}`); + console.log(projection.url); + }); + + return cmd; +} + +export default createStateCommand; diff --git a/src/cli/index.ts b/src/cli/index.ts index deb6bf51..7ce66d2c 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -61,6 +61,7 @@ import { createPingCommand } from './commands/ping.js'; import { createAuditCommand } from './commands/audit.js'; import { createStatsCommand } from './commands/stats.js'; import { createBenchCommand } from './commands/bench.js'; +import { createStateCommand } from './commands/state.js'; import { createDigestCommands } from './commands/digest.js'; import { createTeamCommands } from './commands/team.js'; import { createDesiresCommands } from './commands/desires.js'; @@ -75,7 +76,12 @@ import chalk from 'chalk'; import * as fs from 'fs'; import * as path from 'path'; import { filterPending } from '../integrations/mcp/pending-utils.js'; +import { + getCurrentRepoGitHubInfo, + refreshCurrentRepoPullRequestState, +} from '../integrations/github/pr-state.js'; import { ProjectManager } from '../core/projects/project-manager.js'; +import { canonicalStateStore } from '../core/shared-state/canonical-store.js'; import { join } from 'path'; import { existsSync, mkdirSync } from 'fs'; import type { @@ -272,6 +278,28 @@ program projectPath: projectRoot, sessionId: options.session, }); + const sharedProjectState = await canonicalStateStore.getProjectSummary({ + projectId: session.projectId, + projectPath: projectRoot, + eventLimit: 5, + }); + const githubInfo = getCurrentRepoGitHubInfo(projectRoot); + let githubProjection = + githubInfo && + (await canonicalStateStore.getGitHubPullRequest({ + repo: githubInfo.repo, + branch: githubInfo.branch, + })); + + if ( + githubInfo && + (!githubProjection || + Date.now() - githubProjection.lastSyncedAt > 2 * 60 * 1000) + ) { + githubProjection = + (await refreshCurrentRepoPullRequestState(projectRoot)) || + githubProjection; + } // Auto-discover shared context on startup const contextDiscovery = await sharedContextLayer.autoDiscoverContext(); @@ -372,6 +400,24 @@ program console.log( ` Cached contexts: ${contextCount.count || 0} (global)` ); + console.log( + ` Shared sessions: ${sharedProjectState.activeSessions.length}` + ); + console.log( + ` Shared instances: ${sharedProjectState.activeInstances.length}` + ); + console.log( + ` Shared claims: ${sharedProjectState.activeClaims.length}` + ); + + const branchClaim = sharedProjectState.activeClaims.find( + (claim) => claim.branch && claim.branch === session.branch + ); + if (branchClaim) { + console.log( + ` Branch owner: ${branchClaim.tool} ${branchClaim.instanceId || branchClaim.sessionId || branchClaim.claimId.slice(0, 8)}` + ); + } // Show recent activity const recentFrames = db @@ -401,6 +447,22 @@ program }); } + if (githubProjection) { + console.log(`\n GitHub PR:`); + console.log( + ` #${githubProjection.prNumber} ${githubProjection.state} ${githubProjection.title}` + ); + console.log( + ` ${githubProjection.headRefName} -> ${githubProjection.baseRefName}` + ); + if (githubProjection.reviewDecision) { + console.log(` Review: ${githubProjection.reviewDecision}`); + } + if (githubProjection.statusCheckRollup) { + console.log(` Checks: ${githubProjection.statusCheckRollup}`); + } + } + console.log(`\n Current Session:`); console.log(` Stack depth: ${stackDepth}`); console.log(` Active frames: ${activeFrames.length}`); @@ -770,6 +832,7 @@ program.addCommand(createModelCommand()); program.addCommand(createAuditCommand()); program.addCommand(createStatsCommand()); program.addCommand(createBenchCommand()); +program.addCommand(createStateCommand()); program.addCommand(createDigestCommands()); program.addCommand(createTeamCommands()); program.addCommand(createDesiresCommands()); diff --git a/src/core/session/session-manager.ts b/src/core/session/session-manager.ts index 8b61ee6e..5880219b 100644 --- a/src/core/session/session-manager.ts +++ b/src/core/session/session-manager.ts @@ -9,6 +9,7 @@ import * as path from 'path'; import * as _crypto from 'crypto'; import { logger } from '../monitoring/logger.js'; import { SystemError, ErrorCode } from '../errors/index.js'; +import { canonicalStateStore } from '../shared-state/canonical-store.js'; // Type-safe environment variable access function _getEnv(key: string, defaultValue?: string): string { const value = process.env[key]; @@ -169,6 +170,16 @@ export class SessionManager { await this.saveSession(session); await this.setProjectActiveSession(params.projectId, session.sessionId); + await canonicalStateStore.appendEvent({ + type: 'session_created', + tool: 'stackmemory', + sessionId: session.sessionId, + projectId: session.projectId, + branch: session.branch, + payload: { + state: session.state, + }, + }); // Set as current session this.currentSession = session; @@ -209,6 +220,14 @@ export class SessionManager { `${session.sessionId}.json` ); await fs.writeFile(sessionPath, JSON.stringify(session, null, 2)); + await canonicalStateStore.upsertSession({ + sessionId: session.sessionId, + tool: 'stackmemory', + projectId: session.projectId, + branch: session.branch, + status: session.state, + metadata: session.metadata, + }); } async suspendSession(sessionId?: string): Promise { @@ -260,6 +279,15 @@ export class SessionManager { ); await fs.rename(sessionPath, historyPath); + await canonicalStateStore.endSession(session.sessionId, 'closed'); + await canonicalStateStore.appendEvent({ + type: 'session_closed', + tool: 'stackmemory', + sessionId: session.sessionId, + projectId: session.projectId, + branch: session.branch, + payload: {}, + }); } } diff --git a/src/core/shared-state/canonical-store.ts b/src/core/shared-state/canonical-store.ts new file mode 100644 index 00000000..77675aa6 --- /dev/null +++ b/src/core/shared-state/canonical-store.ts @@ -0,0 +1,905 @@ +import * as fs from 'fs/promises'; +import * as path from 'path'; +import * as os from 'os'; +import { createHash, randomUUID } from 'crypto'; + +export type SharedToolName = 'stackmemory' | 'claude' | 'codex' | 'opencode'; + +export interface SharedInstanceRecord { + instanceId: string; + tool: SharedToolName; + sessionId?: string; + projectId?: string; + projectPath?: string; + branch?: string; + worktreePath?: string; + pid?: number; + startedAt: number; + lastSeenAt: number; + status: 'active' | 'ended'; + metadata?: Record; +} + +export interface SharedSessionRecord { + sessionId: string; + tool: SharedToolName; + projectId?: string; + projectPath?: string; + branch?: string; + startedAt: number; + lastSeenAt: number; + status: 'active' | 'suspended' | 'closed'; + instanceIds: string[]; + metadata?: Record; +} + +export interface SharedStateEvent { + id: string; + type: string; + timestamp: number; + tool?: SharedToolName; + instanceId?: string; + sessionId?: string; + projectId?: string; + projectPath?: string; + branch?: string; + payload: Record; +} + +export interface SharedProjectSummary { + projectId?: string; + projectPath?: string; + activeSessions: SharedSessionRecord[]; + activeInstances: SharedInstanceRecord[]; + activeClaims: SharedPathClaimRecord[]; + recentEvents: SharedStateEvent[]; +} + +export interface GitHubPullRequestProjection { + repo: string; + branch: string; + projectId?: string; + projectPath?: string; + prNumber: number; + title: string; + state: 'OPEN' | 'CLOSED' | 'MERGED'; + isDraft: boolean; + url: string; + baseRefName: string; + headRefName: string; + headRefOid?: string; + mergedAt?: string; + updatedAt: string; + reviewDecision?: string; + statusCheckRollup?: string; + lastSyncedAt: number; +} + +export interface SharedPathClaimRecord { + claimId: string; + tool: SharedToolName; + sessionId?: string; + instanceId?: string; + projectId?: string; + projectPath?: string; + branch?: string; + paths: string[]; + status: 'active' | 'released' | 'expired'; + claimedAt: number; + lastSeenAt: number; + expiresAt: number; + releasedAt?: number; + releaseReason?: string; + metadata?: Record; +} + +export interface SharedPathClaimConflict { + claimId: string; + branch?: string; + paths: string[]; + sessionId?: string; + instanceId?: string; +} + +export interface SharedPathClaimResult { + record: SharedPathClaimRecord; + conflicts: SharedPathClaimConflict[]; +} + +function getBaseStateDir(): string { + const xdgState = process.env['XDG_STATE_HOME']?.trim(); + if (xdgState) { + return path.join(xdgState, 'stackmemory'); + } + + const homeDir = + process.env['HOME'] || process.env['USERPROFILE'] || os.homedir(); + return path.join(homeDir, '.stackmemory'); +} + +function projectIdFromIdentifier(identifier: string): string { + return identifier + .replace(/\.git$/, '') + .replace(/[^a-zA-Z0-9-]/g, '-') + .toLowerCase() + .slice(-50); +} + +function normalizeProjectId( + projectId?: string, + projectPath?: string +): string | undefined { + if (projectId && projectId.trim()) { + return projectIdFromIdentifier(projectId.trim()); + } + if (!projectPath || !projectPath.trim()) { + return undefined; + } + + return createHash('sha1') + .update(projectPath.trim().toLowerCase()) + .digest('hex') + .slice(0, 16); +} + +async function pathExists(targetPath: string): Promise { + try { + await fs.access(targetPath); + return true; + } catch { + return false; + } +} + +export class CanonicalStateStore { + private rootDir: string; + + constructor(rootDir: string = path.join(getBaseStateDir(), 'shared-state')) { + this.rootDir = rootDir; + } + + getRootDir(): string { + return this.rootDir; + } + + async initialize(): Promise { + await fs.mkdir(this.getInstancesDir(), { recursive: true }); + await fs.mkdir(this.getSessionsDir(), { recursive: true }); + await fs.mkdir(this.getEventsDir(), { recursive: true }); + await fs.mkdir(this.getGithubDir(), { recursive: true }); + await fs.mkdir(this.getClaimsDir(), { recursive: true }); + } + + async upsertInstance( + input: Omit & { + startedAt?: number; + lastSeenAt?: number; + } + ): Promise { + await this.initialize(); + + const filePath = this.getInstanceFile(input.instanceId); + const existing = await this.readJsonFile(filePath); + const now = Date.now(); + + const record: SharedInstanceRecord = { + instanceId: input.instanceId, + tool: input.tool, + sessionId: input.sessionId ?? existing?.sessionId, + projectId: + normalizeProjectId(input.projectId, input.projectPath) ?? + existing?.projectId, + projectPath: input.projectPath ?? existing?.projectPath, + branch: input.branch ?? existing?.branch, + worktreePath: input.worktreePath ?? existing?.worktreePath, + pid: input.pid ?? existing?.pid, + startedAt: existing?.startedAt ?? input.startedAt ?? now, + lastSeenAt: input.lastSeenAt ?? now, + status: input.status ?? existing?.status ?? 'active', + metadata: { + ...(existing?.metadata || {}), + ...(input.metadata || {}), + }, + }; + + await this.writeJsonFile(filePath, record); + + if (record.sessionId) { + await this.upsertSession({ + sessionId: record.sessionId, + tool: record.tool, + projectId: record.projectId, + projectPath: record.projectPath, + branch: record.branch, + instanceId: record.instanceId, + metadata: record.metadata, + }); + } + + return record; + } + + async endInstance(instanceId: string): Promise { + await this.initialize(); + + const filePath = this.getInstanceFile(instanceId); + const existing = await this.readJsonFile(filePath); + if (!existing) { + return; + } + + const updated: SharedInstanceRecord = { + ...existing, + status: 'ended', + lastSeenAt: Date.now(), + }; + await this.writeJsonFile(filePath, updated); + } + + async upsertSession( + input: Omit< + SharedSessionRecord, + 'startedAt' | 'lastSeenAt' | 'instanceIds' | 'status' + > & { + startedAt?: number; + lastSeenAt?: number; + instanceId?: string; + instanceIds?: string[]; + status?: SharedSessionRecord['status']; + } + ): Promise { + await this.initialize(); + + const filePath = this.getSessionFile(input.sessionId); + const existing = await this.readJsonFile(filePath); + const now = Date.now(); + const nextInstanceIds = new Set(existing?.instanceIds || []); + + if (input.instanceId) { + nextInstanceIds.add(input.instanceId); + } + for (const instanceId of input.instanceIds || []) { + nextInstanceIds.add(instanceId); + } + + const record: SharedSessionRecord = { + sessionId: input.sessionId, + tool: input.tool, + projectId: + normalizeProjectId(input.projectId, input.projectPath) ?? + existing?.projectId, + projectPath: input.projectPath ?? existing?.projectPath, + branch: input.branch ?? existing?.branch, + startedAt: existing?.startedAt ?? input.startedAt ?? now, + lastSeenAt: input.lastSeenAt ?? now, + status: input.status ?? existing?.status ?? 'active', + instanceIds: Array.from(nextInstanceIds), + metadata: { + ...(existing?.metadata || {}), + ...(input.metadata || {}), + }, + }; + + await this.writeJsonFile(filePath, record); + return record; + } + + async endSession( + sessionId: string, + status: SharedSessionRecord['status'] = 'closed' + ): Promise { + await this.initialize(); + + const filePath = this.getSessionFile(sessionId); + const existing = await this.readJsonFile(filePath); + if (!existing) { + return; + } + + const updated: SharedSessionRecord = { + ...existing, + status, + lastSeenAt: Date.now(), + }; + await this.writeJsonFile(filePath, updated); + } + + async saveGitHubPullRequest( + projection: GitHubPullRequestProjection + ): Promise { + await this.initialize(); + + const normalizedProjectId = + normalizeProjectId(projection.projectId, projection.projectPath) ?? + projection.projectId; + const filePath = this.getGitHubPullRequestFile( + projection.repo, + projection.branch + ); + const record: GitHubPullRequestProjection = { + ...projection, + projectId: normalizedProjectId, + lastSyncedAt: projection.lastSyncedAt || Date.now(), + }; + await this.writeJsonFile(filePath, record); + return record; + } + + async claimPaths( + input: Omit< + SharedPathClaimRecord, + 'claimId' | 'claimedAt' | 'lastSeenAt' | 'expiresAt' | 'status' + > & { + claimId?: string; + ttlMs?: number; + lastSeenAt?: number; + expiresAt?: number; + status?: SharedPathClaimRecord['status']; + } + ): Promise { + await this.initialize(); + await this.cleanupExpiredClaims(); + + const now = input.lastSeenAt ?? Date.now(); + const existing = input.claimId + ? await this.readJsonFile( + this.getClaimFile(input.claimId) + ) + : null; + const record: SharedPathClaimRecord = { + claimId: input.claimId || randomUUID(), + tool: input.tool, + sessionId: input.sessionId ?? existing?.sessionId, + instanceId: input.instanceId ?? existing?.instanceId, + projectId: + normalizeProjectId(input.projectId, input.projectPath) ?? + existing?.projectId, + projectPath: input.projectPath ?? existing?.projectPath, + branch: input.branch ?? existing?.branch, + paths: Array.from( + new Set( + (input.paths ?? existing?.paths ?? []) + .map((item) => item.trim()) + .filter(Boolean) + ) + ), + status: input.status ?? 'active', + claimedAt: existing?.claimedAt ?? now, + lastSeenAt: now, + expiresAt: + input.expiresAt ?? + now + Math.max(1, input.ttlMs ?? 24 * 60 * 60 * 1000), + metadata: { + ...(existing?.metadata || {}), + ...(input.metadata || {}), + }, + }; + + const conflicts = ( + await this.listPathClaims({ + projectId: record.projectId, + projectPath: record.projectPath, + activeOnly: true, + }) + ) + .filter((claim) => claim.claimId !== record.claimId) + .filter((claim) => this.claimsOverlap(record, claim)) + .map((claim) => ({ + claimId: claim.claimId, + branch: claim.branch, + paths: claim.paths, + sessionId: claim.sessionId, + instanceId: claim.instanceId, + })); + + await this.writeJsonFile(this.getClaimFile(record.claimId), record); + return { record, conflicts }; + } + + async releaseClaims(options: { + claimId?: string; + instanceId?: string; + sessionId?: string; + projectId?: string; + projectPath?: string; + branch?: string; + reason?: string; + }): Promise { + await this.initialize(); + + const now = Date.now(); + let released = 0; + const claims = await this.listPathClaims({ + projectId: options.projectId, + projectPath: options.projectPath, + activeOnly: false, + }); + + for (const claim of claims) { + if (claim.status !== 'active') { + continue; + } + if (options.claimId && claim.claimId !== options.claimId) { + continue; + } + if (options.instanceId && claim.instanceId !== options.instanceId) { + continue; + } + if (options.sessionId && claim.sessionId !== options.sessionId) { + continue; + } + if (options.branch && claim.branch !== options.branch) { + continue; + } + + await this.writeJsonFile(this.getClaimFile(claim.claimId), { + ...claim, + status: 'released', + lastSeenAt: now, + releasedAt: now, + releaseReason: options.reason || claim.releaseReason, + }); + released++; + } + + return released; + } + + async listPathClaims(options?: { + projectId?: string; + projectPath?: string; + activeOnly?: boolean; + }): Promise { + await this.initialize(); + await this.cleanupExpiredClaims(); + + const projectId = normalizeProjectId( + options?.projectId, + options?.projectPath + ); + const dir = this.getClaimsDir(); + const entries = await fs.readdir(dir); + const claims = await Promise.all( + entries + .filter((entry) => entry.endsWith('.json')) + .map((entry) => + this.readJsonFile(path.join(dir, entry)) + ) + ); + + return (claims.filter(Boolean) as SharedPathClaimRecord[]) + .filter((claim) => !options?.activeOnly || claim.status === 'active') + .filter( + (claim) => + !projectId || + this.matchesProject( + claim.projectId, + claim.projectPath, + projectId, + options?.projectPath + ) + ) + .sort((a, b) => b.lastSeenAt - a.lastSeenAt); + } + + async listActiveProjectPaths(): Promise { + await this.initialize(); + + const projectPaths = new Set(); + const sessions = await this.listSessions(); + for (const session of sessions) { + if (session.status === 'active' && session.projectPath) { + projectPaths.add(session.projectPath); + } + } + + const instances = await this.listInstances(); + for (const instance of instances) { + if (instance.status === 'active' && instance.projectPath) { + projectPaths.add(instance.projectPath); + } + } + + const pullRequests = await this.listGitHubPullRequests(); + for (const projection of pullRequests) { + if (projection.projectPath) { + projectPaths.add(projection.projectPath); + } + } + + return Array.from(projectPaths).sort(); + } + + async getGitHubPullRequest(options: { + repo: string; + branch: string; + }): Promise { + await this.initialize(); + return this.readJsonFile( + this.getGitHubPullRequestFile(options.repo, options.branch) + ); + } + + async listGitHubPullRequests(options?: { + projectId?: string; + projectPath?: string; + }): Promise { + await this.initialize(); + + const projectId = normalizeProjectId( + options?.projectId, + options?.projectPath + ); + const dir = this.getGithubDir(); + const entries = await fs.readdir(dir); + const records = await Promise.all( + entries + .filter((entry) => entry.endsWith('.json')) + .map((entry) => + this.readJsonFile(path.join(dir, entry)) + ) + ); + + return (records.filter(Boolean) as GitHubPullRequestProjection[]).filter( + (record) => + !projectId || + this.matchesProject( + record.projectId, + record.projectPath, + projectId, + options?.projectPath + ) + ); + } + + async appendEvent( + input: Omit & { + id?: string; + timestamp?: number; + } + ): Promise { + await this.initialize(); + + const event: SharedStateEvent = { + id: input.id || randomUUID(), + timestamp: input.timestamp || Date.now(), + type: input.type, + tool: input.tool, + instanceId: input.instanceId, + sessionId: input.sessionId, + projectId: normalizeProjectId(input.projectId, input.projectPath), + projectPath: input.projectPath, + branch: input.branch, + payload: input.payload || {}, + }; + + const date = new Date(event.timestamp).toISOString().slice(0, 10); + const eventFile = path.join(this.getEventsDir(), `${date}.jsonl`); + await fs.appendFile(eventFile, `${JSON.stringify(event)}\n`, 'utf8'); + + if (event.instanceId) { + const instance = await this.readJsonFile( + this.getInstanceFile(event.instanceId) + ); + if (instance) { + await this.upsertInstance({ + ...instance, + lastSeenAt: event.timestamp, + }); + } + } + + if (event.sessionId) { + const session = await this.readJsonFile( + this.getSessionFile(event.sessionId) + ); + if (session) { + await this.upsertSession({ + ...session, + lastSeenAt: event.timestamp, + instanceIds: session.instanceIds, + }); + } + } + + return event; + } + + async listSessions(): Promise { + await this.initialize(); + + const dir = this.getSessionsDir(); + const entries = await fs.readdir(dir); + const sessions = await Promise.all( + entries + .filter((entry) => entry.endsWith('.json')) + .map((entry) => + this.readJsonFile(path.join(dir, entry)) + ) + ); + + return sessions.filter(Boolean) as SharedSessionRecord[]; + } + + async listInstances(): Promise { + await this.initialize(); + + const dir = this.getInstancesDir(); + const entries = await fs.readdir(dir); + const instances = await Promise.all( + entries + .filter((entry) => entry.endsWith('.json')) + .map((entry) => + this.readJsonFile(path.join(dir, entry)) + ) + ); + + return instances.filter(Boolean) as SharedInstanceRecord[]; + } + + async getProjectSummary(options: { + projectId?: string; + projectPath?: string; + eventLimit?: number; + }): Promise { + await this.initialize(); + + const projectId = normalizeProjectId( + options.projectId, + options.projectPath + ); + const sessions = (await this.listSessions()).filter( + (session) => + session.status === 'active' && + this.matchesProject( + session.projectId, + session.projectPath, + projectId, + options.projectPath + ) + ); + const instances = (await this.listInstances()).filter( + (instance) => + instance.status === 'active' && + this.matchesProject( + instance.projectId, + instance.projectPath, + projectId, + options.projectPath + ) + ); + const activeClaims = await this.listPathClaims({ + projectId, + projectPath: options.projectPath, + activeOnly: true, + }); + + const recentEvents = await this.listRecentEvents({ + projectId, + projectPath: options.projectPath, + limit: options.eventLimit || 10, + }); + + return { + projectId, + projectPath: options.projectPath, + activeSessions: sessions.sort((a, b) => b.lastSeenAt - a.lastSeenAt), + activeInstances: instances.sort((a, b) => b.lastSeenAt - a.lastSeenAt), + activeClaims, + recentEvents, + }; + } + + private async cleanupExpiredClaims(): Promise { + const dir = this.getClaimsDir(); + const entries = await fs.readdir(dir).catch(() => [] as string[]); + const now = Date.now(); + + for (const entry of entries) { + if (!entry.endsWith('.json')) { + continue; + } + const filePath = path.join(dir, entry); + const claim = await this.readJsonFile(filePath); + if (!claim || claim.status !== 'active' || claim.expiresAt > now) { + continue; + } + await this.writeJsonFile(filePath, { + ...claim, + status: 'expired', + releasedAt: now, + releaseReason: claim.releaseReason || 'expired', + lastSeenAt: now, + }); + } + } + + private claimsOverlap( + left: SharedPathClaimRecord, + right: SharedPathClaimRecord + ): boolean { + if ( + left.branch && + right.branch && + left.branch.trim() && + right.branch.trim() && + left.branch === right.branch + ) { + return true; + } + + for (const leftPath of left.paths) { + for (const rightPath of right.paths) { + if (this.pathsOverlap(leftPath, rightPath)) { + return true; + } + } + } + + return false; + } + + private pathsOverlap(left: string, right: string): boolean { + const normalizedLeft = this.normalizeClaimPath(left); + const normalizedRight = this.normalizeClaimPath(right); + + if (!normalizedLeft || !normalizedRight) { + return false; + } + if ( + normalizedLeft === '*' || + normalizedRight === '*' || + normalizedLeft === '.' || + normalizedRight === '.' + ) { + return true; + } + if (normalizedLeft === normalizedRight) { + return true; + } + + return ( + normalizedLeft.startsWith(`${normalizedRight}/`) || + normalizedRight.startsWith(`${normalizedLeft}/`) + ); + } + + private normalizeClaimPath(value: string): string { + return value + .trim() + .replace(/\\/g, '/') + .replace(/\/\*\*$/, '') + .replace(/\/$/, ''); + } + + async listRecentEvents(options: { + projectId?: string; + projectPath?: string; + limit?: number; + }): Promise { + await this.initialize(); + + const projectId = normalizeProjectId( + options.projectId, + options.projectPath + ); + const eventDir = this.getEventsDir(); + const eventFiles = (await fs.readdir(eventDir)) + .filter((entry) => entry.endsWith('.jsonl')) + .sort() + .reverse() + .slice(0, 7); + + const events: SharedStateEvent[] = []; + for (const entry of eventFiles) { + const filePath = path.join(eventDir, entry); + const content = await fs.readFile(filePath, 'utf8'); + const lines = content + .split('\n') + .map((line) => line.trim()) + .filter(Boolean); + + for (let i = lines.length - 1; i >= 0; i--) { + try { + const parsed = JSON.parse(lines[i]) as SharedStateEvent; + if ( + this.matchesProject( + parsed.projectId, + parsed.projectPath, + projectId, + options.projectPath + ) + ) { + events.push(parsed); + } + } catch { + // Skip malformed event lines. + } + if (events.length >= (options.limit || 20)) { + return events; + } + } + } + + return events; + } + + private matchesProject( + candidateProjectId: string | undefined, + candidateProjectPath: string | undefined, + projectId: string | undefined, + projectPath: string | undefined + ): boolean { + if (projectId && candidateProjectId) { + return candidateProjectId === projectId; + } + if (projectPath && candidateProjectPath) { + return candidateProjectPath === projectPath; + } + return !projectId && !projectPath; + } + + private getInstancesDir(): string { + return path.join(this.rootDir, 'instances'); + } + + private getSessionsDir(): string { + return path.join(this.rootDir, 'sessions'); + } + + private getEventsDir(): string { + return path.join(this.rootDir, 'events'); + } + + private getGithubDir(): string { + return path.join(this.rootDir, 'github', 'pull-requests'); + } + + private getClaimsDir(): string { + return path.join(this.rootDir, 'claims'); + } + + private getInstanceFile(instanceId: string): string { + return path.join(this.getInstancesDir(), `${instanceId}.json`); + } + + private getSessionFile(sessionId: string): string { + return path.join(this.getSessionsDir(), `${sessionId}.json`); + } + + private getGitHubPullRequestFile(repo: string, branch: string): string { + const slug = `${repo}__${branch}` + .replace(/[\\/]/g, '__') + .replace(/[^a-zA-Z0-9_.-]/g, '-'); + return path.join(this.getGithubDir(), `${slug}.json`); + } + + private getClaimFile(claimId: string): string { + return path.join(this.getClaimsDir(), `${claimId}.json`); + } + + private async readJsonFile(filePath: string): Promise { + if (!(await pathExists(filePath))) { + return null; + } + + const content = await fs.readFile(filePath, 'utf8'); + return JSON.parse(content) as T; + } + + private async writeJsonFile(filePath: string, value: unknown): Promise { + const dir = path.dirname(filePath); + await fs.mkdir(dir, { recursive: true }); + + const tempPath = `${filePath}.${process.pid}.tmp`; + await fs.writeFile(tempPath, JSON.stringify(value, null, 2)); + await fs.rename(tempPath, filePath); + } +} + +export const canonicalStateStore = new CanonicalStateStore(); +export { + getBaseStateDir as getCanonicalStateBaseDir, + normalizeProjectId, + projectIdFromIdentifier, +}; diff --git a/src/daemon/daemon-config.ts b/src/daemon/daemon-config.ts index 32e2d2f6..1e846a6a 100644 --- a/src/daemon/daemon-config.ts +++ b/src/daemon/daemon-config.ts @@ -65,6 +65,7 @@ export interface DaemonConfig { version: string; context: ContextServiceConfig; linear: LinearServiceConfig; + github: DaemonServiceConfig; maintenance: MaintenanceServiceConfig; memory: MemoryServiceConfig; fileWatch: FileWatchConfig; @@ -87,6 +88,10 @@ export const DEFAULT_DAEMON_CONFIG: DaemonConfig = { retryAttempts: 3, retryDelay: 30000, }, + github: { + enabled: false, + interval: 5, + }, maintenance: { enabled: true, interval: 360, // 6 hours @@ -124,6 +129,12 @@ export interface DaemonStatus { services: { context: { enabled: boolean; lastRun?: number; saveCount?: number }; linear: { enabled: boolean; lastRun?: number; syncCount?: number }; + github: { + enabled: boolean; + lastRun?: number; + syncCount?: number; + lastProjectionState?: string; + }; maintenance: { enabled: boolean; lastRun?: number; @@ -200,6 +211,7 @@ export function loadDaemonConfig(): DaemonConfig { ...config, context: { ...DEFAULT_DAEMON_CONFIG.context, ...config.context }, linear: { ...DEFAULT_DAEMON_CONFIG.linear, ...config.linear }, + github: { ...DEFAULT_DAEMON_CONFIG.github, ...config.github }, maintenance: { ...DEFAULT_DAEMON_CONFIG.maintenance, ...config.maintenance, @@ -223,6 +235,7 @@ export function saveDaemonConfig(config: Partial): void { ...config, context: { ...currentConfig.context, ...config.context }, linear: { ...currentConfig.linear, ...config.linear }, + github: { ...currentConfig.github, ...config.github }, maintenance: { ...currentConfig.maintenance, ...config.maintenance }, memory: { ...currentConfig.memory, ...config.memory }, fileWatch: { ...currentConfig.fileWatch, ...config.fileWatch }, @@ -241,6 +254,7 @@ export function readDaemonStatus(): DaemonStatus { services: { context: { enabled: false }, linear: { enabled: false }, + github: { enabled: false }, maintenance: { enabled: false }, memory: { enabled: false }, fileWatch: { enabled: false }, diff --git a/src/daemon/services/github-service.ts b/src/daemon/services/github-service.ts new file mode 100644 index 00000000..f1892281 --- /dev/null +++ b/src/daemon/services/github-service.ts @@ -0,0 +1,158 @@ +import { existsSync } from 'fs'; +import { join } from 'path'; +import { homedir } from 'os'; +import type { DaemonServiceConfig } from '../daemon-config.js'; +import { refreshCurrentRepoPullRequestState } from '../../integrations/github/pr-state.js'; +import { canonicalStateStore } from '../../core/shared-state/canonical-store.js'; + +export interface GitHubServiceState { + lastSyncTime: number; + syncCount: number; + errors: string[]; + nextSyncTime?: number; + lastProjectionState?: string; + lastProjectsScanned?: number; +} + +export class DaemonGitHubService { + private config: DaemonServiceConfig; + private state: GitHubServiceState; + private intervalId?: NodeJS.Timeout; + private isRunning = false; + private onLog: (level: string, message: string, data?: unknown) => void; + + constructor( + config: DaemonServiceConfig, + onLog: (level: string, message: string, data?: unknown) => void + ) { + this.config = config; + this.onLog = onLog; + this.state = { + lastSyncTime: 0, + syncCount: 0, + errors: [], + }; + } + + async start(): Promise { + if (this.isRunning || !this.config.enabled) { + return; + } + + if (!this.isGitHubConfigured()) { + this.onLog('WARN', 'GitHub CLI not configured, skipping github service'); + return; + } + + this.isRunning = true; + const intervalMs = this.config.interval * 60 * 1000; + + this.onLog('INFO', 'GitHub service started', { + interval: this.config.interval, + }); + + await this.performSync(); + + this.intervalId = setInterval(async () => { + await this.performSync(); + }, intervalMs); + } + + stop(): void { + if (this.intervalId) { + clearInterval(this.intervalId); + this.intervalId = undefined; + } + this.isRunning = false; + this.onLog('INFO', 'GitHub service stopped'); + } + + getState(): GitHubServiceState { + return { + ...this.state, + nextSyncTime: this.isRunning + ? this.state.lastSyncTime + this.config.interval * 60 * 1000 + : undefined, + }; + } + + async forceSync(): Promise { + await this.performSync(); + } + + private async performSync(): Promise { + if (!this.isRunning) return; + + try { + const projectRoots = await this.getProjectRoots(); + this.state.lastProjectsScanned = projectRoots.length; + if (projectRoots.length === 0) { + this.onLog('DEBUG', 'No active project roots found for GitHub sync'); + return; + } + + let synced = false; + for (const projectRoot of projectRoots) { + const projection = + await refreshCurrentRepoPullRequestState(projectRoot); + if (!projection) { + this.onLog('DEBUG', 'No GitHub PR projection available', { + projectRoot, + }); + continue; + } + + synced = true; + this.state.syncCount++; + this.state.lastSyncTime = Date.now(); + this.state.lastProjectionState = projection.state; + + this.onLog('INFO', 'GitHub PR projection refreshed', { + projectRoot, + repo: projection.repo, + branch: projection.branch, + prNumber: projection.prNumber, + state: projection.state, + }); + } + if (!synced) { + this.state.lastSyncTime = Date.now(); + } + } catch (err) { + const errorMsg = err instanceof Error ? err.message : String(err); + this.state.errors.push(errorMsg); + this.onLog('ERROR', 'GitHub sync failed', { error: errorMsg }); + + if (this.state.errors.length > 10) { + this.state.errors = this.state.errors.slice(-10); + } + } + } + + private isGitHubConfigured(): boolean { + try { + return existsSync(join(homedir(), '.config', 'gh', 'hosts.yml')); + } catch { + return false; + } + } + + private async getProjectRoots(): Promise { + const roots = new Set(); + const activeProjectPaths = + await canonicalStateStore.listActiveProjectPaths(); + + for (const projectPath of activeProjectPaths) { + if (existsSync(join(projectPath, '.git'))) { + roots.add(projectPath); + } + } + + const cwd = process.cwd(); + if (existsSync(join(cwd, '.git'))) { + roots.add(cwd); + } + + return Array.from(roots).sort(); + } +} diff --git a/src/daemon/unified-daemon.ts b/src/daemon/unified-daemon.ts index 08935dff..0bf7ffd4 100644 --- a/src/daemon/unified-daemon.ts +++ b/src/daemon/unified-daemon.ts @@ -26,6 +26,7 @@ import { } from './daemon-config.js'; import { DaemonContextService } from './services/context-service.js'; import { DaemonLinearService } from './services/linear-service.js'; +import { DaemonGitHubService } from './services/github-service.js'; import { DaemonMaintenanceService } from './services/maintenance-service.js'; import { DaemonMemoryService } from './services/memory-service.js'; @@ -42,6 +43,7 @@ export class UnifiedDaemon { private paths: ReturnType; private contextService: DaemonContextService; private linearService: DaemonLinearService; + private githubService: DaemonGitHubService; private maintenanceService: DaemonMaintenanceService; private memoryService: DaemonMemoryService; private heartbeatInterval?: NodeJS.Timeout; @@ -63,6 +65,11 @@ export class UnifiedDaemon { (level, msg, data) => this.log(level, 'linear', msg, data) ); + this.githubService = new DaemonGitHubService( + this.config.github, + (level, msg, data) => this.log(level, 'github', msg, data) + ); + this.maintenanceService = new DaemonMaintenanceService( this.config.maintenance, (level, msg, data) => this.log(level, 'maintenance', msg, data) @@ -142,6 +149,7 @@ export class UnifiedDaemon { private updateStatus(): void { const maintenanceState = this.maintenanceService.getState(); const memoryState = this.memoryService.getState(); + const githubState = this.githubService.getState(); const status: DaemonStatus = { running: true, pid: process.pid, @@ -158,6 +166,12 @@ export class UnifiedDaemon { lastRun: this.linearService.getState().lastSyncTime || undefined, syncCount: this.linearService.getState().syncCount, }, + github: { + enabled: this.config.github.enabled, + lastRun: githubState.lastSyncTime || undefined, + syncCount: githubState.syncCount, + lastProjectionState: githubState.lastProjectionState, + }, maintenance: { enabled: this.config.maintenance.enabled, lastRun: maintenanceState.lastRunTime || undefined, @@ -180,6 +194,7 @@ export class UnifiedDaemon { errors: [ ...this.contextService.getState().errors.slice(-5), ...this.linearService.getState().errors.slice(-5), + ...githubState.errors.slice(-5), ...maintenanceState.errors.slice(-5), ...memoryState.errors.slice(-5), ], @@ -240,6 +255,12 @@ export class UnifiedDaemon { enabled: false, syncCount: this.linearService.getState().syncCount, }, + github: { + enabled: false, + syncCount: this.githubService.getState().syncCount, + lastProjectionState: + this.githubService.getState().lastProjectionState, + }, maintenance: { enabled: false, staleFramesCleaned: @@ -266,6 +287,7 @@ export class UnifiedDaemon { uptime: Date.now() - this.startTime, contextSaves: this.contextService.getState().saveCount, linearSyncs: this.linearService.getState().syncCount, + githubSyncs: this.githubService.getState().syncCount, maintenanceRuns: this.maintenanceService.getState().ftsRebuilds, memoryTriggers: this.memoryService.getState().triggerCount, }); @@ -279,6 +301,7 @@ export class UnifiedDaemon { // Stop services this.contextService.stop(); this.linearService.stop(); + this.githubService.stop(); this.maintenanceService.stop(); this.memoryService.stop(); @@ -312,6 +335,7 @@ export class UnifiedDaemon { config: { context: this.config.context.enabled, linear: this.config.linear.enabled, + github: this.config.github.enabled, maintenance: this.config.maintenance.enabled, memory: this.config.memory.enabled, fileWatch: this.config.fileWatch.enabled, @@ -321,6 +345,7 @@ export class UnifiedDaemon { // Start services this.contextService.start(); await this.linearService.start(); + await this.githubService.start(); this.maintenanceService.start(); this.memoryService.start(); @@ -336,6 +361,7 @@ export class UnifiedDaemon { getStatus(): DaemonStatus { const maintenanceState = this.maintenanceService.getState(); const memoryState = this.memoryService.getState(); + const githubState = this.githubService.getState(); return { running: !this.isShuttingDown, pid: process.pid, @@ -352,6 +378,12 @@ export class UnifiedDaemon { lastRun: this.linearService.getState().lastSyncTime || undefined, syncCount: this.linearService.getState().syncCount, }, + github: { + enabled: this.config.github.enabled, + lastRun: githubState.lastSyncTime || undefined, + syncCount: githubState.syncCount, + lastProjectionState: githubState.lastProjectionState, + }, maintenance: { enabled: this.config.maintenance.enabled, lastRun: maintenanceState.lastRunTime || undefined, diff --git a/src/features/sweep/pty-wrapper.ts b/src/features/sweep/pty-wrapper.ts index 7b6cf5c8..57ecf744 100644 --- a/src/features/sweep/pty-wrapper.ts +++ b/src/features/sweep/pty-wrapper.ts @@ -33,6 +33,8 @@ export interface PtyWrapperConfig { claudeArgs?: string[]; stateFile?: string; initialInput?: string; + onExit?: (exitCode: number) => Promise | void; + onSignal?: (signal: 'SIGINT' | 'SIGTERM') => Promise | void; } // Minimal interface for node-pty process to avoid compile-time dep @@ -59,6 +61,8 @@ export class PtyWrapper { claudeArgs: config.claudeArgs || [], stateFile: config.stateFile || getSweepPath('sweep-state.json'), initialInput: config.initialInput || '', + onExit: config.onExit || (() => undefined), + onSignal: config.onSignal || (() => undefined), }; this.stateWatcher = new SweepStateWatcher(this.config.stateFile); @@ -174,8 +178,9 @@ export class PtyWrapper { }); // Handle PTY exit - this.ptyProcess.onExit(({ exitCode }) => { + this.ptyProcess.onExit(async ({ exitCode }) => { this.cleanup(); + await this.config.onExit(exitCode); // Sync Linear on exit if configured if (process.env['LINEAR_API_KEY']) { try { @@ -191,12 +196,17 @@ export class PtyWrapper { }); // Handle signals - const onSignal = () => { + const onSignal = async (signal: 'SIGINT' | 'SIGTERM') => { this.cleanup(); + await this.config.onSignal(signal); process.exit(0); }; - process.on('SIGINT', onSignal); - process.on('SIGTERM', onSignal); + process.on('SIGINT', () => { + void onSignal('SIGINT'); + }); + process.on('SIGTERM', () => { + void onSignal('SIGTERM'); + }); } private acceptPrediction(): void { diff --git a/src/integrations/github/pr-state.ts b/src/integrations/github/pr-state.ts new file mode 100644 index 00000000..cbde49b0 --- /dev/null +++ b/src/integrations/github/pr-state.ts @@ -0,0 +1,209 @@ +import { execFileSync } from 'child_process'; +import { + canonicalStateStore, + type GitHubPullRequestProjection, +} from '../../core/shared-state/canonical-store.js'; +import { projectIdFromIdentifier } from '../../core/shared-state/canonical-store.js'; + +export interface CurrentRepoGitHubInfo { + repo: string; + branch: string; + projectPath: string; + projectId: string; +} + +interface GhPrViewResult { + number: number; + title: string; + state: 'OPEN' | 'CLOSED' | 'MERGED'; + isDraft: boolean; + url: string; + baseRefName: string; + headRefName: string; + headRefOid?: string; + mergedAt?: string | null; + updatedAt: string; + reviewDecision?: string | null; + statusCheckRollup?: Array<{ + __typename?: string; + conclusion?: string | null; + status?: string | null; + state?: string | null; + }> | null; +} + +function runGit(args: string[], cwd: string): string { + return execFileSync('git', args, { + cwd, + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'pipe'], + }).trim(); +} + +function runGh(args: string[], cwd: string): string { + return execFileSync('gh', args, { + cwd, + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'pipe'], + }).trim(); +} + +function normalizeRemoteToRepo(remote: string): string { + const cleaned = remote.replace(/\.git$/, '').trim(); + + if (cleaned.startsWith('git@github.com:')) { + return cleaned.replace('git@github.com:', ''); + } + + if (cleaned.startsWith('https://github.com/')) { + return cleaned.replace('https://github.com/', ''); + } + + if (cleaned.startsWith('http://github.com/')) { + return cleaned.replace('http://github.com/', ''); + } + + throw new Error(`Unsupported GitHub remote: ${remote}`); +} + +function summarizeStatusCheckRollup( + rollup: GhPrViewResult['statusCheckRollup'] +): string | undefined { + if (!rollup || rollup.length === 0) { + return undefined; + } + + const states = rollup + .map((item) => item.conclusion || item.status || item.state) + .filter(Boolean) as string[]; + + if (states.length === 0) { + return undefined; + } + + if (states.every((state) => state === 'SUCCESS')) { + return 'SUCCESS'; + } + if (states.some((state) => state === 'FAILURE' || state === 'ERROR')) { + return 'FAILURE'; + } + if ( + states.some( + (state) => + state === 'PENDING' || state === 'IN_PROGRESS' || state === 'EXPECTED' + ) + ) { + return 'PENDING'; + } + + return states[0]; +} + +export function getCurrentRepoGitHubInfo( + cwd: string = process.cwd() +): CurrentRepoGitHubInfo | null { + try { + const projectPath = runGit(['rev-parse', '--show-toplevel'], cwd); + const branch = runGit(['rev-parse', '--abbrev-ref', 'HEAD'], projectPath); + const remote = runGit( + ['config', '--get', 'remote.origin.url'], + projectPath + ); + const repo = normalizeRemoteToRepo(remote); + + return { + repo, + branch, + projectPath, + projectId: projectIdFromIdentifier(remote), + }; + } catch { + return null; + } +} + +export async function refreshCurrentRepoPullRequestState( + cwd: string = process.cwd() +): Promise { + const info = getCurrentRepoGitHubInfo(cwd); + if (!info) { + return null; + } + + try { + const output = runGh( + [ + 'pr', + 'view', + '--repo', + info.repo, + '--json', + [ + 'number', + 'title', + 'state', + 'isDraft', + 'url', + 'baseRefName', + 'headRefName', + 'headRefOid', + 'mergedAt', + 'updatedAt', + 'reviewDecision', + 'statusCheckRollup', + ].join(','), + ], + info.projectPath + ); + + const parsed = JSON.parse(output) as GhPrViewResult; + const projection: GitHubPullRequestProjection = { + repo: info.repo, + branch: info.branch, + projectId: info.projectId, + projectPath: info.projectPath, + prNumber: parsed.number, + title: parsed.title, + state: + parsed.mergedAt && parsed.state === 'MERGED' ? 'MERGED' : parsed.state, + isDraft: parsed.isDraft, + url: parsed.url, + baseRefName: parsed.baseRefName, + headRefName: parsed.headRefName, + headRefOid: parsed.headRefOid, + mergedAt: parsed.mergedAt || undefined, + updatedAt: parsed.updatedAt, + reviewDecision: parsed.reviewDecision || undefined, + statusCheckRollup: summarizeStatusCheckRollup(parsed.statusCheckRollup), + lastSyncedAt: Date.now(), + }; + + await canonicalStateStore.saveGitHubPullRequest(projection); + if (projection.state === 'MERGED' || projection.state === 'CLOSED') { + await canonicalStateStore.releaseClaims({ + projectId: info.projectId, + projectPath: info.projectPath, + branch: info.branch, + reason: `github_pr_${projection.state.toLowerCase()}`, + }); + } + await canonicalStateStore.appendEvent({ + type: 'github_pr_refreshed', + tool: 'stackmemory', + projectId: info.projectId, + projectPath: info.projectPath, + branch: info.branch, + payload: { + repo: info.repo, + prNumber: projection.prNumber, + state: projection.state, + reviewDecision: projection.reviewDecision, + statusCheckRollup: projection.statusCheckRollup, + }, + }); + + return projection; + } catch { + return null; + } +} From 10db0938be17c1fd2f90ea7a7381d69b3b196754 Mon Sep 17 00:00:00 2001 From: "StackMemory Bot (CLI)" Date: Tue, 14 Apr 2026 17:43:37 -0400 Subject: [PATCH 06/11] feat: add deterministic harness smoke tooling --- .husky/pre-commit | 53 +-- package.json | 5 + scripts/determinism-pre-commit.sh | 67 ++++ src/cli/claude-sm.ts | 92 ++++-- src/cli/codex-sm.ts | 75 +++-- src/cli/commands/bench.ts | 261 ++++++++++++++- src/cli/commands/handoff.ts | 52 ++- src/cli/gemini-sm.ts | 53 ++- src/cli/index.ts | 17 + src/cli/opencode-sm.ts | 72 ++-- src/cli/utils/determinism-watcher.ts | 90 +++++ src/cli/utils/real-cli-bin.ts | 66 ++++ src/core/monitoring/logger.ts | 3 +- .../session/__tests__/project-handoff.test.ts | 64 ++++ src/core/session/project-handoff.ts | 85 +++++ .../multimodal/__tests__/determinism.test.ts | 103 ++++++ src/orchestrators/multimodal/determinism.ts | 309 ++++++++++++++++++ src/orchestrators/multimodal/harness.ts | 250 ++++++++------ src/orchestrators/multimodal/types.ts | 2 + 19 files changed, 1467 insertions(+), 252 deletions(-) create mode 100755 scripts/determinism-pre-commit.sh create mode 100644 src/cli/utils/determinism-watcher.ts create mode 100644 src/cli/utils/real-cli-bin.ts create mode 100644 src/core/session/__tests__/project-handoff.test.ts create mode 100644 src/core/session/project-handoff.ts create mode 100644 src/orchestrators/multimodal/__tests__/determinism.test.ts create mode 100644 src/orchestrators/multimodal/determinism.ts diff --git a/.husky/pre-commit b/.husky/pre-commit index 869a3294..b0e2844d 100755 --- a/.husky/pre-commit +++ b/.husky/pre-commit @@ -1,43 +1,14 @@ -#!/bin/sh -# GITBUTLER_MANAGED_HOOK_V1 -# This hook is managed by GitButler to prevent accidental commits on the workspace branch. -# Your original pre-commit hook has been preserved as 'pre-commit-user'. - -HOOKS_DIR=$(dirname "$0") - -# Run user's hook first if it exists - if it fails, stop here -if [ -x "$HOOKS_DIR/pre-commit-user" ]; then - "$HOOKS_DIR/pre-commit-user" "$@" || exit $? -fi - -# Get the current branch name -BRANCH=$(git symbolic-ref --short HEAD 2>/dev/null) - -if [ "$BRANCH" = "gitbutler/workspace" ]; then - echo "" - echo "GITBUTLER_ERROR: Cannot commit directly to gitbutler/workspace branch." - echo "" - echo "GitButler manages commits on this branch. Please use GitButler to commit your changes:" - echo " - Use the GitButler app to create commits" - echo " - Or run 'but commit' from the command line" - echo "" - echo "If you want to exit GitButler mode and use normal git:" - echo " - Run 'but teardown' to switch to a regular branch" - echo " - Or directly checkout another branch: git checkout " - echo "" - echo "If you no longer have the GitButler CLI installed, you can simply remove this hook and checkout another branch:" - printf ' rm "%s/pre-commit"\n' "$HOOKS_DIR" - echo "" - exit 1 -fi - -# Not on workspace branch - run user's original hook if it exists -if [ -x "$HOOKS_DIR/pre-commit-user" ]; then - echo "" - echo "WARNING: GitButler's pre-commit hook is still installed but you're not on gitbutler/workspace." - echo "If you're no longer using GitButler, you can restore your original hook:" - printf ' mv "%s/pre-commit-user" "%s/pre-commit"\n' "$HOOKS_DIR" "$HOOKS_DIR" - echo "" +# Use Node version from .nvmrc +export NVM_DIR="$HOME/.nvm" +if [ -s "$NVM_DIR/nvm.sh" ]; then + . "$NVM_DIR/nvm.sh" + nvm use 2>/dev/null +elif [ -d "$HOME/.nvm/versions/node" ]; then + NODE_VER=$(cat "$(git rev-parse --show-toplevel)/.nvmrc" 2>/dev/null || echo "20") + NODE_PATH=$(ls -d "$HOME/.nvm/versions/node/v${NODE_VER}"* 2>/dev/null | head -1) + [ -n "$NODE_PATH" ] && export PATH="$NODE_PATH/bin:$PATH" fi -exit 0 +npx lint-staged +bash scripts/determinism-pre-commit.sh +npm run build diff --git a/package.json b/package.json index 52284729..75a89d9b 100644 --- a/package.json +++ b/package.json @@ -114,6 +114,11 @@ "test:run": "vitest run", "test:pre-publish": "./scripts/test-pre-publish-quick.sh", "test:pre-commit": "vitest related --run --reporter=dot --silent --bail=1", + "determinism:smoke": "node --import tsx src/cli/index.ts bench determinism --task \"Determinism probe\" --runs 5", + "determinism:watch": "node --import tsx src/cli/index.ts bench determinism --task \"Determinism probe\" --runs 3 --watch", + "determinism:latest": "node --import tsx src/cli/index.ts bench determinism --latest --json", + "determinism:test": "npx vitest run src/orchestrators/multimodal/__tests__/determinism.test.ts --reporter=dot", + "determinism:pre-commit": "bash scripts/determinism-pre-commit.sh", "prepublishOnly": "npm run build && npm run verify:dist && npm run test:pre-publish", "quality": "npm run lint && npm run test:run && npm run build", "dev": "tsx watch src/integrations/mcp/server.ts", diff --git a/scripts/determinism-pre-commit.sh b/scripts/determinism-pre-commit.sh new file mode 100755 index 00000000..6ff8fb72 --- /dev/null +++ b/scripts/determinism-pre-commit.sh @@ -0,0 +1,67 @@ +#!/bin/bash + +set -euo pipefail + +PROJECT_ROOT="$(git rev-parse --show-toplevel 2>/dev/null || pwd)" +cd "$PROJECT_ROOT" + +BLUE='\033[0;34m' +GREEN='\033[0;32m' +YELLOW='\033[1;33m' +NC='\033[0m' + +log_info() { + echo -e "${BLUE}[determinism]${NC} $1" +} + +log_success() { + echo -e "${GREEN}[determinism]${NC} $1" +} + +log_skip() { + echo -e "${YELLOW}[determinism]${NC} $1" +} + +if [ "${STACKMEMORY_DETERMINISM_SKIP:-0}" = "1" ]; then + log_skip "Skipping because STACKMEMORY_DETERMINISM_SKIP=1" + exit 0 +fi + +CHANGED_FILES="${STACKMEMORY_DETERMINISM_FILES:-}" +if [ -z "$CHANGED_FILES" ]; then + CHANGED_FILES="$(git diff --cached --name-only --diff-filter=ACMR)" +fi + +if [ -z "$CHANGED_FILES" ]; then + log_skip "No staged files detected" + exit 0 +fi + +RELEVANT_PATTERN='^(src/orchestrators/multimodal/|src/cli/commands/bench\.ts$|src/cli/index\.ts$|src/core/monitoring/logger\.ts$)' +RELEVANT_FILES="$(printf '%s\n' "$CHANGED_FILES" | rg "$RELEVANT_PATTERN" || true)" + +if [ -z "$RELEVANT_FILES" ]; then + log_skip "No harness determinism files staged" + exit 0 +fi + +RUNS="${STACKMEMORY_DETERMINISM_RUNS:-3}" +TASK="${STACKMEMORY_DETERMINISM_TASK:-Determinism pre-commit}" + +log_info "Running deterministic smoke check for staged harness files" +printf '%s\n' "$RELEVANT_FILES" | sed 's/^/ - /' + +REPORT_JSON="$(node --import tsx src/cli/index.ts bench determinism --task "$TASK" --runs "$RUNS" --json)" + +SCORE="$(printf '%s' "$REPORT_JSON" | node -e "let data='';process.stdin.on('data',d=>data+=d);process.stdin.on('end',()=>{const report=JSON.parse(data);process.stdout.write(String(report.score));});")" + +if [ "$SCORE" != "100" ] && [ "$SCORE" != "100.00" ]; then + log_skip "Determinism smoke failed with score $SCORE/100" + printf '%s\n' "$REPORT_JSON" + exit 1 +fi + +log_info "Running deterministic harness tests" +npx vitest run src/orchestrators/multimodal/__tests__/determinism.test.ts --reporter=dot + +log_success "Determinism guard passed ($SCORE/100)" diff --git a/src/cli/claude-sm.ts b/src/cli/claude-sm.ts index 0bc7339b..8bb7459c 100644 --- a/src/cli/claude-sm.ts +++ b/src/cli/claude-sm.ts @@ -17,10 +17,17 @@ import { program } from 'commander'; import { v4 as uuidv4 } from 'uuid'; import chalk from 'chalk'; import { initializeTracing, trace } from '../core/trace/index.js'; +import { resolveRealCliBin } from './utils/real-cli-bin.js'; +import { + type DeterminismWatcherHandle, + startDeterminismWatcher, + stopDeterminismWatcher, +} from './utils/determinism-watcher.js'; import { canonicalStateStore, projectIdFromIdentifier, } from '../core/shared-state/canonical-store.js'; +import { loadProjectHandoff } from '../core/session/project-handoff.js'; import { getModelRouter, loadModelRouterConfig, @@ -136,6 +143,8 @@ class ClaudeSM { private sessionId: string; private ownsSession: boolean; private sessionEnded: boolean; + private determinismWatcher: DeterminismWatcherHandle | null; + private skippedHandoffReason: string | null; constructor() { // Load persistent defaults @@ -166,6 +175,8 @@ class ClaudeSM { this.sessionId = process.env['STACKMEMORY_SESSION'] || uuidv4(); this.ownsSession = !process.env['STACKMEMORY_SESSION']; this.sessionEnded = false; + this.determinismWatcher = null; + this.skippedHandoffReason = null; // Ensure config directory exists if (!fs.existsSync(this.claudeConfigDir)) { @@ -275,19 +286,16 @@ class ClaudeSM { } private resolveClaudeBin(): string | null { - // 1) CLI-specified - if (this.config.claudeBin && this.config.claudeBin.trim()) { - return this.config.claudeBin.trim(); - } - // 2) Env override - const envBin = process.env['CLAUDE_BIN']; - if (envBin && envBin.trim()) return envBin.trim(); - // 3) PATH detection - try { - execSync('which claude', { stdio: 'ignore' }); - return 'claude'; - } catch {} - return null; + return resolveRealCliBin({ + explicitBin: this.config.claudeBin, + envBin: process.env['CLAUDE_BIN'], + preferredPaths: [ + path.join(os.homedir(), '.local', 'bin', 'claude'), + '/usr/local/bin/claude', + '/opt/homebrew/bin/claude', + ], + pathCommands: ['claude'], + }); } private gepaProcesses: ReturnType[] = []; @@ -373,6 +381,30 @@ class ClaudeSM { this.gepaProcesses = []; } + private startDeterminismWatcher(): void { + this.determinismWatcher = startDeterminismWatcher({ + stackmemoryBin: this.stackmemoryPath, + cwd: process.cwd(), + task: this.config.task, + instanceId: this.config.instanceId, + sessionId: this.sessionId, + tool: 'claude', + }); + + if (this.determinismWatcher) { + const modeLabel = + this.determinismWatcher.mode === 'targeted' + ? 'targeted' + : 'repo-root fallback'; + console.log(chalk.gray(` Determinism: ${modeLabel}`)); + } + } + + private stopDeterminismWatcher(): void { + stopDeterminismWatcher(this.determinismWatcher); + this.determinismWatcher = null; + } + private setupWorktree(): string | null { if (!this.config.useWorktree || !this.isGitRepo()) { return null; @@ -489,19 +521,25 @@ class ClaudeSM { if (!this.config.contextEnabled) return null; try { - const handoffPath = path.join( + const handoff = loadProjectHandoff( process.cwd(), - '.stackmemory', - 'last-handoff.md' + this.isGitRepo() ? this.getCurrentBranch() : undefined ); - if (fs.existsSync(handoffPath)) { - const content = fs.readFileSync(handoffPath, 'utf8').trim(); - if (content.length > 0) { - // Cap at 8000 chars to avoid excessively long system prompts - return content.length > 8000 - ? content.substring(0, 8000) + '\n\n[...truncated]' - : content; - } + if (!handoff) { + this.skippedHandoffReason = null; + return null; + } + if (!handoff.compatible) { + this.skippedHandoffReason = handoff.mismatchReason || 'stale handoff'; + return null; + } + this.skippedHandoffReason = null; + const content = handoff.content.trim(); + if (content.length > 0) { + // Cap at 8000 chars to avoid excessively long system prompts + return content.length > 8000 + ? content.substring(0, 8000) + '\n\n[...truncated]' + : content; } } catch { // Silently continue - handoff loading is optional @@ -759,6 +797,7 @@ class ClaudeSM { payload: Record = {} ): Promise { this.stopGEPAWatcher(); + this.stopDeterminismWatcher(); this.saveContext( eventType === 'session_end' @@ -1045,6 +1084,7 @@ class ClaudeSM { } await this.publishSessionStart(); + this.startDeterminismWatcher(); console.log(chalk.gray(`šŸ¤– Instance ID: ${this.config.instanceId}`)); console.log(chalk.gray(`🧠 Session ID: ${this.sessionId.slice(0, 8)}`)); console.log(chalk.gray(`šŸ“ Working in: ${process.cwd()}`)); @@ -1133,6 +1173,10 @@ class ClaudeSM { if (handoffContent) { initialInput = handoffContent; console.log(chalk.gray(' Handoff context ready')); + } else if (this.skippedHandoffReason) { + console.log( + chalk.gray(` Handoff skipped: ${this.skippedHandoffReason}`) + ); } const theoryContent = this.getTheoryContent(); diff --git a/src/cli/codex-sm.ts b/src/cli/codex-sm.ts index 2a3a8d84..2718a992 100644 --- a/src/cli/codex-sm.ts +++ b/src/cli/codex-sm.ts @@ -13,6 +13,12 @@ import { program } from 'commander'; import { v4 as uuidv4 } from 'uuid'; import chalk from 'chalk'; import { initializeTracing, trace } from '../core/trace/index.js'; +import { resolveRealCliBin } from './utils/real-cli-bin.js'; +import { + type DeterminismWatcherHandle, + startDeterminismWatcher, + stopDeterminismWatcher, +} from './utils/determinism-watcher.js'; import { canonicalStateStore, projectIdFromIdentifier, @@ -36,6 +42,7 @@ class CodexSM { private sessionId: string; private ownsSession: boolean; private sessionEnded: boolean; + private determinismWatcher: DeterminismWatcherHandle | null; constructor() { this.config = { @@ -50,6 +57,7 @@ class CodexSM { this.sessionId = process.env['STACKMEMORY_SESSION'] || uuidv4(); this.ownsSession = !process.env['STACKMEMORY_SESSION']; this.sessionEnded = false; + this.determinismWatcher = null; } private getRepoRoot(): string | null { @@ -151,25 +159,26 @@ class CodexSM { } private resolveCodexBin(): string | null { - // 1) CLI option - if (this.config.codexBin && this.config.codexBin.trim()) { - return this.config.codexBin.trim(); - } - // 2) Environment override - const envBin = process.env['CODEX_BIN']; - if (envBin && envBin.trim()) { - return envBin.trim(); - } - // 3) Detect on PATH - try { - execSync('which codex', { stdio: 'ignore' }); - return 'codex'; - } catch {} - try { - execSync('which codex-cli', { stdio: 'ignore' }); - return 'codex-cli'; - } catch {} - return null; + return resolveRealCliBin({ + explicitBin: this.config.codexBin, + envBin: process.env['CODEX_BIN'], + preferredPaths: [ + path.join( + os.homedir(), + '.nvm', + 'versions', + 'node', + 'v22.22.0', + 'bin', + 'codex' + ), + '/usr/local/bin/codex', + '/opt/homebrew/bin/codex', + '/usr/local/bin/codex-cli', + '/opt/homebrew/bin/codex-cli', + ], + pathCommands: ['codex', 'codex-cli'], + }); } private setupWorktree(): string | null { @@ -379,6 +388,30 @@ class CodexSM { } } + private startDeterminismWatcher(): void { + this.determinismWatcher = startDeterminismWatcher({ + stackmemoryBin: this.stackmemoryPath, + cwd: process.cwd(), + task: this.config.task, + instanceId: this.config.instanceId, + sessionId: this.sessionId, + tool: 'codex', + }); + + if (this.determinismWatcher) { + const modeLabel = + this.determinismWatcher.mode === 'targeted' + ? 'targeted' + : 'repo-root fallback'; + console.log(chalk.gray(`🧪 Determinism: ${modeLabel}`)); + } + } + + private stopDeterminismWatcher(): void { + stopDeterminismWatcher(this.determinismWatcher); + this.determinismWatcher = null; + } + public async run(args: string[]): Promise { const codexArgs: string[] = []; let i = 0; @@ -486,6 +519,7 @@ class CodexSM { if (this.config.worktreePath) process.env['CODEX_WORKTREE_PATH'] = this.config.worktreePath; await this.publishSessionStart(); + this.startDeterminismWatcher(); console.log(chalk.gray(`šŸ¤– Instance ID: ${this.config.instanceId}`)); console.log(chalk.gray(`🧠 Session ID: ${this.sessionId.slice(0, 8)}`)); @@ -537,6 +571,7 @@ class CodexSM { }); child.on('exit', async (code) => { + this.stopDeterminismWatcher(); this.saveContext('Codex session ended', { action: 'session_end', exitCode: code, @@ -573,6 +608,7 @@ class CodexSM { }); process.on('SIGINT', async () => { + this.stopDeterminismWatcher(); this.saveContext('Codex session interrupted', { action: 'session_interrupt', }); @@ -581,6 +617,7 @@ class CodexSM { }); process.on('SIGTERM', async () => { + this.stopDeterminismWatcher(); this.saveContext('Codex session terminated', { action: 'session_terminate', }); diff --git a/src/cli/commands/bench.ts b/src/cli/commands/bench.ts index 4e6538bc..8991121d 100644 --- a/src/cli/commands/bench.ts +++ b/src/cli/commands/bench.ts @@ -6,6 +6,7 @@ */ import { Command } from 'commander'; +import chalk from 'chalk'; import { existsSync, readFileSync, readdirSync } from 'fs'; import { join } from 'path'; import { @@ -18,6 +19,16 @@ import { feedbackLoops, _DEFAULT_CONFIG, } from '../../core/monitoring/feedback-loops.js'; +import { + DETERMINISM_WATCH_IGNORE, + DETERMINISM_WATCH_PATTERNS, + getDeterminismWatchTargets, + persistDeterminismReport, + readLatestDeterminismReport, + runDeterminismSmoke, + type DeterminismReport, + type StoredDeterminismReport, +} from '../../orchestrators/multimodal/determinism.js'; function loadRunMetrics(projectRoot: string): HarnessRunMetrics[] { const metricsFile = join( @@ -66,6 +77,66 @@ function loadSpikeAudits( .filter(Boolean) as Array<{ file: string; data: any }>; } +function printDeterminismReport( + task: string, + requestedRuns: number, + report: DeterminismReport +): void { + console.log('\nHarness Determinism Smoke'); + console.log('═'.repeat(60)); + console.log(`Task: ${task}`); + console.log(`Runs: ${report.runs}`); + console.log(`Determinism score: ${report.score.toFixed(2)}/100`); + + console.log('\nDimension Scores:'); + for (const dimension of report.dimensions) { + console.log( + ` ${dimension.name.padEnd(14)} ${dimension.score.toFixed(2).padStart(6)}/100 ${dimension.details}` + ); + } + + if (report.recommendations.length > 0) { + console.log('\nRecommended Tightening:'); + for (const recommendation of report.recommendations) { + console.log(` - ${recommendation}`); + } + } else { + console.log('\nNo drift detected in deterministic fixture mode.'); + } + + const sample = report.snapshots[0]; + if (sample) { + console.log('\nReference Snapshot:'); + console.log(` resultHash: ${sample.resultHash.slice(0, 16)}`); + console.log(` planHash: ${sample.planHash.slice(0, 16)}`); + console.log(` critiqueHash: ${sample.critiqueHash.slice(0, 16)}`); + console.log(` commandsHash: ${sample.commandsHash.slice(0, 16)}`); + console.log(` iterations: ${sample.iterations}`); + console.log(` contextTokens: ${sample.contextTokens}`); + } + + if (report.runs !== requestedRuns) { + console.log( + `\nNote: requested ${requestedRuns} runs, completed ${report.runs}.` + ); + } + + console.log(''); +} + +function printStoredDeterminismReport(stored: StoredDeterminismReport): void { + console.log('\nCached Determinism Result'); + console.log('═'.repeat(60)); + console.log(`Task: ${stored.task}`); + console.log(`Trigger: ${stored.trigger}`); + console.log(`Timestamp: ${stored.timestamp}`); + console.log(`Determinism score: ${stored.report.score.toFixed(2)}/100`); + if (stored.changedPaths.length > 0) { + console.log(`Changed paths: ${stored.changedPaths.join(', ')}`); + } + console.log(''); +} + export function createBenchCommand(): Command { const bench = new Command('bench') .description( @@ -243,17 +314,203 @@ export function createBenchCommand(): Command { console.log(''); }); + // Sub-command: bench loops + bench + .command('determinism') + .description( + 'Run deterministic fixture smoke checks for the multimodal harness' + ) + .option( + '-t, --task ', + 'Task description to run through the harness', + 'Add a small auth guard' + ) + .option('--runs ', 'Number of repeated runs', '5') + .option( + '--planner-model ', + 'Planner model label to include in the run config', + 'claude-sonnet-4-20250514' + ) + .option( + '--reviewer-model ', + 'Reviewer model label to include in the run config', + 'claude-sonnet-4-20250514' + ) + .option('--implementer ', 'codex|claude', 'codex') + .option('--max-iters ', 'Retry loop iterations', '2') + .option( + '--watch', + 'Watch harness-critical files and rerun on changes', + false + ) + .option( + '--debounce-ms ', + 'Debounce window for write completion in watch mode', + '3000' + ) + .option('--latest', 'Show the latest cached determinism result', false) + .option('--json', 'Output as JSON', false) + .action(async function () { + const command = this as Command; + const options = command.opts(); + const json = Boolean(options.json || command.parent?.opts().json); + const projectRoot = process.cwd(); + const runs = Math.max(1, parseInt(options.runs, 10) || 5); + const debounceMs = Math.max( + 250, + parseInt(options.debounceMs, 10) || 3000 + ); + + if (options.latest) { + const stored = readLatestDeterminismReport(projectRoot); + if (!stored) { + console.error('No cached determinism result found.'); + process.exitCode = 1; + return; + } + + if (json) { + console.log(JSON.stringify(stored, null, 2)); + return; + } + + printStoredDeterminismReport(stored); + return; + } + + const runCheck = async ( + trigger: string, + changedPaths: string[] = [] + ): Promise => { + const report = await runDeterminismSmoke( + { + task: options.task, + repoPath: projectRoot, + }, + { + runs, + plannerModel: options.plannerModel, + reviewerModel: options.reviewerModel, + implementer: options.implementer, + maxIters: parseInt(options.maxIters, 10) || 2, + } + ); + + const stored = persistDeterminismReport(projectRoot, report, { + task: options.task, + trigger, + changedPaths, + }); + + if (json) { + console.log(JSON.stringify(stored, null, 2)); + } else { + printDeterminismReport(options.task, runs, report); + } + + return stored; + }; + + if (options.watch) { + const chokidar = await import('chokidar'); + const watchTargets = getDeterminismWatchTargets(projectRoot); + const watchPatterns = watchTargets.map((pattern) => + join(projectRoot, pattern) + ); + const watcher = chokidar.watch(watchPatterns, { + ignoreInitial: true, + ignored: DETERMINISM_WATCH_IGNORE.map((pattern) => + join(projectRoot, pattern) + ), + awaitWriteFinish: { + stabilityThreshold: debounceMs, + pollInterval: 100, + }, + }); + + let running = false; + let rerunRequested = false; + const pendingPaths = new Set(); + + const maybeRun = async (trigger: string) => { + if (running) { + rerunRequested = true; + return; + } + + running = true; + const changedPaths = Array.from(pendingPaths).sort(); + pendingPaths.clear(); + + try { + await runCheck(trigger, changedPaths); + } finally { + running = false; + if (rerunRequested) { + rerunRequested = false; + await maybeRun('watch:debounced-rerun'); + } + } + }; + + const onFileEvent = async (trigger: string, filePath: string) => { + const relativePath = filePath.startsWith(projectRoot) + ? filePath.slice(projectRoot.length + 1) + : filePath; + pendingPaths.add(relativePath); + if (!json) { + console.log( + chalk.gray(`determinism watcher: ${trigger} ${relativePath}`) + ); + } + await maybeRun(`watch:${trigger}`); + }; + + watcher.on('all', async (eventName: string, filePath: string) => { + if (eventName !== 'add' && eventName !== 'change') { + return; + } + await onFileEvent(eventName, filePath); + }); + + if (!json) { + console.log('\nHarness Determinism Watch'); + console.log('═'.repeat(60)); + console.log(`Task: ${options.task}`); + console.log(`Watching: ${watchTargets.join(', ')}`); + console.log(`Debounce: ${debounceMs}ms`); + console.log(chalk.gray('Press Ctrl+C to stop.\n')); + } + + await runCheck('watch:initial'); + await new Promise((resolve) => { + const stop = () => { + void watcher.close(); + resolve(); + }; + process.once('SIGINT', stop); + process.once('SIGTERM', stop); + }); + return; + } + + await runCheck('manual'); + }); + // Sub-command: bench loops bench .command('loops') .description('Show feedback loop configuration, status, and recent events') .option('--json', 'Output as JSON', false) - .action((options) => { + .action(function () { + const command = this as Command; + const options = command.opts(); + const json = Boolean(options.json || command.parent?.opts().json); const config = feedbackLoops.getConfig(); const stats = feedbackLoops.getStats(); const history = feedbackLoops.getHistory(undefined, 20); - if (options.json) { + if (json) { console.log(JSON.stringify({ config, stats, history }, null, 2)); return; } diff --git a/src/cli/commands/handoff.ts b/src/cli/commands/handoff.ts index 3dc1eb85..959e8b95 100644 --- a/src/cli/commands/handoff.ts +++ b/src/cli/commands/handoff.ts @@ -20,6 +20,10 @@ import { FrameManager } from '../../core/context/index.js'; import { LinearTaskManager } from '../../features/tasks/linear-task-manager.js'; import { logger } from '../../core/monitoring/logger.js'; import { EnhancedHandoffGenerator } from '../../core/session/handoff.js'; +import { + getProjectHandoffPaths, + loadProjectHandoff, +} from '../../core/session/project-handoff.js'; // Simple token estimation (avg 3.5 chars per token for English) const countTokens = (text: string): number => Math.ceil(text.length / 3.5); @@ -360,7 +364,8 @@ Generated by stackmemory capture at ${timestamp} if (!existsSync(stackmemoryDir)) { mkdirSync(stackmemoryDir, { recursive: true }); } - const handoffPath = join(stackmemoryDir, 'last-handoff.md'); + const { handoffPath, metadataPath } = + getProjectHandoffPaths(projectRoot); writeFileSync(handoffPath, handoffPrompt); // Save versioned copy @@ -379,6 +384,18 @@ Generated by stackmemory capture at ${timestamp} branch, handoffPrompt ); + writeFileSync( + metadataPath, + JSON.stringify( + { + branch, + capturedAt: new Date().toISOString(), + projectRoot, + }, + null, + 2 + ) + ); console.log( `Versioned: ${versionedPath.split('/').slice(-2).join('/')}` ); @@ -433,14 +450,10 @@ export function createRestoreCommand(): Command { cmd .description('Restore context from last handoff') .option('--no-copy', 'Do not copy prompt to clipboard') + .option('--force', 'Restore even if the handoff branch does not match') .action(async (options) => { try { const projectRoot = process.cwd(); - const handoffPath = join( - projectRoot, - '.stackmemory', - 'last-handoff.md' - ); const metaPath = join( process.env['HOME'] || '~', '.stackmemory', @@ -448,14 +461,35 @@ export function createRestoreCommand(): Command { 'last-handoff-meta.json' ); - if (!existsSync(handoffPath)) { + const currentBranch = (() => { + try { + return execSync('git rev-parse --abbrev-ref HEAD', { + encoding: 'utf-8', + cwd: projectRoot, + stdio: ['pipe', 'pipe', 'pipe'], + }).trim(); + } catch { + return undefined; + } + })(); + + const handoff = loadProjectHandoff(projectRoot, currentBranch); + if (!handoff) { console.log('āŒ No handoff found in this project'); console.log('šŸ’” Run "stackmemory capture" to create one'); return; } - // Read handoff prompt - const handoffPrompt = readFileSync(handoffPath, 'utf-8'); + if (!handoff.compatible && !options.force) { + console.log('āš ļø Skipping stale handoff'); + console.log(` ${handoff.mismatchReason}`); + console.log( + ' Run "stackmemory restore --force" to inspect it anyway' + ); + return; + } + + const handoffPrompt = handoff.content; // Display the prompt console.log('\n' + '='.repeat(60)); diff --git a/src/cli/gemini-sm.ts b/src/cli/gemini-sm.ts index b0bbf9c6..cea9d63b 100644 --- a/src/cli/gemini-sm.ts +++ b/src/cli/gemini-sm.ts @@ -16,6 +16,7 @@ import { program } from 'commander'; import { v4 as uuidv4 } from 'uuid'; import chalk from 'chalk'; import { initializeTracing, trace } from '../core/trace/index.js'; +import { resolveRealCliBin } from './utils/real-cli-bin.js'; interface GeminiSMConfig { defaultWorktree: boolean; @@ -167,40 +168,24 @@ class GeminiSM { } private resolveGeminiBin(): string | null { - if (this.config.geminiBin && this.config.geminiBin.trim()) { - return this.config.geminiBin.trim(); - } - const envBin = process.env['GEMINI_BIN']; - if (envBin && envBin.trim()) return envBin.trim(); - - const possiblePaths = [ - path.join( - os.homedir(), - '.nvm', - 'versions', - 'node', - 'v22.22.0', - 'bin', - 'gemini' - ), - '/usr/local/bin/gemini', - '/opt/homebrew/bin/gemini', - ]; - - for (const binPath of possiblePaths) { - if (fs.existsSync(binPath)) { - return binPath; - } - } - - // Try PATH - try { - execSync('which gemini', { stdio: 'ignore' }); - return 'gemini'; - } catch { - // Not found - } - return null; + return resolveRealCliBin({ + explicitBin: this.config.geminiBin, + envBin: process.env['GEMINI_BIN'], + preferredPaths: [ + path.join( + os.homedir(), + '.nvm', + 'versions', + 'node', + 'v22.22.0', + 'bin', + 'gemini' + ), + '/usr/local/bin/gemini', + '/opt/homebrew/bin/gemini', + ], + pathCommands: ['gemini'], + }); } private setupWorktree(): string | null { diff --git a/src/cli/index.ts b/src/cli/index.ts index 7ce66d2c..f6153228 100644 --- a/src/cli/index.ts +++ b/src/cli/index.ts @@ -7,6 +7,12 @@ // Set environment flag for CLI usage to skip async context bridge process.env['STACKMEMORY_CLI'] = 'true'; +// Machine-readable CLI output should not be prefixed by INFO banners unless +// the caller explicitly opted into a log level. +if (!process.env['STACKMEMORY_LOG_LEVEL'] && process.argv.includes('--json')) { + process.env['STACKMEMORY_LOG_LEVEL'] = 'ERROR'; +} + // Load environment variables (quiet mode to suppress logging) import { config as loadDotenv } from 'dotenv'; loadDotenv({ quiet: true }); @@ -878,6 +884,11 @@ program .option('--audit-dir ', 'Persist spike results to directory') .option('--record-frame', 'Record as real frame with anchors', false) .option('--record', 'Record plan & critique into StackMemory context', false) + .option( + '--deterministic-fixture', + 'Use deterministic fixture planner/critic for replayable smoke runs', + false + ) .option('--json', 'Emit single JSON result (UI-friendly)', false) .option('--quiet', 'Minimal output (default)', true) .option('--verbose', 'Verbose sectioned output', false) @@ -904,6 +915,7 @@ program auditDir: opts.auditDir, recordFrame: Boolean(opts.recordFrame), record: Boolean(opts.record), + deterministicFixture: Boolean(opts.deterministicFixture), } ); @@ -978,6 +990,10 @@ program .option('--audit-dir ', 'Persist spike results to directory') .option('--record-frame', 'Record as real frame with anchors') .option('--record', 'Record plan & critique into StackMemory context') + .option( + '--deterministic-fixture', + 'Use deterministic fixture planner/critic for replayable smoke runs' + ) .option('--json', 'Emit single JSON result (UI-friendly)') .option('--quiet', 'Minimal output') .option('--verbose', 'Verbose sectioned output') @@ -1032,6 +1048,7 @@ program auditDir: opts.auditDir, recordFrame: Boolean(opts.recordFrame), record: Boolean(opts.record), + deterministicFixture: Boolean(opts.deterministicFixture), } ); diff --git a/src/cli/opencode-sm.ts b/src/cli/opencode-sm.ts index 6b5f5f61..adf15beb 100644 --- a/src/cli/opencode-sm.ts +++ b/src/cli/opencode-sm.ts @@ -16,6 +16,12 @@ import { program } from 'commander'; import { v4 as uuidv4 } from 'uuid'; import chalk from 'chalk'; import { initializeTracing, trace } from '../core/trace/index.js'; +import { resolveRealCliBin } from './utils/real-cli-bin.js'; +import { + type DeterminismWatcherHandle, + startDeterminismWatcher, + stopDeterminismWatcher, +} from './utils/determinism-watcher.js'; interface OpencodeSMConfig { defaultWorktree: boolean; @@ -70,6 +76,7 @@ class OpencodeSM { private config: OpencodeConfig; private stackmemoryPath: string; private smConfig: OpencodeSMConfig; + private determinismWatcher: DeterminismWatcherHandle | null; constructor() { this.smConfig = loadSMConfig(); @@ -84,6 +91,7 @@ class OpencodeSM { }; this.stackmemoryPath = this.findStackMemory(); + this.determinismWatcher = null; } private getRepoRoot(): string | null { @@ -169,33 +177,16 @@ class OpencodeSM { } private resolveOpencodeBin(): string | null { - if (this.config.opencodeBin && this.config.opencodeBin.trim()) { - return this.config.opencodeBin.trim(); - } - const envBin = process.env['OPENCODE_BIN']; - if (envBin && envBin.trim()) return envBin.trim(); - - // Check common OpenCode locations - const possiblePaths = [ - path.join(os.homedir(), '.opencode', 'bin', 'opencode'), - '/usr/local/bin/opencode', - '/opt/homebrew/bin/opencode', - ]; - - for (const binPath of possiblePaths) { - if (fs.existsSync(binPath)) { - return binPath; - } - } - - // Try PATH - try { - execSync('which opencode', { stdio: 'ignore' }); - return 'opencode'; - } catch { - // Not found - } - return null; + return resolveRealCliBin({ + explicitBin: this.config.opencodeBin, + envBin: process.env['OPENCODE_BIN'], + preferredPaths: [ + path.join(os.homedir(), '.opencode', 'bin', 'opencode'), + '/usr/local/bin/opencode', + '/opt/homebrew/bin/opencode', + ], + pathCommands: ['opencode'], + }); } private setupWorktree(): string | null { @@ -333,6 +324,29 @@ class OpencodeSM { } } + private startDeterminismWatcher(): void { + this.determinismWatcher = startDeterminismWatcher({ + stackmemoryBin: this.stackmemoryPath, + cwd: process.cwd(), + task: this.config.task, + instanceId: this.config.instanceId, + tool: 'opencode', + }); + + if (this.determinismWatcher) { + const modeLabel = + this.determinismWatcher.mode === 'targeted' + ? 'targeted' + : 'repo-root fallback'; + console.log(chalk.gray(`Determinism: ${modeLabel}`)); + } + } + + private stopDeterminismWatcher(): void { + stopDeterminismWatcher(this.determinismWatcher); + this.determinismWatcher = null; + } + public async run(args: string[]): Promise { const opencodeArgs: string[] = []; let i = 0; @@ -455,6 +469,7 @@ class OpencodeSM { if (this.config.worktreePath) { process.env['OPENCODE_WORKTREE_PATH'] = this.config.worktreePath; } + this.startDeterminismWatcher(); console.log(chalk.gray(`Instance: ${this.config.instanceId}`)); console.log(chalk.gray(`Working in: ${process.cwd()}`)); @@ -501,6 +516,7 @@ class OpencodeSM { }); opencode.on('exit', async (code) => { + this.stopDeterminismWatcher(); this.saveContext('OpenCode session ended', { action: 'session_end', exitCode: code, @@ -525,6 +541,7 @@ class OpencodeSM { }); process.on('SIGINT', () => { + this.stopDeterminismWatcher(); this.saveContext('OpenCode session interrupted', { action: 'session_interrupt', }); @@ -532,6 +549,7 @@ class OpencodeSM { }); process.on('SIGTERM', () => { + this.stopDeterminismWatcher(); this.saveContext('OpenCode session terminated', { action: 'session_terminate', }); diff --git a/src/cli/utils/determinism-watcher.ts b/src/cli/utils/determinism-watcher.ts new file mode 100644 index 00000000..8453484f --- /dev/null +++ b/src/cli/utils/determinism-watcher.ts @@ -0,0 +1,90 @@ +import { spawn } from 'child_process'; +import * as fs from 'fs'; +import * as path from 'path'; + +import { getDeterminismWatchTargets } from '../../orchestrators/multimodal/determinism.js'; + +export interface DeterminismWatcherOptions { + stackmemoryBin: string; + cwd: string; + task?: string; + instanceId?: string; + sessionId?: string; + tool: 'claude' | 'codex' | 'opencode'; +} + +export interface DeterminismWatcherHandle { + child: ReturnType; + mode: 'targeted' | 'repo-root'; + targets: string[]; +} + +export function shouldAutoStartDeterminismWatcher(cwd: string): boolean { + if (process.env['STACKMEMORY_DETERMINISM_AUTO'] === '0') { + return false; + } + + return fs.existsSync(path.join(cwd, '.git')); +} + +export function startDeterminismWatcher( + options: DeterminismWatcherOptions +): DeterminismWatcherHandle | null { + if (!shouldAutoStartDeterminismWatcher(options.cwd)) { + return null; + } + + const runs = process.env['STACKMEMORY_DETERMINISM_RUNS'] || '3'; + const task = + options.task || + process.env['STACKMEMORY_DETERMINISM_TASK'] || + 'Determinism probe'; + const targets = getDeterminismWatchTargets(options.cwd); + const mode: 'targeted' | 'repo-root' = + targets.length === 1 && targets[0] === '.' ? 'repo-root' : 'targeted'; + + const child = spawn( + options.stackmemoryBin, + [ + 'bench', + 'determinism', + '--task', + task, + '--runs', + runs, + '--watch', + '--json', + ], + { + cwd: options.cwd, + stdio: 'ignore', + env: { + ...process.env, + STACKMEMORY_DETERMINISM_PARENT_TOOL: options.tool, + STACKMEMORY_DETERMINISM_PARENT_INSTANCE: options.instanceId || '', + STACKMEMORY_DETERMINISM_PARENT_SESSION: options.sessionId || '', + }, + } + ); + + return { + child, + mode, + targets, + }; +} + +export function stopDeterminismWatcher( + handle: DeterminismWatcherHandle | null +): void { + const child = handle?.child ?? null; + if (!child || child.killed) { + return; + } + + try { + child.kill('SIGTERM'); + } catch { + // Best-effort only. + } +} diff --git a/src/cli/utils/real-cli-bin.ts b/src/cli/utils/real-cli-bin.ts new file mode 100644 index 00000000..c1a0eff3 --- /dev/null +++ b/src/cli/utils/real-cli-bin.ts @@ -0,0 +1,66 @@ +import { execSync } from 'child_process'; +import * as fs from 'fs'; + +const DEFAULT_WRAPPER_PATH_SNIPPETS = [ + '/Applications/cmux.app/Contents/Resources/bin/', +]; + +function isWrapperPath( + candidate: string, + wrapperPathSnippets: string[] +): boolean { + const normalized = candidate.trim(); + return wrapperPathSnippets.some((snippet) => normalized.includes(snippet)); +} + +export interface ResolveRealCliBinOptions { + explicitBin?: string; + envBin?: string; + preferredPaths?: string[]; + pathCommands: string[]; + wrapperPathSnippets?: string[]; +} + +export function resolveRealCliBin( + options: ResolveRealCliBinOptions +): string | null { + if (options.explicitBin?.trim()) { + return options.explicitBin.trim(); + } + if (options.envBin?.trim()) { + return options.envBin.trim(); + } + + const wrapperPathSnippets = + options.wrapperPathSnippets || DEFAULT_WRAPPER_PATH_SNIPPETS; + + for (const candidate of options.preferredPaths || []) { + if ( + fs.existsSync(candidate) && + !isWrapperPath(candidate, wrapperPathSnippets) + ) { + return candidate; + } + } + + for (const command of options.pathCommands) { + try { + const output = execSync(`which -a ${command}`, { + encoding: 'utf8', + stdio: ['ignore', 'pipe', 'ignore'], + }); + const resolved = output + .split('\n') + .map((line) => line.trim()) + .filter(Boolean) + .find((candidate) => !isWrapperPath(candidate, wrapperPathSnippets)); + if (resolved) { + return resolved; + } + } catch { + // Continue searching. + } + } + + return null; +} diff --git a/src/core/monitoring/logger.ts b/src/core/monitoring/logger.ts index 030490dd..be75dc00 100644 --- a/src/core/monitoring/logger.ts +++ b/src/core/monitoring/logger.ts @@ -105,6 +105,7 @@ export class Logger { private constructor() { // Set log level from environment const envLevel = process.env['STACKMEMORY_LOG_LEVEL']?.toUpperCase(); + const jsonCliMode = !envLevel && process.argv.includes('--json'); switch (envLevel) { case 'ERROR': this.logLevel = LogLevel.ERROR; @@ -116,7 +117,7 @@ export class Logger { this.logLevel = LogLevel.DEBUG; break; default: - this.logLevel = LogLevel.INFO; + this.logLevel = jsonCliMode ? LogLevel.ERROR : LogLevel.INFO; } // Set up log file if in debug mode or if specified diff --git a/src/core/session/__tests__/project-handoff.test.ts b/src/core/session/__tests__/project-handoff.test.ts new file mode 100644 index 00000000..5e393c68 --- /dev/null +++ b/src/core/session/__tests__/project-handoff.test.ts @@ -0,0 +1,64 @@ +import { afterEach, describe, expect, it } from 'vitest'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; +import { + loadProjectHandoff, + parseBranchFromHandoffContent, +} from '../project-handoff.js'; + +describe('project handoff compatibility', () => { + const tempDirs: string[] = []; + + afterEach(() => { + for (const dir of tempDirs.splice(0)) { + rmSync(dir, { recursive: true, force: true }); + } + }); + + function makeProject(): string { + const dir = mkdtempSync(join(tmpdir(), 'stackmemory-handoff-')); + mkdirSync(join(dir, '.stackmemory'), { recursive: true }); + tempDirs.push(dir); + return dir; + } + + it('parses branch from compact handoff content', () => { + expect( + parseBranchFromHandoffContent( + '# Handoff: stackmemory@feature/test-branch\n## Work: test' + ) + ).toBe('feature/test-branch'); + }); + + it('loads a compatible handoff for the current branch', () => { + const project = makeProject(); + writeFileSync( + join(project, '.stackmemory', 'last-handoff.md'), + '# Handoff: stackmemory@feature/test-branch\n## Work: test' + ); + writeFileSync( + join(project, '.stackmemory', 'last-handoff-meta.json'), + JSON.stringify({ branch: 'feature/test-branch' }, null, 2) + ); + + const handoff = loadProjectHandoff(project, 'feature/test-branch'); + expect(handoff).not.toBeNull(); + expect(handoff?.compatible).toBe(true); + expect(handoff?.branch).toBe('feature/test-branch'); + }); + + it('rejects a stale handoff from another branch', () => { + const project = makeProject(); + writeFileSync( + join(project, '.stackmemory', 'last-handoff.md'), + '# Handoff: stackmemory@feature/old-branch\n## Work: test' + ); + + const handoff = loadProjectHandoff(project, 'release/current'); + expect(handoff).not.toBeNull(); + expect(handoff?.compatible).toBe(false); + expect(handoff?.mismatchReason).toContain('feature/old-branch'); + expect(handoff?.mismatchReason).toContain('release/current'); + }); +}); diff --git a/src/core/session/project-handoff.ts b/src/core/session/project-handoff.ts new file mode 100644 index 00000000..38a58f66 --- /dev/null +++ b/src/core/session/project-handoff.ts @@ -0,0 +1,85 @@ +import { existsSync, readFileSync } from 'fs'; +import { join } from 'path'; + +export interface ProjectHandoffMetadata { + branch?: string; + capturedAt?: string; + gitHead?: string; + projectRoot?: string; +} + +export interface LoadedProjectHandoff { + content: string; + branch: string | null; + compatible: boolean; + mismatchReason?: string; +} + +export function getProjectHandoffPaths(projectRoot: string): { + handoffPath: string; + metadataPath: string; +} { + return { + handoffPath: join(projectRoot, '.stackmemory', 'last-handoff.md'), + metadataPath: join(projectRoot, '.stackmemory', 'last-handoff-meta.json'), + }; +} + +export function parseBranchFromHandoffContent(content: string): string | null { + const compactMatch = content.match(/^# Handoff:\s+.+?@([^\n]+)$/m); + if (compactMatch?.[1]) { + return compactMatch[1].trim(); + } + + const verboseMatch = content.match(/^\*\*Branch\*\*:\s+([^\n]+)$/m); + if (verboseMatch?.[1]) { + return verboseMatch[1].trim(); + } + + const ultraMatch = content.match(/^\[H\].+?@([^|\n]+)\|/m); + if (ultraMatch?.[1]) { + return ultraMatch[1].trim(); + } + + return null; +} + +export function loadProjectHandoff( + projectRoot: string, + currentBranch?: string +): LoadedProjectHandoff | null { + const { handoffPath, metadataPath } = getProjectHandoffPaths(projectRoot); + if (!existsSync(handoffPath)) { + return null; + } + + const content = readFileSync(handoffPath, 'utf8').trim(); + if (!content) { + return null; + } + + let metadata: ProjectHandoffMetadata | null = null; + if (existsSync(metadataPath)) { + try { + metadata = JSON.parse(readFileSync(metadataPath, 'utf8')); + } catch { + metadata = null; + } + } + + const branch = metadata?.branch || parseBranchFromHandoffContent(content); + if (currentBranch && branch && branch !== currentBranch) { + return { + content, + branch, + compatible: false, + mismatchReason: `handoff is for branch ${branch}, current branch is ${currentBranch}`, + }; + } + + return { + content, + branch: branch || null, + compatible: true, + }; +} diff --git a/src/orchestrators/multimodal/__tests__/determinism.test.ts b/src/orchestrators/multimodal/__tests__/determinism.test.ts new file mode 100644 index 00000000..17d2f1ec --- /dev/null +++ b/src/orchestrators/multimodal/__tests__/determinism.test.ts @@ -0,0 +1,103 @@ +import { afterEach, describe, expect, it } from 'vitest'; +import { existsSync, mkdtempSync, rmSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; + +import { + persistDeterminismReport, + readLatestDeterminismReport, + runDeterminismSmoke, +} from '../determinism.js'; +import { runSpike } from '../harness.js'; + +const tempDirs: string[] = []; + +function makeTempRepo(): string { + const dir = mkdtempSync(join(tmpdir(), 'stackmemory-determinism-')); + tempDirs.push(dir); + return dir; +} + +afterEach(() => { + while (tempDirs.length > 0) { + const dir = tempDirs.pop(); + if (dir) { + rmSync(dir, { recursive: true, force: true }); + } + } +}); + +describe('multimodal determinism', () => { + it('scores deterministic fixture runs at 100 out of 100', async () => { + const repoPath = makeTempRepo(); + + const report = await runDeterminismSmoke( + { + task: 'Add a small auth guard', + repoPath, + }, + { + runs: 5, + implementer: 'codex', + maxIters: 2, + } + ); + + expect(report.runs).toBe(5); + expect(report.score).toBe(100); + expect( + report.dimensions.every((dimension) => dimension.score === 100) + ).toBe(true); + expect(report.recommendations).toEqual([]); + expect( + new Set(report.snapshots.map((snapshot) => snapshot.resultHash)).size + ).toBe(1); + }); + + it('can skip audit persistence for deterministic or replay runs', async () => { + const repoPath = makeTempRepo(); + + await runSpike( + { + task: 'Add a small auth guard', + repoPath, + }, + { + dryRun: true, + deterministicFixture: true, + persistAudit: false, + } + ); + + expect(existsSync(join(repoPath, '.stackmemory', 'build'))).toBe(false); + }); + + it('persists and reloads the latest cached determinism report', async () => { + const repoPath = makeTempRepo(); + const report = await runDeterminismSmoke( + { + task: 'Add a small auth guard', + repoPath, + }, + { + runs: 3, + } + ); + + const stored = persistDeterminismReport(repoPath, report, { + task: 'Add a small auth guard', + trigger: 'test', + changedPaths: ['src/orchestrators/multimodal/harness.ts'], + }); + const reloaded = readLatestDeterminismReport(repoPath); + + expect(reloaded).not.toBeNull(); + expect(reloaded?.task).toBe('Add a small auth guard'); + expect(reloaded?.trigger).toBe('test'); + expect(reloaded?.changedPaths).toEqual([ + 'src/orchestrators/multimodal/harness.ts', + ]); + expect(reloaded?.report.score).toBe(100); + expect(stored.report.score).toBe(100); + }); +}); diff --git a/src/orchestrators/multimodal/determinism.ts b/src/orchestrators/multimodal/determinism.ts new file mode 100644 index 00000000..3d4163f7 --- /dev/null +++ b/src/orchestrators/multimodal/determinism.ts @@ -0,0 +1,309 @@ +import { createHash } from 'crypto'; +import { + appendFileSync, + existsSync, + mkdirSync, + readFileSync, + writeFileSync, +} from 'fs'; +import { join } from 'path'; +import type { HarnessOptions, HarnessResult, PlanningInput } from './types.js'; +import { compactPlan } from './utils.js'; +import { runSpike } from './harness.js'; + +export const DETERMINISM_WATCH_PATTERNS = [ + 'src/orchestrators/multimodal', + 'src/cli/commands/bench.ts', + 'src/cli/index.ts', + 'src/core/monitoring/logger.ts', +]; + +export const DETERMINISM_WATCH_IGNORE = [ + '.git/**', + 'node_modules/**', + 'dist/**', + 'build/**', + '.next/**', + '.turbo/**', + 'coverage/**', + '.stackmemory/**', +]; + +export interface DeterminismSnapshot { + index: number; + approved: boolean; + iterations: number; + planHash: string; + critiqueHash: string; + commandsHash: string; + resultHash: string; + contextTokens: number; +} + +export interface DeterminismDimensionScore { + name: string; + score: number; + weight: number; + details: string; +} + +export interface DeterminismReport { + runs: number; + score: number; + snapshots: DeterminismSnapshot[]; + dimensions: DeterminismDimensionScore[]; + recommendations: string[]; +} + +export interface StoredDeterminismReport { + timestamp: string; + task: string; + trigger: string; + changedPaths: string[]; + report: DeterminismReport; +} + +function stableStringify(value: unknown): string { + return JSON.stringify(canonicalize(value)); +} + +function canonicalize(value: unknown): unknown { + if (Array.isArray(value)) { + return value.map((item) => canonicalize(item)); + } + if (value && typeof value === 'object') { + const entries = Object.entries(value as Record).sort( + ([a], [b]) => a.localeCompare(b) + ); + return Object.fromEntries( + entries.map(([key, entryValue]) => [key, canonicalize(entryValue)]) + ); + } + return value; +} + +function hashValue(value: unknown): string { + return createHash('sha256').update(stableStringify(value)).digest('hex'); +} + +function modeAgreement(values: T[]): number { + if (values.length === 0) return 1; + const counts = new Map(); + for (const value of values) { + counts.set(value, (counts.get(value) || 0) + 1); + } + const maxCount = Math.max(...counts.values()); + return maxCount / values.length; +} + +function normalizeResult(result: HarnessResult) { + return { + plan: compactPlan(result.plan), + critique: canonicalize(result.critique), + implementation: { + success: result.implementation.success, + summary: result.implementation.summary, + commands: [...(result.implementation.commands || [])], + }, + iterations: (result.iterations || []).map((iteration) => ({ + command: iteration.command, + ok: iteration.ok, + critique: canonicalize(iteration.critique), + outputPreviewHash: hashValue(iteration.outputPreview), + })), + }; +} + +function estimateContextTokens(result: HarnessResult): number { + const normalized = normalizeResult(result); + return Math.ceil(stableStringify(normalized).length / 4); +} + +function toSnapshot(result: HarnessResult, index: number): DeterminismSnapshot { + const normalized = normalizeResult(result); + return { + index, + approved: result.critique.approved, + iterations: (result.iterations || []).length, + planHash: hashValue(compactPlan(result.plan)), + critiqueHash: hashValue(canonicalize(result.critique)), + commandsHash: hashValue(result.implementation.commands || []), + resultHash: hashValue(normalized), + contextTokens: estimateContextTokens(result), + }; +} + +function computeNumericStability(values: number[]): number { + if (values.length <= 1) return 1; + const min = Math.min(...values); + const max = Math.max(...values); + if (max === min) return 1; + return Math.max(0, 1 - (max - min) / Math.max(max, 1)); +} + +function scoreReport(snapshots: DeterminismSnapshot[]): DeterminismReport { + const dimensions: DeterminismDimensionScore[] = [ + { + name: 'result', + weight: 40, + score: modeAgreement(snapshots.map((item) => item.resultHash)) * 100, + details: 'Full normalized result hash agreement', + }, + { + name: 'plan', + weight: 20, + score: modeAgreement(snapshots.map((item) => item.planHash)) * 100, + details: 'Plan structure hash agreement', + }, + { + name: 'critique', + weight: 15, + score: modeAgreement(snapshots.map((item) => item.critiqueHash)) * 100, + details: 'Critique hash agreement', + }, + { + name: 'commands', + weight: 10, + score: modeAgreement(snapshots.map((item) => item.commandsHash)) * 100, + details: 'Implementer command sequence agreement', + }, + { + name: 'iterations', + weight: 10, + score: modeAgreement(snapshots.map((item) => item.iterations)) * 100, + details: 'Retry-count agreement', + }, + { + name: 'context_tokens', + weight: 5, + score: + computeNumericStability(snapshots.map((item) => item.contextTokens)) * + 100, + details: 'Token-footprint stability', + }, + ]; + + const weightedScore = dimensions.reduce((sum, dimension) => { + return sum + dimension.score * dimension.weight; + }, 0); + const totalWeight = dimensions.reduce( + (sum, dimension) => sum + dimension.weight, + 0 + ); + const score = totalWeight > 0 ? weightedScore / totalWeight : 0; + + const recommendations: string[] = []; + if (dimensions[0].score < 100) { + recommendations.push( + 'Pin planner/critic outputs behind deterministic fixtures or replay traces.' + ); + } + if (dimensions[1].score < 100) { + recommendations.push( + 'Canonicalize plan generation further and remove any model-dependent fields from smoke checks.' + ); + } + if (dimensions[4].score < 100) { + recommendations.push( + 'Tighten retry rules so the same failure mode produces the same iteration count.' + ); + } + if (dimensions[5].score < 100) { + recommendations.push( + 'Reduce context assembly drift by sorting symbols and fixing token accounting.' + ); + } + + return { + runs: snapshots.length, + score: Math.round(score * 100) / 100, + snapshots, + dimensions, + recommendations, + }; +} + +export async function runDeterminismSmoke( + input: PlanningInput, + options: HarnessOptions & { runs?: number } = {} +): Promise { + const runs = Math.max(1, options.runs ?? 5); + const snapshots: DeterminismSnapshot[] = []; + + for (let index = 0; index < runs; index++) { + const result = await runSpike(input, { + ...options, + dryRun: options.dryRun ?? true, + deterministicFixture: options.deterministicFixture ?? true, + persistAudit: false, + record: false, + recordFrame: false, + }); + snapshots.push(toSnapshot(result, index + 1)); + } + + return scoreReport(snapshots); +} + +export function getDeterminismWatchTargets(repoPath: string): string[] { + const existingTargets = DETERMINISM_WATCH_PATTERNS.filter((target) => + existsSync(join(repoPath, target)) + ); + + if (existingTargets.length > 0) { + return existingTargets; + } + + // Fallback: watch the current repo root, but rely on ignore globs so the + // watcher remains contained to the repo without scanning generated/vendor dirs. + return ['.']; +} + +function getDeterminismDir(repoPath: string): string { + return join(repoPath, '.stackmemory', 'determinism'); +} + +export function persistDeterminismReport( + repoPath: string, + report: DeterminismReport, + meta: { + task: string; + trigger: string; + changedPaths?: string[]; + } +): StoredDeterminismReport { + const dir = getDeterminismDir(repoPath); + mkdirSync(dir, { recursive: true }); + + const stored: StoredDeterminismReport = { + timestamp: new Date().toISOString(), + task: meta.task, + trigger: meta.trigger, + changedPaths: meta.changedPaths || [], + report, + }; + + writeFileSync( + join(dir, 'latest.json'), + JSON.stringify(stored, null, 2) + '\n' + ); + appendFileSync(join(dir, 'history.jsonl'), JSON.stringify(stored) + '\n'); + return stored; +} + +export function readLatestDeterminismReport( + repoPath: string +): StoredDeterminismReport | null { + const latestPath = join(getDeterminismDir(repoPath), 'latest.json'); + if (!existsSync(latestPath)) { + return null; + } + + try { + return JSON.parse( + readFileSync(latestPath, 'utf8') + ) as StoredDeterminismReport; + } catch { + return null; + } +} diff --git a/src/orchestrators/multimodal/harness.ts b/src/orchestrators/multimodal/harness.ts index 3259d170..c071f4e2 100644 --- a/src/orchestrators/multimodal/harness.ts +++ b/src/orchestrators/multimodal/harness.ts @@ -58,6 +58,48 @@ function heuristicPlan(input: PlanningInput): ImplementationPlan { }; } +function deterministicCritique(args: { + plan: ImplementationPlan; + ok: boolean; + diff: string; + checks: ReturnType | null; +}): CritiqueResult { + const issues: string[] = []; + const suggestions: string[] = []; + + if (!args.ok) { + issues.push('Implementer command failed'); + suggestions.push('Fix the command invocation before retrying'); + } + + if (args.diff.includes('<<<<<<<') || args.diff.includes('>>>>>>>')) { + issues.push('Merge conflict markers detected in diff'); + suggestions.push('Resolve conflict markers before approval'); + } + + if (args.checks && !args.checks.lintOk) { + issues.push('Lint checks failed'); + suggestions.push('Address lint failures before approval'); + } + + if (args.checks && !args.checks.testsOk) { + issues.push('Tests failed'); + suggestions.push('Fix failing tests before approval'); + } + + if (!args.diff || args.diff.startsWith('(no changes detected)')) { + suggestions.push( + 'No code changes detected; verify the task can be satisfied without edits' + ); + } + + return { + approved: issues.length === 0, + issues, + suggestions, + }; +} + export async function runSpike( input: PlanningInput, options: HarnessOptions = {} @@ -71,24 +113,28 @@ export async function runSpike( const t0 = Date.now(); let plan: ImplementationPlan; - try { - const raw = await callClaude(plannerPrompt, { - model: options.plannerModel, - system: plannerSystem, - }); + if (options.deterministicFixture) { + plan = heuristicPlan(input); + } else { try { - // Strip markdown code fences if present - const cleaned = raw - .replace(/^```(?:json)?\s*\n?/i, '') - .replace(/\n?```\s*$/i, '') - .trim(); - plan = JSON.parse(cleaned); + const raw = await callClaude(plannerPrompt, { + model: options.plannerModel, + system: plannerSystem, + }); + try { + // Strip markdown code fences if present + const cleaned = raw + .replace(/^```(?:json)?\s*\n?/i, '') + .replace(/\n?```\s*$/i, '') + .trim(); + plan = JSON.parse(cleaned); + } catch { + // Fall back to heuristic if model returned text + plan = heuristicPlan(input); + } } catch { - // Fall back to heuristic if model returned text plan = heuristicPlan(input); } - } catch { - plan = heuristicPlan(input); } const planLatencyMs = Date.now() - t0; @@ -155,23 +201,32 @@ export async function runSpike( // Critic reviews the diff, not the CLI log const criticSystem = `You are a strict code reviewer. Review the git diff against the plan. Check for: correctness, missing steps, unrelated changes, bugs, security issues. Also review lint and test results if provided. Return raw JSON only (no markdown fences): { "approved": boolean, "issues": ["string"], "suggestions": ["string"] }`; const criticPrompt = `Plan: ${plan.summary}\nAcceptance criteria:\n${plan.steps.map((s) => s.acceptanceCriteria?.join(', ') || s.title).join('\n')}\n\nAttempt ${i + 1}/${maxIters}\nImplementer exit: ${ok ? 'success' : 'failed'}\n\nGit diff:\n${diff}${checksSection}`; - try { - const raw = await callClaude(criticPrompt, { - model: options.reviewerModel, - system: criticSystem, + if (options.deterministicFixture) { + lastCritique = deterministicCritique({ + plan, + ok, + diff, + checks, }); - // Strip markdown code fences if present - const cleaned = raw - .replace(/^```(?:json)?\s*\n?/i, '') - .replace(/\n?```\s*$/i, '') - .trim(); - lastCritique = JSON.parse(cleaned); - } catch { - lastCritique = { - approved: ok, - issues: ok ? [] : ['Critique failed'], - suggestions: [], - }; + } else { + try { + const raw = await callClaude(criticPrompt, { + model: options.reviewerModel, + system: criticSystem, + }); + // Strip markdown code fences if present + const cleaned = raw + .replace(/^```(?:json)?\s*\n?/i, '') + .replace(/\n?```\s*$/i, '') + .trim(); + lastCritique = JSON.parse(cleaned); + } catch { + lastCritique = { + approved: ok, + issues: ok ? [] : ['Critique failed'], + suggestions: [], + }; + } } iterations.push({ @@ -213,75 +268,77 @@ export async function runSpike( contextTokens: Math.ceil(finalDiff.length / 4), }; - // Persist audit + metrics - try { - const dir = - options.auditDir || path.join(input.repoPath, '.stackmemory', 'build'); - fs.mkdirSync(dir, { recursive: true }); - const stamp = new Date().toISOString().replace(/[:.]/g, '-'); - const file = path.join(dir, `spike-${stamp}.json`); - fs.writeFileSync( - file, - JSON.stringify( - { - input, - options: { ...options, auditDir: undefined }, - plan, - iterations, - metrics: runMetrics, - }, - null, - 2 - ) - ); - - // Append to metrics JSONL for time-series analysis - const metricsFile = path.join(dir, 'harness-metrics.jsonl'); - fs.appendFileSync(metricsFile, JSON.stringify(runMetrics) + '\n'); - - // LOOP 5: Harness Regression — check rolling window against targets + // Persist audit + metrics unless explicitly disabled for replay/smoke runs. + if (options.persistAudit !== false) { try { - const lines = fs - .readFileSync(metricsFile, 'utf-8') - .split('\n') - .filter((l) => l.trim()); - const recent = lines - .slice(-10) - .map((l) => JSON.parse(l) as HarnessRunMetrics); - if (recent.length >= 3) { - const summary = summarizeRuns(recent); - if (summary.approvalRate < HARNESS_TARGETS.firstPassApprovalRate) { - feedbackLoops.fire( - 'harnessRegression', - 'metrics_append', - { - metric: 'approvalRate', - current: summary.approvalRate, - target: HARNESS_TARGETS.firstPassApprovalRate, - window: recent.length, - }, - 'regression_alert' - ); - } - if (summary.p95TotalLatencyMs > HARNESS_TARGETS.totalLatencyP95Ms) { - feedbackLoops.fire( - 'harnessRegression', - 'metrics_append', - { - metric: 'totalLatencyP95', - current: summary.p95TotalLatencyMs, - target: HARNESS_TARGETS.totalLatencyP95Ms, - window: recent.length, - }, - 'regression_alert' - ); + const dir = + options.auditDir || path.join(input.repoPath, '.stackmemory', 'build'); + fs.mkdirSync(dir, { recursive: true }); + const stamp = new Date().toISOString().replace(/[:.]/g, '-'); + const file = path.join(dir, `spike-${stamp}.json`); + fs.writeFileSync( + file, + JSON.stringify( + { + input, + options: { ...options, auditDir: undefined }, + plan, + iterations, + metrics: runMetrics, + }, + null, + 2 + ) + ); + + // Append to metrics JSONL for time-series analysis + const metricsFile = path.join(dir, 'harness-metrics.jsonl'); + fs.appendFileSync(metricsFile, JSON.stringify(runMetrics) + '\n'); + + // LOOP 5: Harness Regression — check rolling window against targets + try { + const lines = fs + .readFileSync(metricsFile, 'utf-8') + .split('\n') + .filter((l) => l.trim()); + const recent = lines + .slice(-10) + .map((l) => JSON.parse(l) as HarnessRunMetrics); + if (recent.length >= 3) { + const summary = summarizeRuns(recent); + if (summary.approvalRate < HARNESS_TARGETS.firstPassApprovalRate) { + feedbackLoops.fire( + 'harnessRegression', + 'metrics_append', + { + metric: 'approvalRate', + current: summary.approvalRate, + target: HARNESS_TARGETS.firstPassApprovalRate, + window: recent.length, + }, + 'regression_alert' + ); + } + if (summary.p95TotalLatencyMs > HARNESS_TARGETS.totalLatencyP95Ms) { + feedbackLoops.fire( + 'harnessRegression', + 'metrics_append', + { + metric: 'totalLatencyP95', + current: summary.p95TotalLatencyMs, + target: HARNESS_TARGETS.totalLatencyP95Ms, + window: recent.length, + }, + 'regression_alert' + ); + } } + } catch { + // best-effort } } catch { - // best-effort + // best-effort only } - } catch { - // best-effort only } // Optionally record to local context DB @@ -418,12 +475,15 @@ async function recordAsFrame( // Lightweight planner: returns only the plan without implementation/critique export async function runPlanOnly( input: PlanningInput, - options: { plannerModel?: string } = {} + options: { plannerModel?: string; deterministicFixture?: boolean } = {} ): Promise { const plannerSystem = `You write concise, actionable implementation plans. Output raw JSON only (no markdown code fences). Schema: { "summary": "string", "steps": [{ "id": "step-1", "title": "string", "rationale": "string", "acceptanceCriteria": ["string"] }], "risks": ["string"] }`; const contextSummary = getLocalContextSummary(input.repoPath); const plannerPrompt = `Task: ${input.task}\nRepo: ${input.repoPath}\nNotes: ${input.contextNotes || '(none)'}\n${contextSummary}\nConstraints: Keep the plan minimal and implementable in a single PR.`; + if (options.deterministicFixture) { + return heuristicPlan(input); + } try { const raw = await callClaude(plannerPrompt, { model: options.plannerModel, diff --git a/src/orchestrators/multimodal/types.ts b/src/orchestrators/multimodal/types.ts index e6535d74..1eb6ad1d 100644 --- a/src/orchestrators/multimodal/types.ts +++ b/src/orchestrators/multimodal/types.ts @@ -30,8 +30,10 @@ export interface HarnessOptions { implementer?: 'codex' | 'claude'; maxIters?: number; // retry loop for critique → fix cycles auditDir?: string; // where to persist spike results + persistAudit?: boolean; // if false, skip writing audit artifacts/metrics record?: boolean; // store plan/critique in local context DB recordFrame?: boolean; // create a real frame and anchors + deterministicFixture?: boolean; // force deterministic fixture mode for smoke/replay checks } export interface ImplementationResult { From 6bf62e96f2524cfc0f5fb39317f833adcedbca12 Mon Sep 17 00:00:00 2001 From: "StackMemory Bot (CLI)" Date: Tue, 14 Apr 2026 17:49:36 -0400 Subject: [PATCH 07/11] docs: add design principles architecture note --- docs/architecture/DESIGN_PRINCIPLES.md | 91 +++++++++++++++++++++ docs/architecture/TECHNICAL_ARCHITECTURE.md | 2 + 2 files changed, 93 insertions(+) create mode 100644 docs/architecture/DESIGN_PRINCIPLES.md diff --git a/docs/architecture/DESIGN_PRINCIPLES.md b/docs/architecture/DESIGN_PRINCIPLES.md new file mode 100644 index 00000000..f2fb12be --- /dev/null +++ b/docs/architecture/DESIGN_PRINCIPLES.md @@ -0,0 +1,91 @@ +# Design Principles + +## The Three-Layer Architecture + +``` +ā”Œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā” +│ FAT SKILLS (intelligence) │ +│ Markdown procedures that encode │ +│ judgment, process, domain knowledge. │ +│ This is where 90% of the value lives. │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ THIN HARNESS (routing) │ +│ ~200 lines of code. JSON in, text out. │ +│ Read-only by default. State machine. │ +ā”œā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”¤ +│ DETERMINISTIC FOUNDATION (execution) │ +│ QueryDB, ReadDoc, Search, Timeline │ +│ — the tools that never fail ambiguously│ +ā””ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”€ā”˜ +``` + +### The Principle + +**Push intelligence UP into skills. Push execution DOWN into deterministic tooling. Keep the harness THIN.** + +When you do this: +- Every model improvement automatically improves every skill +- The deterministic layer stays perfectly reliable +- The harness never accumulates complexity + +### How This Maps to StackMemory + +| Layer | StackMemory Component | Examples | +|-------|----------------------|----------| +| **Fat Skills** | `.claude/skills/`, CLAUDE.md, wiki articles | Context engineering, code conventions, deploy recipes | +| **Thin Harness** | MCP server, CLI, hooks, handoff script | `stackmemory restore`, `stackmemory snap`, frame lifecycle | +| **Deterministic Foundation** | SQLite, file system, git, embeddings | `contexts` table, `.stackmemory/` directory, decision log files | + +### Anti-Patterns + +- **Fat harness**: Logic in the MCP server that should be a skill. If you're writing `if/else` chains in the harness, move it to a skill. +- **Thin skills**: Skills that just call tools. If a skill has no judgment, it's a tool wrapper — push it down. +- **Smart foundation**: Database queries that encode business logic. Keep the foundation dumb — SELECT/INSERT/UPDATE only. + +## Cross-Agent Memory Strategies + +When multiple agents need shared state, choose the mechanism that matches the bottleneck: + +| Need | Strategy | StackMemory Component | +|------|----------|----------------------| +| Survive session restart | **Persistent context** | `stackmemory restore` / handoff script | +| Share decisions across agents | **Decision log** | `.stackmemory/decisions/` files | +| Transfer orchestrator state to worker | **Text handoff** (current) | `-smd` wrapper, structured notes | +| Transfer latent state without text | **KV cache compaction** (research) | Not yet — requires runtime KV access | +| Find relevant prior context | **Semantic search** | Embeddings + vector index | +| Replicate exact prior state | **Snapshot** | `stackmemory snap save/restore` | + +### Current Default: Text Handoff + +The `-smd` wrapper (`stackmemory-auto-handoff.sh`) does text-level handoff: +1. Saves current session state before exit +2. Restores prior context on next session start +3. Injects structured notes (decisions, corrections, task state) + +This is the **"structured notes" strategy** — human-readable, auditable, portable across model families. It works with any API (Claude, Codex, local models). + +### Future: Latent Briefing (Research) + +For systems that control the inference runtime (self-hosted models, custom Cloudflare workers), **Latent Briefing** offers a more efficient path: + +- Compact orchestrator KV cache using Attention Matching +- Task-guided scoring retains only positions relevant to the current worker +- Eliminates text serialization overhead + +**Status**: Research reference. Blocked by API access — Claude API doesn't expose KV state. Viable for self-hosted models or custom inference runtimes. + +**When to revisit**: When StackMemory supports self-hosted model backends, or when Substrate Cloud ships a custom inference runtime. + +**Reference**: See skill doc `latent-briefing.skill.md` for the full technical treatment, decision framework, and gotchas. + +## Compaction Hierarchy + +When context is too large, apply these strategies in order: + +1. **Observation masking** — Hide tool outputs that aren't relevant to the current task (cheapest) +2. **Prefix caching** — Reuse identical prompt prefixes across calls (free with API support) +3. **Structured notes** — Summarize prior sessions into decision/correction format (current default) +4. **Semantic retrieval** — Pull only relevant chunks from prior context (needs embeddings) +5. **KV cache compaction** — Transfer latent state directly (requires runtime access) + +Each level is more powerful but harder to implement. Start from the top. Only move down when the level above is insufficient. diff --git a/docs/architecture/TECHNICAL_ARCHITECTURE.md b/docs/architecture/TECHNICAL_ARCHITECTURE.md index 85a0efa6..eaba5874 100644 --- a/docs/architecture/TECHNICAL_ARCHITECTURE.md +++ b/docs/architecture/TECHNICAL_ARCHITECTURE.md @@ -47,6 +47,8 @@ The outer system that: > **Harness = runtime. Frames = call stack. Tools = syscalls. Digests = return values.** +**Design principle**: Push intelligence UP into skills. Push execution DOWN into deterministic tooling. Keep the harness THIN. See `DESIGN_PRINCIPLES.md` for the full three-layer architecture and cross-agent memory strategy hierarchy. + --- ## Database Design From b6c3afbe6147abd56fc198268fd6eb2e5dde9843 Mon Sep 17 00:00:00 2001 From: "StackMemory Bot (CLI)" Date: Tue, 14 Apr 2026 18:06:02 -0400 Subject: [PATCH 08/11] chore: update gepa baselines and clean GitButler hooks --- .husky/post-checkout | 72 ------ .husky/pre-commit-user | 13 -- scripts/gepa/.before-optimize.md | 228 ++++-------------- scripts/gepa/generations/gen-000/baseline.md | 230 +++++-------------- scripts/gepa/generations/gen-001/baseline.md | 230 +++++-------------- 5 files changed, 157 insertions(+), 616 deletions(-) delete mode 100755 .husky/post-checkout delete mode 100755 .husky/pre-commit-user diff --git a/.husky/post-checkout b/.husky/post-checkout deleted file mode 100755 index fd875bc5..00000000 --- a/.husky/post-checkout +++ /dev/null @@ -1,72 +0,0 @@ -#!/bin/sh -# GITBUTLER_MANAGED_HOOK_V1 -# This hook auto-cleans GitButler hooks when you checkout away from gitbutler/workspace. - -PREV_HEAD=$1 -NEW_HEAD=$2 -BRANCH_CHECKOUT=$3 - -# Only act on branch checkouts (not file checkouts) -if [ "$BRANCH_CHECKOUT" != "1" ]; then - # Run user's hook if it exists - if [ -x "$(dirname "$0")/post-checkout-user" ]; then - exec "$(dirname "$0")/post-checkout-user" "$@" - fi - exit 0 -fi - -# Get the new branch name -NEW_BRANCH=$(git symbolic-ref --short HEAD 2>/dev/null) - -# If we just left gitbutler/workspace (and aren't coming back to it) -PREV_BRANCH=$(git name-rev --name-only "$PREV_HEAD" 2>/dev/null | sed 's|^remotes/||') -if echo "$PREV_BRANCH" | grep -q "gitbutler/workspace"; then - if [ "$NEW_BRANCH" != "gitbutler/workspace" ]; then - echo "" - echo "NOTE: You have left GitButler's managed workspace branch." - echo "Cleaning up GitButler hooks..." - - HOOKS_DIR=$(dirname "$0") - - # Restore pre-commit - but only if it's GitButler-managed - if [ -f "$HOOKS_DIR/pre-commit-user" ]; then - mv "$HOOKS_DIR/pre-commit-user" "$HOOKS_DIR/pre-commit" - echo " Restored: pre-commit" - elif [ -f "$HOOKS_DIR/pre-commit" ]; then - # Only remove if it's GitButler-managed (has our signature) - if grep -q "GITBUTLER_MANAGED_HOOK_V1" "$HOOKS_DIR/pre-commit"; then - rm "$HOOKS_DIR/pre-commit" - echo " Removed: pre-commit (GitButler managed)" - else - echo " Warning: pre-commit hook is not GitButler-managed, leaving it untouched" - fi - fi - - # Run user's post-checkout if it exists, then clean up - if [ -x "$HOOKS_DIR/post-checkout-user" ]; then - "$HOOKS_DIR/post-checkout-user" "$@" - mv "$HOOKS_DIR/post-checkout-user" "$HOOKS_DIR/post-checkout" - echo " Restored: post-checkout" - else - # Only remove self if we're GitButler-managed (we should be, but check anyway) - if grep -q "GITBUTLER_MANAGED_HOOK_V1" "$HOOKS_DIR/post-checkout"; then - rm "$HOOKS_DIR/post-checkout" - echo " Removed: post-checkout (GitButler managed)" - else - echo " Warning: post-checkout hook is not GitButler-managed, leaving it untouched" - fi - fi - - echo "" - echo "To return to GitButler mode, run: but setup" - echo "" - exit 0 - fi -fi - -# Run user's hook if it exists -if [ -x "$(dirname "$0")/post-checkout-user" ]; then - exec "$(dirname "$0")/post-checkout-user" "$@" -fi - -exit 0 diff --git a/.husky/pre-commit-user b/.husky/pre-commit-user deleted file mode 100755 index d8089be9..00000000 --- a/.husky/pre-commit-user +++ /dev/null @@ -1,13 +0,0 @@ -# Use Node version from .nvmrc -export NVM_DIR="$HOME/.nvm" -if [ -s "$NVM_DIR/nvm.sh" ]; then - . "$NVM_DIR/nvm.sh" - nvm use 2>/dev/null -elif [ -d "$HOME/.nvm/versions/node" ]; then - NODE_VER=$(cat "$(git rev-parse --show-toplevel)/.nvmrc" 2>/dev/null || echo "20") - NODE_PATH=$(ls -d "$HOME/.nvm/versions/node/v${NODE_VER}"* 2>/dev/null | head -1) - [ -n "$NODE_PATH" ] && export PATH="$NODE_PATH/bin:$PATH" -fi - -npx lint-staged -npm run build diff --git a/scripts/gepa/.before-optimize.md b/scripts/gepa/.before-optimize.md index 4dc0ebb0..2388f26a 100644 --- a/scripts/gepa/.before-optimize.md +++ b/scripts/gepa/.before-optimize.md @@ -1,198 +1,72 @@ -# CLAUDE.md +# croissant.ai — Agent Guide -You are a senior full-stack engineer working on **Sol**, the monorepo for Rize — an automatic time tracking application. Read the relevant code before making changes. Quote the specific code you're modifying when explaining changes. +Tool-agnostic reference for AI coding agents working in this repository. -## Project Overview +## Stack -- **api/** — Rails 7.1 GraphQL backend (Ruby 3.3.5) -- **web/** — Next.js 14 React web app (Node 22) -- **electron/** — Electron desktop app (Node 22) -- **services/** — Bun-based TypeScript event consumers/workers -- **voyager/** — Marketing website and landing pages (Next.js) -- **scripts/** — Automation scripts (categorized by side-effect type) -- **puppet/** — Puppeteer server for images/PDFs -- **chrome/** — Chrome browser extension -- **docs/** — Docusaurus documentation site -- **zapier/** — Zapier integration +Node.js / Express / PostgreSQL / Redis +Railway deployment | Stripe / Salesforce / QuickBooks integrations -## Development Commands +## Project Structure -```bash -# Start all services (requires iTerm2 on macOS) -./scripts/run-dev.sh - -# Or individually: -cd api && hivemind Procfile.dev # Rails + AnyCable + Sidekiq + Clockwork -cd web && npm run dev # Next.js dev server -cd electron && npm run dev # Electron with hot reload -cd services && hivemind Procfile.dev # Bun services -cd voyager && npm run dev # Marketing site (port 3003) ``` - -### Docker (start first) -```bash -cd api && docker-compose up -d -# TimescaleDB :15432 | Redis :16379 | Kafka :9092 | MySQL :13306 +src/ + api/ # Route handlers + core/ # monitoring-service, cache-service, queue-service, master-agent, api-validation + features/ # Feature modules + shared/ # Shared utilities + integrations/ # Third-party connectors +docs/ # Documentation +scripts/ # Automation scripts +docker/ # Container configs +prompts/ # Externalized LLM prompt templates ``` -### Testing -```bash -cd api && bundle exec rspec # Full API suite -cd api && bundle exec rspec spec/path/to/file_spec.rb # Single file -cd api && bundle exec rspec spec/path/to/file_spec.rb:42 # Single line -cd electron && npm test # Electron (Jest) -# Web — no active tests -``` +## Commands -### Building ```bash -cd api && bundle install && rake db:migrate -cd web && npm run build # gql-gen + tailwind + next build -cd electron && npm run build # Electron Forge make -cd services && bun install +npm run dev # Start dev server +npm run test # Run test suites (3 parallel Jest workers, maxWorkers=4) +npm run lint # Lint check +npm run migrate # Run DB migrations +docker-compose up -d # Start local DBs ``` -## Architecture - -### GraphQL API -Two endpoints: `api/v1` (public — OAuth, Zapier) and `private/v1` (web, electron). Located at `api/app/graphql/{api,private}/v1/`. - -### Background Processing -- **Sidekiq** for async jobs (`api/config/sidekiq.yml`) — use `perform_async`, not `perform_later` (ApplicationJob uses Sidekiq::Worker, not ActiveJob) -- **Clockwork** for scheduled jobs (`api/config/clock.rb`) -- **Kafka** for event streaming (`services/consumers/`) - -### Databases -PostgreSQL (primary) + TimescaleDB (time-series, separate connection) + MySQL (legacy) + Redis (cache, ActionCable, Sidekiq) - -### Real-time -AnyCable WebSocket server for subscriptions. Channels in `api/app/channels/`. - -## Code Patterns - -### Ruby/Rails -- Controllers validate + enqueue async jobs. Jobs handle business logic. Models handle delivery. -- Webhook controllers: `skip_before_action :authenticate_user!` + shared secret verification -- `CanonicalEmail.find_by_canonical(email:)` — uses `email_address` gem canonicalization; stub in tests -- `Identity#first_name` is a computed method (from `name` via `Nameable::Latin`), not a column -- `generate_hash_authentication_settings_url` calls `update!` internally — stub in tests via `allow_any_instance_of(Identity)` -- Test env uses `cache_store: :null_store` — swap to `MemoryStore` in `around` block for cache tests -- Postmark emails: all go through `PostmarkClient.deliver_in_batches_with_templates` with required keys: `email_enabled`, `email_bounced`, `message_stream` -- Prefer `be_between(before, after)` for time assertions (no `freeze_time` or `travel_to`) - -### JavaScript/TypeScript -- Use `test()` instead of `it()` in Jest tests -- Use `toBeCalled()` instead of `toHaveBeenCalledWith()` in assertions -- ESM: add `.js` extension to relative imports - -### Error Handling -- Prefer returning undefined over throwing exceptions -- Log and continue rather than crashing — filter nulls at boundaries -- Validate inputs at system boundaries (user input, external APIs, webhooks) - -## Scripts (`scripts/`) - -Standalone Node.js `.mjs` automation — outreach, content, analytics, CRM sync. Organized by side-effect type: - -- **`scripts/commit/`** — Scripts that produce repo artifacts (PRs, committed files). Includes `feedback/` for feedback collection and `profound-briefs/` for AEO pulse output. -- **`scripts/ops/`** — Marketing motions with external side effects (CRM sync, outreach, social content). -- **`scripts/diag/`** — Read-only diagnostics (pipeline health checks, demo scorecards). -- **`scripts/data/`** — Committed data artifacts (ICP keywords, pipeline config, profound learnings/snapshots). -- **`scripts/lib/`** — Shared utilities (Attio, Claude, Fathom, Slack, dates, prompts). - -Scheduled via GitHub Actions cron. All scheduled workflows support `workflow_dispatch` for manual runs. - -**GitHub Actions limit:** `workflow_dispatch` allows max 25 `inputs`. `weekly-start.yml` has 22/25 inputs. Feedback is consolidated into a single JSON `feedback` input: `{"social":"...","aeo":"...","blog":"...","snitcher":"..."}`. - -### Slack `/run` command -When adding or renaming GitHub Actions workflows that should be triggerable via Slack, update the `WORKFLOWS` hash in `api/app/jobs/trigger_github_workflow_job.rb`. When deleting a workflow, remove it from the hash. The Slack `/run` command reads this mapping to dispatch workflows. - -### Workflow → Script mapping - -| Workflow | Script path | Category | -|---|---|---| -| `weekly-start.yml` | `voyager/scripts/content-brief.mjs` + `voyager/scripts/content-audit.mjs` + `ops/fathom-social-content.mjs` + `ops/fathom-testimonial-scan.mjs` + `ops/perplexity-citation-audit.mjs` + `commit/profound-aeo-pulse.mjs` + `voyager/scripts/generate-blog-scaffold.mjs` + `ops/ahrefs-firehose-digest.mjs` + `ops/export-dripify.mjs` + `commit/prospect-discovery.mjs` + `ops/repush-clay-leads.mjs` + `ops/snitcher-outreach.mjs` | GHA cron (Mon) | -| `weekly-end.yml` | `diag/fathom-demo-scorecard.mjs` + `commit/feedback/collect-*.mjs` | GHA cron (Fri) | -| `anneal-keywords.yml` | `commit/anneal-keywords.mjs` | GHA cron (Sun) | -| `g2-review-monitor.yml` | `ops/g2-to-senja.mjs` | GHA cron (Daily) | -| `testimonial-pipeline.yml` | `commit/testimonial-pipeline.mjs` | Manual | -| `video-pipeline.yml` | `ops/video-clips.mjs` | Manual | -| `pagespeed-audit.yml` | `diag/pagespeed-audit.mjs` + `commit/pagespeed-improvements.mjs` | GHA cron (1st of month) | -| `daily-ops.yml` | `ops/slack-digest.mjs` + `ops/fathom-meeting-digest.mjs` + `ops/ops-daily-briefing.mjs` | GHA cron (weekdays) | -| `indexnow-submit.yml` | (inline curl) | Push to master (voyager) / Manual | - -## GitHub Actions (`.github/workflows/`) - -### CI/CD (PR-triggered) -- `test-api.yml` — RSpec on PR to `api/` -- `review-voyager-seo.yml` — SEO/AEO/GEO review on PR to `voyager/` -- `main.yml` — Deploy API/Web/Services/Docs/Voyager to staging on merge to master -- `deploy-production.yml` — Manual sequential prod deploy (API → Services → Web) - -### GitHub Actions gotcha -In `actions/github-script@v7`, `github.rest.issues.createComment` posts plain issue comments on PRs (PRs are issues in GitHub's API). For inline code suggestions on specific files/lines, use `github.rest.pulls.createReview` or `github.rest.pulls.createReviewComment` instead. - -### Scheduled (cron) -- `weekly-start.yml` — Mon 9am ET (content review, social content, testimonial scan, Perplexity audit, AEO pulse → blog scaffold, Ahrefs digest, Dripify export, prospect discovery → snitcher outreach) -- `weekly-end.yml` — Fri 9am ET (demo scorecard + pipeline health) -- `anneal-keywords.yml` — Sun 11am ET (keyword annealing + kill pattern updates) -- `g2-review-monitor.yml` — Daily 10am ET -- `pagespeed-audit.yml` — 1st of month 9am ET (PSI audit → Claude recommendations → PR) -- `daily-ops.yml` — Weekdays 10am ET (signal monitor, G2 reviews, review intercept, Slack digest → meeting digest → daily briefing) -- `indexnow-submit.yml` — On push to master (voyager pages) + manual (`/run indexnow urls=...`) +## Git Conventions -## Deployments +- Branch prefixes: `feature/`, `fix/`, `chore/` +- Commit format: `type(scope): message` +- Do NOT add `Co-Authored-By` lines to commits +- Pre-commit hook runs: `npm run lint` + `npm run test` + E2E browser screenshots -### Staging (auto on merge to master) -- **API, Web, Services** — GCP Cloud Run via Docker (Artifact Registry) -- **Voyager** — GCP Cloud Run -- **Docs** — Heroku +## Testing Rules -### Production (manual `workflow_dispatch` only) -- Sequential: API → 5min wait → Services → 5min wait → Web -- `gh workflow run deploy-production.yml --ref master` +- **Framework**: Jest + SWC +- **DB mocking**: Use dependency injection (DI), not global mocks +- **Supertest**: Pass `app` (NOT `server`) to supertest +- **Global jest**: src/ tests use global `jest` — do NOT import from `@jest/globals` (causes redeclaration errors) +- **Mock reset**: `jest.clearAllMocks()` resets `mockReturnValue` — always re-set mocks in `beforeEach` +- **Test runner**: `npm test` is long-running; run in a background process or sub-agent, not inline -## Voyager Content +## ESLint Rules -Blog posts in `voyager/src/content/blog/*.mdx`. See `voyager/CLAUDE.md` for tone of voice, banned words, and content rules. - -Key patterns: -- Blog JSON-LD (BlogPosting) in `voyager/src/modules/blogJsonLd.js` -- FAQ structured data via `faqs` frontmatter array in blog MDX files -- Sitemap auto-includes all posts via `voyager/src/app/sitemap.js` -- Blog scaffold: `voyager/scripts/generate-blog-scaffold.mjs` (or `npm run content:scaffold`) -- Analytics events: `voyager/src/modules/analytics.js` -- Route paths: `voyager/src/utils/locations.js` - -## Style - -### Commits -- Plain imperative sentences, no conventional commit prefixes -- Short and direct — describe what, not why - -### Code -- Read before writing. Edit over rewrite. No docs unless asked. -- KISS / YAGNI / SOLID. Under 20 lines per function. -- Comments only for complex logic. No emojis in code. -- When blocked, try an alternative approach before asking. Explain what you tried and why it failed. -- Review your changes against the task requirements before reporting completion. - -## Knowledge Skills (.claude/skills/knowledge/) +- Use `catch {}` not `catch (_err) {}` — underscore prefix not in the allowed pattern +- CJS format for JS files in `src/` -Project-specific knowledge skills load automatically when prompts match `activates_on` keywords. They provide current API patterns, SDK versions, and gotchas that prevent hallucination. - -**When to suggest a new skill:** If you encounter a repeatable workflow where you got something wrong (wrong API shape, deprecated pattern, incorrect filter field), suggest creating a knowledge skill for it. Format: "This would be a good candidate for a `.claude/skills/knowledge/.skill.md` — want me to create one?" +## Key Patterns -Current skills: `postmark-email`, `nextjs-app-router`, `profound-mcp`, `greptile-review`, `tailwind-v4-design`, `rails-graphql-mutations`, `rails-sidekiq-clockwork`, `rails-billing-identity`, `electron-store-ipc`, `chrome-extension`, `blog-hero-images` +- Provenance tracking: every data point includes source, timestamp, lineage +- Multi-tenant container isolation +- DI route factories for testability +- Error handling: return undefined over throwing; log and continue over crashing +- Add `.js` extension to relative ESM imports -## Key Files +## StackMemory Context Rule -- `api/config/database.yml` — DB connections (primary + timescale) -- `api/config/sidekiq.yml` — Job queues and concurrency -- `api/config/clock.rb` — Scheduled jobs (Clockwork) -- `api/Procfile.dev` — Dev processes -- `api/app/services/postmark_client.rb` — Email delivery (all Postmark goes through here) -- `api/app/services/drip_campaign_config.rb` — Drip email templates + required keys -- `voyager/CLAUDE.md` — Blog tone, banned words, content rules -- `sol.code-workspace` — VS Code workspace -- Each project requires its own `.env` file (not in repo) +- When an agent fetches conversation context for active work, it must pass the exact current assignment or question as `task_query`. +- Prefer the MCP shape: + - `org_id` + - `conversation_id` + - `task_query` + - `recover_on_low_signal: true` +- Do not fetch raw `get_conversation` context for worker execution unless full transcript behavior is explicitly required. diff --git a/scripts/gepa/generations/gen-000/baseline.md b/scripts/gepa/generations/gen-000/baseline.md index 5fd37e77..0c86cace 100644 --- a/scripts/gepa/generations/gen-000/baseline.md +++ b/scripts/gepa/generations/gen-000/baseline.md @@ -1,198 +1,74 @@ -# CLAUDE.md +# croissant.ai — Agent Guide -You are a senior full-stack engineer working on **Sol**, the monorepo for Rize — an automatic time tracking application. Read the relevant code before making changes. Quote the specific code you're modifying when explaining changes. +Tool-agnostic reference for AI coding agents working in this repository. -## Project Overview +## Stack -- **api/** — Rails 7.1 GraphQL backend (Ruby 3.3.5) -- **web/** — Next.js 14 React web app (Node 22) -- **electron/** — Electron desktop app (Node 22) -- **services/** — Bun-based TypeScript event consumers/workers -- **voyager/** — Marketing website and landing pages (Next.js) -- **scripts/** — Automation scripts (categorized by side-effect type) -- **puppet/** — Puppeteer server for images/PDFs -- **chrome/** — Chrome browser extension -- **docs/** — Docusaurus documentation site -- **zapier/** — Zapier integration +Node.js / Express / PostgreSQL / Redis +Railway deployment | Stripe / Salesforce / QuickBooks integrations -## Development Commands +## Project Structure -```bash -# Start all services (requires iTerm2 on macOS) -./scripts/run-dev.sh - -# Or individually: -cd api && hivemind Procfile.dev # Rails + AnyCable + Sidekiq + Clockwork -cd web && npm run dev # Next.js dev server -cd electron && npm run dev # Electron with hot reload -cd services && hivemind Procfile.dev # Bun services -cd voyager && npm run dev # Marketing site (port 3003) ``` - -### Docker (start first) -```bash -cd api && docker-compose up -d -# TimescaleDB :15432 | Redis :16379 | Kafka :9092 | MySQL :13306 +src/ + api/ # Route handlers + core/ # monitoring-service, cache-service, queue-service, master-agent, api-validation + features/ # Feature modules + shared/ # Shared utilities + integrations/ # Third-party connectors +docs/ # Documentation +scripts/ # Automation scripts +docker/ # Container configs +prompts/ # Externalized LLM prompt templates ``` -### Testing -```bash -cd api && bundle exec rspec # Full API suite -cd api && bundle exec rspec spec/path/to/file_spec.rb # Single file -cd api && bundle exec rspec spec/path/to/file_spec.rb:42 # Single line -cd electron && npm test # Electron (Jest) -# Web — no active tests -``` +## Commands -### Building ```bash -cd api && bundle install && rake db:migrate -cd web && npm run build # gql-gen + tailwind + next build -cd electron && npm run build # Electron Forge make -cd services && bun install +npm run dev # Start dev server +npm run test # Run test suites (3 parallel Jest workers, maxWorkers=4) +npm run lint # Lint check +npm run migrate # Run DB migrations +docker-compose up -d # Start local DBs ``` -## Architecture - -### GraphQL API -Two endpoints: `api/v1` (public — OAuth, Zapier) and `private/v1` (web, electron). Located at `api/app/graphql/{api,private}/v1/`. - -### Background Processing -- **Sidekiq** for async jobs (`api/config/sidekiq.yml`) — use `perform_async`, not `perform_later` (ApplicationJob uses Sidekiq::Worker, not ActiveJob) -- **Clockwork** for scheduled jobs (`api/config/clock.rb`) -- **Kafka** for event streaming (`services/consumers/`) - -### Databases -PostgreSQL (primary) + TimescaleDB (time-series, separate connection) + MySQL (legacy) + Redis (cache, ActionCable, Sidekiq) - -### Real-time -AnyCable WebSocket server for subscriptions. Channels in `api/app/channels/`. - -## Code Patterns - -### Ruby/Rails -- Controllers validate + enqueue async jobs. Jobs handle business logic. Models handle delivery. -- Webhook controllers: `skip_before_action :authenticate_user!` + shared secret verification -- `CanonicalEmail.find_by_canonical(email:)` — uses `email_address` gem canonicalization; stub in tests -- `Identity#first_name` is a computed method (from `name` via `Nameable::Latin`), not a column -- `generate_hash_authentication_settings_url` calls `update!` internally — stub in tests via `allow_any_instance_of(Identity)` -- Test env uses `cache_store: :null_store` — swap to `MemoryStore` in `around` block for cache tests -- Postmark emails: all go through `PostmarkClient.deliver_in_batches_with_templates` with required keys: `email_enabled`, `email_bounced`, `message_stream` -- Prefer `be_between(before, after)` for time assertions (no `freeze_time` or `travel_to`) - -### JavaScript/TypeScript -- Use `test()` instead of `it()` in Jest tests -- Use `toBeCalled()` instead of `toHaveBeenCalledWith()` in assertions -- ESM: add `.js` extension to relative imports - -### Error Handling -- Prefer returning undefined over throwing exceptions -- Log and continue rather than crashing — filter nulls at boundaries -- Validate inputs at system boundaries (user input, external APIs, webhooks) - -## Scripts (`scripts/`) - -Standalone Node.js `.mjs` automation — outreach, content, analytics, CRM sync. Organized by side-effect type: - -- **`scripts/commit/`** — Scripts that produce repo artifacts (PRs, committed files). Includes `feedback/` for feedback collection and `profound-briefs/` for AEO pulse output. -- **`scripts/ops/`** — Marketing motions with external side effects (CRM sync, outreach, social content). -- **`scripts/diag/`** — Read-only diagnostics (pipeline health checks, demo scorecards). -- **`scripts/data/`** — Committed data artifacts (ICP keywords, pipeline config, profound learnings/snapshots). -- **`scripts/lib/`** — Shared utilities (Attio, Claude, Fathom, Slack, dates, prompts). - -Scheduled via GitHub Actions cron. All scheduled workflows support `workflow_dispatch` for manual runs. - -**GitHub Actions limit:** `workflow_dispatch` allows max 25 `inputs`. `weekly-start.yml` has 22/25 inputs. Feedback is consolidated into a single JSON `feedback` input: `{"social":"...","aeo":"...","blog":"...","snitcher":"..."}`. - -### Slack `/run` command -When adding or renaming GitHub Actions workflows that should be triggerable via Slack, update the `WORKFLOWS` hash in `api/app/jobs/trigger_github_workflow_job.rb`. When deleting a workflow, remove it from the hash. The Slack `/run` command reads this mapping to dispatch workflows. - -### Workflow → Script mapping - -| Workflow | Script path | Category | -|---|---|---| -| `weekly-start.yml` | `voyager/scripts/content-brief.mjs` + `voyager/scripts/content-audit.mjs` + `ops/fathom-social-content.mjs` + `ops/fathom-testimonial-scan.mjs` + `ops/perplexity-citation-audit.mjs` + `commit/profound-aeo-pulse.mjs` + `voyager/scripts/generate-blog-scaffold.mjs` + `ops/ahrefs-firehose-digest.mjs` + `ops/export-dripify.mjs` + `commit/prospect-discovery.mjs` + `ops/repush-clay-leads.mjs` + `ops/snitcher-outreach.mjs` | GHA cron (Mon) | -| `weekly-end.yml` | `diag/fathom-demo-scorecard.mjs` + `commit/feedback/collect-*.mjs` + `commit/feedback/collect-ops-feedback.mjs` + `diag/weekly-retro.mjs` | GHA cron (Fri) | -| `anneal-keywords.yml` | `commit/anneal-keywords.mjs` | GHA cron (Sun) | -| `g2-review-monitor.yml` | `ops/g2-to-senja.mjs` | GHA cron (Daily) | -| `testimonial-pipeline.yml` | `commit/testimonial-pipeline.mjs` | Manual | -| `video-pipeline.yml` | `ops/video-clips.mjs` | Manual | -| `pagespeed-audit.yml` | `diag/pagespeed-audit.mjs` + `commit/pagespeed-improvements.mjs` | GHA cron (1st of month) | -| `daily-ops.yml` | `ops/slack-digest.mjs` + `ops/fathom-meeting-digest.mjs` + `ops/ops-daily-briefing.mjs` | GHA cron (weekdays) | -| `indexnow-submit.yml` | (inline curl) | Push to master (voyager) / Manual | - -## GitHub Actions (`.github/workflows/`) - -### CI/CD (PR-triggered) -- `test-api.yml` — RSpec on PR to `api/` -- `review-voyager-seo.yml` — SEO/AEO/GEO review on PR to `voyager/` -- `main.yml` — Deploy API/Web/Services/Docs/Voyager to staging on merge to master -- `deploy-production.yml` — Manual sequential prod deploy (API → Services → Web) - -### GitHub Actions gotcha -In `actions/github-script@v7`, `github.rest.issues.createComment` posts plain issue comments on PRs (PRs are issues in GitHub's API). For inline code suggestions on specific files/lines, use `github.rest.pulls.createReview` or `github.rest.pulls.createReviewComment` instead. - -### Scheduled (cron) -- `weekly-start.yml` — Mon 9am ET (content review, social content, testimonial scan, Perplexity audit, AEO pulse → blog scaffold, Ahrefs digest, Dripify export, prospect discovery → snitcher outreach) -- `weekly-end.yml` — Fri 9am ET (demo scorecard + pipeline health) -- `anneal-keywords.yml` — Sun 11am ET (keyword annealing + kill pattern updates) -- `g2-review-monitor.yml` — Daily 10am ET -- `pagespeed-audit.yml` — 1st of month 9am ET (PSI audit → Claude recommendations → PR) -- `daily-ops.yml` — Weekdays 10am ET (signal monitor, G2 reviews, review intercept, Slack digest → meeting digest → daily briefing) -- `indexnow-submit.yml` — On push to master (voyager pages) + manual (`/run indexnow urls=...`) +## Git Conventions -## Deployments +- Branch prefixes: `feature/`, `fix/`, `chore/` +- Commit format: `type(scope): message` +- Do NOT add `Co-Authored-By` lines to commits +- Pre-commit hook runs: `npm run lint` + `npm run test` + E2E browser screenshots -### Staging (auto on merge to master) -- **API, Web, Services** — GCP Cloud Run via Docker (Artifact Registry) -- **Voyager** — GCP Cloud Run -- **Docs** — Heroku +## Testing Rules -### Production (manual `workflow_dispatch` only) -- Sequential: API → 5min wait → Services → 5min wait → Web -- `gh workflow run deploy-production.yml --ref master` +- **Framework**: Jest + SWC +- **DB mocking**: Use dependency injection (DI), not global mocks +- **Supertest**: Pass `app` (NOT `server`) to supertest +- **Global jest**: src/ tests use global `jest` — do NOT import from `@jest/globals` (causes redeclaration errors) +- **Mock reset**: `jest.clearAllMocks()` resets `mockReturnValue` — always re-set mocks in `beforeEach` +- **Test runner**: `npm test` is long-running; run in a background process or sub-agent, not inline -## Voyager Content +## ESLint Rules -Blog posts in `voyager/src/content/blog/*.mdx`. See `voyager/CLAUDE.md` for tone of voice, banned words, and content rules. - -Key patterns: -- Blog JSON-LD (BlogPosting) in `voyager/src/modules/blogJsonLd.js` -- FAQ structured data via `faqs` frontmatter array in blog MDX files -- Sitemap auto-includes all posts via `voyager/src/app/sitemap.js` -- Blog scaffold: `voyager/scripts/generate-blog-scaffold.mjs` (or `npm run content:scaffold`) -- Analytics events: `voyager/src/modules/analytics.js` -- Route paths: `voyager/src/utils/locations.js` - -## Style - -### Commits -- Plain imperative sentences, no conventional commit prefixes -- Short and direct — describe what, not why - -### Code -- Read before writing. Edit over rewrite. No docs unless asked. -- KISS / YAGNI / SOLID. Under 20 lines per function. -- Comments only for complex logic. No emojis in code. -- When blocked, try an alternative approach before asking. Explain what you tried and why it failed. -- Review your changes against the task requirements before reporting completion. - -## Knowledge Skills (.claude/skills/knowledge/) +- Use `catch {}` not `catch (_err) {}` — underscore prefix not in the allowed pattern +- CJS format for JS files in `src/` -Project-specific knowledge skills load automatically when prompts match `activates_on` keywords. They provide current API patterns, SDK versions, and gotchas that prevent hallucination. - -**When to suggest a new skill:** If you encounter a repeatable workflow where you got something wrong (wrong API shape, deprecated pattern, incorrect filter field), suggest creating a knowledge skill for it. Format: "This would be a good candidate for a `.claude/skills/knowledge/.skill.md` — want me to create one?" +## Key Patterns -Current skills: `postmark-email`, `nextjs-app-router`, `profound-mcp`, `greptile-review`, `tailwind-v4-design`, `rails-graphql-mutations`, `rails-sidekiq-clockwork`, `rails-billing-identity`, `electron-store-ipc`, `chrome-extension`, `blog-hero-images` +- Provenance tracking: every data point includes source, timestamp, lineage +- Multi-tenant container isolation +- DI route factories for testability +- Error handling: return undefined over throwing; log and continue over crashing +- Add `.js` extension to relative ESM imports -## Key Files +## StackMemory Context Rule -- `api/config/database.yml` — DB connections (primary + timescale) -- `api/config/sidekiq.yml` — Job queues and concurrency -- `api/config/clock.rb` — Scheduled jobs (Clockwork) -- `api/Procfile.dev` — Dev processes -- `api/app/services/postmark_client.rb` — Email delivery (all Postmark goes through here) -- `api/app/services/drip_campaign_config.rb` — Drip email templates + required keys -- `voyager/CLAUDE.md` — Blog tone, banned words, content rules -- `sol.code-workspace` — VS Code workspace -- Each project requires its own `.env` file (not in repo) +- When an agent fetches conversation context for active work, it must pass the exact current assignment or question as `task_query`. +- Prefer the MCP shape: + - `org_id` + - `conversation_id` + - `worker_mode: true` + - `task_query` + - `recover_on_low_signal: true` +- Do not fetch raw `get_conversation` context for worker execution unless full transcript behavior is explicitly required. +- The current assignment is persisted under `.stackmemory/worker-context/current-assignment.json` so wrappers and hooks can auto-fill or enforce `task_query`. diff --git a/scripts/gepa/generations/gen-001/baseline.md b/scripts/gepa/generations/gen-001/baseline.md index 5fd37e77..0c86cace 100644 --- a/scripts/gepa/generations/gen-001/baseline.md +++ b/scripts/gepa/generations/gen-001/baseline.md @@ -1,198 +1,74 @@ -# CLAUDE.md +# croissant.ai — Agent Guide -You are a senior full-stack engineer working on **Sol**, the monorepo for Rize — an automatic time tracking application. Read the relevant code before making changes. Quote the specific code you're modifying when explaining changes. +Tool-agnostic reference for AI coding agents working in this repository. -## Project Overview +## Stack -- **api/** — Rails 7.1 GraphQL backend (Ruby 3.3.5) -- **web/** — Next.js 14 React web app (Node 22) -- **electron/** — Electron desktop app (Node 22) -- **services/** — Bun-based TypeScript event consumers/workers -- **voyager/** — Marketing website and landing pages (Next.js) -- **scripts/** — Automation scripts (categorized by side-effect type) -- **puppet/** — Puppeteer server for images/PDFs -- **chrome/** — Chrome browser extension -- **docs/** — Docusaurus documentation site -- **zapier/** — Zapier integration +Node.js / Express / PostgreSQL / Redis +Railway deployment | Stripe / Salesforce / QuickBooks integrations -## Development Commands +## Project Structure -```bash -# Start all services (requires iTerm2 on macOS) -./scripts/run-dev.sh - -# Or individually: -cd api && hivemind Procfile.dev # Rails + AnyCable + Sidekiq + Clockwork -cd web && npm run dev # Next.js dev server -cd electron && npm run dev # Electron with hot reload -cd services && hivemind Procfile.dev # Bun services -cd voyager && npm run dev # Marketing site (port 3003) ``` - -### Docker (start first) -```bash -cd api && docker-compose up -d -# TimescaleDB :15432 | Redis :16379 | Kafka :9092 | MySQL :13306 +src/ + api/ # Route handlers + core/ # monitoring-service, cache-service, queue-service, master-agent, api-validation + features/ # Feature modules + shared/ # Shared utilities + integrations/ # Third-party connectors +docs/ # Documentation +scripts/ # Automation scripts +docker/ # Container configs +prompts/ # Externalized LLM prompt templates ``` -### Testing -```bash -cd api && bundle exec rspec # Full API suite -cd api && bundle exec rspec spec/path/to/file_spec.rb # Single file -cd api && bundle exec rspec spec/path/to/file_spec.rb:42 # Single line -cd electron && npm test # Electron (Jest) -# Web — no active tests -``` +## Commands -### Building ```bash -cd api && bundle install && rake db:migrate -cd web && npm run build # gql-gen + tailwind + next build -cd electron && npm run build # Electron Forge make -cd services && bun install +npm run dev # Start dev server +npm run test # Run test suites (3 parallel Jest workers, maxWorkers=4) +npm run lint # Lint check +npm run migrate # Run DB migrations +docker-compose up -d # Start local DBs ``` -## Architecture - -### GraphQL API -Two endpoints: `api/v1` (public — OAuth, Zapier) and `private/v1` (web, electron). Located at `api/app/graphql/{api,private}/v1/`. - -### Background Processing -- **Sidekiq** for async jobs (`api/config/sidekiq.yml`) — use `perform_async`, not `perform_later` (ApplicationJob uses Sidekiq::Worker, not ActiveJob) -- **Clockwork** for scheduled jobs (`api/config/clock.rb`) -- **Kafka** for event streaming (`services/consumers/`) - -### Databases -PostgreSQL (primary) + TimescaleDB (time-series, separate connection) + MySQL (legacy) + Redis (cache, ActionCable, Sidekiq) - -### Real-time -AnyCable WebSocket server for subscriptions. Channels in `api/app/channels/`. - -## Code Patterns - -### Ruby/Rails -- Controllers validate + enqueue async jobs. Jobs handle business logic. Models handle delivery. -- Webhook controllers: `skip_before_action :authenticate_user!` + shared secret verification -- `CanonicalEmail.find_by_canonical(email:)` — uses `email_address` gem canonicalization; stub in tests -- `Identity#first_name` is a computed method (from `name` via `Nameable::Latin`), not a column -- `generate_hash_authentication_settings_url` calls `update!` internally — stub in tests via `allow_any_instance_of(Identity)` -- Test env uses `cache_store: :null_store` — swap to `MemoryStore` in `around` block for cache tests -- Postmark emails: all go through `PostmarkClient.deliver_in_batches_with_templates` with required keys: `email_enabled`, `email_bounced`, `message_stream` -- Prefer `be_between(before, after)` for time assertions (no `freeze_time` or `travel_to`) - -### JavaScript/TypeScript -- Use `test()` instead of `it()` in Jest tests -- Use `toBeCalled()` instead of `toHaveBeenCalledWith()` in assertions -- ESM: add `.js` extension to relative imports - -### Error Handling -- Prefer returning undefined over throwing exceptions -- Log and continue rather than crashing — filter nulls at boundaries -- Validate inputs at system boundaries (user input, external APIs, webhooks) - -## Scripts (`scripts/`) - -Standalone Node.js `.mjs` automation — outreach, content, analytics, CRM sync. Organized by side-effect type: - -- **`scripts/commit/`** — Scripts that produce repo artifacts (PRs, committed files). Includes `feedback/` for feedback collection and `profound-briefs/` for AEO pulse output. -- **`scripts/ops/`** — Marketing motions with external side effects (CRM sync, outreach, social content). -- **`scripts/diag/`** — Read-only diagnostics (pipeline health checks, demo scorecards). -- **`scripts/data/`** — Committed data artifacts (ICP keywords, pipeline config, profound learnings/snapshots). -- **`scripts/lib/`** — Shared utilities (Attio, Claude, Fathom, Slack, dates, prompts). - -Scheduled via GitHub Actions cron. All scheduled workflows support `workflow_dispatch` for manual runs. - -**GitHub Actions limit:** `workflow_dispatch` allows max 25 `inputs`. `weekly-start.yml` has 22/25 inputs. Feedback is consolidated into a single JSON `feedback` input: `{"social":"...","aeo":"...","blog":"...","snitcher":"..."}`. - -### Slack `/run` command -When adding or renaming GitHub Actions workflows that should be triggerable via Slack, update the `WORKFLOWS` hash in `api/app/jobs/trigger_github_workflow_job.rb`. When deleting a workflow, remove it from the hash. The Slack `/run` command reads this mapping to dispatch workflows. - -### Workflow → Script mapping - -| Workflow | Script path | Category | -|---|---|---| -| `weekly-start.yml` | `voyager/scripts/content-brief.mjs` + `voyager/scripts/content-audit.mjs` + `ops/fathom-social-content.mjs` + `ops/fathom-testimonial-scan.mjs` + `ops/perplexity-citation-audit.mjs` + `commit/profound-aeo-pulse.mjs` + `voyager/scripts/generate-blog-scaffold.mjs` + `ops/ahrefs-firehose-digest.mjs` + `ops/export-dripify.mjs` + `commit/prospect-discovery.mjs` + `ops/repush-clay-leads.mjs` + `ops/snitcher-outreach.mjs` | GHA cron (Mon) | -| `weekly-end.yml` | `diag/fathom-demo-scorecard.mjs` + `commit/feedback/collect-*.mjs` + `commit/feedback/collect-ops-feedback.mjs` + `diag/weekly-retro.mjs` | GHA cron (Fri) | -| `anneal-keywords.yml` | `commit/anneal-keywords.mjs` | GHA cron (Sun) | -| `g2-review-monitor.yml` | `ops/g2-to-senja.mjs` | GHA cron (Daily) | -| `testimonial-pipeline.yml` | `commit/testimonial-pipeline.mjs` | Manual | -| `video-pipeline.yml` | `ops/video-clips.mjs` | Manual | -| `pagespeed-audit.yml` | `diag/pagespeed-audit.mjs` + `commit/pagespeed-improvements.mjs` | GHA cron (1st of month) | -| `daily-ops.yml` | `ops/slack-digest.mjs` + `ops/fathom-meeting-digest.mjs` + `ops/ops-daily-briefing.mjs` | GHA cron (weekdays) | -| `indexnow-submit.yml` | (inline curl) | Push to master (voyager) / Manual | - -## GitHub Actions (`.github/workflows/`) - -### CI/CD (PR-triggered) -- `test-api.yml` — RSpec on PR to `api/` -- `review-voyager-seo.yml` — SEO/AEO/GEO review on PR to `voyager/` -- `main.yml` — Deploy API/Web/Services/Docs/Voyager to staging on merge to master -- `deploy-production.yml` — Manual sequential prod deploy (API → Services → Web) - -### GitHub Actions gotcha -In `actions/github-script@v7`, `github.rest.issues.createComment` posts plain issue comments on PRs (PRs are issues in GitHub's API). For inline code suggestions on specific files/lines, use `github.rest.pulls.createReview` or `github.rest.pulls.createReviewComment` instead. - -### Scheduled (cron) -- `weekly-start.yml` — Mon 9am ET (content review, social content, testimonial scan, Perplexity audit, AEO pulse → blog scaffold, Ahrefs digest, Dripify export, prospect discovery → snitcher outreach) -- `weekly-end.yml` — Fri 9am ET (demo scorecard + pipeline health) -- `anneal-keywords.yml` — Sun 11am ET (keyword annealing + kill pattern updates) -- `g2-review-monitor.yml` — Daily 10am ET -- `pagespeed-audit.yml` — 1st of month 9am ET (PSI audit → Claude recommendations → PR) -- `daily-ops.yml` — Weekdays 10am ET (signal monitor, G2 reviews, review intercept, Slack digest → meeting digest → daily briefing) -- `indexnow-submit.yml` — On push to master (voyager pages) + manual (`/run indexnow urls=...`) +## Git Conventions -## Deployments +- Branch prefixes: `feature/`, `fix/`, `chore/` +- Commit format: `type(scope): message` +- Do NOT add `Co-Authored-By` lines to commits +- Pre-commit hook runs: `npm run lint` + `npm run test` + E2E browser screenshots -### Staging (auto on merge to master) -- **API, Web, Services** — GCP Cloud Run via Docker (Artifact Registry) -- **Voyager** — GCP Cloud Run -- **Docs** — Heroku +## Testing Rules -### Production (manual `workflow_dispatch` only) -- Sequential: API → 5min wait → Services → 5min wait → Web -- `gh workflow run deploy-production.yml --ref master` +- **Framework**: Jest + SWC +- **DB mocking**: Use dependency injection (DI), not global mocks +- **Supertest**: Pass `app` (NOT `server`) to supertest +- **Global jest**: src/ tests use global `jest` — do NOT import from `@jest/globals` (causes redeclaration errors) +- **Mock reset**: `jest.clearAllMocks()` resets `mockReturnValue` — always re-set mocks in `beforeEach` +- **Test runner**: `npm test` is long-running; run in a background process or sub-agent, not inline -## Voyager Content +## ESLint Rules -Blog posts in `voyager/src/content/blog/*.mdx`. See `voyager/CLAUDE.md` for tone of voice, banned words, and content rules. - -Key patterns: -- Blog JSON-LD (BlogPosting) in `voyager/src/modules/blogJsonLd.js` -- FAQ structured data via `faqs` frontmatter array in blog MDX files -- Sitemap auto-includes all posts via `voyager/src/app/sitemap.js` -- Blog scaffold: `voyager/scripts/generate-blog-scaffold.mjs` (or `npm run content:scaffold`) -- Analytics events: `voyager/src/modules/analytics.js` -- Route paths: `voyager/src/utils/locations.js` - -## Style - -### Commits -- Plain imperative sentences, no conventional commit prefixes -- Short and direct — describe what, not why - -### Code -- Read before writing. Edit over rewrite. No docs unless asked. -- KISS / YAGNI / SOLID. Under 20 lines per function. -- Comments only for complex logic. No emojis in code. -- When blocked, try an alternative approach before asking. Explain what you tried and why it failed. -- Review your changes against the task requirements before reporting completion. - -## Knowledge Skills (.claude/skills/knowledge/) +- Use `catch {}` not `catch (_err) {}` — underscore prefix not in the allowed pattern +- CJS format for JS files in `src/` -Project-specific knowledge skills load automatically when prompts match `activates_on` keywords. They provide current API patterns, SDK versions, and gotchas that prevent hallucination. - -**When to suggest a new skill:** If you encounter a repeatable workflow where you got something wrong (wrong API shape, deprecated pattern, incorrect filter field), suggest creating a knowledge skill for it. Format: "This would be a good candidate for a `.claude/skills/knowledge/.skill.md` — want me to create one?" +## Key Patterns -Current skills: `postmark-email`, `nextjs-app-router`, `profound-mcp`, `greptile-review`, `tailwind-v4-design`, `rails-graphql-mutations`, `rails-sidekiq-clockwork`, `rails-billing-identity`, `electron-store-ipc`, `chrome-extension`, `blog-hero-images` +- Provenance tracking: every data point includes source, timestamp, lineage +- Multi-tenant container isolation +- DI route factories for testability +- Error handling: return undefined over throwing; log and continue over crashing +- Add `.js` extension to relative ESM imports -## Key Files +## StackMemory Context Rule -- `api/config/database.yml` — DB connections (primary + timescale) -- `api/config/sidekiq.yml` — Job queues and concurrency -- `api/config/clock.rb` — Scheduled jobs (Clockwork) -- `api/Procfile.dev` — Dev processes -- `api/app/services/postmark_client.rb` — Email delivery (all Postmark goes through here) -- `api/app/services/drip_campaign_config.rb` — Drip email templates + required keys -- `voyager/CLAUDE.md` — Blog tone, banned words, content rules -- `sol.code-workspace` — VS Code workspace -- Each project requires its own `.env` file (not in repo) +- When an agent fetches conversation context for active work, it must pass the exact current assignment or question as `task_query`. +- Prefer the MCP shape: + - `org_id` + - `conversation_id` + - `worker_mode: true` + - `task_query` + - `recover_on_low_signal: true` +- Do not fetch raw `get_conversation` context for worker execution unless full transcript behavior is explicitly required. +- The current assignment is persisted under `.stackmemory/worker-context/current-assignment.json` so wrappers and hooks can auto-fill or enforce `task_query`. From 2f8ed5f67a4fdf250d39ca724ae847877e01914f Mon Sep 17 00:00:00 2001 From: "StackMemory Bot (CLI)" Date: Thu, 16 Apr 2026 15:15:08 -0400 Subject: [PATCH 09/11] fix(conductor): harden lane mode cleanup --- .../commands/__tests__/conductor-lane.test.ts | 198 ++++++++++++ src/cli/commands/orchestrate.ts | 301 +++++++++++++++++- src/cli/commands/orchestrator.ts | 39 ++- 3 files changed, 533 insertions(+), 5 deletions(-) create mode 100644 src/cli/commands/__tests__/conductor-lane.test.ts diff --git a/src/cli/commands/__tests__/conductor-lane.test.ts b/src/cli/commands/__tests__/conductor-lane.test.ts new file mode 100644 index 00000000..6ac7ab7f --- /dev/null +++ b/src/cli/commands/__tests__/conductor-lane.test.ts @@ -0,0 +1,198 @@ +import { afterEach, beforeEach, describe, expect, it, vi } from 'vitest'; +import { Command } from 'commander'; +import { mkdtempSync, mkdirSync, rmSync, writeFileSync } from 'fs'; +import { join } from 'path'; +import { tmpdir } from 'os'; + +describe('conductor lane mode', () => { + let tempDir: string; + let sigintBefore: Function[]; + let sigtermBefore: Function[]; + + beforeEach(() => { + tempDir = mkdtempSync(join(tmpdir(), 'sm-conductor-lane-')); + sigintBefore = process.listeners('SIGINT'); + sigtermBefore = process.listeners('SIGTERM'); + vi.resetModules(); + }); + + afterEach(() => { + for (const listener of process.listeners('SIGINT')) { + if (!sigintBefore.includes(listener)) { + process.removeListener('SIGINT', listener); + } + } + for (const listener of process.listeners('SIGTERM')) { + if (!sigtermBefore.includes(listener)) { + process.removeListener('SIGTERM', listener); + } + } + vi.restoreAllMocks(); + vi.resetModules(); + rmSync(tempDir, { recursive: true, force: true }); + }); + + it('skips lane cleanup when worktree cleanliness cannot be verified', async () => { + const execSync = vi.fn((cmd: string) => { + if (cmd === 'git branch --show-current') return 'lane/main\n'; + if (cmd === `git branch --list 'worktree-agent-*'`) { + return ' worktree-agent-123\n'; + } + if ( + cmd === 'git merge-base --is-ancestor "worktree-agent-123" "lane/main"' + ) { + throw Object.assign(new Error('not ancestor'), { status: 1 }); + } + if (cmd === 'git cherry "lane/main" "worktree-agent-123"') return ''; + if (cmd === 'git worktree list --porcelain') { + return [ + 'worktree /tmp/worktree-agent-123', + 'branch refs/heads/worktree-agent-123', + '', + ].join('\n'); + } + if (cmd === 'git -C "/tmp/worktree-agent-123" status --short') { + throw new Error('status unavailable'); + } + throw new Error(`Unexpected execSync: ${cmd}`); + }); + + vi.doMock('child_process', async () => { + const actual = + await vi.importActual('child_process'); + return { ...actual, execSync }; + }); + + const consoleLog = vi.spyOn(console, 'log').mockImplementation(() => {}); + const { createConductorCommands } = await import('../orchestrate.js'); + + const program = new Command(); + program.exitOverride(); + program.addCommand(createConductorCommands()); + + await program.parseAsync([ + 'node', + 'stackmemory', + 'conductor', + 'lane', + 'cleanup', + '--repo', + tempDir, + ]); + + const output = consoleLog.mock.calls + .map((call) => String(call[0])) + .join('\n'); + expect(output).toContain('unknown'); + expect(output).toContain('could not verify clean state'); + expect( + execSync.mock.calls.some(([cmd]) => + String(cmd).includes('git worktree remove') + ) + ).toBe(false); + }); + + it('uses worktrees in auto mode when lane mode is enabled', async () => { + const repoRoot = join(tempDir, 'repo'); + const workspaceRoot = join(tempDir, 'workspaces'); + const appServerPath = join(tempDir, 'claude-app-server.cjs'); + + mkdirSync(join(repoRoot, '.git', 'gitbutler'), { recursive: true }); + writeFileSync(appServerPath, 'module.exports = {};'); + + const execSync = vi.fn((cmd: string) => { + if (cmd === 'but --version') return 'gitbutler 1.0.0\n'; + throw new Error(`Unexpected execSync: ${cmd}`); + }); + + vi.doMock('child_process', async () => { + const actual = + await vi.importActual('child_process'); + return { ...actual, execSync }; + }); + + const { Conductor } = await import('../orchestrator.js'); + const conductor = new Conductor({ + activeStates: ['Todo'], + terminalStates: ['Done', 'Cancelled'], + inProgressState: 'In Progress', + inReviewState: 'In Review', + pollIntervalMs: 1, + maxConcurrent: 1, + workspaceRoot, + repoRoot, + baseBranch: 'main', + appServerPath, + turnTimeoutMs: 1, + maxRetries: 0, + hookTimeoutMs: 1, + agentMode: 'cli', + workspaceMode: 'auto', + laneBranch: 'lane/main', + }); + + (conductor as unknown as Record).createLinearClient = vi + .fn() + .mockResolvedValue(null); + (conductor as unknown as Record).cacheWorkflowStates = vi + .fn() + .mockResolvedValue(undefined); + (conductor as unknown as Record).writeStatusFile = vi.fn(); + (conductor as unknown as Record).poll = vi + .fn() + .mockResolvedValue(undefined); + (conductor as unknown as Record).schedulePoll = vi + .fn() + .mockResolvedValue(undefined); + + await conductor.start(); + + expect( + execSync.mock.calls.some(([cmd]) => String(cmd) === 'but --version') + ).toBe(false); + expect( + (conductor as unknown as { useGitButler: boolean }).useGitButler + ).toBe(false); + }); + + it('rejects explicit gitbutler mode when lane mode is enabled', async () => { + const repoRoot = join(tempDir, 'repo'); + const workspaceRoot = join(tempDir, 'workspaces'); + const appServerPath = join(tempDir, 'claude-app-server.cjs'); + + mkdirSync(repoRoot, { recursive: true }); + writeFileSync(appServerPath, 'module.exports = {};'); + + const execSync = vi.fn(); + + vi.doMock('child_process', async () => { + const actual = + await vi.importActual('child_process'); + return { ...actual, execSync }; + }); + + const { Conductor } = await import('../orchestrator.js'); + const conductor = new Conductor({ + activeStates: ['Todo'], + terminalStates: ['Done', 'Cancelled'], + inProgressState: 'In Progress', + inReviewState: 'In Review', + pollIntervalMs: 1, + maxConcurrent: 1, + workspaceRoot, + repoRoot, + baseBranch: 'main', + appServerPath, + turnTimeoutMs: 1, + maxRetries: 0, + hookTimeoutMs: 1, + agentMode: 'cli', + workspaceMode: 'gitbutler', + laneBranch: 'lane/main', + }); + + await expect(conductor.start()).rejects.toThrow( + '--lane is only supported with git worktrees' + ); + }); +}); diff --git a/src/cli/commands/orchestrate.ts b/src/cli/commands/orchestrate.ts index 405c41d4..af8a34c1 100644 --- a/src/cli/commands/orchestrate.ts +++ b/src/cli/commands/orchestrate.ts @@ -1421,6 +1421,300 @@ export function createConductorCommands(): Command { } }); + // --- lane --- + // Inspect / clean up disposable worktree-agent-* branches that have been + // merged back into a human-curated lane branch. + + // Check whether is effectively merged into . + // Two-phase: + // 1) Fast: `git merge-base --is-ancestor` — catches regular merges + ff. + // 2) Fallback: `git cherry` — patch-id matching catches squash-merged + // branches, which BOTH `git branch --merged` and `--is-ancestor` miss. + // Conductor's auto-PR flow often ends in a squash, so this matters. + const isMerged = (repo: string, branch: string, lane: string): boolean => { + try { + execSync(`git merge-base --is-ancestor "${branch}" "${lane}"`, { + cwd: repo, + stdio: 'pipe', + timeout: 5000, + }); + return true; + } catch (err) { + const status = (err as { status?: number }).status; + if (status !== 1) throw err; // genuine error, not "false" + // not a direct ancestor — fall through to patch-id check + } + try { + const out = execSync(`git cherry "${lane}" "${branch}"`, { + cwd: repo, + encoding: 'utf-8', + timeout: 10000, + }).trim(); + if (!out) return true; // no unique commits (rare non-ancestor case) + const lines = out.split('\n').filter(Boolean); + // `-` = commit's patch is already in lane; `+` = not in lane. + return lines.every((l) => l.startsWith('-')); + } catch { + return false; + } + }; + + // List all worktree-agent-* branches and bucket by ancestry vs lane. + const bucketLaneBranches = ( + repo: string, + lane: string + ): { merged: string[]; unmerged: string[] } => { + let all: string[] = []; + try { + all = execSync(`git branch --list 'worktree-agent-*'`, { + cwd: repo, + encoding: 'utf-8', + timeout: 10000, + }) + .split('\n') + .map((s) => s.trim().replace(/^[*+]\s*/, '')) + .filter(Boolean); + } catch { + // non-fatal — no branches found + } + const merged: string[] = []; + const unmerged: string[] = []; + for (const b of all) { + (isMerged(repo, b, lane) ? merged : unmerged).push(b); + } + return { merged, unmerged }; + }; + + // Return worktree cleanliness so cleanup can refuse removal when git status + // cannot prove the tree is clean. + const getWorktreeCleanliness = ( + repo: string, + wtPath: string + ): 'clean' | 'dirty' | 'unknown' => { + try { + const out = execSync(`git -C "${wtPath}" status --short`, { + cwd: repo, + encoding: 'utf-8', + timeout: 5000, + }); + return out.trim().length > 0 ? 'dirty' : 'clean'; + } catch { + return 'unknown'; + } + }; + + const laneCmd = cmd + .command('lane') + .description( + 'Inspect or clean up worktree-agent-* branches against a lane' + ); + + laneCmd + .command('status') + .description( + 'List worktree-agent-* branches and their merge state vs the lane' + ) + .option( + '--lane ', + 'Lane branch to compare against (default: current branch)' + ) + .option('--repo ', 'Git repo root', process.cwd()) + .action((options) => { + const repo: string = options.repo; + const lane: string = + options.lane || + execSync('git branch --show-current', { + cwd: repo, + encoding: 'utf-8', + }).trim(); + + if (!lane) { + console.error( + `${c.red}Could not resolve lane branch.${c.r} Pass --lane.` + ); + process.exit(1); + } + + const { merged, unmerged } = bucketLaneBranches(repo, lane); + + console.log(`\n ${c.b}Lane:${c.r} ${c.cyan}${lane}${c.r}\n`); + if (merged.length) { + console.log(` ${c.green}merged (${merged.length}):${c.r}`); + for (const b of merged) console.log(` ${c.gray}āœ“${c.r} ${b}`); + } + if (unmerged.length) { + console.log(`\n ${c.orange}unmerged (${unmerged.length}):${c.r}`); + for (const b of unmerged) console.log(` ${c.orange}•${c.r} ${b}`); + } + if (!merged.length && !unmerged.length) { + console.log(` ${c.gray}No worktree-agent-* branches found.${c.r}`); + } + }); + + laneCmd + .command('cleanup') + .description( + 'Remove worktree-agent-* branches and worktrees already merged into the lane' + ) + .option( + '--lane ', + 'Lane branch to compare against (default: current branch)' + ) + .option('--repo ', 'Git repo root', process.cwd()) + .option('--dry-run', 'Show what would be removed without doing it', false) + .option( + '--force', + 'Remove worktrees even if they have uncommitted changes', + false + ) + .action((options) => { + const repo: string = options.repo; + const lane: string = + options.lane || + execSync('git branch --show-current', { + cwd: repo, + encoding: 'utf-8', + }).trim(); + + if (!lane) { + console.error( + `${c.red}Could not resolve lane branch.${c.r} Pass --lane.` + ); + process.exit(1); + } + + const { merged } = bucketLaneBranches(repo, lane); + + if (merged.length === 0) { + console.log( + `${c.green}Nothing to clean up for lane ${c.cyan}${lane}${c.r}.` + ); + return; + } + + // Build map of branch → worktree path (if any) + const worktreeMap = new Map(); + try { + const wt = execSync('git worktree list --porcelain', { + cwd: repo, + encoding: 'utf-8', + timeout: 10000, + }); + let currentPath = ''; + for (const line of wt.split('\n')) { + if (line.startsWith('worktree ')) { + currentPath = line.slice('worktree '.length).trim(); + } else if (line.startsWith('branch ')) { + const ref = line.slice('branch '.length).trim(); + const b = ref.replace(/^refs\/heads\//, ''); + if (currentPath) worktreeMap.set(b, currentPath); + } + } + } catch { + // non-fatal — fall back to branch-only cleanup + } + + console.log( + `\n ${c.b}Lane:${c.r} ${c.cyan}${lane}${c.r} ${c.d}(${merged.length} merged branches)${c.r}\n` + ); + + let skippedDirty = 0; + let skippedUnknown = 0; + for (const branch of merged) { + const wtPath = worktreeMap.get(branch); + const cleanliness = wtPath + ? getWorktreeCleanliness(repo, wtPath) + : 'unknown'; + const dirty = cleanliness === 'dirty'; + + // Refuse to destroy dirty worktrees without --force — prevents + // racing with a still-working agent and losing uncommitted work. + if (dirty && !options.force) { + skippedDirty++; + console.log( + ` ${c.orange}⚠${c.r} ${c.orange}dirty${c.r} ${wtPath} ${c.d}(skipping ${branch} — pass --force to remove)${c.r}` + ); + continue; + } + + // If git status could not prove the worktree is clean, refuse cleanup + // unless the operator explicitly opts into forceful removal. + if (wtPath && cleanliness === 'unknown' && !options.force) { + skippedUnknown++; + console.log( + ` ${c.orange}⚠${c.r} ${c.orange}unknown${c.r} ${wtPath} ${c.d}(skipping ${branch} — could not verify clean state; pass --force to remove)${c.r}` + ); + continue; + } + + const dirtyTag = dirty ? ` ${c.orange}[dirty]${c.r}` : ''; + const unknownTag = + cleanliness === 'unknown' ? ` ${c.orange}[unknown]${c.r}` : ''; + const action = wtPath + ? `${c.gray}worktree remove${c.r} ${wtPath}${dirtyTag}${unknownTag} + ${c.gray}branch -D${c.r} ${branch}` + : `${c.gray}branch -D${c.r} ${branch}`; + console.log(` ${c.green}āœ“${c.r} ${action}`); + + if (options.dryRun) continue; + + if (wtPath) { + // --force on `git worktree remove` is always passed because we + // already decided above whether dirty state is tolerable. + try { + execSync(`git worktree remove "${wtPath}" --force`, { + cwd: repo, + stdio: 'pipe', + timeout: 15000, + }); + } catch (err) { + console.log( + ` ${c.red}worktree remove failed:${c.r} ${(err as Error).message}` + ); + } + } + try { + execSync(`git branch -D "${branch}"`, { + cwd: repo, + stdio: 'pipe', + timeout: 5000, + }); + } catch (err) { + console.log( + ` ${c.red}branch -D failed:${c.r} ${(err as Error).message}` + ); + } + } + + if (!options.dryRun) { + try { + execSync('git worktree prune', { + cwd: repo, + stdio: 'pipe', + timeout: 10000, + }); + } catch { + // non-fatal + } + const skipNote = skippedDirty + ? ` ${c.orange}(${skippedDirty} dirty skipped — pass --force)${c.r}` + : ''; + const unknownNote = skippedUnknown + ? ` ${c.orange}(${skippedUnknown} unknown skipped — pass --force)${c.r}` + : ''; + console.log(`\n ${c.green}Done.${c.r}${skipNote}${unknownNote}`); + } else { + const skipNote = skippedDirty + ? ` ${c.orange}(${skippedDirty} dirty would be skipped — pass --force to include)${c.r}` + : ''; + const unknownNote = skippedUnknown + ? ` ${c.orange}(${skippedUnknown} unknown would be skipped — pass --force to include)${c.r}` + : ''; + console.log( + `\n ${c.d}Dry run — no changes made. Remove --dry-run to execute.${c.r}${skipNote}${unknownNote}` + ); + } + }); + // --- logs --- cmd .command('logs') @@ -2050,6 +2344,10 @@ export function createConductorCommands(): Command { 'Workspace mode: "auto" (detect GitButler), "gitbutler", or "worktree"', 'auto' ) + .option( + '--lane ', + 'Optional lane branch. When set, conductor uses git worktrees rooted at the lane as worktree-agent- and suppresses PRs (lane is human-curated).' + ) .action(async (options) => { // Ensure default prompt template exists on first start ensureDefaultPromptTemplate(); @@ -2069,8 +2367,9 @@ export function createConductorCommands(): Command { turnTimeoutMs: parseInt(options.turnTimeout, 10), agentMode: options.mode === 'adapter' ? 'adapter' : 'cli', model: options.model, - autoPR: options.pr, + autoPR: options.lane ? false : options.pr, workspaceMode: options.workspaceMode, + laneBranch: options.lane, }); await conductor.start(); diff --git a/src/cli/commands/orchestrator.ts b/src/cli/commands/orchestrator.ts index 396b2711..cf1c1ab3 100644 --- a/src/cli/commands/orchestrator.ts +++ b/src/cli/commands/orchestrator.ts @@ -88,6 +88,13 @@ export interface ConductorConfig { autoPR?: boolean; /** Workspace mode: 'auto' (detect GitButler), 'gitbutler', or 'worktree' (default: 'auto') */ workspaceMode?: 'auto' | 'gitbutler' | 'worktree'; + /** + * Optional lane branch (e.g. "feature/STA-500-retrieval"). When set: + * - agent worktrees branch from laneBranch (not baseBranch) + * - branch naming uses "worktree-agent-" (not "conductor/") + * - autoPR is suppressed; merge target is the lane, curated by a human + */ + laneBranch?: string; } export interface RunningIssue { @@ -758,9 +765,18 @@ export class Conductor { } } - // Detect workspace mode: GitButler virtual branches or git worktrees const wsMode = this.config.workspaceMode || 'auto'; - if (wsMode === 'gitbutler' || wsMode === 'auto') { + const laneMode = Boolean(this.config.laneBranch); + if (laneMode && wsMode === 'gitbutler') { + throw new Error( + '--lane is only supported with git worktrees. Use --workspace-mode worktree or omit --workspace-mode.' + ); + } + + // Detect workspace mode: GitButler virtual branches or git worktrees. + // Lane mode always uses worktrees because branches are created from the + // lane itself and later inspected/cleaned via `conductor lane`. + if (!laneMode && (wsMode === 'gitbutler' || wsMode === 'auto')) { try { const butVersion = execSync('but --version', { cwd: this.config.repoRoot, @@ -785,6 +801,10 @@ export class Conductor { } // auto mode: fall through to worktrees } + } else if (laneMode) { + logger.info('Lane mode enabled; using git worktrees', { + laneBranch: this.config.laneBranch, + }); } // Ensure workspace root exists (only needed for worktree mode) @@ -1747,6 +1767,12 @@ export class Conductor { } private createGitButlerBranch(issue: LinearIssue, wsKey: string): string { + if (this.config.laneBranch) { + throw new Error( + 'Lane mode requires git worktrees; GitButler virtual branches are not supported.' + ); + } + const branchName = `conductor/${wsKey}`; try { @@ -1795,7 +1821,11 @@ export class Conductor { return wsPath; } - const branchName = `conductor/${wsKey}`; + // Lane mode: subagents get disposable worktree-agent- branches + // rooted at the lane branch. Base mode: conductor/ off baseBranch. + const lane = this.config.laneBranch; + const branchName = lane ? `worktree-agent-${wsKey}` : `conductor/${wsKey}`; + const startPoint = lane ? lane : `origin/${this.config.baseBranch}`; try { execSync('git fetch origin', { @@ -1805,7 +1835,7 @@ export class Conductor { }); execSync( - `git worktree add "${wsPath}" -b "${branchName}" "origin/${this.config.baseBranch}"`, + `git worktree add "${wsPath}" -b "${branchName}" "${startPoint}"`, { cwd: this.config.repoRoot, stdio: 'pipe', @@ -1817,6 +1847,7 @@ export class Conductor { identifier: issue.identifier, path: wsPath, branch: branchName, + lane: lane || null, }); } catch (err) { try { From 079b395fcba5280b122c9c20087f9b0fad890b0f Mon Sep 17 00:00:00 2001 From: "StackMemory Bot (CLI)" Date: Thu, 16 Apr 2026 19:40:11 -0400 Subject: [PATCH 10/11] chore: reorganize root for clarity Consolidate duplicate docs, relocate wandering files, and tighten .gitignore for agent scratch dirs. - Move SPEC.md, RELEASE_NOTES.md, tomorrow.md, vision.md to docs/ (replacing stale docs/ copies with the up-to-date root versions) - Move mcp_review_config.json to config/ - Untrack .lint-fix-log.json (ephemeral lint artifact) - Delete stale .tsbuildinfo-* and .lint-errors.log - Ignore agent scratch dirs (.ralph/, .swarm/, .bjarne/, .entire/, .opencode/, .git.backup/) and local trees (archive/, site/, voyager/, plugins/) - Update README.md Vision link to docs/vision.md --- .gitignore | 19 + .lint-fix-log.json | 22 - README.md | 2 +- RELEASE_NOTES.md | 80 -- SPEC.md | 1247 ----------------- .../mcp_review_config.json | 0 docs/RELEASE_NOTES.md | 2 +- docs/SPEC.md | 528 ++++++- tomorrow.md => docs/tomorrow.md | 0 vision.md => docs/vision.md | 0 10 files changed, 519 insertions(+), 1381 deletions(-) delete mode 100644 .lint-fix-log.json delete mode 100644 RELEASE_NOTES.md delete mode 100644 SPEC.md rename mcp_review_config.json => config/mcp_review_config.json (100%) rename tomorrow.md => docs/tomorrow.md (100%) rename vision.md => docs/vision.md (100%) diff --git a/.gitignore b/.gitignore index 85b0c6f6..9b63e497 100644 --- a/.gitignore +++ b/.gitignore @@ -135,3 +135,22 @@ scripts/gepa/results/scores.jsonl scripts/gepa/state.json scripts/gepa/results/ scripts/gepa/generations/ + +# Agent tool working dirs (untracked, per-tool scratch) +.ralph/ +.swarm/ +.bjarne/ +.entire/ +.opencode/ + +# Local backups and lint artifacts +.git.backup/ +.lint-errors.log +.lint-fix-log.json +.lint-fix-log.*.json + +# Local scratch / generated trees +archive/ +site/ +voyager/ +plugins/ diff --git a/.lint-fix-log.json b/.lint-fix-log.json deleted file mode 100644 index a227e230..00000000 --- a/.lint-fix-log.json +++ /dev/null @@ -1,22 +0,0 @@ -[ - { - "timestamp": "2026-01-05T19:42:12.135Z", - "level": "info", - "message": "šŸ”§ Starting auto-fix loop..." - }, - { - "timestamp": "2026-01-05T19:42:12.138Z", - "level": "info", - "message": "šŸ“ Auto-fix attempt 1/3" - }, - { - "timestamp": "2026-01-05T19:42:15.745Z", - "level": "info", - "message": "Running ESLint auto-fix..." - }, - { - "timestamp": "2026-01-05T19:42:21.910Z", - "level": "success", - "message": "āœ… All fixable lint errors resolved! (warnings are ok for commits)" - } -] \ No newline at end of file diff --git a/README.md b/README.md index eca2b11c..74a1676e 100644 --- a/README.md +++ b/README.md @@ -423,7 +423,7 @@ Options: `--until`, `--until-not`, `--until-empty`, `--until-non-empty`, `--unti - [Development Guide](./docs/DEVELOPMENT.md) — Contributing and development - [Architecture](./docs/architecture.md) — System design - [API Reference](./docs/API_REFERENCE.md) — API documentation -- [Vision](./vision.md) — Product vision and principles +- [Vision](./docs/vision.md) — Product vision and principles - [Status](./docs/status.md) — Current project status - [Roadmap](./docs/roadmap.md) — Future plans diff --git a/RELEASE_NOTES.md b/RELEASE_NOTES.md deleted file mode 100644 index bba8bb97..00000000 --- a/RELEASE_NOTES.md +++ /dev/null @@ -1,80 +0,0 @@ -# Release Notes - v0.2.8 - -## LLM-Driven Context Retrieval System (STA-95) - -This release introduces intelligent context retrieval that uses LLM analysis to select the most relevant frames for any query. - -### New Features - -#### Smart Context Retrieval (`smart_context` MCP tool) - -- **Natural language queries**: Ask for context in plain English -- **LLM-driven analysis**: Intelligently selects relevant frames based on query semantics -- **Token budget management**: Stays within specified token limits -- **Auditable reasoning**: Every retrieval decision is explained -- **Heuristic fallback**: Works even without LLM provider - -#### Compressed Memory Summary (`get_summary` MCP tool) - -- **Recent session summary**: Frames, operations, files touched, errors -- **Historical patterns**: Topic counts, key decisions, recurring issues -- **Queryable indices**: By error, time, contributor, topic, file -- **Summary statistics**: Frame counts, event counts, anchor totals - -### Architecture - -``` -context_retrieval: - compressed_summary: - recent_session: frames, operations, files, errors - historical_patterns: topic counts, key decisions, recurring issues - queryable_indices: by error, timeframe, contributor - - llm_analysis: - inputs: current_query, compressed_summary, token_budget - output: reasoning (auditable), frames_to_retrieve, confidence_score -``` - -### New MCP Tools - -| Tool | Description | -| --------------- | -------------------------------------------------------- | -| `smart_context` | LLM-driven context retrieval with natural language query | -| `get_summary` | Compressed summary of project memory | - -### Other Changes - -- **Trace Detection**: Improved persistence and bundling -- **Model-Aware Compaction**: Handlers for context window management -- **Linear Sync**: Enhanced sync manager for Linear integration -- **Query Parser**: Extended natural language query parsing - -### Files Added - -- `src/core/retrieval/` - Complete retrieval system - - `types.ts` - Type definitions - - `summary-generator.ts` - Compressed summary generation - - `llm-context-retrieval.ts` - Main retrieval orchestrator - - `index.ts` - Module exports -- `src/core/context/compaction-handler.ts` - Autocompaction detection -- `src/core/context/model-aware-compaction.ts` - Model-specific handling -- `src/core/trace/trace-store.ts` - Trace persistence -- `src/integrations/linear/sync-manager.ts` - Enhanced Linear sync - -## Installation - -```bash -npm install -g @stackmemoryai/stackmemory@0.2.8 -``` - -## Usage - -```bash -# In Claude Desktop or MCP client: -smart_context "What did we work on related to authentication?" -get_summary -``` - ---- - -_Built with LLM-driven context retrieval_ diff --git a/SPEC.md b/SPEC.md deleted file mode 100644 index 41796aaf..00000000 --- a/SPEC.md +++ /dev/null @@ -1,1247 +0,0 @@ -# StackMemory Specification v1.0 - -## Executive Summary - -StackMemory is a **lossless, project-scoped memory runtime** for AI coding and writing tools that preserves full project context across sessions using a call stack metaphor instead of linear chat logs. It organizes memory as nested frames with smart retrieval, enabling AI tools to maintain context across thread resets, model switches, and long-running projects. - -## Core Architecture - -### 1. Memory Model - -#### 1.1 Frame Stack Structure -```yaml -memory_model: - structure: "call_stack" # Not linear chat log - max_depth: 10000 # Maximum frames in stack - retention: 30_days # Local retention window - storage: - local: "SQLite" # Fast local storage - remote: "TimeSeries DB + S3" # Infinite remote storage -``` - -#### 1.2 Frame Composition -```yaml -frame: - metadata: - id: "uuid" - title: "descriptive_name" - type: "task|debug|feature|architecture" - owner: "user_id" - created: "timestamp" - - contents: - events: [] # Tool calls, messages, observations - anchors: [] # Decisions, constraints, interfaces - digest: {} # 60% deterministic, 40% AI-generated summary - score: 0.0-1.0 # Importance score for retention -``` - -### 2. Storage Architecture - -#### 2.1 Two-Tier Storage System -```yaml -local_storage: - young: - age: "< 1 day" - retention: "complete" # Full events, all tool calls - memory_strategy: "hot" # RAM for instant access - compression: "none" - - mature: - age: "1-7 days" - retention: "selective" # Digests + anchors + high-score events - memory_strategy: "warm" # SQLite with memory cache - compression: "lz4" - score_threshold: 0.4 - - old: - age: "7-30 days" - retention: "critical" # Anchors + decisions only - memory_strategy: "cold" # SQLite, no cache - compression: "zstd" - score_threshold: 0.7 - - max_size: 2GB - overflow_strategy: "promote_to_remote" - -remote_storage: - retention: "infinite" - indexing: - primary: "timeseries" # ClickHouse/TimescaleDB - secondary: "inverted" # Elasticsearch - graph: "relationships" # Neo4j for frame deps - - retrieval: - cache_layer: "redis" - p50_latency: 50ms - p99_latency: 500ms - prefetch: true - - cost_model: - storage: "$0.02/GB/month" - retrieval: "$0.0004/1000_reads" - - tiers: - hot: "< 7 days" # S3 Standard - warm: "7-90 days" # S3 Standard-IA - cold: "> 90 days" # S3 Glacier - archive: "> 1 year" # Glacier Deep Archive -``` - -#### 2.2 Migration Strategy -```yaml -local_to_remote_migration: - triggers: - age_based: - schedule: "0 */6 * * *" # Every 6 hours - migrate_after: 24h - - size_pressure: - soft_limit: 75% # Start migration - hard_limit: 90% # Force migration - strategy: "lowest_score_first" - - importance_based: - score_thresholds: - "< 0.3": 2h # Low importance - "< 0.5": 12h # Medium - "< 0.7": 24h # High - ">= 0.7": 7d # Critical - - upload_strategy: - mode: "hybrid" - continuous_streaming: - for_events: ["decision", "constraint", "api_change"] - latency: "< 1 minute" - - batch_upload: - for_events: ["tool_call", "observation", "message"] - batch_size: 100 - interval: 300s - compression: true - - smart_batching: - group_by: "frame" - wait_for_frame_close: true - max_wait: 1h -``` - -### 3. Importance Scoring System - -#### 3.1 Tool Call Scoring (Deterministic) -```yaml -tool_scores: - # Discovery & Intelligence (0.8-1.0) - search: 0.95 # Finding context/code - task_creation: 0.90 # Planning work - decision_recording: 0.90 # Architectural choices - context_retrieval: 0.85 # Loading memory - - # Structural Changes (0.6-0.8) - write_new_file: 0.75 - major_refactor: 0.70 - api_change: 0.70 - - # Modifications (0.3-0.6) - edit: 0.50 - test: 0.45 - bash_execution: 0.40 - - # Simple Reads (0.1-0.3) - read: 0.25 - ls: 0.20 - grep: 0.15 # Simple pattern matching -``` - -#### 3.2 Scoring Formula -```yaml -scoring: - formula: | - score = (base_score * weights.base) + - (impact_multiplier * weights.impact) + - (persistence_bonus * weights.persistence) + - (reference_count * weights.reference) - - weights: - configurable: true # Per-project tuning - defaults: - base: 0.4 - impact: 0.3 - persistence: 0.2 - reference: 0.1 - - profiles: - security_focused: - impact: 0.5 # Changes matter more - exploration_heavy: - reference: 0.5 # Discovery paths matter - production_system: - persistence: 0.3 # Permanent changes critical -``` - -### 4. Smart Context Retrieval - -#### 4.1 LLM-Driven Retrieval -```yaml -context_retrieval: - compressed_summary: - # Provided to LLM for analysis - recent_session: - frames: 15 - dominant_operations: [] - files_touched: [] - errors_encountered: [] - - historical_patterns: - topic_frame_counts: {} - key_decisions: [] - recurring_issues: [] - - queryable_indices: - by_error_type: {} - by_timeframe: {} - by_contributor: {} - - llm_analysis: - inputs: - - current_query - - compressed_summary - - token_budget - - output: - reasoning: "visible/auditable" - frames_to_retrieve: [] - confidence_score: 0.0-1.0 - - generation: - when: "on_demand" # Not pre-computed - visibility: "settings/on_request" # Auditable -``` - -#### 4.2 Query Language - -##### 4.2.1 Natural Language Queries -```yaml -nlp_queries: - time_based: - - "provide context from the last day" - - "show me what happened yesterday" - - "get all work from December 15-20" - - "what did Alice work on last week" - - topic_based: - - "find all authentication work" - - "show database migration frames" - - "get frames about the login bug" - - "what decisions were made about caching" - - combined: - - "show Alice's auth work from last week" - - "get high-priority bug fixes from yesterday" - - "find security decisions in the last month" -``` - -##### 4.2.2 Structured Query Format -```typescript -interface StackMemoryQuery { - // Time filters - time?: { - last?: string; // "1d", "3h", "1w", "2m" - since?: Date; // ISO timestamp - until?: Date; - between?: [Date, Date]; - specific?: Date; // Exact date - }; - - // Content filters - content?: { - topic?: string[]; // ["auth", "database"] - files?: string[]; // ["src/*.ts", "tests/*"] - errors?: string[]; // ["timeout", "null pointer"] - tools?: string[]; // ["search", "edit", "test"] - }; - - // Frame filters - frame?: { - type?: FrameType[]; // ["bug", "feature", "refactor"] - status?: Status[]; // ["open", "closed", "stalled"] - score?: { - min?: number; // 0.0-1.0 - max?: number; - }; - depth?: { - min?: number; // Stack depth - max?: number; - }; - }; - - // People filters - people?: { - owner?: string[]; // ["alice", "bob"] - contributors?: string[]; - team?: string; // "backend-team" - }; - - // Output control - output?: { - limit?: number; // Max frames to return - sort?: SortBy; // "time" | "score" | "relevance" - include?: string[]; // ["digests", "events", "anchors"] - format?: Format; // "full" | "summary" | "ids" - }; -} -``` - -##### 4.2.3 Query Examples -```typescript -// Last day's context -{ - time: { last: "1d" }, - output: { format: "summary" } -} - -// High-importance auth work -{ - content: { topic: ["auth", "oauth"] }, - frame: { score: { min: 0.7 } }, - output: { sort: "score", limit: 20 } -} - -// Team's recent critical work -{ - time: { last: "3d" }, - people: { team: "backend-team" }, - frame: { score: { min: 0.8 } }, - output: { sort: "time" } -} -``` - -##### 4.2.4 Hybrid Query Syntax -```bash -# Command-line style -stackmemory query "auth work" --since="2024-12-20" --owner=alice - -# Inline modifiers -"show auth work @alice #high-priority since:yesterday depth:10" - -# Template style -"context from {time.last=1d} about {topic=authentication}" -``` - -##### 4.2.5 Query Shortcuts -```yaml -shortcuts: - # Time shortcuts - "today": { time: { last: "24h" } } - "yesterday": { time: { between: ["yesterday 00:00", "yesterday 23:59"] } } - "this week": { time: { last: "7d" } } - - # Topic shortcuts - "bugs": { frame: { type: ["bug", "error", "fix"] } } - "features": { frame: { type: ["feature", "enhancement"] } } - "critical": { frame: { score: { min: 0.8 } } } - - # Workflow shortcuts - "my work": { people: { owner: ["$current_user"] } } - "team work": { people: { team: "$current_team" } } - "recent": { time: { last: "4h" } } -``` - -##### 4.2.6 Query Response Format -```typescript -interface QueryResponse { - query: { - original: string; // User's input - interpreted: Query; // Parsed query - expanded: Query; // After expansion - }; - - results: { - frames: Frame[]; // Matching frames - count: number; // Total matches - score: number; // Query confidence - }; - - metadata: { - execution_time: number; // ms - tokens_used: number; - cache_hit: boolean; - }; - - suggestions: { - refine: string[]; // "Try adding time filter" - related: string[]; // "See also: auth decisions" - }; -} -``` - -#### 4.3 Trace Bundling -```yaml -trace_detection: - definition: "Chain of related tool calls" - - boundaries: - time_proximity: 30s # Tools within 30 seconds - same_target: true # Same file/directory - causal_relationship: true # Error → fix → test - - compression: - strategy: "single_trace" # Bundle as one unit - scoring: "max(all_tools)" # Use highest score - - example: - raw: "Search → Read(10) → Edit(3) → Test → Fix → Test" - compressed: "Fixed auth bug via search-driven refactor [0.95]" -``` - -### 5. Garbage Collection - -#### 5.1 Incremental GC Strategy -```yaml -garbage_collection: - type: "incremental" # Avoid stop-the-world - - process: - frames_per_cycle: 100 # Process in chunks - cycle_interval: 60s # Every minute - - generational: - young: "< 1 day" - mature: "1-7 days" - old: "7-30 days" - - priorities: - protect: - - current_session - - pinned_frames - - unsynced_changes - - high_score_frames - - evict_first: - - low_score_frames - - orphaned_frames - - duplicate_traces -``` - -### 6. Digest Generation - -#### 6.1 Hybrid Approach (60/40) -```yaml -digest_generation: - deterministic: 60% # Reliable extraction - ai_generated: 40% # AI-generated summary - - deterministic_fields: - - files_modified - - tests_run - - errors_encountered - - tool_call_count - - duration - - exit_status - - ai_generated_fields: - - summary # 1-2 sentences - - key_decisions - - learned_insights - - next_steps - - processing: - when: "batch_during_idle" # Not immediate - max_tokens: 200 - fallback: "deterministic_only" -``` - -### 7. Team Collaboration - -#### 7.1 Dual Stack Architecture -```yaml -stack_types: - individual: - owner: "single_user" - visibility: "private" - can_promote: true - - shared: - team: "team_id" - visibility: "team" - participants: [] - handoff_enabled: true - - interaction: - promote: "individual → shared" - fork: "shared → individual" - merge: "individual → shared" - handoff: "alice → bob" -``` - -#### 7.2 Frame Ownership -```yaml -frame_ownership: - creator: "original_author" - contributors: [] - last_active: "current_user" - - permissions: - read: "team" - continue: "team" - close: "owner_or_admin" - delete: "owner_only" - - handoff: - explicit: "transfer_command" - implicit: "continue_working" - timeout: "idle_24h" -``` - -### 8. Configuration System - -#### 8.1 Configuration File -```yaml -# .stackmemory/config.yaml -version: 1.0 - -scoring: - weights: - base: 0.4 - impact: 0.3 - persistence: 0.2 - reference: 0.1 - - tool_scores: - # Custom overrides - custom_tool: 0.75 - -retention: - local: - young: 1d - mature: 7d - old: 30d - max_size: 2GB - - remote: - enabled: true - retention: infinite - -performance: - max_stack_depth: 10000 - retrieval_timeout_ms: 500 - -profiles: - environment: "production" -``` - -#### 8.2 Configuration Validation -```bash -$ stackmemory config validate - -validation_checks: - - syntax_validation - - semantic_validation - - performance_analysis - - compatibility_check - - environment_verification - -output: - errors: [] - warnings: [] - suggestions: [] - auto_fix_available: true -``` - -### 9. MCP Integration - -#### 9.1 Available Tools -```yaml -mcp_tools: - # Context Management - - get_context # Smart retrieval with LLM - - add_decision # Record decisions - - start_frame # Begin new frame - - close_frame # Close with digest - - # Task Management - - create_task - - update_task_status - - get_active_tasks - - get_task_metrics - - # Linear Integration - - linear_sync - - linear_update_task - - linear_get_tasks - - # Analytics - - get_metrics - - get_frame_history - - search_frames -``` - -#### 9.2 Context Bundle Format -```json -{ - "compressed_summary": { - "recent_activity": {}, - "historical_patterns": {}, - "statistics": {} - }, - "hot_frames": [], - "relevant_anchors": [], - "query_endpoints": { - "deep_search": "endpoint", - "replay_session": "endpoint", - "get_specific_frames": "endpoint" - } -} -``` - -### 10. Security & Privacy - -#### 10.1 Secret Detection -```yaml -secret_detection: - patterns: - - api_keys: "regex_patterns" - - passwords: "regex_patterns" - - tokens: "regex_patterns" - - custom: "user_defined" - - action: - detection: "real_time" - handling: "redact" # Not block - notification: "warn_user" - - storage: - hashed: true - reversible: false -``` - -#### 10.2 Privacy Controls -```yaml -privacy: - data_residency: "configurable" - encryption: - at_rest: "AES-256" - in_transit: "TLS 1.3" - - retention: - deletion_on_request: true - audit_trail: "maintained" - - sharing: - default: "private" - team_opt_in: true - org_visibility: "admin_only" -``` - -### 11. Performance Targets - -#### 11.1 SLAs -```yaml -performance_slas: - retrieval: - p50: 50ms - p95: 200ms - p99: 500ms - - storage: - write_throughput: "10K events/sec" - batch_upload: "100MB/min" - - availability: - uptime: "99.9%" - data_durability: "99.999999999%" # 11 nines - - scale: - max_frames: 10000 - max_events_per_frame: 5000 - max_storage_per_project: "unlimited" -``` - -### 12. Advanced Memory Patterns - -#### 12.1 Episodic Memory System -```yaml -episodic_memory: - definition: "Capture and reuse past agent experiences" - - episode_structure: - trigger: "significant_event" # Decision, error, breakthrough - context_snapshot: - - pre_state # State before episode - - action_sequence # Tools and decisions - - outcome # Result and impact - - learned_pattern # Extracted insight - - retrieval_strategy: - similarity_matching: - current_context: true - embedding_distance: "cosine" - threshold: 0.85 - - temporal_relevance: - recent_weight: 0.7 - historical_weight: 0.3 - - injection_mechanism: - when: "similar_context_detected" - format: "Past episode: {summary} led to {outcome}" - max_episodes: 3 -``` - -#### 12.2 Memory Synthesis from Execution Logs -```yaml -log_synthesis: - pattern_extraction: - frequency_analysis: - - common_error_sequences - - repeated_tool_patterns - - decision_reversals - - causality_detection: - error_to_fix_chains: true - search_to_discovery: true - test_to_refactor: true - - synthesis_output: - workflow_patterns: - - "Search → Read → Edit → Test → Fix" - - "Error → Analyze → Search → Solution" - - anti_patterns: - - "Repeated failed attempts" - - "Circular dependencies" - - optimization_opportunities: - - "Batch similar operations" - - "Cache frequent queries" -``` - -### 13. Feedback Loop Architecture - -#### 13.1 Reflection Loop Pattern -```yaml -reflection_loop: - trigger_conditions: - - frame_completion - - significant_error - - milestone_reached - - context_switch - - reflection_process: - analyze: - - what_worked: "successful patterns" - - what_failed: "error patterns" - - alternative_approaches: "unexplored paths" - - synthesize: - key_insights: [] - patterns_identified: [] - improvements_suggested: [] - - persist: - to_anchors: true # Save as decisions - to_digest: true # Include in summary - score_boost: 0.2 # Important for learning -``` - -#### 13.2 Self-Critique Evaluation System -```yaml -self_critique: - evaluation_dimensions: - code_quality: - - correctness: "Does it work?" - - efficiency: "Is it optimal?" - - maintainability: "Is it clean?" - - decision_quality: - - rationale: "Was reasoning sound?" - - alternatives: "Were options considered?" - - evidence: "Was it data-driven?" - - process_quality: - - methodology: "Was approach systematic?" - - tool_usage: "Were tools used effectively?" - - time_management: "Was effort proportional?" - - critique_storage: - attach_to_frame: true - influence_scoring: true - guide_future_retrieval: true - - continuous_improvement: - track_critique_patterns: true - adjust_weights_based_on_outcomes: true - share_learnings_across_team: true -``` - -#### 13.3 Rich Feedback Integration -```yaml -feedback_sources: - automated: - - test_results - - linting_output - - performance_metrics - - security_scans - - human: - - code_review_comments - - user_satisfaction - - explicit_feedback - - environmental: - - build_success_rate - - deployment_outcomes - - production_incidents - - integration: - collection: "multi_channel" - correlation: "cross_reference" - weight_by_reliability: true - - feedback_to_memory: - positive: "boost_frame_score" - negative: "annotate_with_lessons" - neutral: "record_for_pattern" -``` - -### 14. Context Optimization Strategies - -#### 14.1 Context Minimization Pattern -```yaml -context_minimization: - strategies: - intelligent_filtering: - remove_redundant: true - compress_similar: true - prioritize_relevant: true - - hierarchical_summarization: - detail_levels: - - full: "complete events" - - medium: "key operations" - - summary: "outcomes only" - - dynamic_windowing: - expand_on: "high_relevance" - contract_on: "low_relevance" - adaptive_sizing: true - - benefits: - reduced_token_usage: "40-60%" - faster_processing: true - clearer_focus: true -``` - -#### 14.2 Dynamic Context Injection -```yaml -dynamic_injection: - triggers: - - context_switch_detected - - new_error_type - - unfamiliar_codebase_area - - performance_degradation - - injection_sources: - - relevant_documentation - - similar_past_solutions - - team_knowledge_base - - external_references - - injection_timing: - just_in_time: true # Right before needed - predictive: true # Anticipate needs - on_demand: true # User requested - - injection_format: - inline_hints: "minimal disruption" - sidebar_context: "additional detail" - full_frame: "comprehensive context" -``` - -#### 14.3 Context Window Anxiety Management -```yaml -anxiety_management: - monitoring: - track_usage: "continuous" - alert_threshold: 70% - critical_threshold: 90% - - mitigation_strategies: - progressive_compression: - - summarize_old_frames - - drop_low_score_events - - archive_to_retrieval - - selective_loading: - - load_only_relevant - - defer_deep_history - - use_pointers_not_content - - smart_truncation: - preserve: "decisions_and_outcomes" - truncate: "intermediate_steps" - compress: "repetitive_patterns" -``` - -### 15. Tool Orchestration Patterns - -#### 15.1 Progressive Tool Discovery -```yaml -tool_discovery: - learning_progression: - basic: ["read", "write", "search"] - intermediate: ["edit", "test", "analyze"] - advanced: ["refactor", "optimize", "architect"] - - discovery_mechanism: - observation: "watch_usage_patterns" - suggestion: "recommend_when_relevant" - education: "explain_tool_benefits" - - tool_introduction: - gradual: true - context_appropriate: true - with_examples: true -``` - -#### 15.2 Conditional Parallel Execution -```yaml -parallel_execution: - conditions: - can_parallelize: - - independent_files - - different_subsystems - - non_conflicting_operations - - must_serialize: - - dependent_changes - - shared_resources - - ordered_operations - - orchestration: - plan: "identify_parallelizable" - execute: "batch_similar_operations" - synchronize: "merge_results" - handle_conflicts: "retry_or_serialize" - - benefits: - speed: "3-5x improvement" - efficiency: "reduced_overhead" - atomicity: "group_related_changes" -``` - -### 16. Multi-Agent Coordination - -#### 16.1 Sub-Agent Spawning Pattern -```yaml -sub_agent_spawning: - spawn_triggers: - - complex_subtask - - specialized_domain - - parallel_workstream - - exploratory_analysis - - agent_types: - analyzer: "deep_investigation" - builder: "implementation" - reviewer: "quality_check" - documenter: "knowledge_capture" - - coordination: - handoff: "clear_context_transfer" - results: "structured_return" - state: "shared_memory_access" - - lifecycle: - spawn: "with_specific_context" - execute: "autonomous_operation" - report: "structured_findings" - terminate: "clean_resource_release" -``` - -#### 16.2 Multi-Agent Debate Pattern -```yaml -debate_pattern: - participants: - proposer: "suggests_solution" - critic: "identifies_issues" - synthesizer: "merges_perspectives" - - debate_process: - rounds: 3 - convergence_required: true - consensus_threshold: 0.8 - - decision_recording: - all_perspectives: true - final_consensus: true - dissenting_opinions: true - - benefits: - better_decisions: "multiple viewpoints" - error_reduction: "critical analysis" - learning: "exposed reasoning" -``` - -### 17. Evaluation and Scoring Evolution - -#### 17.1 Anti-Reward-Hacking Design -```yaml -anti_reward_hacking: - diverse_metrics: - - outcome_based: "actual_results" - - process_based: "methodology_quality" - - efficiency_based: "resource_usage" - - learning_based: "knowledge_gained" - - dynamic_weights: - adjust_based_on: - - gaming_detection - - metric_reliability - - context_importance - - validation: - cross_check_metrics: true - human_spot_checks: true - anomaly_detection: true -``` - -#### 17.2 Continuous Calibration -```yaml -calibration: - feedback_loop: - collect: "outcome_data" - analyze: "prediction_vs_actual" - adjust: "scoring_weights" - - calibration_frequency: - minor: "daily" - major: "weekly" - reset: "monthly" - - drift_detection: - monitor: "score_distributions" - alert: "significant_changes" - auto_adjust: "within_bounds" -``` - -### 18. Future Extensibility - -#### 18.1 Roadmap Features (Enhanced) -```yaml -planned_features: - # Original features - - cross_repository_memory - - team_memory_spaces - - background_project_compilers - - fine_grained_retention_policies - - ml_based_importance_scoring - - predictive_context_loading - - ide_frame_boundary_visualization - - # New pattern-based features - - episodic_memory_retrieval - - reflection_loop_automation - - multi_agent_orchestration - - context_anxiety_management - - progressive_tool_discovery - - debate_based_decision_making - - continuous_self_improvement -``` - -#### 18.2 Integration Points -```yaml -integrations: - current: - - claude_code - - linear - - github - - planned: - - vscode - - cursor - - jetbrains - - gitlab - - jira - - slack - - pattern_integrations: - - langchain: "memory_patterns" - - autogen: "multi_agent" - - guidance: "structured_generation" - - dspy: "optimization_loops" -``` - -## Implementation Priorities - -### Phase 1: Core Runtime (Current) -- [x] Frame stack management -- [x] Local SQLite storage -- [x] MCP server -- [x] Basic scoring -- [x] Claude Code integration - -### Phase 2: Intelligence Layer -- [ ] LLM-driven retrieval -- [ ] Hybrid digest generation -- [ ] Smart trace detection -- [ ] Configurable scoring - -### Phase 3: Collaboration -- [ ] Shared team stacks -- [ ] Frame handoff -- [ ] Merge conflict resolution -- [ ] Team analytics - -### Phase 4: Scale -- [ ] Remote infinite storage -- [ ] Incremental GC -- [ ] Performance optimization -- [ ] Enterprise features - -## Success Metrics - -```yaml -adoption: - - daily_active_projects: 10000 - - frames_created_per_day: 1M - - context_retrievals_per_day: 10M - -quality: - - retrieval_relevance: "> 90%" - - digest_accuracy: "> 85%" - - user_satisfaction: "> 4.5/5" - -performance: - - retrieval_latency: "< 100ms p50" - - zero_context_loss: true - - uptime: "> 99.9%" -``` - -## Configuration Examples - -### Example 1: Security-Focused Project -```yaml -scoring: - weights: - impact: 0.5 - persistence: 0.3 - tool_scores: - security_scan: 0.95 - -retention: - local: - old: 90d # Keep security decisions longer -``` - -### Example 2: Exploration-Heavy Project -```yaml -scoring: - weights: - reference: 0.5 - base: 0.2 - tool_scores: - search: 0.99 - -performance: - retrieval_timeout_ms: 1000 # Allow deeper searches -``` - -## Implementation Guidance - -### Pattern Implementation Priority Matrix -```yaml -high_impact_easy: - # Implement first - quick wins - - context_minimization # 40-60% token savings - - reflection_loop # Improves decision quality - - parallel_tool_execution # 3-5x speed improvement - - episodic_memory # Reuse past solutions - -high_impact_complex: - # Phase 2 - significant value - - self_critique_system # Continuous improvement - - multi_agent_debate # Better decisions - - dynamic_context_injection # Just-in-time context - - log_synthesis # Learn from patterns - -moderate_impact: - # Phase 3 - refinements - - progressive_tool_discovery # Gradual capability - - anti_reward_hacking # Robust metrics - - sub_agent_spawning # Task delegation - - context_anxiety_mgmt # Proactive optimization -``` - -### Key Design Principles from Patterns -```yaml -principles: - 1_externalize_state: - rationale: "Enable persistence across sessions" - implementation: "Filesystem + database hybrid" - - 2_minimize_context: - rationale: "Maximize efficiency and clarity" - implementation: "Hierarchical summarization" - - 3_learn_continuously: - rationale: "Improve over time" - implementation: "Reflection loops + pattern extraction" - - 4_orchestrate_intelligently: - rationale: "Use right tool for task" - implementation: "Progressive discovery + conditional execution" - - 5_critique_systematically: - rationale: "Ensure quality" - implementation: "Multi-dimensional evaluation" -``` - -### Practical Implementation Steps -```yaml -step_1_baseline: - - implement_frame_stack - - add_basic_scoring - - create_sqlite_storage - - build_mcp_interface - -step_2_memory_patterns: - - add_episodic_retrieval - - implement_log_synthesis - - create_reflection_loops - - build_pattern_detection - -step_3_optimization: - - add_context_minimization - - implement_dynamic_injection - - create_parallel_execution - - optimize_retrieval_speed - -step_4_intelligence: - - add_self_critique - - implement_debate_patterns - - create_continuous_calibration - - build_learning_system - -step_5_scale: - - add_multi_agent_coordination - - implement_distributed_memory - - create_team_collaboration - - optimize_for_production -``` - -## Conclusion - -StackMemory provides a revolutionary approach to AI tool memory management through: -- **Lossless storage** with smart retrieval -- **Frame-based organization** replacing linear chat logs -- **Two-tier storage** balancing performance and capacity -- **LLM-driven context selection** for optimal relevance -- **Team collaboration** through shared and individual stacks -- **Configurable scoring** adapting to project needs -- **Advanced patterns** from agentic AI research -- **Continuous learning** through reflection and synthesis -- **Intelligent orchestration** of tools and agents -- **Context optimization** for efficiency at scale - -The system ensures AI tools never lose context while maintaining performance at scale, incorporating state-of-the-art patterns from the agentic AI community. \ No newline at end of file diff --git a/mcp_review_config.json b/config/mcp_review_config.json similarity index 100% rename from mcp_review_config.json rename to config/mcp_review_config.json diff --git a/docs/RELEASE_NOTES.md b/docs/RELEASE_NOTES.md index ff14ab8f..bba8bb97 100644 --- a/docs/RELEASE_NOTES.md +++ b/docs/RELEASE_NOTES.md @@ -77,4 +77,4 @@ get_summary --- -_Built with LLM-driven intelligent context retrieval_ +_Built with LLM-driven context retrieval_ diff --git a/docs/SPEC.md b/docs/SPEC.md index 1941fb48..41796aaf 100644 --- a/docs/SPEC.md +++ b/docs/SPEC.md @@ -2,7 +2,7 @@ ## Executive Summary -StackMemory is a **lossless, project-scoped memory runtime** for development tools that preserves full project context across sessions using a call stack metaphor instead of linear chat logs. It organizes memory as nested frames with smart retrieval, maintaining context across thread resets, model switches, and long-running projects. +StackMemory is a **lossless, project-scoped memory runtime** for AI coding and writing tools that preserves full project context across sessions using a call stack metaphor instead of linear chat logs. It organizes memory as nested frames with smart retrieval, enabling AI tools to maintain context across thread resets, model switches, and long-running projects. ## Core Architecture @@ -120,7 +120,7 @@ local_to_remote_migration: interval: 300s compression: true - intelligent_batching: + smart_batching: group_by: "frame" wait_for_frame_close: true max_wait: 1h @@ -179,7 +179,7 @@ scoring: persistence: 0.3 # Permanent changes critical ``` -### 4. Intelligent Context Retrieval +### 4. Smart Context Retrieval #### 4.1 LLM-Driven Retrieval ```yaml @@ -429,7 +429,7 @@ garbage_collection: ```yaml digest_generation: deterministic: 60% # Reliable extraction - ai_generated: 40% # Intelligent summary + ai_generated: 40% # AI-generated summary deterministic_fields: - files_modified @@ -657,11 +657,380 @@ performance_slas: max_storage_per_project: "unlimited" ``` -### 12. Future Extensibility +### 12. Advanced Memory Patterns -#### 12.1 Roadmap Features +#### 12.1 Episodic Memory System +```yaml +episodic_memory: + definition: "Capture and reuse past agent experiences" + + episode_structure: + trigger: "significant_event" # Decision, error, breakthrough + context_snapshot: + - pre_state # State before episode + - action_sequence # Tools and decisions + - outcome # Result and impact + - learned_pattern # Extracted insight + + retrieval_strategy: + similarity_matching: + current_context: true + embedding_distance: "cosine" + threshold: 0.85 + + temporal_relevance: + recent_weight: 0.7 + historical_weight: 0.3 + + injection_mechanism: + when: "similar_context_detected" + format: "Past episode: {summary} led to {outcome}" + max_episodes: 3 +``` + +#### 12.2 Memory Synthesis from Execution Logs +```yaml +log_synthesis: + pattern_extraction: + frequency_analysis: + - common_error_sequences + - repeated_tool_patterns + - decision_reversals + + causality_detection: + error_to_fix_chains: true + search_to_discovery: true + test_to_refactor: true + + synthesis_output: + workflow_patterns: + - "Search → Read → Edit → Test → Fix" + - "Error → Analyze → Search → Solution" + + anti_patterns: + - "Repeated failed attempts" + - "Circular dependencies" + + optimization_opportunities: + - "Batch similar operations" + - "Cache frequent queries" +``` + +### 13. Feedback Loop Architecture + +#### 13.1 Reflection Loop Pattern +```yaml +reflection_loop: + trigger_conditions: + - frame_completion + - significant_error + - milestone_reached + - context_switch + + reflection_process: + analyze: + - what_worked: "successful patterns" + - what_failed: "error patterns" + - alternative_approaches: "unexplored paths" + + synthesize: + key_insights: [] + patterns_identified: [] + improvements_suggested: [] + + persist: + to_anchors: true # Save as decisions + to_digest: true # Include in summary + score_boost: 0.2 # Important for learning +``` + +#### 13.2 Self-Critique Evaluation System +```yaml +self_critique: + evaluation_dimensions: + code_quality: + - correctness: "Does it work?" + - efficiency: "Is it optimal?" + - maintainability: "Is it clean?" + + decision_quality: + - rationale: "Was reasoning sound?" + - alternatives: "Were options considered?" + - evidence: "Was it data-driven?" + + process_quality: + - methodology: "Was approach systematic?" + - tool_usage: "Were tools used effectively?" + - time_management: "Was effort proportional?" + + critique_storage: + attach_to_frame: true + influence_scoring: true + guide_future_retrieval: true + + continuous_improvement: + track_critique_patterns: true + adjust_weights_based_on_outcomes: true + share_learnings_across_team: true +``` + +#### 13.3 Rich Feedback Integration +```yaml +feedback_sources: + automated: + - test_results + - linting_output + - performance_metrics + - security_scans + + human: + - code_review_comments + - user_satisfaction + - explicit_feedback + + environmental: + - build_success_rate + - deployment_outcomes + - production_incidents + + integration: + collection: "multi_channel" + correlation: "cross_reference" + weight_by_reliability: true + + feedback_to_memory: + positive: "boost_frame_score" + negative: "annotate_with_lessons" + neutral: "record_for_pattern" +``` + +### 14. Context Optimization Strategies + +#### 14.1 Context Minimization Pattern +```yaml +context_minimization: + strategies: + intelligent_filtering: + remove_redundant: true + compress_similar: true + prioritize_relevant: true + + hierarchical_summarization: + detail_levels: + - full: "complete events" + - medium: "key operations" + - summary: "outcomes only" + + dynamic_windowing: + expand_on: "high_relevance" + contract_on: "low_relevance" + adaptive_sizing: true + + benefits: + reduced_token_usage: "40-60%" + faster_processing: true + clearer_focus: true +``` + +#### 14.2 Dynamic Context Injection +```yaml +dynamic_injection: + triggers: + - context_switch_detected + - new_error_type + - unfamiliar_codebase_area + - performance_degradation + + injection_sources: + - relevant_documentation + - similar_past_solutions + - team_knowledge_base + - external_references + + injection_timing: + just_in_time: true # Right before needed + predictive: true # Anticipate needs + on_demand: true # User requested + + injection_format: + inline_hints: "minimal disruption" + sidebar_context: "additional detail" + full_frame: "comprehensive context" +``` + +#### 14.3 Context Window Anxiety Management +```yaml +anxiety_management: + monitoring: + track_usage: "continuous" + alert_threshold: 70% + critical_threshold: 90% + + mitigation_strategies: + progressive_compression: + - summarize_old_frames + - drop_low_score_events + - archive_to_retrieval + + selective_loading: + - load_only_relevant + - defer_deep_history + - use_pointers_not_content + + smart_truncation: + preserve: "decisions_and_outcomes" + truncate: "intermediate_steps" + compress: "repetitive_patterns" +``` + +### 15. Tool Orchestration Patterns + +#### 15.1 Progressive Tool Discovery +```yaml +tool_discovery: + learning_progression: + basic: ["read", "write", "search"] + intermediate: ["edit", "test", "analyze"] + advanced: ["refactor", "optimize", "architect"] + + discovery_mechanism: + observation: "watch_usage_patterns" + suggestion: "recommend_when_relevant" + education: "explain_tool_benefits" + + tool_introduction: + gradual: true + context_appropriate: true + with_examples: true +``` + +#### 15.2 Conditional Parallel Execution +```yaml +parallel_execution: + conditions: + can_parallelize: + - independent_files + - different_subsystems + - non_conflicting_operations + + must_serialize: + - dependent_changes + - shared_resources + - ordered_operations + + orchestration: + plan: "identify_parallelizable" + execute: "batch_similar_operations" + synchronize: "merge_results" + handle_conflicts: "retry_or_serialize" + + benefits: + speed: "3-5x improvement" + efficiency: "reduced_overhead" + atomicity: "group_related_changes" +``` + +### 16. Multi-Agent Coordination + +#### 16.1 Sub-Agent Spawning Pattern +```yaml +sub_agent_spawning: + spawn_triggers: + - complex_subtask + - specialized_domain + - parallel_workstream + - exploratory_analysis + + agent_types: + analyzer: "deep_investigation" + builder: "implementation" + reviewer: "quality_check" + documenter: "knowledge_capture" + + coordination: + handoff: "clear_context_transfer" + results: "structured_return" + state: "shared_memory_access" + + lifecycle: + spawn: "with_specific_context" + execute: "autonomous_operation" + report: "structured_findings" + terminate: "clean_resource_release" +``` + +#### 16.2 Multi-Agent Debate Pattern +```yaml +debate_pattern: + participants: + proposer: "suggests_solution" + critic: "identifies_issues" + synthesizer: "merges_perspectives" + + debate_process: + rounds: 3 + convergence_required: true + consensus_threshold: 0.8 + + decision_recording: + all_perspectives: true + final_consensus: true + dissenting_opinions: true + + benefits: + better_decisions: "multiple viewpoints" + error_reduction: "critical analysis" + learning: "exposed reasoning" +``` + +### 17. Evaluation and Scoring Evolution + +#### 17.1 Anti-Reward-Hacking Design +```yaml +anti_reward_hacking: + diverse_metrics: + - outcome_based: "actual_results" + - process_based: "methodology_quality" + - efficiency_based: "resource_usage" + - learning_based: "knowledge_gained" + + dynamic_weights: + adjust_based_on: + - gaming_detection + - metric_reliability + - context_importance + + validation: + cross_check_metrics: true + human_spot_checks: true + anomaly_detection: true +``` + +#### 17.2 Continuous Calibration +```yaml +calibration: + feedback_loop: + collect: "outcome_data" + analyze: "prediction_vs_actual" + adjust: "scoring_weights" + + calibration_frequency: + minor: "daily" + major: "weekly" + reset: "monthly" + + drift_detection: + monitor: "score_distributions" + alert: "significant_changes" + auto_adjust: "within_bounds" +``` + +### 18. Future Extensibility + +#### 18.1 Roadmap Features (Enhanced) ```yaml planned_features: + # Original features - cross_repository_memory - team_memory_spaces - background_project_compilers @@ -669,9 +1038,18 @@ planned_features: - ml_based_importance_scoring - predictive_context_loading - ide_frame_boundary_visualization + + # New pattern-based features + - episodic_memory_retrieval + - reflection_loop_automation + - multi_agent_orchestration + - context_anxiety_management + - progressive_tool_discovery + - debate_based_decision_making + - continuous_self_improvement ``` -#### 12.2 Integration Points +#### 18.2 Integration Points ```yaml integrations: current: @@ -686,37 +1064,40 @@ integrations: - gitlab - jira - slack + + pattern_integrations: + - langchain: "memory_patterns" + - autogen: "multi_agent" + - guidance: "structured_generation" + - dspy: "optimization_loops" ``` ## Implementation Priorities -### Phase 1: Core Runtime āœ… COMPLETE (v0.1.x) +### Phase 1: Core Runtime (Current) - [x] Frame stack management - [x] Local SQLite storage - [x] MCP server - [x] Basic scoring - [x] Claude Code integration -### Phase 2: Intelligence Layer āœ… COMPLETE (v0.2.x) -- [x] LLM-driven retrieval -- [x] Hybrid digest generation (60/40 deterministic/AI) -- [x] Smart trace detection and bundling -- [x] Configurable scoring with weight profiles -- [x] Railway storage optimization (3-tier) - -### Phase 3: Collaboration āœ… COMPLETE (v0.3.x) -- [x] Shared team stacks (dual stack architecture) -- [x] Frame handoff workflows (v0.3.4) -- [x] Context bridge for cross-session sync -- [x] Linear integration with bidirectional sync -- [x] Claude Skills for workflow automation - -### Phase 4: Scale (Next - v0.4.x) -- [ ] Remote infinite storage (S3/GCS) -- [ ] Incremental garbage collection -- [ ] Performance optimization (<100ms p50) -- [ ] Enterprise features (SSO, audit logs) -- [ ] Multi-repository support +### Phase 2: Intelligence Layer +- [ ] LLM-driven retrieval +- [ ] Hybrid digest generation +- [ ] Smart trace detection +- [ ] Configurable scoring + +### Phase 3: Collaboration +- [ ] Shared team stacks +- [ ] Frame handoff +- [ ] Merge conflict resolution +- [ ] Team analytics + +### Phase 4: Scale +- [ ] Remote infinite storage +- [ ] Incremental GC +- [ ] Performance optimization +- [ ] Enterprise features ## Success Metrics @@ -766,14 +1147,101 @@ performance: retrieval_timeout_ms: 1000 # Allow deeper searches ``` +## Implementation Guidance + +### Pattern Implementation Priority Matrix +```yaml +high_impact_easy: + # Implement first - quick wins + - context_minimization # 40-60% token savings + - reflection_loop # Improves decision quality + - parallel_tool_execution # 3-5x speed improvement + - episodic_memory # Reuse past solutions + +high_impact_complex: + # Phase 2 - significant value + - self_critique_system # Continuous improvement + - multi_agent_debate # Better decisions + - dynamic_context_injection # Just-in-time context + - log_synthesis # Learn from patterns + +moderate_impact: + # Phase 3 - refinements + - progressive_tool_discovery # Gradual capability + - anti_reward_hacking # Robust metrics + - sub_agent_spawning # Task delegation + - context_anxiety_mgmt # Proactive optimization +``` + +### Key Design Principles from Patterns +```yaml +principles: + 1_externalize_state: + rationale: "Enable persistence across sessions" + implementation: "Filesystem + database hybrid" + + 2_minimize_context: + rationale: "Maximize efficiency and clarity" + implementation: "Hierarchical summarization" + + 3_learn_continuously: + rationale: "Improve over time" + implementation: "Reflection loops + pattern extraction" + + 4_orchestrate_intelligently: + rationale: "Use right tool for task" + implementation: "Progressive discovery + conditional execution" + + 5_critique_systematically: + rationale: "Ensure quality" + implementation: "Multi-dimensional evaluation" +``` + +### Practical Implementation Steps +```yaml +step_1_baseline: + - implement_frame_stack + - add_basic_scoring + - create_sqlite_storage + - build_mcp_interface + +step_2_memory_patterns: + - add_episodic_retrieval + - implement_log_synthesis + - create_reflection_loops + - build_pattern_detection + +step_3_optimization: + - add_context_minimization + - implement_dynamic_injection + - create_parallel_execution + - optimize_retrieval_speed + +step_4_intelligence: + - add_self_critique + - implement_debate_patterns + - create_continuous_calibration + - build_learning_system + +step_5_scale: + - add_multi_agent_coordination + - implement_distributed_memory + - create_team_collaboration + - optimize_for_production +``` + ## Conclusion StackMemory provides a revolutionary approach to AI tool memory management through: -- **Lossless storage** with intelligent retrieval +- **Lossless storage** with smart retrieval - **Frame-based organization** replacing linear chat logs - **Two-tier storage** balancing performance and capacity - **LLM-driven context selection** for optimal relevance - **Team collaboration** through shared and individual stacks - **Configurable scoring** adapting to project needs +- **Advanced patterns** from agentic AI research +- **Continuous learning** through reflection and synthesis +- **Intelligent orchestration** of tools and agents +- **Context optimization** for efficiency at scale -The system ensures AI tools never lose context while maintaining performance at scale. \ No newline at end of file +The system ensures AI tools never lose context while maintaining performance at scale, incorporating state-of-the-art patterns from the agentic AI community. \ No newline at end of file diff --git a/tomorrow.md b/docs/tomorrow.md similarity index 100% rename from tomorrow.md rename to docs/tomorrow.md diff --git a/vision.md b/docs/vision.md similarity index 100% rename from vision.md rename to docs/vision.md From dbe9856bcd6593b2c042ca023f7c649e40158485 Mon Sep 17 00:00:00 2001 From: "StackMemory Bot (CLI)" Date: Fri, 17 Apr 2026 08:47:53 -0400 Subject: [PATCH 11/11] fix(test): mock canonicalStateStore in session tests MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit Session tests mocked fs/promises but not the canonical-store module. The canonicalStateStore singleton inherited the mocked fs, causing pathExists to return true while readFile returned undefined — crashing JSON.parse. Mock the entire canonical-store module with stubs for upsertSession, appendEvent, and endSession. --- src/core/session/__tests__/session-manager.test.ts | 9 +++++++++ src/core/session/__tests__/session.test.ts | 9 +++++++++ 2 files changed, 18 insertions(+) diff --git a/src/core/session/__tests__/session-manager.test.ts b/src/core/session/__tests__/session-manager.test.ts index 2d24eee2..93ef884c 100644 --- a/src/core/session/__tests__/session-manager.test.ts +++ b/src/core/session/__tests__/session-manager.test.ts @@ -21,6 +21,15 @@ vi.mock('child_process', () => ({ execSync: vi.fn().mockReturnValue('main\n'), })); +vi.mock('../../shared-state/canonical-store.js', () => ({ + canonicalStateStore: { + upsertSession: vi.fn().mockResolvedValue({}), + appendEvent: vi.fn().mockResolvedValue(undefined), + endSession: vi.fn().mockResolvedValue(undefined), + initialize: vi.fn().mockResolvedValue(undefined), + }, +})); + describe('SessionManager', () => { let manager: SessionManager; diff --git a/src/core/session/__tests__/session.test.ts b/src/core/session/__tests__/session.test.ts index 0fb3d35f..7c502887 100644 --- a/src/core/session/__tests__/session.test.ts +++ b/src/core/session/__tests__/session.test.ts @@ -26,6 +26,15 @@ vi.mock('fs/promises', () => ({ access: vi.fn(), })); +vi.mock('../../shared-state/canonical-store.js', () => ({ + canonicalStateStore: { + upsertSession: vi.fn().mockResolvedValue({}), + appendEvent: vi.fn().mockResolvedValue(undefined), + endSession: vi.fn().mockResolvedValue(undefined), + initialize: vi.fn().mockResolvedValue(undefined), + }, +})); + vi.mock('child_process', () => ({ execSync: vi.fn().mockReturnValue('main\n'), }));