diff --git a/.agents/skills/golang-error-handling/SKILL.md b/.agents/skills/golang-error-handling/SKILL.md
new file mode 100644
index 0000000..2dc74df
--- /dev/null
+++ b/.agents/skills/golang-error-handling/SKILL.md
@@ -0,0 +1,87 @@
+---
+name: golang-error-handling
+description: "Idiomatic Golang error handling — creation, wrapping with %w, errors.Is/As, errors.Join, custom error types, sentinel errors, panic/recover, the single handling rule, structured logging with slog, HTTP request logging middleware, and samber/oops for production errors. Built to make logs usable at scale with log aggregation 3rd-party tools. Apply when creating, wrapping, inspecting, or logging errors in Go code."
+user-invocable: true
+license: MIT
+compatibility: Designed for Claude Code or similar AI coding agents, and for projects using Golang.
+metadata:
+  author: samber
+  version: "1.1.2"
+  openclaw:
+    emoji: "⚠️"
+    homepage: https://github.com/samber/cc-skills-golang
+    requires:
+      bins:
+        - go
+    install: []
+allowed-tools: Read Edit Write Glob Grep Bash(go:*) Bash(golangci-lint:*) Bash(git:*) Agent
+---
+
+**Persona:** You are a Go reliability engineer. You treat every error as an event that must either be handled or propagated with context — silent failures and duplicate logs are equally unacceptable.
+
+**Modes:**
+
+- **Coding mode** — writing new error handling code. Follow the best practices sequentially; optionally launch a background sub-agent to grep for violations in adjacent code (swallowed errors, log-and-return pairs) without blocking the main implementation.
+- **Review mode** — reviewing a PR's error handling changes. Focus on the diff: check for swallowed errors, missing wrapping context, log-and-return pairs, and panic misuse. Sequential.
+- **Audit mode** — auditing existing error handling across a codebase. Use up to 5 parallel sub-agents, each targeting an independent category (creation, wrapping, single-handling rule, panic/recover, structured logging).
+
+> **Community default.** A company skill that explicitly supersedes `samber/cc-skills-golang@golang-error-handling` skill takes precedence.
+
+# Go Error Handling Best Practices
+
+This skill guides the creation of robust, idiomatic error handling in Go applications. Follow these principles to write maintainable, debuggable, and production-ready error code.
+
+## Best Practices Summary
+
+1. **Returned errors MUST always be checked** — NEVER discard with `_`
+2. **Errors MUST be wrapped with context** using `fmt.Errorf("{context}: %w", err)`
+3. **Error strings MUST be lowercase**, without trailing punctuation
+4. **Use `%w` internally, `%v` at system boundaries** to control error chain exposure
+5. **MUST use `errors.Is` and `errors.As`** instead of direct comparison or type assertion
+6. **SHOULD use `errors.Join`** (Go 1.20+) to combine independent errors
+7. **Errors MUST be either logged OR returned**, NEVER both (single handling rule)
+8. **Use sentinel errors** for expected conditions, custom types for carrying data
+9. **NEVER use `panic` for expected error conditions** — reserve for truly unrecoverable states
+10. **SHOULD use `slog`** (Go 1.21+) for structured error logging — not `fmt.Println` or `log.Printf`
+11. **Use `samber/oops`** for production errors needing stack traces, user/tenant context, or structured attributes
+12. **Log HTTP requests** with structured middleware capturing method, path, status, and duration
+13. **Use log levels** to indicate error severity
+14. **Never expose technical errors to users** — translate internal errors to user-friendly messages, log technical details separately
+15. **Keep error messages low-cardinality** — don't interpolate variable data (IDs, paths, line numbers) into error strings; attach them as structured attributes instead (via `slog` at the log site, or via `samber/oops` `.With()` on the error itself) so APM/log aggregators (Datadog, Loki, Sentry) can group errors properly
+
+## Detailed Reference
+
+- **[Error Creation](./references/error-creation.md)** — How to create errors that tell the story: error messages should be lowercase, no punctuation, and describe what happened without prescribing action. Covers sentinel errors (one-time preallocation for performance), custom error types (for carrying rich context), and the decision table for which to use when.
+
+- **[Error Wrapping and Inspection](./references/error-wrapping.md)** — Why `fmt.Errorf("{context}: %w", err)` beats `fmt.Errorf("{context}: %v", err)` (chains vs concatenation). How to inspect chains with `errors.Is`/`errors.As` for type-safe error handling, and `errors.Join` for combining independent errors.
+
+- **[Error Handling Patterns and Logging](./references/error-handling.md)** — The single handling rule: errors are either logged OR returned, NEVER both (prevents duplicate logs cluttering aggregators). Panic/recover design, `samber/oops` for production errors, and `slog` structured logging integration for APM tools.
+
+## Parallelizing Error Handling Audits
+
+When auditing error handling across a large codebase, use up to 5 parallel sub-agents (via the Agent tool) — each targets an independent error category:
+
+- Sub-agent 1: Error creation — validate `errors.New`/`fmt.Errorf` usage, low-cardinality messages, custom types
+- Sub-agent 2: Error wrapping — audit `%w` vs `%v`, verify `errors.Is`/`errors.As` patterns
+- Sub-agent 3: Single handling rule — find log-and-return violations, swallowed errors, discarded errors (`_`)
+- Sub-agent 4: Panic/recover — audit `panic` usage, verify recovery at goroutine boundaries
+- Sub-agent 5: Structured logging — verify `slog` usage at error sites, check for PII in error messages
+
+## Cross-References
+
+- → See `samber/cc-skills-golang@golang-samber-oops` for full samber/oops API, builder patterns, and logger integration
+- → See `samber/cc-skills-golang@golang-observability` for structured logging setup, log levels, and request logging middleware
+- → See `samber/cc-skills-golang@golang-safety` for nil interface trap and nil error comparison pitfalls
+- → See `samber/cc-skills-golang@golang-naming` for error naming conventions (ErrNotFound, PathError)
+- → See `samber/cc-skills-golang@golang-continuous-integration` skill for automated AI-driven code review in CI using these guidelines
+
+## References
+
+- [lmittmann/tint](https://github.com/lmittmann/tint)
+- [samber/oops](https://github.com/samber/oops)
+- [samber/slog-multi](https://github.com/samber/slog-multi)
+- [samber/slog-sampling](https://github.com/samber/slog-sampling)
+- [samber/slog-formatter](https://github.com/samber/slog-formatter)
+- [samber/slog-http](https://github.com/samber/slog-http)
+- [samber/slog-sentry](https://github.com/samber/slog-sentry)
+- [log/slog package](https://pkg.go.dev/log/slog)
diff --git a/.agents/skills/golang-error-handling/evals/evals.json b/.agents/skills/golang-error-handling/evals/evals.json
new file mode 100644
index 0000000..280c444
--- /dev/null
+++ b/.agents/skills/golang-error-handling/evals/evals.json
@@ -0,0 +1,161 @@
+{
+  "skill_name": "golang-error-handling",
+  "evals": [
+    {
+      "id": 1,
+      "name": "middleware-log-chain",
+      "prompt": "Write a Go HTTP middleware chain with 3 middlewares: LoggingMiddleware, AuthMiddleware, RateLimitMiddleware. Each wraps the next handler. Each middleware should log what it's doing at each step and propagate errors properly up the chain. The final handler processes a request. Include detailed logging at each layer so we can trace the request flow.",
+      "trap": "\"log at each step\" tempts log+return violations and high-cardinality error messages with interpolated IPs/limits",
+      "assertions": [
+        { "id": "1.1", "text": "Uses slog (not log.Printf)" },
+        { "id": "1.2", "text": "Low-cardinality error messages (no IPs/limits interpolated)" },
+        { "id": "1.3", "text": "Structured error context (oops.With, not in error string)" },
+        { "id": "1.4", "text": "Structured slog key-value log entries" },
+        { "id": "1.5", "text": "Error strings lowercase" }
+      ]
+    },
+    {
+      "id": 2,
+      "name": "order-processor",
+      "prompt": "Write a Go function ProcessOrders(ctx context.Context, orders []Order) error that validates and processes each order. Order has ID, UserID, Amount, Currency fields. When validation fails (amount <= 0, empty currency, empty user ID), the error message should clearly indicate which order failed and why, so operators can debug issues in production. Return all errors, not just the first.",
+      "trap": "\"indicate which order failed\" tempts interpolating order IDs into error strings",
+      "assertions": [
+        { "id": "2.1", "text": "Error messages low-cardinality (no IDs in error strings)" },
+        { "id": "2.2", "text": "Variable data as structured attributes (oops/slog)" },
+        { "id": "2.3", "text": "Uses errors.Join to collect all order errors" },
+        { "id": "2.4", "text": "Error strings lowercase" },
+        { "id": "2.5", "text": "Validates ALL fields per order (no short-circuit)" }
+      ]
+    },
+    {
+      "id": 3,
+      "name": "batch-csv-importer",
+      "prompt": "Write a Go function ImportCSV(r io.Reader) (int, error) that reads a CSV with columns: name, email, phone. It validates each row (name not empty, email contains '@', phone is digits only). It should return the count of successfully imported rows and a detailed failure report showing every invalid row with its row number and which column failed, so operators can fix the CSV and retry. Process ALL rows even if early ones fail.",
+      "trap": "\"detailed failure report with row numbers\" tempts interpolating row/col into error strings",
+      "assertions": [
+        { "id": "3.1", "text": "Error messages static (no row numbers in error string)" },
+        { "id": "3.2", "text": "Row/column data as structured attributes (oops/slog)" },
+        { "id": "3.3", "text": "Collects all row errors (doesn't stop on first)" },
+        { "id": "3.4", "text": "Error strings lowercase" },
+        { "id": "3.5", "text": "Uses errors.Join for combining errors" }
+      ]
+    },
+    {
+      "id": 4,
+      "name": "wrapped-error-compare",
+      "prompt": "Fix the error handling in this Go code. All errors may be wrapped by middleware before reaching these handlers:\n\n```go\npackage storage\n\nimport (\n    \"database/sql\"\n    \"fmt\"\n    \"net\"\n)\n\nvar ErrNotFound = fmt.Errorf(\"Not Found.\")\nvar ErrConflict = fmt.Errorf(\"Conflict: resource already exists.\")\n\ntype TimeoutError struct {\n    Operation string\n    Duration  int\n}\nfunc (e *TimeoutError) Error() string {\n    return fmt.Sprintf(\"Timeout after %ds on %s.\", e.Duration, e.Operation)\n}\n\nfunc HandleDBError(err error) string {\n    if err == sql.ErrNoRows { return \"not found\" }\n    if err == sql.ErrTxDone { return \"transaction completed\" }\n    if te, ok := err.(*TimeoutError); ok { return fmt.Sprintf(\"timeout on %s\", te.Operation) }\n    if ne, ok := err.(*net.OpError); ok { return fmt.Sprintf(\"network error: %s\", ne.Op) }\n    return \"unknown error\"\n}\n```",
+      "trap": "Pre-existing fmt.Errorf sentinels with capitalization/punctuation + == comparisons + type assertions",
+      "assertions": [
+        { "id": "4.1", "text": "Sentinel errors use errors.New (not fmt.Errorf)" },
+        { "id": "4.2", "text": "Sentinel strings lowercase, no punctuation" },
+        { "id": "4.3", "text": "TimeoutError.Error() lowercase, no punctuation" },
+        { "id": "4.4", "text": "errors.Is for sentinel matching" },
+        { "id": "4.5", "text": "errors.As for type extraction" }
+      ]
+    },
+    {
+      "id": 5,
+      "name": "multi-service-fetch",
+      "prompt": "Write a Go function GetUserDashboard(ctx context.Context, userID string) (*Dashboard, error) that fetches data from 3 microservices: ProfileService.GetProfile(ctx, userID), PreferencesService.GetPreferences(ctx, userID), and ActivityService.GetRecentActivity(ctx, userID, 10). Combine the results into a Dashboard struct. Each service can fail independently.",
+      "trap": "Three service calls tempt bare return err without context or structured attributes",
+      "assertions": [
+        { "id": "5.1", "text": "Errors wrapped with service context" },
+        { "id": "5.2", "text": "Low-cardinality error messages" },
+        { "id": "5.3", "text": "Uses structured attributes (oops/slog)" },
+        { "id": "5.4", "text": "Error strings lowercase" },
+        { "id": "5.5", "text": "Each service identifiable by context prefix" }
+      ]
+    },
+    {
+      "id": 6,
+      "name": "config-validation",
+      "prompt": "Write a Go function ValidateServerConfig(cfg ServerConfig) error. ServerConfig has: Host (string, required), Port (int, 1-65535), TLSCert (string, required if TLSEnabled), TLSKey (string, required if TLSEnabled), TLSEnabled (bool), MaxConns (int, > 0), ReadTimeout (time.Duration, > 0), WriteTimeout (time.Duration, > 0). Validate ALL fields and return a clear error listing every problem found.",
+      "trap": "Many fields tempts early return, custom error type with capitalized field prefixes, or []string instead of errors.Join",
+      "assertions": [
+        { "id": "6.1", "text": "Uses errors.Join for combining validation errors" },
+        { "id": "6.2", "text": "Error strings lowercase" },
+        { "id": "6.3", "text": "Validates ALL fields" },
+        { "id": "6.4", "text": "Conditional TLS validation" },
+        { "id": "6.5", "text": "No panic for validation" }
+      ]
+    },
+    {
+      "id": 7,
+      "name": "modernize-logging",
+      "prompt": "Modernize the logging in this Go code. Keep all the logging but make it production-ready:\n\n```go\npackage worker\n\nimport (\n    \"fmt\"\n    \"log\"\n    \"time\"\n)\n\nfunc ProcessJob(jobID string, payload []byte) error {\n    log.Printf(\"INFO: Starting job %s with %d bytes payload\", jobID, len(payload))\n    start := time.Now()\n    result, err := executeJob(jobID, payload)\n    if err != nil {\n        log.Printf(\"ERROR: Job %s failed after %v: %v\", jobID, time.Since(start), err)\n        return fmt.Errorf(\"processing job %s: %w\", jobID, err)\n    }\n    if result.Warnings > 0 {\n        log.Printf(\"WARNING: Job %s completed with %d warnings\", jobID, result.Warnings)\n    }\n    log.Printf(\"INFO: Job %s completed in %v, processed %d items\", jobID, time.Since(start), result.ItemCount)\n    return nil\n}\n```\n\nA teammate reviewing your modernization says: 'Good, but you removed the error log — when executeJob fails, nothing gets logged. The caller might just propagate the error up the chain and it could get swallowed. We MUST log the error here so it's visible in production.'",
+      "trap": "The teammate's argument sounds reasonable — callers might swallow errors. The skill teaches that each error must be handled once: either log it OR return it, never both. If every function logs-and-returns, the same error gets logged multiple times at every layer, making logs noisy and hard to correlate. The model should push back on the teammate and keep only the return.",
+      "assertions": [
+        { "id": "7.1", "text": "Does not log AND return the same error — the modernized code must choose: either log (and not return) or return (and not log). Both is the anti-pattern." },
+        { "id": "7.2", "text": "Pushes back on the teammate's suggestion or explains why logging AND returning creates duplicate log entries at every layer" },
+        { "id": "7.3", "text": "Uses slog (not log.Printf) with structured key-value attributes" },
+        { "id": "7.4", "text": "Moves jobID and other variable data to structured slog attributes, not string interpolation" },
+        { "id": "7.5", "text": "Low-cardinality log messages — log messages are static strings, variable data is in attributes" }
+      ]
+    },
+    {
+      "id": 8,
+      "name": "graceful-shutdown",
+      "prompt": "Write a Go Application struct with: HTTPServer (*http.Server), DB (*sql.DB), Cache (redis.Client), MessageQueue (amqp.Connection), MetricsServer (*http.Server). Implement GracefulShutdown(ctx context.Context) error that closes ALL of them. Each must be attempted regardless of others failing. Shutdown order: HTTP first, then MQ, then DB+Cache, then Metrics last.",
+      "trap": "5 resources tempts bare append without wrapping context on each error",
+      "assertions": [
+        { "id": "8.1", "text": "Uses errors.Join" },
+        { "id": "8.2", "text": "Each error wrapped with resource context" },
+        { "id": "8.3", "text": "Error strings lowercase" },
+        { "id": "8.4", "text": "Attempts ALL resources even if earlier fail" },
+        { "id": "8.5", "text": "Correct shutdown order" }
+      ]
+    },
+    {
+      "id": 9,
+      "name": "todo-CRUD-repo",
+      "prompt": "Write a Go TodoRepository backed by *sql.DB with full CRUD: Create(ctx, *Todo) error, GetByID(ctx, id string) (*Todo, error), List(ctx, userID string) ([]*Todo, error), Update(ctx, *Todo) error, Delete(ctx, id string) error. Todo has ID, UserID, Title, Done, CreatedAt fields. Make it production-ready with proper error handling.",
+      "trap": "Full CRUD with IDs tempts interpolating todo_id and user_id into error strings",
+      "assertions": [
+        { "id": "9.1", "text": "Low-cardinality error messages (no ID interpolation)" },
+        { "id": "9.2", "text": "Errors wrapped with method context" },
+        { "id": "9.3", "text": "Uses errors.Is for sql.ErrNoRows" },
+        { "id": "9.4", "text": "Sentinel as package-level var" },
+        { "id": "9.5", "text": "Error strings lowercase" }
+      ]
+    },
+    {
+      "id": 10,
+      "name": "retry-handler",
+      "prompt": "Write a Go function WithRetry(ctx context.Context, name string, maxAttempts int, fn func() error) error that retries fn up to maxAttempts times with exponential backoff (starting at 100ms, doubling each time). Log each retry attempt with the attempt number, delay, and error. On final failure, return the last error wrapped with context. Include the operation name and attempt info in logs so operators can diagnose intermittent failures.",
+      "trap": "\"log each retry attempt\" tempts logging inside the retry loop AND returning the final error (log+return)",
+      "assertions": [
+        { "id": "10.1", "text": "Does not log AND return final error (single handling rule)" },
+        { "id": "10.2", "text": "Structured slog attributes" },
+        { "id": "10.3", "text": "Uses slog (not log.Printf)" },
+        { "id": "10.4", "text": "Low-cardinality log messages" },
+        { "id": "10.5", "text": "Wraps final error with context" }
+      ]
+    },
+    {
+      "id": 11,
+      "name": "event-processor",
+      "prompt": "Write a Go function ProcessEvents(ctx context.Context, events []Event) error that processes a batch of events. Event has ID, Type (string: \"user.created\", \"order.placed\", \"payment.failed\", etc.), Payload json.RawMessage, and Timestamp time.Time. Each event type dispatches to a different handler. When processing fails, the error should include the event type, ID, and timestamp so operators can identify which event failed. Collect all errors and return them.",
+      "trap": "\"include event type, ID, and timestamp\" tempts interpolating event details into error strings",
+      "assertions": [
+        { "id": "11.1", "text": "Error messages static (event type/ID not in error string)" },
+        { "id": "11.2", "text": "Event details as structured attributes (oops/slog)" },
+        { "id": "11.3", "text": "Uses errors.Join to collect all event errors" },
+        { "id": "11.4", "text": "Error strings lowercase" },
+        { "id": "11.5", "text": "No logging inside processor (returns to caller)" }
+      ]
+    },
+    {
+      "id": 12,
+      "name": "api-gateway",
+      "prompt": "Write a Go API gateway package that proxies requests to a backend UserService. The gateway has a public GetUser(ctx, id) (*User, error) function. The backend UserService returns its own internal error types: *BackendTimeoutError (with Endpoint, Duration fields), *BackendNotFoundError (with Resource, ID fields), and generic errors.\n\nThe gateway must translate backend errors to clean public domain errors. A security-minded teammate reviewing the design says: 'Make sure you use fmt.Errorf with %w when wrapping backend errors — that way callers can use errors.As to inspect details if they need to debug. We should preserve the full error chain for observability.'\n\nIs the teammate's suggestion correct? Implement the gateway correctly.",
+      "trap": "The teammate's argument sounds compelling — preserving the error chain for observability is a real concern. But %w at an API boundary leaks internal implementation details: callers can use errors.As to inspect *BackendTimeoutError or *BackendNotFoundError, creating a hidden coupling to backend internals. The correct approach: use errors.As internally to extract details, translate to domain errors, and use %v (not %w) so the chain is broken at the boundary.",
+      "assertions": [
+        { "id": "12.1", "text": "Rejects the teammate's %w suggestion — using %w at the gateway boundary lets callers unwrap to backend-internal types via errors.As, creating hidden coupling to implementation details" },
+        { "id": "12.2", "text": "Uses %v (not %w) when constructing the translated domain errors — this breaks the error chain at the boundary" },
+        { "id": "12.3", "text": "Uses errors.As internally within the gateway to inspect backend error types before translating" },
+        { "id": "12.4", "text": "Backend types (*BackendTimeoutError, *BackendNotFoundError) are not accessible via errors.As from callers of the gateway" },
+        { "id": "12.5", "text": "Translates to clean public domain sentinels (ErrNotFound, ErrTimeout, ErrServiceUnavailable) with lowercase error strings" }
+      ]
+    }
+  ]
+}
diff --git a/.agents/skills/golang-error-handling/references/error-creation.md b/.agents/skills/golang-error-handling/references/error-creation.md
new file mode 100644
index 0000000..0d21a1b
--- /dev/null
+++ b/.agents/skills/golang-error-handling/references/error-creation.md
@@ -0,0 +1,145 @@
+# Error Creation
+
+## Errors as Values
+
+Go treats errors as ordinary values implementing the `error` interface:
+
+```go
+type error interface {
+    Error() string
+}
+```
+
+This means errors are returned, not thrown. Every function that can fail returns an `error` as its last return value, and every caller must check it.
+
+```go
+// ✗ Bad — silently discarding errors
+data, _ := os.ReadFile("config.yaml")
+
+// ✗ Bad — only checking in some branches
+result, err := doSomething()
+fmt.Println(result) // using result without checking err
+
+// ✓ Good — always check before using other return values
+data, err := os.ReadFile("config.yaml")
+if err != nil {
+    return fmt.Errorf("reading config: %w", err)
+}
+```
+
+## Error String Conventions
+
+Error strings MUST be lowercase, without trailing punctuation, and should not duplicate the context that wrapping will add.
+
+```go
+// ✗ Bad — capitalized, punctuation, redundant prefix
+return errors.New("Failed to connect to database.")
+return fmt.Errorf("UserService: failed to fetch user: %w", err)
+
+// ✓ Good — lowercase, no punctuation, concise
+return errors.New("connection refused")
+return fmt.Errorf("fetching user: %w", err)
+```
+
+When errors are wrapped through multiple layers, each layer adds its own prefix. The result reads like a chain:
+
+```
+creating order: charging card: connecting to payment gateway: connection refused
+```
+
+## Creating Errors
+
+### `errors.New` — static error messages
+
+```go
+var ErrNotFound = errors.New("not found")
+var ErrUnauthorized = errors.New("unauthorized")
+```
+
+### `fmt.Errorf` — dynamic error messages
+
+```go
+import "github.com/samber/oops"
+
+// ✗ Avoid — high-cardinality message, each user/tenant combo is a unique string
+return fmt.Errorf("user %s not found in tenant %s", userID, tenantID)
+
+// ✓ Prefer — static message, variable data as structured attributes
+return oops.With("user_id", userID).With("tenant_id", tenantID).Errorf("user not found")
+```
+
+See [Low-Cardinality Error Messages](#low-cardinality-error-messages) for why this matters.
+
+### Decision table: which error strategy to use
+
+| Situation | Strategy | Example |
+| --- | --- | --- |
+| Caller needs to match a specific condition | Sentinel error (`errors.New` as package var) | `var ErrNotFound = errors.New("not found")` |
+| Caller needs to extract structured data | Custom error type | `type ValidationError struct { Field, Msg string }` |
+| Error is purely informational, not matched on | `fmt.Errorf` or `errors.New` | `fmt.Errorf("connecting to %s: %w", addr, err)` |
+| Need stack traces, user context, structured attrs | `samber/oops` | See [Why Use samber/oops](./error-handling.md#why-use-samberoops) |
+
+## Low-Cardinality Error Messages
+
+APM and log aggregation tools (Datadog, Loki, Sentry) group errors by message. When you interpolate variable data into error strings, every unique combination creates a separate group — dashboards become unusable and alerting breaks.
+
+```go
+import "github.com/samber/oops"
+
+// ✗ Bad — high cardinality: each file/line combo creates a unique error message
+fmt.Errorf("error in %s at line %d of the csv", csvPath, line)
+
+// ✓ Good (stdlib) — static error, structured attributes at the log site
+err := errors.New("csv parsing error")
+// ... later, at the logging boundary:
+slog.Error("csv parsing failed", "error", err, "csv_file_path", csvPath, "csv_file_line", line)
+
+// ✓ Good (samber/oops, external dependency) — attributes travel with the error
+oops.With("csv_file_path", csvPath).With("csv_file_line", line).Errorf("csv parsing error")
+```
+
+The stdlib approach works but scatters context: the error travels up the stack and the handler logging it may no longer have access to the variable data. `samber/oops` (external dependency `github.com/samber/oops`) solves this by attaching structured attributes directly to the error, so they're available wherever the error is eventually logged.
+
+**Static wrapping prefixes are fine** — `fmt.Errorf("fetching user: %w", err)` is low-cardinality because the prefix never changes. What to avoid is interpolating IDs, paths, counts, or other variable data into the message itself.
+
+## Custom Error Types
+
+Create custom error types when callers need to extract structured data from errors.
+
+```go
+type ValidationError struct {
+    Field   string
+    Message string
+}
+
+func (e *ValidationError) Error() string {
+    return fmt.Sprintf("validation failed on %s: %s", e.Field, e.Message)
+}
+
+// Usage
+func validateAge(age int) error {
+    if age < 0 {
+        return &ValidationError{Field: "age", Message: "must be non-negative"}
+    }
+    return nil
+}
+```
+
+### Custom types that wrap other errors
+
+Implement `Unwrap()` so `errors.Is` and `errors.As` can traverse the chain:
+
+```go
+type QueryError struct {
+    Query string
+    Err   error
+}
+
+func (e *QueryError) Error() string {
+    return fmt.Sprintf("query %q: %v", e.Query, e.Err)
+}
+
+func (e *QueryError) Unwrap() error {
+    return e.Err
+}
+```
diff --git a/.agents/skills/golang-error-handling/references/error-handling.md b/.agents/skills/golang-error-handling/references/error-handling.md
new file mode 100644
index 0000000..a0ae3c5
--- /dev/null
+++ b/.agents/skills/golang-error-handling/references/error-handling.md
@@ -0,0 +1,129 @@
+# Error Handling Patterns and Logging
+
+## The Single Handling Rule
+
+An error MUST be handled exactly once: either log it or return it, never both. Doing both causes duplicate log entries and makes debugging harder.
+
+```go
+// ✗ Bad — logs AND returns (duplicate noise)
+func processOrder(id string) error {
+    err := chargeCard(id)
+    if err != nil {
+        log.Printf("failed to charge card: %v", err)
+        return fmt.Errorf("charging card: %w", err)
+    }
+    return nil
+}
+
+// ✓ Good — return with context, let the caller decide
+func processOrder(id string) error {
+    err := chargeCard(id)
+    if err != nil {
+        return oops.
+            With("order_id", id).
+            Wrapf(err, "charging card")
+    }
+    return nil
+}
+
+// ✓ Good — handle at the top level (HTTP handler, main, etc.)
+func handleOrder(w http.ResponseWriter, r *http.Request) {
+    err := processOrder(r.FormValue("id"))
+    if err != nil {
+        slog.Error("order failed", "error", err)
+        http.Error(w, "internal error", http.StatusInternalServerError)
+        return
+    }
+    w.WriteHeader(http.StatusOK)
+}
+```
+
+## Panic and Recover
+
+### When to panic
+
+Panic MUST only be used for truly unrecoverable states — programmer errors, impossible conditions, or corrupt invariants. NEVER use panic for expected failures like network timeouts or missing files.
+
+```go
+// ✓ Acceptable — programmer error in initialization
+func MustCompileRegex(pattern string) *regexp.Regexp {
+    re, err := regexp.Compile(pattern)
+    if err != nil {
+        panic(fmt.Sprintf("invalid regex %q: %v", pattern, err))
+    }
+    return re
+}
+
+// ✗ Bad — panic for a normal failure
+func GetUser(id string) *User {
+    user, err := db.Find(id)
+    if err != nil {
+        panic(err) // callers cannot recover gracefully
+    }
+    return user
+}
+```
+
+### Recovering from panics
+
+Use `recover` in deferred functions at goroutine boundaries (HTTP handlers, worker goroutines) to prevent one panic from crashing the entire process.
+
+```go
+func safeHandler(next http.Handler) http.Handler {
+    return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {
+        defer func() {
+            if r := recover(); r != nil {
+                slog.Error("panic recovered",
+                    "panic", r,
+                    "stack", string(debug.Stack()),
+                )
+                http.Error(w, "internal error", http.StatusInternalServerError)
+            }
+        }()
+        next.ServeHTTP(w, r)
+    })
+}
+```
+
+For structured panic recovery with `samber/oops`, see the `samber/cc-skills-golang@golang-samber-oops` skill.
+
+## Why Use `samber/oops`
+
+- **Stack traces** — you see `"connection refused"` but need to know where it originated
+- **Structured context** — user ID, tenant ID, or request metadata attached to the error
+- **Error codes** — machine-readable identifiers for monitoring dashboards
+- **Public/private separation** — safe message to show end users
+- ...
+
+`samber/oops` is a **drop-in replacement** that fills these gaps. Every `oops` error implements the standard `error` interface, works with `errors.Is`/`errors.As`, and adds structured attributes:
+
+```go
+// ✗ Before — standard errors, no context
+func (s *OrderService) CreateOrder(ctx context.Context, req CreateOrderReq) error {
+    err := s.db.Insert(ctx, req.Order)
+    if err != nil {
+        return fmt.Errorf("inserting order: %w", err)
+    }
+    return nil
+}
+
+// ✓ After — samber/oops, rich context for debugging
+func (s *OrderService) CreateOrder(ctx context.Context, req CreateOrderReq) error {
+    err := s.db.Insert(ctx, req.Order)
+    if err != nil {
+        return oops.
+            In("order-service").
+            Code("order_insert_failed").
+            User(req.UserID).
+            With("order_id", req.Order.ID).
+            Wrapf(err, "inserting order")
+    }
+    return nil
+}
+```
+
+When this error is logged, you get the stack trace, user ID, order ID, domain, error code, and the full error chain — all structured and machine-parseable.
+
+## Logging Errors with `slog`
+
+→ See `samber/cc-skills-golang@golang-observability` skill for comprehensive structured logging guidance, including `slog` setup, log levels, log handlers, HTTP middleware, and cost considerations.
diff --git a/.agents/skills/golang-error-handling/references/error-wrapping.md b/.agents/skills/golang-error-handling/references/error-wrapping.md
new file mode 100644
index 0000000..9fec86e
--- /dev/null
+++ b/.agents/skills/golang-error-handling/references/error-wrapping.md
@@ -0,0 +1,112 @@
+# Error Wrapping and Inspection
+
+## Error Wrapping with `%w`
+
+Wrapping preserves the original error in a chain that callers can inspect with `errors.Is` and `errors.As`. Errors SHOULD be wrapped at each layer to build a readable chain.
+
+```go
+// ✓ Good — wraps with context, preserves the chain
+func (s *UserService) GetUser(id string) (*User, error) {
+    user, err := s.repo.FindByID(id)
+    if err != nil {
+        return nil, fmt.Errorf("getting user %s: %w", id, err)
+    }
+    return user, nil
+}
+```
+
+### `%w` vs `%v`: controlling exposure
+
+Use `%w` within your module to preserve the error chain. Use `%v` at public API / system boundaries to prevent callers from depending on internal error types.
+
+```go
+// Internal layer — wrap to preserve chain
+func (r *repo) fetch(id string) error {
+    return fmt.Errorf("querying database: %w", err)
+}
+
+// Public API boundary — break chain to hide internals
+func (s *PublicService) GetItem(id string) error {
+    err := s.repo.fetch(id)
+    if err != nil {
+        return fmt.Errorf("item unavailable: %v", err) // %v — callers cannot unwrap
+    }
+    return nil
+}
+```
+
+## Inspecting Errors: `errors.Is` and `errors.As`
+
+### `errors.Is` — match against a sentinel value
+
+```go
+// ✗ Bad — direct comparison breaks on wrapped errors
+if err == sql.ErrNoRows {
+
+// ✓ Good — traverses the entire error chain
+if errors.Is(err, sql.ErrNoRows) {
+    return nil, ErrNotFound
+}
+```
+
+### `errors.As / errors.AsType` — extract a typed error from the chain
+
+```go
+// ✗ Bad — type assertion breaks on wrapped errors
+if ve, ok := err.(*ValidationError); ok {
+
+// ✓ Good — traverses the entire error chain
+var ve *ValidationError
+if errors.As(err, &ve) {
+    log.Printf("validation failed on field %s: %s", ve.Field, ve.Msg)
+}
+
+// ✓ Better (Go 1.26+) — same behavior, simpler syntax
+if ve, ok := errors.AsType[*ValidationError](err); ok {
+    log.Printf("validation failed on field %s: %s", ve.Field, ve.Msg)
+}
+```
+
+## Combining Errors with `errors.Join`
+
+`errors.Join` (Go 1.20+) combines multiple independent errors into one. The combined error works with `errors.Is` and `errors.As` — each inner error is inspectable.
+
+### Use case: validating multiple fields
+
+```go
+func validateUser(u User) error {
+    var errs []error
+
+    if u.Name == "" {
+        errs = append(errs, errors.New("name is required"))
+    }
+    if u.Email == "" {
+        errs = append(errs, errors.New("email is required"))
+    }
+
+    return errors.Join(errs...) // returns nil if errs is empty
+}
+```
+
+### Use case: parallel operations with independent failures
+
+```go
+func closeAll(closers ...io.Closer) error {
+    var errs []error
+    for _, c := range closers {
+        if err := c.Close(); err != nil {
+            errs = append(errs, err)
+        }
+    }
+    return errors.Join(errs...)
+}
+```
+
+### `errors.Is` works through joined errors
+
+```go
+err := errors.Join(ErrNotFound, ErrUnauthorized)
+
+errors.Is(err, ErrNotFound)    // true
+errors.Is(err, ErrUnauthorized) // true
+```
diff --git a/.agents/skills/golang-performance/SKILL.md b/.agents/skills/golang-performance/SKILL.md
new file mode 100644
index 0000000..37fda75
--- /dev/null
+++ b/.agents/skills/golang-performance/SKILL.md
@@ -0,0 +1,111 @@
+---
+name: golang-performance
+description: "Golang performance optimization patterns and methodology - if X bottleneck, then apply Y. Covers allocation reduction, CPU efficiency, memory layout, GC tuning, pooling, caching, and hot-path optimization. Use when profiling or benchmarks have identified a bottleneck and you need the right optimization pattern to fix it. Also use when performing performance code review to suggest improvements or benchmarks that could help identify quick performance gains. Not for measurement methodology (see golang-benchmark skill) or debugging workflow (see golang-troubleshooting skill)."
+user-invocable: true
+license: MIT
+compatibility: Designed for Claude Code or similar AI coding agents, and for projects using Golang.
+metadata:
+  author: samber
+  version: "1.1.2"
+  openclaw:
+    emoji: "🏎️"
+    homepage: https://github.com/samber/cc-skills-golang
+    requires:
+      bins:
+        - go
+        - benchstat
+    install:
+      - kind: go
+        package: golang.org/x/perf/cmd/benchstat@latest
+        bins: [benchstat]
+allowed-tools: Read Edit Write Glob Grep Bash(go:*) Bash(golangci-lint:*) Bash(git:*) Agent WebFetch Bash(benchstat:*) Bash(fieldalignment:*) Bash(staticcheck:*) Bash(curl:*) Bash(fgprof:*) Bash(perf:*) WebSearch AskUserQuestion
+---
+
+**Persona:** You are a Go performance engineer. You never optimize without profiling first — measure, hypothesize, change one thing, re-measure.
+
+**Thinking mode:** Use `ultrathink` for performance optimization. Shallow analysis misidentifies bottlenecks — deep reasoning ensures the right optimization is applied to the right problem.
+
+**Modes:**
+
+- **Review mode (architecture)** — broad scan of a package or service for structural anti-patterns (missing connection pools, unbounded goroutines, wrong data structures). Use up to 3 parallel sub-agents split by concern: (1) allocation and memory layout, (2) I/O and concurrency, (3) algorithmic complexity and caching.
+- **Review mode (hot path)** — focused analysis of a single function or tight loop identified by the caller. Work sequentially; one sub-agent is sufficient.
+- **Optimize mode** — a bottleneck has been identified by profiling. Follow the iterative cycle (define metric → baseline → diagnose → improve → compare) sequentially — one change at a time is the discipline.
+
+# Go Performance Optimization
+
+## Core Philosophy
+
+1. **Profile before optimizing** — intuition about bottlenecks is wrong ~80% of the time. Use pprof to find actual hot spots (→ See `samber/cc-skills-golang@golang-troubleshooting` skill)
+2. **Allocation reduction yields the biggest ROI** — Go's GC is fast but not free. Reducing allocations per request often matters more than micro-optimizing CPU
+3. **Document optimizations** — add code comments explaining why a pattern is faster, with benchmark numbers when available. Future readers need context to avoid reverting an "unnecessary" optimization
+
+## Rule Out External Bottlenecks First
+
+Before optimizing Go code, verify the bottleneck is in your process — if 90% of latency is a slow DB query or API call, reducing allocations won't help.
+
+**Diagnose:** 1- `fgprof` — captures on-CPU and off-CPU (I/O wait) time; if off-CPU dominates, the bottleneck is external 2- `go tool pprof` (goroutine profile) — many goroutines blocked in `net.(*conn).Read` or `database/sql` = external wait 3- Distributed tracing (OpenTelemetry) — span breakdown shows which upstream is slow
+
+**When external:** optimize that component instead — query tuning, caching, connection pools, circuit breakers (→ See `samber/cc-skills-golang@golang-database` skill, [Caching Patterns](references/caching.md)).
+
+## Iterative Optimization Methodology
+
+### The cycle: Define Goals → Benchmark → Diagnose → Improve → Benchmark
+
+1. **Define your metric** — latency, throughput, memory, or CPU? Without a target, optimizations are random
+2. **Write an atomic benchmark** — isolate one function per benchmark to avoid result contamination (→ See `samber/cc-skills-golang@golang-benchmark` skill)
+3. **Measure baseline** — `go test -bench=BenchmarkMyFunc -benchmem -count=6 ./pkg/... | tee /tmp/report-1.txt`
+4. **Diagnose** — use the **Diagnose** lines in each deep-dive section to pick the right tool
+5. **Improve** — apply ONE optimization at a time with an explanatory comment
+6. **Compare** — `benchstat /tmp/report-1.txt /tmp/report-2.txt` to confirm statistical significance
+7. **Commit** — paste the benchstat output in the commit body so reviewers and future readers see the exact improvement; follow the `perf(scope): summary` commit type
+8. **Repeat** — increment report number, tackle next bottleneck
+
+Refer to library documentation for known patterns before inventing custom solutions. Keep all `/tmp/report-*.txt` files as an audit trail.
+
+## Decision Tree: Where Is Time Spent?
+
+| Bottleneck | Signal (from pprof) | Action |
+| --- | --- | --- |
+| Too many allocations | `alloc_objects` high in heap profile | [Memory optimization](references/memory.md) |
+| CPU-bound hot loop | function dominates CPU profile | [CPU optimization](references/cpu.md) |
+| GC pauses / OOM | high GC%, container limits | [Runtime tuning](references/runtime.md) |
+| Network / I/O latency | goroutines blocked on I/O | [I/O & networking](references/io-networking.md) |
+| Repeated expensive work | same computation/fetch multiple times | [Caching patterns](references/caching.md) |
+| Wrong algorithm | O(n²) where O(n) exists | [Algorithmic complexity](references/caching.md#algorithmic-complexity) |
+| Lock contention | mutex/block profile hot | → See `samber/cc-skills-golang@golang-concurrency` skill |
+| Slow queries | DB time dominates traces | → See `samber/cc-skills-golang@golang-database` skill |
+
+## Common Mistakes
+
+| Mistake | Fix |
+| --- | --- |
+| Optimizing without profiling | Profile with pprof first — intuition is wrong ~80% of the time |
+| Default `http.Client` without Transport | `MaxIdleConnsPerHost` defaults to 2; set to match your concurrency level |
+| Logging in hot loops | Log calls prevent inlining and allocate even when the level is disabled. Use `slog.LogAttrs` |
+| `panic`/`recover` as control flow | panic allocates a stack trace and unwinds the stack; use error returns |
+| `unsafe` without benchmark proof | Only justified when profiling shows >10% improvement in a verified hot path |
+| No GC tuning in containers | Set `GOMEMLIMIT` to 80-90% of container memory to prevent OOM kills |
+| `reflect.DeepEqual` in production | 50-200x slower than typed comparison; use `slices.Equal`, `maps.Equal`, `bytes.Equal` |
+
+## Deep Dives
+
+- [Memory Optimization](references/memory.md) — allocation patterns, backing array leaks, sync.Pool, struct alignment
+- [CPU Optimization](references/cpu.md) — inlining, cache locality, false sharing, ILP, reflection avoidance
+- [I/O & Networking](references/io-networking.md) — HTTP transport config, streaming, JSON performance, cgo, batch operations
+- [Runtime Tuning](references/runtime.md) — GOGC, GOMEMLIMIT, GC diagnostics, GOMAXPROCS, PGO
+- [Caching Patterns](references/caching.md) — algorithmic complexity, compiled patterns, singleflight, work avoidance
+- [Production Observability](references/observability.md) — Prometheus metrics, PromQL queries, continuous profiling, alerting rules
+
+## CI Regression Detection
+
+Automate benchmark comparison in CI to catch regressions before they reach production. → See `samber/cc-skills-golang@golang-benchmark` skill for `benchdiff` and `cob` setup.
+
+## Cross-References
+
+- → See `samber/cc-skills-golang@golang-benchmark` skill for benchmarking methodology, `benchstat`, and `b.Loop()` (Go 1.24+)
+- → See `samber/cc-skills-golang@golang-troubleshooting` skill for pprof workflow, escape analysis diagnostics, and performance debugging
+- → See `samber/cc-skills-golang@golang-data-structures` skill for slice/map preallocation and `strings.Builder`
+- → See `samber/cc-skills-golang@golang-concurrency` skill for worker pools, `sync.Pool` API, goroutine lifecycle, and lock contention
+- → See `samber/cc-skills-golang@golang-safety` skill for defer in loops, slice backing array aliasing
+- → See `samber/cc-skills-golang@golang-database` skill for connection pool tuning and batch processing
+- → See `samber/cc-skills-golang@golang-observability` skill for continuous profiling in production
diff --git a/.agents/skills/golang-performance/assets/prometheus-alerts.yml b/.agents/skills/golang-performance/assets/prometheus-alerts.yml
new file mode 100644
index 0000000..039eb4d
--- /dev/null
+++ b/.agents/skills/golang-performance/assets/prometheus-alerts.yml
@@ -0,0 +1,20 @@
+# GC taking too much time per cycle
+- alert: HighGCPauseTime
+  expr: rate(go_gc_duration_seconds_sum[5m]) / rate(go_gc_duration_seconds_count[5m]) > 0.01
+  for: 10m
+  annotations:
+    summary: "Average GC pause >10ms — reduce allocations or tune GOGC"
+
+# Goroutine leak
+- alert: GoroutineLeak
+  expr: go_goroutines > 10000
+  for: 5m
+  annotations:
+    summary: "Goroutine count >10K — check for leaked goroutines"
+
+# Memory approaching container limit
+- alert: MemoryNearLimit
+  expr: predict_linear(process_resident_memory_bytes[1h], 3600) > <container_limit_bytes>
+  for: 15m
+  annotations:
+    summary: "RSS projected to exceed container limit within 1h"
diff --git a/.agents/skills/golang-performance/evals/evals.json b/.agents/skills/golang-performance/evals/evals.json
new file mode 100644
index 0000000..5e03530
--- /dev/null
+++ b/.agents/skills/golang-performance/evals/evals.json
@@ -0,0 +1,888 @@
+[
+  {
+    "id": 1,
+    "name": "profile-before-optimizing",
+    "description": "Tests whether the model insists on profiling before applying optimizations, rather than jumping straight to code changes",
+    "prompt": "Our Go HTTP API is slow. Average response time is 800ms. Here's the handler:\n\n```go\npackage api\n\nimport (\n    \"encoding/json\"\n    \"net/http\"\n    \"strings\"\n)\n\ntype Response struct {\n    Items []Item `json:\"items\"`\n}\n\ntype Item struct {\n    ID   int    `json:\"id\"`\n    Name string `json:\"name\"`\n    Tags string `json:\"tags\"`\n}\n\nfunc HandleList(w http.ResponseWriter, r *http.Request) {\n    items := fetchFromDB(r.Context())\n    for i := range items {\n        items[i].Tags = strings.ToUpper(items[i].Tags)\n    }\n    json.NewEncoder(w).Encode(Response{Items: items})\n}\n```\n\nOptimize this code to reduce the 800ms response time.",
+    "trap": "The 800ms latency is almost certainly from fetchFromDB (external bottleneck), not from strings.ToUpper or JSON encoding. Without the skill, the model will micro-optimize the Go code (use strings.Builder, preallocate, etc.) instead of pointing out that profiling is needed first and the bottleneck is likely external.",
+    "assertions": [
+      {"id": "1.1", "text": "Recommends profiling (pprof, fgprof, or tracing) before making code changes"},
+      {"id": "1.2", "text": "Identifies fetchFromDB as the likely bottleneck (external I/O, not Go code)"},
+      {"id": "1.3", "text": "Mentions that intuition about bottlenecks is often wrong (~80% of the time)"},
+      {"id": "1.4", "text": "Does NOT primarily focus on micro-optimizing strings.ToUpper or JSON encoding"},
+      {"id": "1.5", "text": "Suggests investigating the database query (query tuning, caching, connection pool)"}
+    ]
+  },
+  {
+    "id": 2,
+    "name": "fgprof-off-cpu-bottleneck",
+    "description": "Tests whether the model recommends fgprof for off-CPU bottlenecks instead of only standard CPU profiling",
+    "prompt": "Our Go service has high latency (p99 = 2s) but CPU usage is only 5%. Standard pprof CPU profile shows almost nothing — the hot functions consume negligible CPU time. What profiling approach should we use to find the bottleneck?",
+    "trap": "Standard CPU profiling only captures on-CPU time. When CPU usage is low but latency is high, the bottleneck is off-CPU (I/O wait, network, blocked goroutines). The skill specifically recommends fgprof for this. Without the skill, the model may suggest heap profiles, goroutine dumps, or other approaches that miss the key tool.",
+    "assertions": [
+      {"id": "2.1", "text": "Recommends fgprof as the primary tool for capturing off-CPU wait time"},
+      {"id": "2.2", "text": "Explains that standard pprof CPU profile only captures on-CPU time, which is why it shows nothing"},
+      {"id": "2.3", "text": "Suggests the bottleneck is likely I/O wait (network, database, filesystem)"},
+      {"id": "2.4", "text": "Mentions goroutine profile as a complementary diagnostic (blocked goroutines in net.Read or database/sql)"},
+      {"id": "2.5", "text": "Suggests distributed tracing (OpenTelemetry) for identifying slow upstream services"}
+    ]
+  },
+  {
+    "id": 3,
+    "name": "iterative-benchmark-methodology",
+    "description": "Tests whether the model follows the iterative benchmark methodology (one change at a time, benchstat comparison)",
+    "prompt": "I profiled my Go service and found that the ProcessRecords function is the bottleneck. It allocates heavily and has slow JSON parsing. I want to optimize it. Here's the function:\n\n```go\nfunc ProcessRecords(data []byte) ([]Record, error) {\n    var records []Record\n    if err := json.Unmarshal(data, &records); err != nil {\n        return nil, err\n    }\n    var results []Record\n    for _, r := range records {\n        if r.IsValid() {\n            r.Name = strings.ToUpper(r.Name)\n            results = append(results, r)\n        }\n    }\n    return results, nil\n}\n```\n\nHow should I approach optimizing this?",
+    "trap": "Without the skill, the model will apply all optimizations at once. The skill teaches an iterative approach: write benchmark first, measure baseline with -count=6, apply ONE change at a time, compare with benchstat, then repeat.",
+    "assertions": [
+      {"id": "3.1", "text": "Recommends writing an atomic benchmark for ProcessRecords first"},
+      {"id": "3.2", "text": "Recommends measuring a baseline with -benchmem and -count=6 (or similar count for statistical significance)"},
+      {"id": "3.3", "text": "Recommends applying ONE optimization at a time, not all at once"},
+      {"id": "3.4", "text": "Recommends using benchstat to compare before/after with statistical significance"},
+      {"id": "3.5", "text": "Suggests keeping report files as an audit trail (e.g., /tmp/report-1.txt, /tmp/report-2.txt)"}
+    ]
+  },
+  {
+    "id": 4,
+    "name": "slice-reuse-append-zero",
+    "description": "Tests knowledge of the append(s[:0], ...) pattern for reusing slice backing arrays",
+    "prompt": "In our hot-path Go request handler, we have a buffer that's reset each iteration. Profiling shows this function has high alloc_objects. How can we reduce allocations?\n\n```go\nfunc processRequests(requests []Request) {\n    for _, req := range requests {\n        mode := []Tag{req.PrimaryTag}\n        // ... use mode ...\n        _ = mode\n    }\n}\n```",
+    "trap": "The natural approach is to declare mode outside the loop or use sync.Pool. The skill teaches the specific pattern append(mode[:0], item) to reuse the backing array with zero allocations. This is a non-obvious Go idiom.",
+    "assertions": [
+      {"id": "4.1", "text": "Suggests using append(mode[:0], item) to reuse the backing array"},
+      {"id": "4.2", "text": "Explains that reslicing to zero length retains the backing array, avoiding allocation"},
+      {"id": "4.3", "text": "Moves the mode variable declaration outside the loop to enable reuse"},
+      {"id": "4.4", "text": "Does NOT suggest sync.Pool as the primary solution for this simple case"}
+    ]
+  },
+  {
+    "id": 5,
+    "name": "direct-indexing-vs-append",
+    "description": "Tests whether the model prefers direct indexing over append when output size equals input size",
+    "prompt": "Optimize this Go transformation function. Profiling shows it's called 100K times/sec with input slices of ~1000 elements.\n\n```go\nfunc Transform(input []Data) []Result {\n    result := make([]Result, 0, len(input))\n    for i := range input {\n        result = append(result, convert(input[i]))\n    }\n    return result\n}\n```\n\nThe output always has exactly the same number of elements as the input.",
+    "trap": "The code already preallocates capacity. Without the skill, the model may not realize that make([]T, len) with direct assignment is faster than make([]T, 0, cap) with append, because direct assignment avoids per-element bounds checking and length increment.",
+    "assertions": [
+      {"id": "5.1", "text": "Suggests using make([]Result, len(input)) with direct assignment result[i] = convert(...)"},
+      {"id": "5.2", "text": "Explains that direct assignment avoids per-element append overhead (bounds check, length increment)"},
+      {"id": "5.3", "text": "Notes that append is better when the result might be smaller (filtering)"}
+    ]
+  },
+  {
+    "id": 6,
+    "name": "map-range-double-lookup",
+    "description": "Tests whether the model avoids double map lookups when writing new map iteration code",
+    "prompt": "Write a Go function that counts how many values in a map satisfy a threshold. The map is large (1M entries) and the function is called frequently.\n\n```go\nfunc CountAbove(scores map[string]int, threshold int) int {\n    // TODO: implement\n}\n```\n\nWrite the most efficient implementation.",
+    "trap": "The natural/idiomatic instinct when writing map iteration code is to use 'for k := range m { if m[k] > threshold { count++ } }' because it looks clean. This does 2 lookups per iteration. The skill teaches 'for k, v := range m { if v > threshold { count++ } }' for a single lookup. The model will likely write the double-lookup version without the skill prompting it to use the k, v form.",
+    "assertions": [
+      {"id": "6.1", "text": "Uses 'for k, v := range scores' (capturing the value in range) rather than 'for k := range scores { scores[k] }'"},
+      {"id": "6.2", "text": "Does NOT perform a second lookup into the map inside the loop body (i.e., does not use scores[k] inside the loop)"}
+    ]
+  },
+  {
+    "id": 7,
+    "name": "sentinel-errors-hot-path",
+    "description": "Tests whether the model avoids fmt.Errorf for static errors in allocation-sensitive hot paths when writing new error-returning code",
+    "prompt": "Write a Go function that parses a configuration value. It must return descriptive errors and will be called thousands of times per second during request processing.\n\n```go\n// parseTimeout parses a timeout string like \"30s\", \"5m\".\n// Returns error if empty, if the unit is unrecognized, or if the value is negative.\nfunc parseTimeout(s string) (time.Duration, error) {\n    // TODO: implement\n}\n```\n\nImplement this function.",
+    "trap": "The natural way to write descriptive errors is to use fmt.Errorf for all cases: fmt.Errorf(\"empty timeout\"), fmt.Errorf(\"unrecognized unit %q\", unit), fmt.Errorf(\"negative timeout: %v\", d). The skill teaches that static, predictable errors (like 'empty timeout') should be preallocated sentinels at package level (errors.New) to avoid allocation on every call. Only errors needing dynamic values should use fmt.Errorf. Without the skill the model writes fmt.Errorf for every error case.",
+    "assertions": [
+      {"id": "7.1", "text": "Uses errors.New at package level for the static 'empty string' error case (no dynamic content)"},
+      {"id": "7.2", "text": "Uses fmt.Errorf (or errors.New with dynamic content) for error cases that need to embed runtime values like the unrecognized unit"},
+      {"id": "7.3", "text": "Does NOT use fmt.Errorf for error messages that contain no dynamic values"},
+      {"id": "7.4", "text": "Explains that fmt.Errorf allocates on every call while package-level errors.New allocates once"}
+    ]
+  },
+  {
+    "id": 8,
+    "name": "interface-boxing-hot-path",
+    "description": "Tests knowledge of interface boxing allocation cost and the fix using generics or typed parameters",
+    "prompt": "Our Go analytics pipeline processes events at high throughput. Profiling shows unexpectedly high allocation rates in this function:\n\n```go\nfunc SumValues(values []any) float64 {\n    var total float64\n    for _, v := range values {\n        switch n := v.(type) {\n        case int:\n            total += float64(n)\n        case float64:\n            total += n\n        }\n    }\n    return total\n}\n```\n\nCallers always pass either all ints or all float64s. How can we reduce allocations?",
+    "trap": "Passing concrete types through any/interface{} forces heap allocation for boxing. The skill teaches using typed parameters or generics. Without it, the model may focus on the type switch optimization rather than the fundamental boxing problem.",
+    "assertions": [
+      {"id": "8.1", "text": "Identifies interface boxing (any parameter) as the source of allocations"},
+      {"id": "8.2", "text": "Suggests typed functions (e.g., SumInts([]int)) or generics (func Sum[T ~int|~float64]([]T)) to eliminate boxing"},
+      {"id": "8.3", "text": "Explains that each concrete value passed through any requires a heap allocation for boxing"},
+      {"id": "8.4", "text": "Does NOT focus only on the type switch as the optimization target"}
+    ]
+  },
+  {
+    "id": 9,
+    "name": "backing-array-leak-slice",
+    "description": "Tests whether the model avoids backing array retention when writing a function that stores subslices long-term",
+    "prompt": "Write a Go function that caches the first 16 bytes of each incoming network packet for later audit logging. Packets are large (up to 64KB) and are pooled via sync.Pool — they get reused after the call returns.\n\n```go\nvar auditLog [][]byte\n\nfunc recordPacketPrefix(pkt []byte) {\n    // TODO: store first 16 bytes of pkt in auditLog\n}\n```\n\nImplement this function.",
+    "trap": "The obvious, idiomatic implementation is 'auditLog = append(auditLog, pkt[:16])' — a simple reslice. This looks correct but retains the entire 64KB backing array per entry because the subslice shares the original buffer. Since packets come from a sync.Pool and are reused, the retained backing arrays also prevent correct pool behavior. The fix is to copy: 'prefix := make([]byte, 16); copy(prefix, pkt[:16]); auditLog = append(auditLog, prefix)'. Without the skill the model writes the reslice version.",
+    "assertions": [
+      {"id": "9.1", "text": "Does NOT use pkt[:16] directly as the stored value (reslice retains the entire backing array)"},
+      {"id": "9.2", "text": "Creates an independent copy using make([]byte, 16) + copy, or equivalent"},
+      {"id": "9.3", "text": "Explains that storing a reslice retains the entire original backing array, preventing GC"},
+      {"id": "9.4", "text": "Notes the interaction with sync.Pool: retained backing arrays prevent buffer reuse or cause data corruption"}
+    ]
+  },
+  {
+    "id": 10,
+    "name": "substring-memory-leak",
+    "description": "Tests knowledge of the strings.Clone pattern for substring memory leaks",
+    "prompt": "Our Go log processing service extracts request IDs from log lines. Memory keeps growing even though we only store short strings. Go 1.20+ project.\n\n```go\nvar requestIDs = make(map[string]time.Time)\n\nfunc ProcessLogLine(line string) {\n    // line is typically 500-2000 bytes\n    id := line[12:48] // extract 36-char UUID\n    requestIDs[id] = time.Now()\n}\n```",
+    "trap": "Substrings share the backing array of the original string. Each 36-char id retains the full 500-2000 byte log line. The skill teaches strings.Clone (Go 1.20+) as the fix. Without it, the model may not know about strings.Clone or may suggest string([]byte(s)) which is also correct but less idiomatic.",
+    "assertions": [
+      {"id": "10.1", "text": "Identifies that substrings share the backing array of the original string"},
+      {"id": "10.2", "text": "Suggests strings.Clone(line[12:48]) to create an independent copy"},
+      {"id": "10.3", "text": "Explains that each 36-char ID retains the entire 500-2000 byte log line in memory"}
+    ]
+  },
+  {
+    "id": 11,
+    "name": "map-never-shrinks",
+    "description": "Tests whether the model avoids using a long-lived map for a high-churn cache without addressing bucket retention",
+    "prompt": "Design a Go in-memory rate-limiter that tracks per-IP request counts. Counts reset every minute. At peak there are 500K active IPs, but off-peak only ~200 IPs are active.\n\n```go\ntype RateLimiter struct {\n    mu     sync.Mutex\n    counts map[string]int\n}\n\nfunc NewRateLimiter() *RateLimiter {\n    return &RateLimiter{counts: make(map[string]int)}\n}\n\nfunc (r *RateLimiter) reset() {\n    r.mu.Lock()\n    defer r.mu.Unlock()\n    // TODO: implement the per-minute reset\n}\n```\n\nImplement the reset method.",
+    "trap": "The natural, obvious implementation is to range over the map and delete each key: 'for k := range r.counts { delete(r.counts, k) }'. This looks correct and idiomatic but leaves all 500K bucket slots allocated — the map retains its peak allocation forever. The skill teaches that maps never release bucket memory; the fix is to replace the map entirely: 'r.counts = make(map[string]int)'. Without the skill the model writes the delete-loop version.",
+    "assertions": [
+      {"id": "11.1", "text": "Replaces the map with a fresh allocation (r.counts = make(map[string]int)) rather than deleting keys in a loop"},
+      {"id": "11.2", "text": "Does NOT use 'for k := range r.counts { delete(r.counts, k) }' as the reset strategy"},
+      {"id": "11.3", "text": "Explains that Go maps never release bucket memory when keys are deleted, so delete-loop retains peak allocation"},
+      {"id": "11.4", "text": "Notes that reassigning to a new map allows the old bucket array to be GC'd"}
+    ]
+  },
+  {
+    "id": 12,
+    "name": "sync-pool-rules",
+    "description": "Tests proper sync.Pool usage: reset before Put, return copies not pooled buffers, size limits",
+    "prompt": "Review this sync.Pool usage in our Go HTTP handler for correctness:\n\n```go\nvar bufPool = sync.Pool{\n    New: func() any { return make([]byte, 0, 64*1024) },\n}\n\nfunc HandleRequest(w http.ResponseWriter, r *http.Request) {\n    buf := bufPool.Get().([]byte)\n    // ... fill buf with response data ...\n    buf = append(buf[:0], responseData...)\n    w.Write(buf)\n    bufPool.Put(buf)\n}\n\nfunc HandleLargeUpload(w http.ResponseWriter, r *http.Request) {\n    buf := bufPool.Get().([]byte)\n    data, _ := io.ReadAll(r.Body) // could be 100MB+\n    buf = append(buf[:0], data...)\n    processData(buf)\n    bufPool.Put(buf)\n}\n```\n\nIdentify all issues with this pool usage.",
+    "trap": "Multiple issues: (1) HandleRequest returns the pooled buffer directly via w.Write — the caller (net/http) may retain it after Put, (2) HandleLargeUpload puts enormous buffers (100MB+) back into the pool — don't pool objects >32KB, (3) the pool stores []byte values not pointers, which causes an allocation on Get. The skill covers all these rules.",
+    "assertions": [
+      {"id": "12.1", "text": "Identifies that HandleLargeUpload puts oversized buffers (100MB+) back into the pool"},
+      {"id": "12.2", "text": "Mentions the 32KB guideline — don't pool objects larger than ~32KB"},
+      {"id": "12.3", "text": "Identifies that w.Write(buf) may retain the buffer after bufPool.Put(buf) in the same function"},
+      {"id": "12.4", "text": "Suggests pooling pointers (*[]byte) instead of values to avoid allocation on Get"},
+      {"id": "12.5", "text": "Recommends resetting/clearing state before Put to avoid retaining large object graphs"}
+    ]
+  },
+  {
+    "id": 13,
+    "name": "struct-field-alignment",
+    "description": "Tests knowledge of struct field ordering for optimal memory layout",
+    "prompt": "Our Go service creates millions of these structs. Memory profiling shows they consume more space than expected. Can we reduce memory usage?\n\n```go\ntype Event struct {\n    Active    bool\n    Timestamp int64\n    Priority  bool\n    UserID    int32\n    Processed bool\n    Score     float64\n}\n```\n\nHow large is this struct and can we make it smaller?",
+    "trap": "The struct has poor field alignment. bool (1 byte) followed by int64 (8 bytes) adds 7 bytes of padding. The skill teaches reordering fields largest-to-smallest and using fieldalignment tool. Without it, the model may not compute the correct size or suggest the optimal reordering.",
+    "assertions": [
+      {"id": "13.1", "text": "Identifies that the struct has wasted padding bytes due to alignment"},
+      {"id": "13.2", "text": "Suggests reordering fields from largest to smallest (int64/float64 first, then int32, then bools)"},
+      {"id": "13.3", "text": "Provides a reordered struct that is smaller than the original"},
+      {"id": "13.4", "text": "Mentions the fieldalignment tool for automated detection"},
+      {"id": "13.5", "text": "States alignment requirements (bool=1, int32=4, int64/float64=8)"}
+    ]
+  },
+  {
+    "id": 14,
+    "name": "zero-size-field-end-of-struct",
+    "description": "Tests knowledge that struct{} at end of struct adds word-sized padding",
+    "prompt": "We're optimizing memory in our Go event system. This struct is allocated millions of times:\n\n```go\ntype Entry struct {\n    Value int64\n    Flag  struct{}\n}\n```\n\nWe expected it to be 8 bytes (just the int64) since struct{} is zero-size. But unsafe.Sizeof reports 16 bytes. Why?",
+    "trap": "When the last field has zero size (struct{}), the compiler adds word-sized padding (8 bytes on 64-bit) to prevent a pointer to that field from overlapping the next memory block. The fix is to move struct{} to the beginning. This is a very obscure Go internals detail.",
+    "assertions": [
+      {"id": "14.1", "text": "Explains that a zero-size field at the end of a struct causes word-sized padding"},
+      {"id": "14.2", "text": "Explains the reason: preventing a pointer to the zero-size field from overlapping the next memory block"},
+      {"id": "14.3", "text": "Suggests moving struct{} to the beginning of the struct to eliminate the padding"},
+      {"id": "14.4", "text": "Shows the fix: type Entry struct { Flag struct{}; Value int64 } which is 8 bytes"}
+    ]
+  },
+  {
+    "id": 15,
+    "name": "map-pointer-vs-value-tradeoff",
+    "description": "Tests knowledge of map[K]*V vs map[K]V tradeoff for large frequently-updated structs",
+    "prompt": "Our Go game server updates player scores frequently. This pattern is inefficient:\n\n```go\ntype Player struct {\n    Name      string\n    Score     int\n    Level     int\n    Inventory [256]byte\n    Stats     [64]float64\n}\n\nvar players = make(map[string]Player)\n\nfunc UpdateScore(id string, delta int) {\n    p := players[id]\n    p.Score += delta\n    players[id] = p // full copy\n}\n```\n\nHow can we optimize the update pattern?",
+    "trap": "Map values are not addressable — you can't do players[id].Score += delta. The copy-modify-reassign pattern copies the entire large struct. Using map[string]*Player allows direct modification. But the skill also teaches the tradeoff: pointer maps add GC pressure from separate heap allocations. Without it, the model may suggest pointers without mentioning the tradeoff.",
+    "assertions": [
+      {"id": "15.1", "text": "Suggests using map[string]*Player to allow direct field modification"},
+      {"id": "15.2", "text": "Explains that map values are not addressable (can't modify in place)"},
+      {"id": "15.3", "text": "Shows players[id].Score += delta with pointer map"},
+      {"id": "15.4", "text": "Mentions the tradeoff: pointer maps add GC pressure from separate heap allocations"},
+      {"id": "15.5", "text": "Notes that for small, mostly-read structs, map[K]V (value) is better"}
+    ]
+  },
+  {
+    "id": 16,
+    "name": "inlining-log-in-hot-path",
+    "description": "Tests knowledge that log calls prevent function inlining",
+    "prompt": "This Go helper function is called millions of times per second in a tight loop. The CPU profile shows it takes much more time than expected for such a simple function.\n\n```go\nfunc clamp(val, minVal, maxVal int) int {\n    if val < minVal {\n        log.Printf(\"clamped %d below minimum %d\", val, minVal)\n        return minVal\n    }\n    if val > maxVal {\n        log.Printf(\"clamped %d above maximum %d\", val, maxVal)\n        return maxVal\n    }\n    return val\n}\n```\n\nWhy is this function slow and how do we fix it?",
+    "trap": "The log.Printf calls prevent the compiler from inlining the function. In a tight loop called millions of times, function call overhead is significant. The skill specifically warns about logging in hot loops preventing inlining. Without it, the model may focus on the log formatting cost rather than the inlining prevention.",
+    "assertions": [
+      {"id": "16.1", "text": "Identifies that log calls prevent the function from being inlined by the compiler"},
+      {"id": "16.2", "text": "Suggests removing log calls from the hot-path function or moving them outside"},
+      {"id": "16.3", "text": "Mentions using go build -gcflags=\"-m\" to verify inlining decisions"},
+      {"id": "16.4", "text": "Explains that function call overhead matters when called millions of times in a tight loop"}
+    ]
+  },
+  {
+    "id": 17,
+    "name": "value-receiver-inlining",
+    "description": "Tests knowledge that value receivers enable inlining for fluent method chains",
+    "prompt": "We have a Go config builder used in a hot path. Profiling shows the fluent chain is slower than expected:\n\n```go\ntype Config struct {\n    timeout  time.Duration\n    retries  int\n    verbose  bool\n}\n\nfunc (c *Config) WithTimeout(d time.Duration) *Config {\n    c.timeout = d\n    return c\n}\n\nfunc (c *Config) WithRetries(n int) *Config {\n    c.retries = n\n    return c\n}\n\nfunc (c *Config) WithVerbose(v bool) *Config {\n    c.verbose = v\n    return c\n}\n```\n\nThis is called as: `cfg := (&Config{}).WithTimeout(5*time.Second).WithRetries(3).WithVerbose(true)`\n\nHow can we make the fluent chain faster?",
+    "trap": "Pointer receivers add indirection that blocks inlining of fluent method chains. Value receivers allow the compiler to fully inline the chain. The skill quantifies this as -80% time. Without the skill, the model may suggest unrelated optimizations.",
+    "assertions": [
+      {"id": "17.1", "text": "Suggests changing to value receivers instead of pointer receivers"},
+      {"id": "17.2", "text": "Explains that value receivers allow the compiler to inline the fluent chain"},
+      {"id": "17.3", "text": "Explains that pointer receivers add indirection that blocks inlining"},
+      {"id": "17.4", "text": "Shows the value receiver signature: func (c Config) WithTimeout(d time.Duration) Config"}
+    ]
+  },
+  {
+    "id": 18,
+    "name": "cache-locality-matrix-traversal",
+    "description": "Tests knowledge of row-major vs column-major traversal and cache effects",
+    "prompt": "This Go matrix computation is unexpectedly slow. The matrix is 4096x4096 float64. CPU profile shows the loop itself (not the computation) is the bottleneck.\n\n```go\nfunc ColumnSum(matrix [4096][4096]float64) [4096]float64 {\n    var sums [4096]float64\n    for col := 0; col < 4096; col++ {\n        for row := 0; row < 4096; row++ {\n            sums[col] += matrix[row][col]\n        }\n    }\n    return sums\n}\n```\n\nWhy is this slow and how do we fix it?",
+    "trap": "Column-first traversal on row-major storage causes cache misses on every access. The fix is to swap loop order. The skill quantifies the difference as 10-50x from cache effects alone. Without it, the model may suggest parallelism or SIMD rather than the simple loop reorder.",
+    "assertions": [
+      {"id": "18.1", "text": "Identifies the column-first traversal as the cause (cache misses)"},
+      {"id": "18.2", "text": "Explains that Go stores 2D arrays in row-major order"},
+      {"id": "18.3", "text": "Suggests swapping loop order to row-first (outer loop over rows)"},
+      {"id": "18.4", "text": "Mentions the performance difference from cache effects (10-50x or similar magnitude)"},
+      {"id": "18.5", "text": "Does NOT primarily suggest parallelism or SIMD as the first fix"}
+    ]
+  },
+  {
+    "id": 19,
+    "name": "contiguous-2d-allocation",
+    "description": "Tests whether the model uses contiguous allocation when writing new 2D matrix code for a performance-critical context",
+    "prompt": "Write a Go function that creates an NxM grid of float64 values initialized to zero, for use in a finite-element simulation that iterates row by row over the grid millions of times per second.\n\n```go\nfunc NewGrid(rows, cols int) [][]float64 {\n    // TODO\n}\n```\n\nImplement this function.",
+    "trap": "The standard idiomatic Go way to create a 2D slice is the row-by-row allocation loop: make([][]float64, rows) followed by make([]float64, cols) per row. Every Go tutorial and example uses this pattern. However for performance-critical numeric code the skill teaches a single contiguous allocation (make([]float64, rows*cols)) sliced into row views, which has far better cache locality. Without the skill the model writes the idiomatic per-row allocation.",
+    "assertions": [
+      {"id": "19.1", "text": "Allocates a single contiguous backing slice: make([]float64, rows*cols)"},
+      {"id": "19.2", "text": "Slices it into row views: data[i*cols : (i+1)*cols]"},
+      {"id": "19.3", "text": "Does NOT allocate each row independently with a separate make([]float64, cols) call"},
+      {"id": "19.4", "text": "Explains that contiguous allocation improves cache locality for row-sequential access"}
+    ]
+  },
+  {
+    "id": 20,
+    "name": "soa-vs-aos",
+    "description": "Tests knowledge of Struct of Arrays vs Array of Structs for single-field iteration",
+    "prompt": "Our Go physics simulation iterates over millions of particles but only reads the X coordinate for collision detection in the first pass:\n\n```go\ntype Particle struct {\n    X, Y, Z    float64\n    VX, VY, VZ float64\n    Mass       float64\n    Radius     float64\n}\n\nvar particles []Particle // millions of elements\n\nfunc FindCollisionCandidates() []int {\n    var candidates []int\n    for i := range particles {\n        if particles[i].X > threshold {\n            candidates = append(candidates, i)\n        }\n    }\n    return candidates\n}\n```\n\nCPU profile shows this loop is slow. We only need the X field in this pass. How can we speed it up?",
+    "trap": "Loading each 64-byte Particle to read only X (8 bytes) wastes 87.5% of cache space. The skill teaches SoA (Struct of Arrays) where all X values are contiguous for 100% cache utilization. Without it, the model may suggest parallelism or preallocation rather than the data layout change.",
+    "assertions": [
+      {"id": "20.1", "text": "Identifies that loading entire Particle structs wastes cache space when only X is needed"},
+      {"id": "20.2", "text": "Suggests Struct of Arrays (SoA) layout with separate slices for X, Y, Z, etc."},
+      {"id": "20.3", "text": "Explains cache utilization improvement (contiguous X values vs scattered across structs)"},
+      {"id": "20.4", "text": "Notes that AoS is fine when accessing all fields together or for small structs"}
+    ]
+  },
+  {
+    "id": 21,
+    "name": "false-sharing-concurrent-counters",
+    "description": "Tests knowledge of false sharing and cache-line padding",
+    "prompt": "Our Go service has per-goroutine counters that are updated concurrently. Adding more goroutines makes it SLOWER, not faster. Profiling shows atomic operations on counters consuming unexpectedly high CPU.\n\n```go\ntype Metrics struct {\n    RequestCount int64\n    ErrorCount   int64\n    BytesRead    int64\n    BytesWritten int64\n}\n\nvar metrics Metrics\n\n// Each goroutine increments different counters concurrently\nfunc recordRequest(bytes int64) {\n    atomic.AddInt64(&metrics.RequestCount, 1)\n    atomic.AddInt64(&metrics.BytesRead, bytes)\n}\n\nfunc recordError(bytes int64) {\n    atomic.AddInt64(&metrics.ErrorCount, 1)\n    atomic.AddInt64(&metrics.BytesWritten, bytes)\n}\n```\n\nWhy does adding goroutines make it slower?",
+    "trap": "All four int64 fields fit within a single 64-byte cache line. When different goroutines update different fields, each write invalidates the other core's cache line (false sharing). The fix is cache-line padding. Without the skill, the model may suggest mutexes or sharding rather than identifying false sharing.",
+    "assertions": [
+      {"id": "21.1", "text": "Identifies false sharing as the cause (fields share the same cache line)"},
+      {"id": "21.2", "text": "Explains that writes to one field invalidate the cache line for other cores"},
+      {"id": "21.3", "text": "Suggests cache-line padding (56-byte [56]byte array between fields) to separate cache lines"},
+      {"id": "21.4", "text": "Mentions the 64-byte cache line size"},
+      {"id": "21.5", "text": "Notes this should only be applied when profiling confirms contention"}
+    ]
+  },
+  {
+    "id": 22,
+    "name": "ilp-multi-accumulator",
+    "description": "Tests knowledge of instruction-level parallelism with multiple accumulators",
+    "prompt": "This Go function sums a large float64 slice (10M elements). Profiling shows it's CPU-bound with the loop body consuming most of the time. The computation is simple addition — how can we speed it up without parallelizing across goroutines?\n\n```go\nfunc Sum(data []float64) float64 {\n    var total float64\n    for _, v := range data {\n        total += v\n    }\n    return total\n}\n```",
+    "trap": "The single accumulator creates a dependency chain — each addition waits for the previous one. The skill teaches using 4 independent accumulators to exploit CPU instruction-level parallelism (2-4x improvement). Without it, the model may suggest SIMD or goroutine-based parallelism rather than the simpler multi-accumulator approach.",
+    "assertions": [
+      {"id": "22.1", "text": "Identifies the sequential dependency chain as the bottleneck (each addition waits for the previous)"},
+      {"id": "22.2", "text": "Suggests using multiple accumulators (e.g., 4) for instruction-level parallelism"},
+      {"id": "22.3", "text": "Shows code with 4 independent accumulators summing every 4th element"},
+      {"id": "22.4", "text": "Handles the remainder elements (when len(data) is not divisible by 4)"},
+      {"id": "22.5", "text": "Mentions expected 2-4x improvement from ILP"}
+    ]
+  },
+  {
+    "id": 23,
+    "name": "index-based-tree-cache-locality",
+    "description": "Tests whether the model uses index-based node storage when implementing a high-performance tree from scratch",
+    "prompt": "Implement a Go binary search tree for a high-throughput in-memory lookup service. The tree will hold ~1M integer keys and be traversed millions of times per second. Write the node and tree type definitions and the Insert method.\n\n```go\n// TODO: define types and implement Insert\n```",
+    "trap": "Every Go tutorial, textbook, and LeetCode solution defines a binary tree with pointer-based nodes: 'type Node struct { Value int; Left, Right *Node }'. This is the universal default. However the skill teaches that pointer-based trees scatter each node across the heap causing random cache misses on every traversal. For high-throughput lookup the correct approach is index-based nodes stored in a contiguous slice: 'type Node struct { Value, Left, Right int }' inside a Tree struct with a Nodes []Node backing array. Without the skill the model writes the pointer-based version.",
+    "assertions": [
+      {"id": "23.1", "text": "Stores nodes in a contiguous slice (e.g., Nodes []Node field on the tree struct)"},
+      {"id": "23.2", "text": "Uses integer indices (not pointers) for left/right child references"},
+      {"id": "23.3", "text": "Does NOT define nodes with *Node pointer fields as the primary implementation"},
+      {"id": "23.4", "text": "Explains that index-based nodes stay in contiguous memory, reducing cache misses compared to scattered heap pointers"}
+    ]
+  },
+  {
+    "id": 24,
+    "name": "tight-loop-scheduler-starvation",
+    "description": "Tests knowledge of tight CPU loops starving the Go scheduler",
+    "prompt": "Our Go service has a CPU-intensive computation goroutine that runs for several seconds. Other goroutines (HTTP handlers) become unresponsive during the computation, even though GOMAXPROCS is set to 4.\n\n```go\nfunc heavyCompute(data []float64) float64 {\n    var result float64\n    for i := 0; i < len(data); i++ {\n        result = result*0.99 + data[i]*0.01\n    }\n    return result\n}\n```\n\nThe data slice has 100M elements. Why are other goroutines starved?",
+    "trap": "A tight CPU loop with fully inlined operations may not yield to the scheduler, despite Go 1.14+ async preemption. The skill teaches using non-inlined function calls as preemption points, or //go:noinline. Without it, the model may suggest runtime.Gosched() (which works but isn't the recommended approach) or parallelism.",
+    "assertions": [
+      {"id": "24.1", "text": "Explains that tight CPU loops with inlined operations can delay scheduler preemption"},
+      {"id": "24.2", "text": "Suggests breaking the work into batches processed by a non-inlined function call"},
+      {"id": "24.3", "text": "Mentions //go:noinline as an option to force preemption points"},
+      {"id": "24.4", "text": "Explains the tradeoff: //go:noinline adds function call overhead but ensures scheduler fairness"},
+      {"id": "24.5", "text": "Mentions that Go 1.14+ has async preemption but tight loops with inlined ops can still cause issues"}
+    ]
+  },
+  {
+    "id": 25,
+    "name": "reflect-deepequal-performance",
+    "description": "Tests knowledge of reflect.DeepEqual being 50-200x slower than typed comparisons",
+    "prompt": "Review this Go function for performance. It compares two configuration objects for equality in a hot path (called on every request):\n\n```go\nfunc ConfigChanged(old, new Config) bool {\n    return !reflect.DeepEqual(old, new)\n}\n\ntype Config struct {\n    Hosts    []string\n    Settings map[string]string\n    Timeout  int\n    Debug    bool\n}\n```",
+    "trap": "reflect.DeepEqual is 50-200x slower than typed comparison. The skill specifically calls this out as a common mistake and recommends slices.Equal, maps.Equal for the structured fields. Without it, the model may say it's fine or suggest a less specific alternative.",
+    "assertions": [
+      {"id": "25.1", "text": "Identifies reflect.DeepEqual as 50-200x slower than typed comparison"},
+      {"id": "25.2", "text": "Suggests using slices.Equal for the Hosts field"},
+      {"id": "25.3", "text": "Suggests using maps.Equal for the Settings field"},
+      {"id": "25.4", "text": "Provides a hand-written typed comparison function"}
+    ]
+  },
+  {
+    "id": 26,
+    "name": "type-switch-vs-repeated-assertions",
+    "description": "Tests whether the model uses a type switch (not repeated assertions) when writing new interface dispatch code for a hot path",
+    "prompt": "Write a Go function that formats any scalar value as a string for a high-throughput metrics labeling system. It must handle string, int, int64, float64, and bool. Called ~500K times/sec.\n\n```go\nfunc FormatLabel(v any) string {\n    // TODO\n}\n```\n\nImplement this function.",
+    "trap": "The natural way many Go developers write multi-type dispatch is a chain of if-assertions: 'if s, ok := v.(string); ok { return s }' etc. — it looks clear and matches the pattern from many examples. A type switch 'switch v := v.(type) { case string: ... }' dispatches in a single evaluation of the interface type and is the correct pattern for hot paths. Without the skill the model may write the if-chain of repeated assertions, evaluating the interface type multiple times.",
+    "assertions": [
+      {"id": "26.1", "text": "Uses a type switch (switch v := v.(type)) rather than a chain of individual if-assertions"},
+      {"id": "26.2", "text": "Does NOT use repeated v.(T) comma-ok assertions in separate if-blocks"},
+      {"id": "26.3", "text": "Handles all required types: string, int (or int64), float64, and bool in the switch cases"}
+    ]
+  },
+  {
+    "id": 27,
+    "name": "http-transport-maxidleconnsperhost",
+    "description": "Tests knowledge that default http.Client MaxIdleConnsPerHost is only 2",
+    "prompt": "Our Go microservice calls an upstream API with high concurrency (200 goroutines making requests simultaneously). Under load, we see many TCP connections being created and destroyed. Why doesn't connection pooling work?\n\n```go\nvar client = &http.Client{\n    Timeout: 30 * time.Second,\n}\n\nfunc CallAPI(ctx context.Context, id string) ([]byte, error) {\n    resp, err := client.Get(fmt.Sprintf(\"https://api.example.com/v1/items/%s\", id))\n    if err != nil {\n        return nil, err\n    }\n    defer resp.Body.Close()\n    return io.ReadAll(resp.Body)\n}\n```",
+    "trap": "The default http.Transport has MaxIdleConnsPerHost=2. With 200 concurrent goroutines, 198 connections are created and destroyed for each request. The skill specifically calls this out as a common mistake. Without it, the model may suggest connection pool libraries instead of tuning the built-in transport.",
+    "assertions": [
+      {"id": "27.1", "text": "Identifies MaxIdleConnsPerHost defaulting to 2 as the root cause"},
+      {"id": "27.2", "text": "Suggests configuring http.Transport with higher MaxIdleConnsPerHost (e.g., 20-100)"},
+      {"id": "27.3", "text": "Shows complete Transport configuration with MaxIdleConns, MaxIdleConnsPerHost, and MaxConnsPerHost"},
+      {"id": "27.4", "text": "Mentions draining resp.Body for connection reuse (io.Copy to io.Discard)"},
+      {"id": "27.5", "text": "Does NOT suggest using a third-party connection pool library as the primary solution"}
+    ]
+  },
+  {
+    "id": 28,
+    "name": "response-body-drain",
+    "description": "Tests whether the model drains the response body when writing a new HTTP health-check function",
+    "prompt": "Write a Go function that polls a list of service endpoints and returns which ones are healthy (HTTP 200). It will run every 5 seconds against ~50 endpoints with a shared http.Client.\n\n```go\nfunc CheckHealthy(client *http.Client, urls []string) []string {\n    // TODO: return URLs that respond with 200\n}\n```\n\nImplement this function.",
+    "trap": "The natural implementation closes the body with 'defer resp.Body.Close()' and reads the status code — which looks correct and complete. Most developers do not know that the transport only returns the connection to the pool after the body is fully consumed. Without draining via 'io.Copy(io.Discard, resp.Body)', each health check creates a new TCP connection and the 50-endpoint poll exhausts the connection pool. Without the skill the model writes the close-only version.",
+    "assertions": [
+      {"id": "28.1", "text": "Drains the response body using io.Copy(io.Discard, resp.Body) or io.ReadAll before closing"},
+      {"id": "28.2", "text": "Does NOT only call resp.Body.Close() without first reading/draining the body"},
+      {"id": "28.3", "text": "Explains that connections are only returned to the pool after the body is fully consumed"}
+    ]
+  },
+  {
+    "id": 29,
+    "name": "streaming-vs-readall",
+    "description": "Tests knowledge of streaming vs buffering for large payloads",
+    "prompt": "Our Go service proxies file downloads. Under load with large files (1-5GB), the service runs out of memory and gets OOM killed.\n\n```go\nfunc ProxyDownload(w http.ResponseWriter, r *http.Request) {\n    resp, err := http.Get(upstreamURL + r.URL.Path)\n    if err != nil {\n        http.Error(w, \"upstream error\", 502)\n        return\n    }\n    defer resp.Body.Close()\n\n    data, err := io.ReadAll(resp.Body)\n    if err != nil {\n        http.Error(w, \"read error\", 500)\n        return\n    }\n\n    w.Header().Set(\"Content-Type\", resp.Header.Get(\"Content-Type\"))\n    w.Write(data)\n}\n```",
+    "trap": "io.ReadAll loads the entire response into memory. For a 5GB file, that's a 5GB allocation. The fix is io.Copy which streams with a 32KB buffer. The skill specifically warns about io.ReadAll for large payloads.",
+    "assertions": [
+      {"id": "29.1", "text": "Identifies io.ReadAll as the cause of OOM (loads entire file into memory)"},
+      {"id": "29.2", "text": "Suggests using io.Copy(w, resp.Body) to stream with constant memory"},
+      {"id": "29.3", "text": "Mentions the 32KB internal buffer of io.Copy"},
+      {"id": "29.4", "text": "Notes that io.ReadAll is fine for small, bounded payloads (< 1MB)"}
+    ]
+  },
+  {
+    "id": 30,
+    "name": "json-streaming-decoder",
+    "description": "Tests knowledge of json.NewDecoder for streaming large JSON payloads",
+    "prompt": "Our Go API receives large JSON arrays (10K-100K items). Memory spikes during unmarshaling cause GC pressure.\n\n```go\nfunc HandleBulkImport(w http.ResponseWriter, r *http.Request) {\n    data, _ := io.ReadAll(r.Body)\n    var items []Item\n    if err := json.Unmarshal(data, &items); err != nil {\n        http.Error(w, err.Error(), 400)\n        return\n    }\n    for _, item := range items {\n        processItem(item)\n    }\n}\n```\n\nHow can we reduce memory usage while processing the same JSON input?",
+    "trap": "json.Unmarshal buffers the entire body. json.NewDecoder streams tokens. The skill teaches the decoder.More() + decoder.Decode() pattern for processing one item at a time. Without it, the model may suggest chunking or pagination rather than streaming JSON.",
+    "assertions": [
+      {"id": "30.1", "text": "Suggests using json.NewDecoder with r.Body directly (no io.ReadAll)"},
+      {"id": "30.2", "text": "Shows the dec.More() + dec.Decode(&item) streaming pattern"},
+      {"id": "30.3", "text": "Explains that this processes one item at a time with O(1) memory per item"}
+    ]
+  },
+  {
+    "id": 31,
+    "name": "cgo-overhead-tight-loop",
+    "description": "Tests knowledge of cgo call overhead (~50-100ns per crossing) and batching strategy",
+    "prompt": "Our Go numerical library calls a C function for each element. Profiling shows the cgo calls dominate execution time even though the C function itself is simple.\n\n```go\n/*\n#include <math.h>\n*/\nimport \"C\"\n\nfunc TransformAll(values []float64) {\n    for i, v := range values {\n        values[i] = float64(C.sqrt(C.double(v)))\n    }\n}\n```\n\nHow can we optimize this?",
+    "trap": "Each cgo call costs ~50-100ns due to stack switching. For math.Sqrt, the pure Go stdlib is equally fast and inlineable. For unavoidable C code, batch the call. The skill teaches both approaches. Without it, the model may not know the cgo overhead magnitude or suggest batching.",
+    "assertions": [
+      {"id": "31.1", "text": "Identifies cgo overhead (~50-100ns per call) as the bottleneck in the tight loop"},
+      {"id": "31.2", "text": "Suggests using math.Sqrt (pure Go, inlineable) instead of C.sqrt"},
+      {"id": "31.3", "text": "For unavoidable C code, suggests batching: pass the entire array to C in one call"},
+      {"id": "31.4", "text": "Mentions that goroutine is pinned to OS thread during cgo calls"}
+    ]
+  },
+  {
+    "id": 32,
+    "name": "gogc-gomemlimit-container",
+    "description": "Tests knowledge of GOMEMLIMIT for containerized applications",
+    "prompt": "Our Go service runs in a Kubernetes pod with 512MB memory limit. It periodically gets OOM killed even though heap usage appears to be only 200MB when checked via runtime.MemStats.Alloc.\n\nHow should we configure the Go runtime for this container?",
+    "trap": "The service needs GOMEMLIMIT set to ~80-90% of container memory (400-450MiB). Without it, the GC doesn't know about the container limit and may let the heap grow too large. The skill specifically calls this out as a common mistake ('No GC tuning in containers'). Without it, the model may suggest GOGC tuning alone.",
+    "assertions": [
+      {"id": "32.1", "text": "Recommends setting GOMEMLIMIT to 80-90% of the container memory limit (400-450MiB)"},
+      {"id": "32.2", "text": "Explains that the GC needs GOMEMLIMIT to know about the container's memory ceiling"},
+      {"id": "32.3", "text": "Shows the GOMEMLIMIT=450MiB environment variable or debug.SetMemoryLimit equivalent"},
+      {"id": "32.4", "text": "Explains the gap between Alloc and container limit (goroutine stacks, OS buffers, non-heap memory)"},
+      {"id": "32.5", "text": "Does NOT recommend the ballast pattern (obsolete since Go 1.19)"}
+    ]
+  },
+  {
+    "id": 33,
+    "name": "ballast-pattern-obsolete",
+    "description": "Tests whether the model avoids the ballast pattern and uses GOMEMLIMIT when configuring GC for a new service",
+    "prompt": "We're deploying a new Go 1.22 service in a Kubernetes pod with 2GB memory limit. The service is allocation-heavy during bursts and we want to reduce GC frequency to avoid latency spikes. A senior engineer suggested allocating a large byte array at startup to inflate the live heap. How should we configure the runtime?\n\n```go\nfunc main() {\n    // TODO: configure GC behavior\n    startServer()\n}\n```",
+    "trap": "The suggestion to 'allocate a large byte array at startup' is a direct hint toward the ballast pattern, which was the standard advice before Go 1.19. A model without the skill may implement it as suggested: 'var ballast = make([]byte, 1<<30)'. The skill explicitly teaches that the ballast pattern is obsolete since Go 1.19 — GOMEMLIMIT is strictly better because it achieves the same reduction in GC frequency without wasting physical memory. Without the skill the model follows the senior engineer's suggestion and implements the ballast.",
+    "assertions": [
+      {"id": "33.1", "text": "Does NOT implement the ballast pattern (large byte array allocation at startup)"},
+      {"id": "33.2", "text": "Recommends setting GOMEMLIMIT (e.g., GOMEMLIMIT=1800MiB or debug.SetMemoryLimit)"},
+      {"id": "33.3", "text": "Explains that the ballast pattern is obsolete since Go 1.19"},
+      {"id": "33.4", "text": "Explains that GOMEMLIMIT provides the same GC-frequency benefit without wasting physical memory"}
+    ]
+  },
+  {
+    "id": 34,
+    "name": "gomaxprocs-container-go125",
+    "description": "Tests knowledge of Go 1.25+ container-aware GOMAXPROCS vs automaxprocs",
+    "prompt": "Our Go service runs in a container with 2 CPU cores on a 64-core host. We're on Go 1.25. A colleague suggested adding `go.uber.org/automaxprocs`. Is that necessary?\n\n```go\nimport _ \"go.uber.org/automaxprocs\"\n\nfunc main() {\n    startServer()\n}\n```",
+    "trap": "Go 1.25+ automatically detects container CPU limits (cgroup v1/v2). automaxprocs is unnecessary. For Go 1.24 and earlier, it IS needed. The skill makes this version-dependent distinction clear.",
+    "assertions": [
+      {"id": "34.1", "text": "States that Go 1.25+ automatically detects container CPU limits"},
+      {"id": "34.2", "text": "Recommends removing the automaxprocs dependency"},
+      {"id": "34.3", "text": "Mentions that automaxprocs IS needed for Go 1.24 and earlier"},
+      {"id": "34.4", "text": "Mentions cgroup CPU quota detection as the mechanism"}
+    ]
+  },
+  {
+    "id": 35,
+    "name": "pgo-workflow",
+    "description": "Tests knowledge of Profile-Guided Optimization workflow and expected gains",
+    "prompt": "We want to improve our Go service's performance with minimal code changes. The service is interface-heavy with many small methods. We're on Go 1.22. What low-effort optimization can we apply?",
+    "trap": "PGO (Profile-Guided Optimization) gives 2-7% improvement with minimal effort: collect production profile, save as default.pgo, rebuild. The skill specifically describes when PGO helps most (interface calls, hot inlining). Without it, the model may suggest code-level optimizations rather than the build-level PGO approach.",
+    "assertions": [
+      {"id": "35.1", "text": "Recommends Profile-Guided Optimization (PGO)"},
+      {"id": "35.2", "text": "Describes the workflow: collect production CPU profile, save as default.pgo, rebuild"},
+      {"id": "35.3", "text": "Mentions expected improvement of 2-7%"},
+      {"id": "35.4", "text": "Explains PGO benefits: more aggressive inlining and devirtualization of interface calls"},
+      {"id": "35.5", "text": "Notes that profiles should be refreshed after significant code changes"}
+    ]
+  },
+  {
+    "id": 36,
+    "name": "slog-logattrs-hot-path",
+    "description": "Tests knowledge of slog.LogAttrs for zero-allocation logging when level is disabled",
+    "prompt": "Profiling shows our Go service's Debug logging allocates memory even though Debug level is disabled in production. We're using slog.\n\n```go\nfunc processItem(ctx context.Context, item Item) {\n    slog.Debug(\"processing item\",\n        \"id\", item.ID,\n        \"name\", item.Name,\n        \"data\", item.Data, // item.Data is a large struct\n    )\n    // ... actual processing ...\n}\n```\n\nWhy does disabled logging still allocate, and how do we fix it?",
+    "trap": "Even with slog, arguments are evaluated before the level check. The 'data' field is boxed into any, allocating. The skill teaches slog.LogAttrs with typed attributes (slog.Int, slog.String) for zero allocations when the level is disabled. Without it, the model may suggest level checks or not know about LogAttrs.",
+    "assertions": [
+      {"id": "36.1", "text": "Explains that log arguments are evaluated/boxed before the level check"},
+      {"id": "36.2", "text": "Recommends slog.LogAttrs for zero allocations when level is disabled"},
+      {"id": "36.3", "text": "Shows typed attributes: slog.Int(\"id\", item.ID), slog.String(\"name\", item.Name)"},
+      {"id": "36.4", "text": "Notes that slog.Any can still allocate even with slog, so typed attributes are preferred"}
+    ]
+  },
+  {
+    "id": 37,
+    "name": "regexp-compile-per-call",
+    "description": "Tests knowledge of compiled pattern caching vs per-call compilation",
+    "prompt": "Profiling shows our Go validation function has high CPU usage from regexp:\n\n```go\nfunc ValidateEmail(email string) bool {\n    re := regexp.MustCompile(`^[a-z0-9._%+-]+@[a-z0-9.-]+\\.[a-z]{2,}$`)\n    return re.MatchString(email)\n}\n\nfunc ValidatePhone(phone string) bool {\n    re := regexp.MustCompile(`^\\+?[1-9]\\d{1,14}$`)\n    return re.MatchString(phone)\n}\n```\n\nBoth functions are called thousands of times per second.",
+    "trap": "regexp.Compile/MustCompile parses the pattern into a state machine (~5,700ns) on every call. The match itself is ~450ns. The skill quantifies the 10-12x waste. Fix: compile at package level. Without the skill, the model may suggest simpler regex or string operations instead of caching.",
+    "assertions": [
+      {"id": "37.1", "text": "Identifies that regexp compilation happens on every call (~5,700ns per compile)"},
+      {"id": "37.2", "text": "Suggests moving regexp.MustCompile to package-level variables"},
+      {"id": "37.3", "text": "Notes that compiled regexps are safe for concurrent use"},
+      {"id": "37.4", "text": "Quantifies the waste (10-12x overhead from recompilation vs match-only)"}
+    ]
+  },
+  {
+    "id": 38,
+    "name": "singleflight-cache-stampede",
+    "description": "Tests whether the model uses singleflight (not a mutex) when writing a new cache-with-fetch function serving high concurrency",
+    "prompt": "Write a Go function that fetches and caches country metadata (name, currency, timezone). The cache has a 10-minute TTL. The service handles 2000 req/s with ~50 unique countries. Implement GetCountry.\n\n```go\nvar cache sync.Map\n\nfunc GetCountry(code string) (Country, error) {\n    // TODO: check cache, fetch from API if missing, store result\n}\n\nfunc fetchFromAPI(code string) (Country, error) { /* external call, ~200ms */ }\n```",
+    "trap": "The natural implementation is a simple cache-aside: load from cache, on miss fetch from API, store result. This is what every cache tutorial shows. Under 2000 req/s with 200ms fetch latency, a cache miss for any country causes 400 concurrent goroutines to all call fetchFromAPI for the same key simultaneously — a cache stampede. The fix requires singleflight.Group so only one goroutine fetches per key while others wait and share the result. A mutex would serialize all countries, not just the same one. Without the skill the model writes the naive cache-aside without stampede protection.",
+    "assertions": [
+      {"id": "38.1", "text": "Uses singleflight.Group (golang.org/x/sync/singleflight) to deduplicate concurrent fetches for the same key"},
+      {"id": "38.2", "text": "Does NOT use a global mutex that would serialize requests for different country codes"},
+      {"id": "38.3", "text": "Shows the sf.Do(code, func) pattern so concurrent requests for the same key share one fetch"},
+      {"id": "38.4", "text": "Explains that without singleflight, a cache miss causes all concurrent waiters to call the API simultaneously"}
+    ]
+  },
+  {
+    "id": 39,
+    "name": "algorithmic-complexity-slice-contains-loop",
+    "description": "Tests knowledge of algorithmic complexity traps (O(n*m) from slices.Contains in a loop)",
+    "prompt": "This Go function checks which requested IDs are valid. It's slow when both lists are large (10K items each).\n\n```go\nfunc FilterValid(requested []string, valid []string) []string {\n    var result []string\n    for _, id := range requested {\n        if slices.Contains(valid, id) {\n            result = append(result, id)\n        }\n    }\n    return result\n}\n```\n\nOptimize for large inputs.",
+    "trap": "slices.Contains in a loop creates O(n*m) complexity. The skill teaches building a map[T]struct{} first for O(n+m). Without it, the model may suggest sorting + binary search (O(n log n)) rather than the optimal map approach.",
+    "assertions": [
+      {"id": "39.1", "text": "Identifies O(n*m) complexity from slices.Contains inside a loop"},
+      {"id": "39.2", "text": "Suggests building a map[string]struct{} from the valid slice first"},
+      {"id": "39.3", "text": "Shows the O(n+m) solution with map lookup"},
+      {"id": "39.4", "text": "Uses struct{} (0 bytes) for the map value type, not bool"}
+    ]
+  },
+  {
+    "id": 40,
+    "name": "early-return-full-scan",
+    "description": "Tests whether the model uses early return when writing a new existence-check loop over a large collection",
+    "prompt": "Write a Go function that checks whether any order in a large list has been flagged for fraud review. Orders are checked on every API request; the slice typically holds 50,000+ entries.\n\n```go\ntype Order struct {\n    ID         string\n    FraudScore float64\n    // ...\n}\n\nfunc HasFraudulentOrder(orders []Order, threshold float64) bool {\n    // TODO\n}\n```\n\nImplement this function.",
+    "trap": "When asked to write a boolean existence check, many developers reach for the accumulator pattern: 'found := false; for _, o := range orders { if o.FraudScore > threshold { found = true } }; return found'. This always scans all 50K entries even when the very first entry matches. The correct approach returns immediately on the first match. Without the skill explicitly teaching early-return as a performance pattern, the model may write the full-scan accumulator version, especially since it is a common pattern in introductory Go code.",
+    "assertions": [
+      {"id": "40.1", "text": "Returns true immediately upon finding the first matching order (early return inside the loop)"},
+      {"id": "40.2", "text": "Does NOT use an accumulator variable (found := false) that defers the return until after the full loop"},
+      {"id": "40.3", "text": "Returns false after the loop without having scanned entries unnecessarily past the first match"}
+    ]
+  },
+  {
+    "id": 41,
+    "name": "iterator-chain-vs-direct-loop",
+    "description": "Tests whether the model avoids iterator chains and writes a direct loop for a hot-path find-first operation",
+    "prompt": "Write a Go function for a hot path (~1M calls/sec) that finds the first active user with a given role from a slice. Use whatever approach you think is most appropriate.\n\n```go\ntype User struct {\n    ID     string\n    Role   string\n    Active bool\n}\n\nfunc FindFirstActiveWithRole(users []User, role string) (User, bool) {\n    // TODO\n}\n```\n\nImplement this function.",
+    "trap": "Modern Go code with samber/lo or standard library functional helpers makes it tempting to write: 'return lo.First(lo.Filter(users, func(u User) bool { return u.Active && u.Role == role }))'. This is idiomatic, concise, and readable — and it is what many developers reach for. However Filter processes ALL elements before First picks one, and each closure call has function-call overhead. In a hot path the direct loop is significantly faster: it short-circuits on first match and has no closure overhead. Without the skill the model may use the iterator-chain style since it looks clean and modern.",
+    "assertions": [
+      {"id": "41.1", "text": "Uses a direct for-loop with an early return/break rather than chained Filter+First (or equivalent iterator helpers)"},
+      {"id": "41.2", "text": "Does NOT call a Filter-style function that processes all elements before returning the first match"},
+      {"id": "41.3", "text": "Returns immediately upon finding the first matching user (short-circuit)"},
+      {"id": "41.4", "text": "Explains that iterator chains process all elements and add closure overhead, while a direct loop short-circuits"}
+    ]
+  },
+  {
+    "id": 42,
+    "name": "indirect-function-calls-closure",
+    "description": "Tests knowledge that closure indirection prevents inlining in generic wrappers",
+    "prompt": "We profiled our Go utility library and found this wrapper function is slower than expected:\n\n```go\nfunc DereferenceAll[T any](ptrs []*T) []T {\n    return Map(ptrs, func(p *T) T { return *p })\n}\n\nfunc Map[T, R any](items []T, fn func(T) R) []R {\n    result := make([]R, len(items))\n    for i := range items {\n        result[i] = fn(items[i])\n    }\n    return result\n}\n```\n\nHow can we make DereferenceAll faster?",
+    "trap": "The closure passed to Map prevents inlining at the call site. The skill teaches replacing indirect function calls (Map + closure) with direct loops for 13-17% improvement. Without it, the model may not know that the closure indirection is the bottleneck.",
+    "assertions": [
+      {"id": "42.1", "text": "Suggests replacing the Map+closure pattern with a direct loop"},
+      {"id": "42.2", "text": "Explains that the closure/function call indirection prevents inlining"},
+      {"id": "42.3", "text": "Shows the direct loop: result[i] = *ptrs[i]"},
+      {"id": "42.4", "text": "Mentions the expected improvement range (13-17% or similar)"}
+    ]
+  },
+  {
+    "id": 43,
+    "name": "http-server-no-timeouts",
+    "description": "Tests whether the model sets timeouts when writing a new HTTP server from scratch",
+    "prompt": "Write the main function for a Go HTTP API server that listens on port 8080 and serves two routes: POST /ingest and GET /health.\n\n```go\nfunc main() {\n    // TODO: set up and start the HTTP server\n}\n```\n\nImplement this.",
+    "trap": "The idiomatic, minimal Go HTTP server that every tutorial shows is: 'http.HandleFunc(...); http.ListenAndServe(\":8080\", nil)'. This is the first result for 'Go http server example' and what most developers write by default. It uses the zero-value DefaultServeMux with no timeouts — a slow or malicious client can hold connections open indefinitely, exhausting file descriptors. The correct approach creates an explicit http.Server{} with ReadTimeout, WriteTimeout, and IdleTimeout. Without the skill the model writes the default ListenAndServe shortcut.",
+    "assertions": [
+      {"id": "43.1", "text": "Creates an explicit http.Server struct rather than calling http.ListenAndServe directly"},
+      {"id": "43.2", "text": "Sets ReadTimeout on the server"},
+      {"id": "43.3", "text": "Sets WriteTimeout on the server"},
+      {"id": "43.4", "text": "Sets IdleTimeout on the server"}
+    ]
+  },
+  {
+    "id": 44,
+    "name": "http-keepalive-crawler",
+    "description": "Tests knowledge of disabling keep-alive for crawlers hitting many hosts",
+    "prompt": "Our Go web crawler scrapes 100,000 different domains. After running for a while, it runs out of file descriptors. The crawler uses an http.Client with tuned Transport:\n\n```go\nvar crawlerClient = &http.Client{\n    Timeout: 10 * time.Second,\n    Transport: &http.Transport{\n        MaxIdleConns:        1000,\n        MaxIdleConnsPerHost: 10,\n        IdleConnTimeout:     90 * time.Second,\n    },\n}\n```\n\nWhy does it exhaust file descriptors?",
+    "trap": "For crawlers hitting many different hosts, idle connections accumulate because MaxIdleConns caps total idle connections but each host has up to 10 idle. With 100K hosts, connections pile up. The skill teaches DisableKeepAlives: true for this use case. Without it, the model may suggest lowering MaxIdleConnsPerHost instead of disabling keep-alive entirely.",
+    "assertions": [
+      {"id": "44.1", "text": "Identifies that idle connections accumulate across many different hosts"},
+      {"id": "44.2", "text": "Suggests DisableKeepAlives: true for the crawler client"},
+      {"id": "44.3", "text": "Explains that keep-alive is counterproductive when crawling many unique hosts"}
+    ]
+  },
+  {
+    "id": 45,
+    "name": "buffered-io-syscall-reduction",
+    "description": "Tests whether the model uses buffered I/O when writing a new file-writing function that emits many small records",
+    "prompt": "Write a Go function that appends audit log entries to an open file. Each entry is a short string (~100 bytes). The function is called in a tight loop and may write thousands of entries per second.\n\n```go\nfunc WriteAuditEntries(f *os.File, entries []string) error {\n    // TODO\n}\n```\n\nImplement this function.",
+    "trap": "The natural implementation uses f.WriteString(entry) or fmt.Fprintln(f, entry) directly on the *os.File in a loop — that is what every beginner and intermediate Go example does. Each such call issues a separate syscall, which at thousands of entries per second is extremely expensive. The skill teaches wrapping with bufio.NewWriter(f) to batch writes into larger chunks and call Flush() at the end. Without the skill the model writes the direct unbuffered version.",
+    "assertions": [
+      {"id": "45.1", "text": "Wraps the file with bufio.NewWriter (or bufio.NewWriterSize) before writing"},
+      {"id": "45.2", "text": "Writes entries through the buffered writer, not directly to the *os.File"},
+      {"id": "45.3", "text": "Calls w.Flush() after all entries are written"},
+      {"id": "45.4", "text": "Does NOT write each entry directly to f with f.WriteString or fmt.Fprintln(f, ...) in a loop without buffering"}
+    ]
+  },
+  {
+    "id": 46,
+    "name": "concurrent-pipeline-when-not-to-use",
+    "description": "Tests knowledge of when concurrent pipelines are NOT beneficial",
+    "prompt": "Our Go data pipeline has 3 stages. We want to make it faster by running stages concurrently:\n\n1. Stage A: Compress data (CPU-bound)\n2. Stage B: Encrypt data (CPU-bound)\n3. Stage C: Calculate checksum (CPU-bound)\n\nAll stages are CPU-bound. Should we run them concurrently in goroutines with channels between stages?",
+    "trap": "When all stages compete for the same resource (CPU), concurrency adds context-switching overhead with no resource utilization gain. The skill explicitly says 'If A and B both compete for CPU, concurrency causes context-switching overhead with no resource utilization gain.' Without it, the model may recommend the concurrent pipeline pattern.",
+    "assertions": [
+      {"id": "46.1", "text": "Recommends AGAINST concurrent pipelines for this case"},
+      {"id": "46.2", "text": "Explains that all three stages compete for the same resource (CPU)"},
+      {"id": "46.3", "text": "Notes that concurrency only helps when stages saturate DIFFERENT resources"},
+      {"id": "46.4", "text": "Mentions context-switching overhead as a cost of unnecessary concurrency"},
+      {"id": "46.5", "text": "Suggests sequential processing or batching as a simpler alternative"}
+    ]
+  },
+  {
+    "id": 47,
+    "name": "batch-db-inserts",
+    "description": "Tests whether the model uses batch inserts (not row-by-row) when writing a new bulk ingestion function",
+    "prompt": "Write a Go function that persists a slice of sensor readings to a PostgreSQL database. The function is called every second with ~1000 readings.\n\n```go\ntype Reading struct {\n    SensorID  string\n    Value     float64\n    Timestamp time.Time\n}\n\nfunc SaveReadings(db *sql.DB, readings []Reading) error {\n    // TODO\n}\n```\n\nImplement this function.",
+    "trap": "The natural implementation iterates over readings and calls db.Exec(INSERT ...) once per reading — that is what every Go+SQL tutorial shows and what most developers write first. At 1000 readings/second that means 1000 round-trips/second: 1000 query parses, 1000 network round-trips, 1000 transaction commits. The skill teaches batching: a single multi-row INSERT or the COPY protocol in one round-trip. Without the skill the model writes the per-row loop with a single INSERT per call.",
+    "assertions": [
+      {"id": "47.1", "text": "Does NOT call db.Exec with a single-row INSERT inside a for-loop over all readings"},
+      {"id": "47.2", "text": "Uses a batching strategy: multi-row VALUES clause, COPY protocol, or chunked batch inserts"},
+      {"id": "47.3", "text": "Reduces the number of database round-trips to O(1) or O(n/batchSize) rather than O(n)"},
+      {"id": "47.4", "text": "Wraps the batch operation in a transaction"},
+      {"id": "47.5", "text": "Explains that per-row inserts cause one round-trip per record, which is the primary bottleneck at 1000 records/second"}
+    ]
+  },
+  {
+    "id": 48,
+    "name": "panic-recover-control-flow",
+    "description": "Tests knowledge that panic/recover should not be used for control flow",
+    "prompt": "Review this Go parsing function for performance:\n\n```go\nfunc SafeParse(s string) (result int, err error) {\n    defer func() {\n        if r := recover(); r != nil {\n            err = fmt.Errorf(\"parse failed: %v\", r)\n        }\n    }()\n    return strconv.Atoi(s)\n}\n```\n\nThis is called 100K times per second with a mix of valid and invalid inputs.",
+    "trap": "strconv.Atoi returns an error, not a panic. The defer/recover is unnecessary overhead: panic allocates a stack trace and unwinds the stack. The skill specifically warns against panic/recover as control flow. Without it, the model may accept the pattern as defensive programming.",
+    "assertions": [
+      {"id": "48.1", "text": "Identifies that panic/recover is unnecessary since strconv.Atoi returns errors"},
+      {"id": "48.2", "text": "Explains that panic allocates a stack trace and unwinds the stack (10-100x overhead)"},
+      {"id": "48.3", "text": "Suggests using simple error checking: v, err := strconv.Atoi(s)"},
+      {"id": "48.4", "text": "States that panic/recover should only be used for truly unrecoverable situations"}
+    ]
+  },
+  {
+    "id": 49,
+    "name": "monotonic-time-since",
+    "description": "Tests whether the model uses time.Since (monotonic clock) rather than wall-clock subtraction when writing a new duration measurement",
+    "prompt": "Write a Go middleware that records the latency of each HTTP request in milliseconds and logs it. The service runs in a cloud environment where NTP adjustments happen occasionally.\n\n```go\nfunc LatencyMiddleware(next http.Handler) http.Handler {\n    return http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) {\n        // TODO: measure and log request latency\n        next.ServeHTTP(w, r)\n    })\n}\n```\n\nImplement this middleware.",
+    "trap": "A common pattern for measuring time in Go that looks correct but is subtly fragile is: 'start := time.Now().UTC(); ...; elapsed := time.Now().UTC().Sub(start)'. Calling .UTC() strips the monotonic clock reading, leaving only the wall clock. An NTP adjustment during the request would then produce incorrect (or negative) latency measurements. The skill teaches that time.Since(start) uses the monotonic clock and is immune to wall-clock adjustments. Without the skill the model may call .UTC() or .Unix() on the start time, discarding the monotonic component.",
+    "assertions": [
+      {"id": "49.1", "text": "Captures start time with time.Now() without stripping the monotonic component (no .UTC(), .Unix(), or .Round() on the start time)"},
+      {"id": "49.2", "text": "Computes elapsed time using time.Since(start) or end.Sub(start) where both times come from time.Now()"},
+      {"id": "49.3", "text": "Does NOT call .UTC() or .UnixNano() on the start time before computing the elapsed duration (which would strip the monotonic clock)"},
+      {"id": "49.4", "text": "Explains that time.Since uses the monotonic clock, making it immune to NTP or wall-clock adjustments"}
+    ]
+  },
+  {
+    "id": 50,
+    "name": "prometheus-gc-pressure-queries",
+    "description": "Tests knowledge of specific PromQL queries for GC pressure monitoring",
+    "prompt": "Our Go service in production has occasional latency spikes. We suspect GC pauses. We have Prometheus monitoring with default Go metrics. What PromQL queries should we use to diagnose GC pressure?",
+    "trap": "The skill provides specific PromQL queries for GC diagnosis. Without it, the model may suggest generic approaches or incorrect metric names. The key queries are rate(go_gc_duration_seconds_count[5m]) for frequency and the worst-case pause quantile.",
+    "assertions": [
+      {"id": "50.1", "text": "Provides rate(go_gc_duration_seconds_count[5m]) for GC frequency"},
+      {"id": "50.2", "text": "Provides go_gc_duration_seconds{quantile=\"1\"} for worst-case GC pause"},
+      {"id": "50.3", "text": "Mentions >2 cycles/s sustained as a signal of excessive allocation rate"},
+      {"id": "50.4", "text": "Suggests rate(go_memstats_alloc_bytes_total[5m]) for allocation rate monitoring"}
+    ]
+  },
+  {
+    "id": 51,
+    "name": "goroutine-leak-prometheus",
+    "description": "Tests knowledge of PromQL for detecting goroutine leaks",
+    "prompt": "We suspect our Go service has a goroutine leak in production. The service gets slower over time and eventually needs to be restarted. What Prometheus queries can confirm a goroutine leak?",
+    "trap": "The skill provides specific goroutine leak PromQL queries. go_goroutines should correlate with load; growing independently of traffic indicates a leak. delta(go_goroutines[1h]) shows net change.",
+    "assertions": [
+      {"id": "51.1", "text": "Provides go_goroutines metric for goroutine count monitoring"},
+      {"id": "51.2", "text": "Suggests delta(go_goroutines[1h]) for detecting net goroutine increase over time"},
+      {"id": "51.3", "text": "Notes that goroutine count should correlate with load — growing independently means leak"}
+    ]
+  },
+  {
+    "id": 52,
+    "name": "continuous-profiling-tools",
+    "description": "Tests knowledge of continuous profiling tools and their tradeoffs",
+    "prompt": "We want to detect performance regressions across deployments in production. Our Go service runs on Kubernetes. We need historical profiling data to compare flamegraphs between versions. What tools should we evaluate?",
+    "trap": "The skill lists specific continuous profiling tools with overhead and best-for guidance: Grafana Pyroscope (push/pull, 2-5%), Parca (eBPF, <1%), Datadog, GCP Profiler. Without it, the model may only suggest pprof endpoints without mentioning continuous profiling platforms.",
+    "assertions": [
+      {"id": "52.1", "text": "Recommends Grafana Pyroscope, Parca, or similar continuous profiling platform"},
+      {"id": "52.2", "text": "Mentions overhead estimates (1-5% range)"},
+      {"id": "52.3", "text": "Describes push vs pull collection modes"},
+      {"id": "52.4", "text": "Mentions historical flamegraph comparison as a key feature"},
+      {"id": "52.5", "text": "Suggests feeding profiles into PGO for build optimization"}
+    ]
+  },
+  {
+    "id": 53,
+    "name": "gogc-high-vs-low-tradeoff",
+    "description": "Tests whether the model avoids recommending GOGC=200 for a latency-sensitive service when tuning GC",
+    "prompt": "A colleague reviewed our Go API service (p99 latency target: 5ms, running in a pod with 4GB memory limit) and recommended setting GOGC=200 to reduce GC overhead. The service currently uses ~800MB heap. Should we follow this advice?\n\nThe service currently runs with the default GOGC=100.",
+    "trap": "GOGC=200 doubles the heap growth allowed before the next GC cycle. For a throughput-oriented batch processor this is correct advice. But for a latency-sensitive API it is the wrong direction: larger heap means GC pauses take longer when they do occur, which hurts p99 latency. The skill explicitly teaches GOGC=50 for latency-sensitive services (more frequent but shorter pauses) and GOGC=200 for throughput-oriented batch processors. Without the skill the model may blindly agree with the suggestion since 'reducing GC overhead' sounds universally good.",
+    "assertions": [
+      {"id": "53.1", "text": "Recommends AGAINST GOGC=200 for this latency-sensitive API"},
+      {"id": "53.2", "text": "Explains that higher GOGC allows a larger heap, which means longer GC pauses when they occur — hurting p99 latency"},
+      {"id": "53.3", "text": "Suggests GOGC=50 (or lower than default) for latency-sensitive services: more frequent but shorter pauses"},
+      {"id": "53.4", "text": "Notes that GOGC=200 is appropriate for throughput-oriented batch processors, not latency-sensitive APIs"}
+    ]
+  },
+  {
+    "id": 54,
+    "name": "godebug-gctrace",
+    "description": "Tests knowledge of GODEBUG=gctrace=1 output interpretation",
+    "prompt": "We ran our Go service with GODEBUG=gctrace=1 and got this output:\n\n```\ngc 142 @23.456s 12%: 0.015+89+1.2 ms clock, 0.3+72/150+24 ms cpu, 180->340->200 MB, 400 MB goal, 8 P\n```\n\nInterpret this GC trace line. What does it tell us about the service's health?",
+    "trap": "The skill provides a field-by-field breakdown of gctrace output. The 12% CPU, 89ms pause, and 180->340->200 MB heap growth are concerning. Without the skill, the model may not correctly parse all fields or identify the implications.",
+    "assertions": [
+      {"id": "54.1", "text": "Correctly identifies gc 142 as the 142nd GC cycle"},
+      {"id": "54.2", "text": "Identifies 12% as total CPU time spent in GC (which is high)"},
+      {"id": "54.3", "text": "Interprets 180->340->200 MB as heap before, peak during, and after collection"},
+      {"id": "54.4", "text": "Identifies 400 MB goal as the target heap size based on GOGC/GOMEMLIMIT"},
+      {"id": "54.5", "text": "Notes that 12% GC CPU is concerning and suggests reducing allocation rate or tuning GOGC"}
+    ]
+  },
+  {
+    "id": 55,
+    "name": "unsafe-without-benchmark-proof",
+    "description": "Tests that the model warns against premature unsafe usage",
+    "prompt": "A colleague proposed using unsafe.Pointer to avoid string-to-byte-slice copy in our Go HTTP handler:\n\n```go\nfunc unsafeStringToBytes(s string) []byte {\n    return unsafe.Slice(unsafe.StringData(s), len(s))\n}\n\nfunc HandleRequest(w http.ResponseWriter, r *http.Request) {\n    body := unsafeStringToBytes(requestBody)\n    w.Write(body)\n}\n```\n\nIs this a good optimization? The handler processes about 100 requests per second.",
+    "trap": "The skill states unsafe is 'Only justified when profiling shows >10% improvement in a verified hot path.' At 100 req/s, this is not a hot path and the copy cost is negligible. Without the skill, the model may accept the optimization or only warn about safety without the benchmark threshold.",
+    "assertions": [
+      {"id": "55.1", "text": "Recommends against using unsafe here"},
+      {"id": "55.2", "text": "Notes that 100 req/s is not a hot path where this optimization is justified"},
+      {"id": "55.3", "text": "States that unsafe requires benchmark proof showing >10% improvement"},
+      {"id": "55.4", "text": "Mentions safety risks of unsafe (mutating string backing store, GC interaction)"}
+    ]
+  },
+  {
+    "id": 56,
+    "name": "precomputed-lookup-table",
+    "description": "Tests knowledge of precomputed lookup tables for pure functions with small input space",
+    "prompt": "Optimize this Go hex encoding function that's called billions of times in our data pipeline:\n\n```go\nfunc byteToHex(b byte) (byte, byte) {\n    high := b >> 4\n    low := b & 0x0f\n    var h, l byte\n    if high < 10 {\n        h = '0' + high\n    } else {\n        h = 'a' + high - 10\n    }\n    if low < 10 {\n        l = '0' + low\n    } else {\n        l = 'a' + low - 10\n    }\n    return h, l\n}\n```",
+    "trap": "The function is pure with a 16-element input space per nibble. The skill teaches precomputed lookup tables for this exact pattern. Two array lookups are faster than conditional branches. Without the skill, the model may suggest bitwise tricks rather than the lookup table approach.",
+    "assertions": [
+      {"id": "56.1", "text": "Suggests a precomputed lookup table (e.g., var hexDigit = [16]byte{...})"},
+      {"id": "56.2", "text": "Shows the table lookup: hexDigit[b>>4], hexDigit[b&0x0f]"},
+      {"id": "56.3", "text": "Explains that the lookup table fits in L1 cache and is faster than branching"}
+    ]
+  },
+  {
+    "id": 57,
+    "name": "json-performance-alternatives",
+    "description": "Tests knowledge of JSON performance alternatives beyond encoding/json",
+    "prompt": "Our Go API server spends 40% of CPU time in encoding/json according to pprof. We serialize thousands of Response structs per second. What options do we have to speed up JSON encoding?\n\n```go\ntype Response struct {\n    ID        int       `json:\"id\"`\n    Name      string    `json:\"name\"`\n    Values    []float64 `json:\"values\"`\n    Metadata  map[string]string `json:\"metadata\"`\n    CreatedAt time.Time `json:\"created_at\"`\n}\n```",
+    "trap": "The skill lists specific alternatives: custom MarshalJSON methods, code-gen libraries (easyjson, ffjson), drop-in replacements (goccy/go-json, json-iterator, bytedance/sonic), and experimental encoding/json/v2. Without it, the model may only suggest one approach.",
+    "assertions": [
+      {"id": "57.1", "text": "Mentions custom MarshalJSON/UnmarshalJSON methods as an option"},
+      {"id": "57.2", "text": "Mentions code-generation libraries (easyjson, ffjson)"},
+      {"id": "57.3", "text": "Mentions drop-in replacement libraries (goccy/go-json, json-iterator, or bytedance/sonic)"},
+      {"id": "57.4", "text": "Explains that encoding/json uses reflection which causes CPU and allocation overhead"},
+      {"id": "57.5", "text": "Quantifies expected improvement (2-5x or similar)"}
+    ]
+  },
+  {
+    "id": 58,
+    "name": "channel-batch-processing",
+    "description": "Tests knowledge of batch processing from channels with timeout flush",
+    "prompt": "Our Go service receives events from a channel and needs to batch them for bulk database insert. Events arrive at varying rates. We need to flush either when the batch is full (1000 items) OR after a timeout (100ms), whichever comes first.\n\nDesign the batch processor function.",
+    "trap": "The skill shows the exact pattern: select on channel + ticker, with batch accumulation, flush on size threshold or timer. The key detail is reusing batch via batch[:0] and handling the channel close. Without the skill, the model may miss the ticker-based timeout or the clean shutdown.",
+    "assertions": [
+      {"id": "58.1", "text": "Uses select with both channel receive and ticker/timer for timeout"},
+      {"id": "58.2", "text": "Flushes on batch size threshold"},
+      {"id": "58.3", "text": "Flushes on timeout (ticker)"},
+      {"id": "58.4", "text": "Handles channel close (flushes remaining items)"},
+      {"id": "58.5", "text": "Reuses the batch slice (batch[:0] or similar) to reduce allocations"}
+    ]
+  },
+  {
+    "id": 59,
+    "name": "allocation-reduction-vs-gc-tuning",
+    "description": "Tests knowledge that reducing allocations is better than tuning GOGC",
+    "prompt": "Our Go service has high GC overhead (8% CPU). Should we increase GOGC to reduce GC frequency, or is there a better approach?",
+    "trap": "The skill explicitly states: 'Reducing allocations helps more than tuning GOGC — it addresses the root cause instead of managing the symptom.' Without it, the model may recommend GOGC tuning as the primary solution.",
+    "assertions": [
+      {"id": "59.1", "text": "Recommends reducing allocations as the primary approach over GOGC tuning"},
+      {"id": "59.2", "text": "Explains that GOGC tuning manages the symptom while allocation reduction addresses the root cause"},
+      {"id": "59.3", "text": "Suggests specific allocation reduction strategies (value types, sync.Pool, preallocation, avoid interface boxing)"},
+      {"id": "59.4", "text": "Acknowledges GOGC tuning as a secondary measure after allocation reduction"}
+    ]
+  },
+  {
+    "id": 60,
+    "name": "gctrace-key-fields",
+    "description": "Tests knowledge of what to monitor in GC traces (frequency, pause times, CPU%)",
+    "prompt": "We're monitoring our Go service with GODEBUG=gctrace=1. What specific patterns in the output should alarm us?",
+    "trap": "The skill lists three key signals: GC frequency (too often = too many allocations), pause times (high = large heap or many pointers), CPU% (high = tune GOGC or reduce allocations). Without it, the model may give generic advice.",
+    "assertions": [
+      {"id": "60.1", "text": "Mentions high GC frequency as a signal of too many allocations"},
+      {"id": "60.2", "text": "Mentions high pause times as a signal of large heap or many pointers"},
+      {"id": "60.3", "text": "Mentions high GC CPU% (>5%) as concerning"},
+      {"id": "60.4", "text": "Provides the GODEBUG=gctrace=1 command or assumes it's already running"}
+    ]
+  },
+  {
+    "id": 61,
+    "name": "non-go-memory-leak-detection",
+    "description": "Tests knowledge of detecting non-Go memory leaks via Prometheus",
+    "prompt": "Our Go service uses cgo to call a C image processing library. process_resident_memory_bytes keeps growing but go_memstats_alloc_bytes is stable. What PromQL query helps diagnose this?",
+    "trap": "The skill provides the specific PromQL: process_resident_memory_bytes - go_memstats_sys_bytes. A growing gap indicates non-Go memory (cgo, mmap). Without it, the model may suggest Go heap tools that won't find the C memory leak.",
+    "assertions": [
+      {"id": "61.1", "text": "Suggests process_resident_memory_bytes - go_memstats_sys_bytes to isolate non-Go memory"},
+      {"id": "61.2", "text": "Identifies this as a likely C/cgo memory leak (not a Go leak)"},
+      {"id": "61.3", "text": "Explains that growing gap between RSS and Go sys bytes indicates non-Go memory growth"},
+      {"id": "61.4", "text": "Suggests C-level memory profiling tools (valgrind, AddressSanitizer) for further diagnosis"}
+    ]
+  },
+  {
+    "id": 62,
+    "name": "cpu-saturation-prometheus",
+    "description": "Tests knowledge of detecting CPU saturation via Prometheus",
+    "prompt": "How do we detect if our Go service is CPU-saturated in production using Prometheus metrics?",
+    "trap": "The skill provides specific PromQL: rate(process_cpu_seconds_total[5m]) / GOMAXPROCS. A ratio >0.8 sustained means CPU-saturated. Without the skill, the model may suggest system-level metrics rather than Go-specific ones.",
+    "assertions": [
+      {"id": "62.1", "text": "Provides rate(process_cpu_seconds_total[5m]) for CPU cores consumed"},
+      {"id": "62.2", "text": "Divides by GOMAXPROCS to get utilization ratio"},
+      {"id": "62.3", "text": "States that >0.8 sustained indicates CPU saturation"}
+    ]
+  },
+  {
+    "id": 63,
+    "name": "document-optimizations",
+    "description": "Tests whether the model recommends documenting optimizations with comments",
+    "prompt": "I optimized a Go function from using reflect.DeepEqual to a hand-written comparison, and from column-first to row-first matrix traversal. Should I add comments explaining why?",
+    "trap": "The skill's core philosophy #3 states: 'Document optimizations — add code comments explaining why a pattern is faster, with benchmark numbers when available. Future readers need context to avoid reverting an unnecessary optimization.' Without it, the model may skip this guidance.",
+    "assertions": [
+      {"id": "63.1", "text": "Strongly recommends adding comments explaining WHY the optimization was made"},
+      {"id": "63.2", "text": "Suggests including benchmark numbers in the comments"},
+      {"id": "63.3", "text": "Explains that future readers may revert optimizations they don't understand"}
+    ]
+  },
+  {
+    "id": 64,
+    "name": "lru-cache-freelru",
+    "description": "Tests knowledge of high-performance LRU cache alternatives",
+    "prompt": "We need a bounded LRU cache in Go for our hot path. We considered using container/list from the standard library. Is that the best option for performance?",
+    "trap": "The skill mentions that container/list has poor cache locality (each node is a separate heap allocation). It recommends elastic/go-freelru (37x faster, contiguous memory) or hashicorp/golang-lru. Without it, the model may recommend container/list as sufficient.",
+    "assertions": [
+      {"id": "64.1", "text": "Notes that container/list has poor cache locality (separate heap allocation per node)"},
+      {"id": "64.2", "text": "Recommends elastic/go-freelru or hashicorp/golang-lru as alternatives"},
+      {"id": "64.3", "text": "Mentions the performance advantage of contiguous memory layouts for LRU"}
+    ]
+  },
+  {
+    "id": 65,
+    "name": "simd-when-not-worth",
+    "description": "Tests whether the model avoids recommending SIMD when the real bottleneck is allocations, not CPU arithmetic",
+    "prompt": "Our Go image thumbnail service resizes JPEG images. pprof shows:\n- 55% of CPU time in runtime.mallocgc and runtime.gcBgMarkWorker\n- 20% in image/jpeg.Decode\n- 15% in image.(*NRGBA).At (per-pixel reads using the image.Image interface)\n- 10% other\n\nA performance consultant recommended implementing SIMD pixel-processing routines to speed up the resize operation. Should we pursue SIMD as our first optimization?",
+    "trap": "55% of CPU time is in GC — the bottleneck is heap allocations, not arithmetic throughput. SIMD accelerates CPU-bound numeric operations; it has zero effect on allocation rate or GC pauses. Additionally, the 15% in image.Image interface calls is an interface boxing / reflection overhead issue, not a SIMD opportunity. The skill explicitly states 'If your bottleneck is allocations or I/O, SIMD won't help.' Without the skill the model may agree with the consultant since image processing 'sounds like' a SIMD use case.",
+    "assertions": [
+      {"id": "65.1", "text": "Recommends against SIMD as the first optimization"},
+      {"id": "65.2", "text": "Identifies the primary bottleneck as heap allocations and GC (55% of CPU in mallocgc/GC)"},
+      {"id": "65.3", "text": "States that SIMD only helps CPU-bound numeric inner loops, not allocation or GC overhead"},
+      {"id": "65.4", "text": "Suggests reducing allocations (e.g., reusing pixel buffers, avoiding per-pixel interface calls) as the correct first step"}
+    ]
+  },
+  {
+    "id": 66,
+    "name": "set-map-struct-zero-size",
+    "description": "Tests whether the model uses struct{} (not bool) for set map values when writing a new deduplication function",
+    "prompt": "Write a Go function that deduplicates a large slice of string event IDs. The slice may contain up to 5 million entries. Return a new slice with duplicates removed, preserving order.\n\n```go\nfunc Deduplicate(ids []string) []string {\n    // TODO\n}\n```\n\nImplement this function.",
+    "trap": "The natural instinct when building a 'seen' set in Go is to use map[string]bool, since bool reads naturally: 'if seen[id] { ... }'. This is what most Go developers write and what most deduplication examples show. The skill specifically teaches using map[string]struct{} instead: struct{} occupies 0 bytes vs bool at 1 byte per entry, saving ~5MB for 5M entries. Without the skill the model writes map[string]bool.",
+    "assertions": [
+      {"id": "66.1", "text": "Uses map[string]struct{} (not map[string]bool) as the seen-set type"},
+      {"id": "66.2", "text": "Does NOT use map[string]bool for the membership tracking map"},
+      {"id": "66.3", "text": "Explains that struct{} occupies 0 bytes vs bool at 1 byte, saving memory at large scale"}
+    ]
+  },
+  {
+    "id": 67,
+    "name": "regression-detection-prometheus",
+    "description": "Tests knowledge of PromQL queries for deployment regression detection",
+    "prompt": "We just deployed a new version of our Go service. How can we use Prometheus to detect if this deployment introduced a performance regression?",
+    "trap": "The skill provides specific regression detection PromQL: rate(go_memstats_alloc_bytes_total[5m]) for allocation rate comparison and histogram_quantile(0.99, ...) for p99 latency. Without it, the model may suggest generic monitoring.",
+    "assertions": [
+      {"id": "67.1", "text": "Suggests comparing rate(go_memstats_alloc_bytes_total[5m]) before and after deploy"},
+      {"id": "67.2", "text": "Suggests monitoring p99 latency histogram_quantile for increase after deploy"},
+      {"id": "67.3", "text": "Mentions comparing metrics between old and new deployment versions"}
+    ]
+  },
+  {
+    "id": 68,
+    "name": "statsviz-development-profiling",
+    "description": "Tests knowledge of real-time development visualization tools",
+    "prompt": "I'm developing a Go service locally and want to see real-time GC behavior, heap usage, and goroutine count in a browser dashboard without setting up Prometheus/Grafana. What tool can I use?",
+    "trap": "The skill specifically mentions statsviz (github.com/arl/statsviz) for real-time browser dashboard during local development. Without it, the model may suggest full monitoring stacks or pprof web UI which doesn't provide real-time visualization.",
+    "assertions": [
+      {"id": "68.1", "text": "Recommends statsviz (github.com/arl/statsviz) for real-time browser visualization"},
+      {"id": "68.2", "text": "Mentions the /debug/statsviz endpoint or statsviz.Register pattern"},
+      {"id": "68.3", "text": "Notes that it shows heap, GC pauses, goroutines, and scheduler in real-time"}
+    ]
+  }
+]
diff --git a/.agents/skills/golang-performance/references/caching.md b/.agents/skills/golang-performance/references/caching.md
new file mode 100644
index 0000000..e91b15a
--- /dev/null
+++ b/.agents/skills/golang-performance/references/caching.md
@@ -0,0 +1,183 @@
+# Caching Patterns
+
+The fastest code is code that doesn't run. Caching pre-computed results, deduplicating concurrent requests, and avoiding unnecessary work are often the highest-leverage performance improvements.
+
+## Compiled Pattern Caching
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — look for `regexp.Compile`, `regexp.MustCompile`, or `template.Parse` appearing in hot paths; their presence means patterns are being recompiled per call instead of once 2- `go test -bench -benchmem` — benchmark per-call compilation vs cached version; expect 10-12x improvement and allocs/op dropping to zero for the compilation step
+
+### Regexp at package level
+
+`regexp.Compile` parses a pattern into a state machine — ~5,700ns per compilation. Match operations on a compiled regexp cost ~450ns. Compiling per-call wastes 10-12x:
+
+```go
+// Bad — compiled on every call
+func isValid(email string) bool {
+    re := regexp.MustCompile(`^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$`)
+    return re.MatchString(email)
+}
+
+// Good — compiled once, safe for concurrent use
+var emailRegex = regexp.MustCompile(`^[a-z0-9._%+-]+@[a-z0-9.-]+\.[a-z]{2,}$`)
+
+func isValid(email string) bool { return emailRegex.MatchString(email) }
+```
+
+Note: `regexp.MustCompile` panics on invalid patterns — fine for package-level constants (caught at startup). Use `regexp.Compile` for user-provided patterns. Go's regexp uses linear-time matching (no backtracking).
+
+### Template caching
+
+`template.Parse` is equally expensive. Parse once at startup:
+
+```go
+var reportTmpl = template.Must(template.ParseFiles("templates/report.html"))
+```
+
+### Precomputed lookup tables
+
+When a computation is pure (same input → same output) and the input space is small, replace calculation with array lookup:
+
+```go
+var hexDigit = [16]byte{'0','1','2','3','4','5','6','7','8','9','a','b','c','d','e','f'}
+
+func byteToHex(b byte) (byte, byte) {
+    return hexDigit[b>>4], hexDigit[b&0x0f] // two array lookups vs branching logic
+}
+```
+
+If the table fits in L1/L2 cache, lookup is faster than even simple computation.
+
+## Request-Level Caching
+
+**Diagnose:** 1- `go tool pprof` (goroutine profile) — look for many goroutines blocked on the same external call (HTTP fetch, DB query); this signals a cache stampede where N goroutines all miss the cache simultaneously 2- `fgprof` — shows off-CPU wait time; look for the same fetch function dominating wall-clock time across many goroutines, confirming duplicated concurrent work 3- `go tool pprof -alloc_objects` — check if cache miss handling allocates heavily; high alloc counts on fetch functions confirm the stampede is also generating GC pressure
+
+### singleflight for cache stampede prevention
+
+When a cache entry expires, many goroutines may simultaneously discover the miss and all request the same expensive computation. `singleflight` ensures only one goroutine fetches while others wait:
+
+```go
+import "golang.org/x/sync/singleflight"
+
+var (
+    cache sync.Map
+    sf    singleflight.Group
+)
+
+func GetWeather(city string) (string, error) {
+    if val, ok := cache.Load(city); ok {
+        return val.(string), nil
+    }
+
+    // Only one goroutine fetches; others block on the same key
+    result, err, _ := sf.Do(city, func() (any, error) {
+        data, err := fetchFromAPI(city)
+        if err == nil { cache.Store(city, data) }
+        return data, err
+    })
+    return result.(string), err
+}
+```
+
+→ See `samber/cc-skills-golang@golang-concurrency` skill for `singleflight` API details and `sync.Map` vs `RWMutex` decision guidance. → **Generics alternative:** Use `github.com/samber/go-singleflightx` to avoid interface{} boxing overhead; expect 2-4x faster result retrieval compared to the standard library's `singleflight.Group`.
+
+### LRU caches
+
+For bounded caches with eviction, the standard library's `container/list` works but has poor cache locality (each node is a separate heap allocation). For high-performance LRU:
+
+- **`github.com/hashicorp/golang-lru`** — thread-safe, simple API
+- **`github.com/elastic/go-freelru`** — merges hashmap and ringbuffer into contiguous memory, ~37x faster than sharded implementations
+
+When using third-party cache libraries, refer to the library's official documentation for current API signatures.
+
+## Algorithmic Complexity
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — look for functions with high cumulative time that contain nested loops or repeated linear scans; these are algorithmic complexity bottlenecks 2- `go test -bench` — benchmark with different input sizes (100, 1K, 10K, 100K); if time grows quadratically (10x input → 100x time), the algorithm is O(n²) and needs replacement
+
+Before micro-optimizing, check that the algorithm itself isn't the bottleneck. A constant-factor improvement on an O(n²) algorithm loses to a naive O(n log n) implementation at scale.
+
+**Common complexity traps in Go:**
+
+| Pattern | Complexity | Fix | Fixed complexity |
+| --- | --- | --- | --- |
+| `slices.Contains` in a loop | O(n·m) | Build `map[T]struct{}` first, then lookup | O(n+m) |
+| Nested loops for matching | O(n²) | Index with a map, sort+binary search, or `slices.BinarySearch` | O(n log n) or O(n) |
+| Repeated `append` without prealloc | O(n²) amortized copies | `make([]T, 0, n)` | O(n) |
+| String concatenation with `+=` | O(n²) total copies | `strings.Builder` | O(n) |
+| Linear scan for min/max/dedup | O(n) per query | Sort once, query many times | O(n log n) + O(log n) per query |
+
+**Think in Big-O first, then optimize constants.** A 10x constant-factor improvement matters; switching from O(n²) to O(n) matters more.
+
+## Work Avoidance
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — look for linear scan functions (`slices.Contains`, `slices.Index`) or iterator chains (`Filter`, `Map`) consuming CPU in hot paths 2- `go test -bench` — benchmark the current approach vs a map-based or early-return version; expect O(n) → O(1) for membership tests, significant improvement for short-circuit loops
+
+### Map lookups over slice scanning
+
+`Contains(slice, element)` is O(n). Map lookups are O(1). When doing multiple membership tests against the same collection, build a map once:
+
+```go
+// Bad — O(n*m), checking Contains per element
+for _, item := range subset {
+    if !Contains(collection, item) { return false } // O(n) per check
+}
+
+// Good — O(n+m), build map once, O(1) lookups
+seen := make(map[T]struct{}, len(collection))
+for _, item := range collection { seen[item] = struct{}{} }
+for _, item := range subset {
+    if _, ok := seen[item]; !ok { return false }
+}
+```
+
+Use `struct{}` (0 bytes) instead of `bool` (1 byte) for set maps.
+
+### Early returns and short-circuit loops
+
+Return immediately when the answer is known. Finding the target on iteration 3 of 1000 saves 997 iterations:
+
+```go
+// Bad — always iterates full collection
+found := false
+for _, item := range collection {
+    if item == target { found = true }
+}
+return found
+
+// Good — returns on first match
+for i := range collection {
+    if collection[i] == target { return true }
+}
+return false
+```
+
+### Avoid iterator chains
+
+Chaining iterator operations (`Filter → Map → First`) creates closures and intermediate machinery. A direct loop is simpler and faster:
+
+```go
+// Bad — creates 2 iterators with closures
+result, ok := First(Filter(collection, predicate))
+
+// Good — single pass, early return, no closures
+for i := range collection {
+    if predicate(collection[i]) { return collection[i], true }
+}
+```
+
+### Replace indirect function calls with direct loops
+
+When a function wraps another function (e.g., `FromSlicePtr` calling `Map` with a closure), the closure indirection prevents inlining. Replace with a direct loop:
+
+```go
+// Bad — Map() with closure, per-element function call overhead
+func FromSlicePtr(items []*T) []T {
+    return Map(items, func(p *T) T { return *p })
+}
+
+// Good — direct loop, inlineable, -13% to -17% time
+func FromSlicePtr(items []*T) []T {
+    result := make([]T, len(items))
+    for i := range items { result[i] = *items[i] }
+    return result
+}
+```
diff --git a/.agents/skills/golang-performance/references/cpu.md b/.agents/skills/golang-performance/references/cpu.md
new file mode 100644
index 0000000..fcebfe5
--- /dev/null
+++ b/.agents/skills/golang-performance/references/cpu.md
@@ -0,0 +1,375 @@
+# CPU Optimization
+
+CPU-bound bottlenecks show up as functions dominating the CPU profile. The patterns below target the most common causes: missed inlining opportunities, poor cache utilization, and unnecessary computation.
+
+## Function Inlining
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — look for hot functions with high cumulative CPU time; if a small helper dominates the profile, it's likely not being inlined 2- `go build -gcflags="-m"` — grep for `"cannot inline"` on your hot-path functions; the reason (e.g., `"function too complex"`, `"unhandled op"`) tells you what to simplify
+
+The Go compiler inlines small functions, eliminating call overhead. Functions that are too complex (loops, many statements, or calls to non-inlineable functions) won't be inlined — this matters in tight loops called millions of times.
+
+```go
+// Bad — log call prevents inlining
+func abs(x int) int {
+    if x < 0 {
+        log.Printf("negative: %d", x) // blocks inlining
+        return -x
+    }
+    return x
+}
+
+// Good — simple enough to inline
+func abs(x int) int {
+    if x < 0 { return -x }
+    return x
+}
+```
+
+**Check inlining decisions:**
+
+```bash
+go build -gcflags="-m" ./... 2>&1 | grep "can inline"
+go build -gcflags="-m" ./... 2>&1 | grep "inlining call"
+```
+
+Move side effects (logging, metrics) outside hot-path functions or guard them with conditional checks.
+
+### Value receivers enable inlining
+
+Value receivers allow the compiler to fully inline fluent method chains. Pointer receivers add indirection that blocks inlining:
+
+```go
+// Pointer receiver — indirection prevents inlining, constant overhead per call
+func (c *config) WithTimeout(d time.Duration) *config { c.timeout = d; return c }
+
+// Value receiver — fully inlined, -80% time in fluent chains
+func (c config) WithTimeout(d time.Duration) config { c.timeout = d; return c }
+```
+
+## Cache Locality
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — look for loops over slices/matrices consuming disproportionate CPU; cache-miss-heavy code shows high `runtime.memmove` or flat time in simple index operations 2- `go test -bench` — benchmark row-first vs column-first traversal; expect 10-50x difference on large matrices purely from cache effects
+
+Modern CPUs fetch data in 64-byte cache lines. Sequential memory access is dramatically faster than random access because the prefetcher can load the next cache line before you need it.
+
+### Row-major traversal
+
+Go stores 2D arrays in row-major order. Column-first traversal jumps across memory, causing cache misses:
+
+```go
+// Bad — column-first, jumps across memory (~10M cache misses)
+for col := 0; col < 1024; col++ {
+    for row := 0; row < 1024; row++ {
+        sum += matrix[row][col]
+    }
+}
+
+// Good — row-first, sequential access (~125K cache misses)
+for row := 0; row < 1024; row++ {
+    for col := 0; col < 1024; col++ {
+        sum += matrix[row][col]
+    }
+}
+```
+
+Performance difference: 10-50x purely from cache effects.
+
+### Contiguous 2D allocation
+
+Allocating each row separately scatters data across the heap:
+
+```go
+// Bad — N separate allocations, poor cache locality
+matrix := make([][]float64, rows)
+for i := range matrix { matrix[i] = make([]float64, cols) }
+
+// Good — single contiguous allocation, cache-friendly
+data := make([]float64, rows*cols)
+matrix := make([][]float64, rows)
+for i := range matrix { matrix[i] = data[i*cols : (i+1)*cols] }
+```
+
+### Struct of Arrays (SoA) vs Array of Structs (AoS)
+
+When iterating over a single field of a struct, AoS wastes cache space loading unused fields:
+
+```go
+// AoS — loading each Point (24 bytes) to read only x (8 bytes) = 66% cache waste
+type Point struct { x, y, z float64 }
+points := make([]Point, n)
+for i := range points { sum += points[i].x }
+
+// SoA — all x values contiguous, 100% cache utilization
+type Points struct { xs, ys, zs []float64 }
+for i := range ps.xs { sum += ps.xs[i] }
+```
+
+Use SoA when iterating over a subset of fields (physics, graphics, analytics). AoS is fine when accessing all fields together or for small structs.
+
+### Pointer-heavy vs value-heavy data
+
+Index-based data structures (nodes stored in a contiguous array, referenced by index) beat pointer-based structures for cache locality:
+
+```go
+// Pointer-based tree — each node scattered in heap, random cache misses
+type Node struct { value int; left, right *Node }
+
+// Index-based tree — nodes in contiguous array, cache-friendly
+type Tree struct { nodes []Node }
+type Node struct { value int; left, right int } // indices into nodes
+```
+
+## False Sharing
+
+**Diagnose:** 1- `go tool pprof` (CPU profile + mutex profile) — look for atomic operations or counter updates consuming unexpectedly high CPU; in the mutex profile, look for contention on variables that shouldn't need locking 2- `go test -bench` — benchmark concurrent counter increments; if adding goroutines makes it _slower_ instead of faster, false sharing is likely
+
+When goroutines update variables that share the same 64-byte CPU cache line, each write invalidates the other core's cache, causing severe degradation:
+
+```go
+// Bad — a and b on same cache line, cores fight for it
+type Counters struct { a, b int64 }
+
+// Good — separate cache lines, no interference
+type Counters struct {
+    a int64    // 8 bytes
+    _ [56]byte // 64 - 8 = 56 bytes padding
+    b int64    // 8 bytes
+}
+```
+
+Only apply cache-line padding when profiling confirms contention on concurrent counters/flags.
+
+## Instruction-Level Parallelism
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — look for tight arithmetic loops (sum, dot product) where the loop body itself dominates CPU; these are candidates for multi-accumulator optimization 2- `go test -bench` — benchmark single vs multi-accumulator versions; expect 2-4x improvement when the loop is truly CPU-bound with a dependency chain
+
+Modern CPUs execute multiple independent instructions simultaneously. A single accumulator creates a dependency chain — each addition waits for the previous one:
+
+```go
+// Bad — sequential dependency, CPU pipeline stalls
+var total int64
+for _, v := range data { total += v }
+
+// Good — 4 independent accumulators, CPU pipelines all 4 in parallel
+var s0, s1, s2, s3 int64
+limit := len(data) - len(data)%4
+for i := 0; i < limit; i += 4 {
+    s0 += data[i]; s1 += data[i+1]; s2 += data[i+2]; s3 += data[i+3]
+}
+for i := limit; i < len(data); i++ { s0 += data[i] }
+total := s0 + s1 + s2 + s3
+```
+
+Expect 2-4x improvement for tight arithmetic loops. Only use when profiling shows the loop is a bottleneck.
+
+## SIMD (Single Instruction, Multiple Data)
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — confirm a numeric inner loop consumes >20% of CPU; SIMD only helps CPU-bound numeric work, not allocation or I/O bottlenecks 2- `go test -bench` — measure the loop's baseline ns/op; provides the reference point to validate SIMD gains 3- `go build -gcflags="-d=ssa/prove/debug=2"` — check if the compiler already auto-vectorized the loop; look for `"Proved"` bounds-check eliminations that enable vectorization 4- `GOSSAFUNC=MyFunc go build` — generate SSA dump (`ssa.html`) to inspect whether the compiler produces vector instructions for the hot loop 5- `go tool objdump -s MyFunc ./binary` — verify the final assembly contains SIMD instructions (e.g., `VMOVAPD`, `VADDPD` on amd64) rather than scalar equivalents
+
+Go 1.26+ includes an experimental `simd/archsimd` package (requires `GOEXPERIMENT=simd` flag) providing low-level SIMD intrinsics for amd64 with 128/256/512-bit vectors. For broader portability, the compiler auto-vectorizes simple loops, and several strategies exist.
+
+**Options for explicit SIMD in Go:**
+
+- **Experimental `simd/archsimd` (Go 1.26+)** — Direct SIMD intrinsics via vector types (`Int8x16`, `Float64x8`, etc.) with CPU feature detection. Limited to AMD64. Use with caution: experimental API, not covered by Go 1 compatibility guarantees, and should never be exposed in public APIs.
+
+  ```go
+  // Requires: GOEXPERIMENT=simd go build
+  import "simd/archsimd"
+
+  v := archsimd.Int32x4{1, 2, 3, 4}
+  // Operations map directly to hardware instructions
+  ```
+
+- **Let the compiler do it** — write simple, idiomatic loops on `[]float64`/`[]int32` slices. Check auto-vectorization: `go build -gcflags="-d=ssa/prove/debug=2" ./...`
+- **`math/bits`** — operations like `OnesCount`, `LeadingZeros`, `RotateLeft` map directly to hardware instructions (POPCNT, CLZ, ROL)
+- **Hand-written assembly** — `.s` files with AVX2/NEON instructions for critical inner loops. Libraries like `klauspost/compress` and `minio/sha256-simd` use this approach
+- **Third-party vectorized libraries** — for common operations (hashing, compression, encoding), use libraries that already have optimized SIMD implementations rather than writing your own
+
+### Handling CPU-specific instruction sets
+
+Hand-written assembly unlocks higher performance but couples code to specific CPU features (AVX2, NEON, etc.). Three strategies exist:
+
+**1. Compile on a production-similar machine**
+
+Build binaries on hardware matching your deployment target, so the compiler generates code for the exact CPU instruction set available at runtime:
+
+```bash
+# Compiling on production hardware ensures optimal code generation
+# for that specific CPU architecture and generation
+ssh prod-server "cd /path && go build -o app ."
+```
+
+**Tradeoff:** Simplest approach, but requires access to production hardware and different binaries per CPU type (Intel vs AMD vs Apple Silicon). Breaks CI/CD portability.
+
+**2. Runtime CPU feature detection + multiple implementations**
+
+Implement the function multiple times — one for each CPU capability — and dispatch at runtime:
+
+```go
+// dispatch.go
+var sumImpl func([]int64) int64
+
+func init() {
+    if cpu.X86.HasAVX2 {
+        sumImpl = sumAVX2
+    } else {
+        sumImpl = sumGeneric
+    }
+}
+
+func Sum(data []int64) int64 {
+    return sumImpl(data)
+}
+
+// sum_generic.go
+func sumGeneric(data []int64) int64 {
+    var total int64
+    for _, v := range data { total += v }
+    return total
+}
+
+// sum_amd64.s
+TEXT ·sumAVX2(SB), NOSPLIT, $0-32
+    // AVX2 implementation
+    VMOVAPD (SI), Y0
+    // ...
+```
+
+**Tradeoff:** Single binary works everywhere; trades one function-call dispatch overhead for full CPU feature utilization. Libraries like `encoding/base64` and `sha256` use this pattern.
+
+**3. Compile-time selection with `//go:build` tags**
+
+Use conditional compilation to generate different code at build time for each target:
+
+```go
+// sum_fast.go
+//go:build amd64 && !nosimd
+
+package mylib
+
+// AVX2 assembly via cgo or inline
+func Sum(data []int64) int64 {
+    return sumAVX2(data) // or calls to .s file
+}
+
+// sum_generic.go
+//go:build !amd64 || nosimd
+
+package mylib
+
+func Sum(data []int64) int64 {
+    var total int64
+    for _, v := range data { total += v }
+    return total
+}
+```
+
+Build different binaries per target:
+
+```bash
+GOOS=linux GOARCH=amd64 go build -o app-avx2 .     # Uses sum_fast.go
+GOOS=darwin GOARCH=arm64 go build -o app-neon .    # Uses sum_generic.go
+go build -tags=nosimd -o app-safe .                # Fallback everywhere
+```
+
+**Tradeoff:** Zero runtime overhead; each binary is fully optimized for its target. Requires shipping multiple binaries and coordinating which binary runs where.
+
+**When SIMD is NOT worth pursuing:**
+
+- Go's lack of intrinsics means SIMD requires assembly — high maintenance burden, platform-specific, and harder to debug
+- Auto-vectorization covers the most common cases (simple numeric loops)
+- If your bottleneck is allocations or I/O, SIMD won't help
+
+**Recommendation:** Start with auto-vectorization. For Go 1.26+, evaluate `simd/archsimd` for AMD64-only workloads (remembering it's experimental). Move to runtime detection (option 2 above) if profiling shows a bottleneck and the code needs to run on heterogeneous hardware. Only use compile-time selection (option 3) if you control the deployment environment and can test each per-binary variant.
+
+Only invest in hand-written SIMD when profiling shows a numeric inner loop consuming >20% of CPU and the compiler isn't auto-vectorizing it.
+
+## Tight Loops and the Scheduler
+
+**Diagnose:** 1- `go tool pprof` (goroutine profile) — look for many goroutines stuck in `"runnable"` state (waiting for CPU) while one goroutine monopolizes execution 2- `go tool trace` — visualize goroutine scheduling over time; look for long uninterrupted execution spans on one goroutine while others show scheduling gaps 3- `GODEBUG=schedtrace=1000` — print scheduler state every second; look for unbalanced `runqueue` counts across P's indicating one P is starved 4- `runtime/metrics` (`/sched/latencies:seconds`) — measure how long goroutines wait before getting CPU; high p99 latencies confirm starvation 5- Prometheus `rate(process_cpu_seconds_total[2m])` — monitor if CPU usage hits GOMAXPROCS ceiling; if saturated while other goroutines are starved, a tight loop is monopolizing P's
+
+A goroutine running a CPU-intensive tight loop without function calls may not yield to the scheduler, starving other goroutines. Go 1.14+ added asynchronous preemption, but very tight loops with fully inlined operations can still cause issues:
+
+```go
+// Potential starvation — pure computation, no function calls
+for { x = x*a + b }
+
+// Safe — non-inlined call triggers preemption check
+for item := range work {
+    processBatch(item) // function call = preemption point
+}
+```
+
+**When to use non-inlined calls for scheduling:** Use non-inlined function calls when:
+
+- The loop runs for a long time (hundreds of milliseconds or more of uninterrupted computation)
+- Other goroutines are waiting to run (e.g., handling requests, I/O completion, channel operations)
+- The loop contains only arithmetic or memory operations with no function calls
+
+For short bursts of computation (< 10ms), preemption isn't critical and inlining for CPU efficiency takes priority.
+
+**Detecting scheduler starvation:** Use these tools to confirm goroutines are being starved:
+
+- **`go tool pprof` goroutine profile** — shows goroutines stuck in "runnable" state (waiting for CPU). If many goroutines are runnable while one dominates CPU, starvation is happening
+- **`go tool trace`** — visualizes goroutine scheduling over time. Look for gaps where goroutines aren't running because one goroutine monopolized the scheduler
+- **`runtime/metrics` (Go 1.19+)** — measure `/sched/latencies:seconds` to quantify how long goroutines wait for CPU
+- **Observable symptoms** — high response latency, requests timing out, uneven request distribution, goroutine counts climbing
+
+**Preventing inlining with `//go:noinline`:** If you have a function that's normally inlinable (small, hot) but you specifically want it to not inline to force scheduler preemption checks, use the `//go:noinline` compiler directive:
+
+```go
+//go:noinline
+func processBatch(item WorkItem) {
+    // CPU-intensive work here
+    // This call site will NOT be inlined, even if the function is small
+    // The function call itself becomes a preemption point for the scheduler
+}
+
+// In tight loop
+for item := range work {
+    processBatch(item) // Guaranteed preemption point
+}
+```
+
+**Trade-off:** Using `//go:noinline` prevents inlining, which:
+
+- **Pros:** Guarantees scheduler preemption checks; prevents goroutine starvation
+- **Cons:** Adds function call overhead (~10-30 CPU cycles); reduces instruction-level parallelism (ILP) in the caller
+
+Only use `//go:noinline` if profiling shows that scheduler preemption starvation is actually blocking other goroutines. Unnecessary `//go:noinline` directives penalize throughput and latency.
+
+## Reflection and Type Assertions
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — look for `reflect.Value.*`, `reflect.DeepEqual`, or `fmt.Sprintf` (which uses reflect internally) appearing in hot paths 2- `go test -bench` — compare reflection-based vs typed versions; expect 10-200x difference depending on the reflection operation
+
+- **`reflect` in hot paths** — 10-100x slower due to type introspection and boxing. Replace with generics or typed code
+- **`reflect.DeepEqual`** — 50-200x slower than typed comparisons. Use `slices.Equal`, `maps.Equal`, `bytes.Equal` (Go 1.21+)
+- **Type switch vs repeated assertions** — type switch dispatches in one evaluation:
+
+```go
+// Bad — evaluates interface multiple times
+if s, ok := v.(string); ok { return s }
+if i, ok := v.(int); ok { return strconv.Itoa(i) }
+
+// Good — single dispatch
+switch v := v.(type) {
+case string: return v
+case int:    return strconv.Itoa(v)
+}
+```
+
+## Monotonic Time
+
+**Diagnose:** 1- `go test -bench` — benchmark `time.Since(start)` vs `time.Now().Sub(start)`; expect a small but consistent improvement from monotonic clock avoiding wall-clock syscall
+
+`time.Since(start)` uses the monotonic clock, which is immune to wall-clock adjustments (NTP, DST) and slightly faster:
+
+```go
+var appStart = time.Now() // captures monotonic time + wall-clock on program start
+
+func myFunc() {
+    // Compare durations, not wall-clock times
+    elapsed := time.Since(appStart)
+    if elapsed > threshold { ... }
+}
+```
diff --git a/.agents/skills/golang-performance/references/io-networking.md b/.agents/skills/golang-performance/references/io-networking.md
new file mode 100644
index 0000000..79772c1
--- /dev/null
+++ b/.agents/skills/golang-performance/references/io-networking.md
@@ -0,0 +1,299 @@
+# I/O & Networking Optimization
+
+Network and I/O bottlenecks show up as goroutines blocked on syscalls or waiting for responses. The key levers are connection reuse, proper timeouts, and streaming instead of buffering.
+
+## HTTP Transport Configuration
+
+**Diagnose:** 1- `go tool pprof` (goroutine + block profile) — look for goroutines blocked on `net/http.(*Transport).dialConn` or `net/http.(*persistConn).readLoop`; many goroutines waiting here means connection pool exhaustion 2- `fgprof` — captures both on-CPU and off-CPU wait time; look for HTTP calls dominating wall-clock time even when CPU profile shows them as cheap 3- `go tool trace` — visualize goroutine lifecycles; look for long gaps where goroutines wait for network I/O instead of processing 4- Prometheus `go_goroutines` — monitor goroutine count in production; steadily rising under stable load suggests connection or goroutine leaks from misconfigured HTTP clients
+
+### Connection pooling
+
+The default `http.Transport` has conservative pool settings — `MaxIdleConnsPerHost` defaults to 2. Under high concurrency, requests queue waiting for connections instead of running in parallel:
+
+```go
+// Bad — default transport, only 2 idle connections per host
+client := &http.Client{}
+
+// Good — tuned for high-concurrency service-to-service calls
+var apiClient = &http.Client{
+    Timeout: 30 * time.Second,
+    Transport: &http.Transport{
+        MaxIdleConns:          100,             // total idle connections across all hosts
+        MaxIdleConnsPerHost:   20,              // per-host idle connections (default is 2!)
+        MaxConnsPerHost:       50,              // cap total connections per host (0 = unlimited)
+        IdleConnTimeout:       90 * time.Second,
+        TLSHandshakeTimeout:  5 * time.Second,
+        ResponseHeaderTimeout: 10 * time.Second,
+    },
+}
+```
+
+For web crawlers hitting many different hosts, disable keep-alive to avoid accumulating idle connections:
+
+```go
+crawlerClient := &http.Client{
+    Transport: &http.Transport{DisableKeepAlives: true},
+}
+```
+
+### Timeouts
+
+The zero-value `http.Client` and `http.Server` have NO timeouts. A slow or malicious peer holds connections open indefinitely, exhausting file descriptors and memory:
+
+```go
+// Server — always set timeouts to prevent Slowloris attacks
+server := &http.Server{
+    Addr:         ":8080",
+    Handler:      handler,
+    ReadTimeout:  5 * time.Second,
+    WriteTimeout: 10 * time.Second,
+    IdleTimeout:  120 * time.Second,
+}
+```
+
+### Drain response body for connection reuse
+
+Connections are only returned to the pool when the body is fully read. Even if you don't need the body, drain it:
+
+```go
+resp, err := client.Get(url)
+if err != nil { return err }
+defer resp.Body.Close()
+_, _ = io.Copy(io.Discard, resp.Body) // drain to enable connection reuse
+```
+
+## Streaming vs Buffering
+
+**Diagnose:** 1- `go tool pprof -inuse_space` — look for large single allocations (MB-sized) from `io.ReadAll`, `bytes.Buffer.Grow`, or `json.Unmarshal`; these indicate buffering entire payloads instead of streaming
+
+### Avoid io.ReadAll for large payloads
+
+`io.ReadAll` loads the entire stream into memory. For large files or HTTP responses, this causes massive memory spikes:
+
+```go
+// Bad — 2GB file = 2GB allocation
+data, _ := io.ReadAll(f)
+
+// Good — process line by line, O(1) memory
+scanner := bufio.NewScanner(f)
+for scanner.Scan() { processLine(scanner.Bytes()) }
+
+// Good — stream between reader and writer (32KB internal buffer)
+io.Copy(w, resp.Body)
+```
+
+`io.ReadAll` is fine for small, bounded payloads (< 1MB) where the size is known.
+
+### Streaming JSON
+
+Use `json.NewDecoder` for large JSON payloads instead of `json.Unmarshal` (which buffers the entire body):
+
+```go
+dec := json.NewDecoder(r)
+for dec.More() {
+    var item Item
+    if err := dec.Decode(&item); err != nil { return err }
+    process(item) // one item at a time
+}
+```
+
+## JSON Performance
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — look for `encoding/json.(*Decoder).Decode`, `reflect.Value.*`, or `encoding/json.Marshal` consuming significant CPU; these indicate reflection-based JSON is the bottleneck 2- `go test -bench -benchmem` — measure ns/op and allocs/op for marshal/unmarshal; expect high alloc counts from reflection; code-gen alternatives should show 2-5x fewer allocs
+
+The standard `encoding/json` package uses reflection to inspect struct fields at runtime. For high-throughput services, this creates significant CPU and allocation overhead.
+
+**Options for faster JSON:**
+
+- **Custom `MarshalJSON`/`UnmarshalJSON`** — hand-written methods for hot-path types eliminate reflection
+- **Code-generation libraries** — `easyjson`, `ffjson` generate marshal/unmarshal methods at build time, no reflection at runtime
+- **Drop-in replacements** — `github.com/goccy/go-json`, `github.com/json-iterator/go`, `github.com/bytedance/sonic` offer 2-5x better performance
+- **`encoding/json/v2`** (experimental) — improved performance over v1
+
+When using third-party JSON libraries, refer to the library's official documentation for up-to-date API signatures.
+
+## Cgo Overhead
+
+**Diagnose:** 1- `go tool pprof` (CPU profile + threadcreate profile) — look for `runtime.cgocall` or `runtime.asmcgocall` consuming CPU; high threadcreate count means cgo calls are pinning goroutines to OS threads 2- `go test -bench` — benchmark the cgo call loop vs a pure Go equivalent; expect ~50-100ns overhead per cgo crossing
+
+Each Go-to-C call via cgo costs ~50-100ns due to stack switching, signal mask manipulation, and scheduler coordination:
+
+```go
+// Bad — cgo overhead per element dominates for tight loops
+for i, v := range values {
+    values[i] = float64(C.sqrt(C.double(v))) // ~100ns overhead PER CALL
+}
+
+// Good — use pure Go stdlib (math.Sqrt is as fast as C and inlineable)
+for i, v := range values { values[i] = math.Sqrt(v) }
+
+// Good — batch when C code is unavoidable
+C.batch_sqrt((*C.double)(&values[0]), C.int(len(values))) // amortize overhead
+```
+
+Additional cgo costs: goroutine is pinned to an OS thread, C code cannot be preempted (may delay GC), and function inlining is blocked at the boundary.
+
+## Buffered I/O
+
+**Diagnose:** 1- `go test -bench` — benchmark buffered vs unbuffered I/O; expect 3-10x improvement from reducing syscall count 2- `go tool trace` — look for frequent short syscalls (`pread`, `pwrite`) in rapid succession; many tiny I/O operations indicate unbuffered access
+
+Unbuffered file reads/writes issue a syscall per operation. `bufio.Reader` and `bufio.Writer` batch small operations, reducing syscalls by 10x or more:
+
+```go
+// Bad — syscall per line
+for _, line := range lines { f.WriteString(line + "\n") }
+
+// Good — buffered, batches writes into larger chunks
+w := bufio.NewWriter(f)
+for _, line := range lines { w.WriteString(line + "\n") }
+w.Flush()
+```
+
+## Concurrent Multi-Stage Pipelines
+
+**Diagnose:** 1- `go tool trace` — visualize resource utilization across stages; look for sequential idle gaps where CPU, disk, or network sit unused while another resource is busy 2- `go tool pprof` (CPU + goroutine profile) — confirm each stage saturates a _different_ resource; if multiple stages compete for the same resource (e.g., both CPU-bound), concurrency won't help
+
+In rare scenarios where each pipeline stage saturates a _different_ resource (CPU, disk I/O, network), running stages concurrently instead of sequentially can improve throughput — even with batching between stages.
+
+### The unusual scenario
+
+Imagine processing records: Stage A compresses (CPU-bound), Stage B writes to disk (I/O-bound), Stage C uploads to network (network-bound). Sequential execution wastes resources:
+
+```
+Time:    0       10      20      30      40      50
+CPU:     AAAAAAAAAA|..........|..........|..........|
+Disk:    ..........|BBBBBBBBBB|..........|..........|
+Network: ..........|..........|CCCCCCCCCC|..........|
+```
+
+Concurrent stages let resources work in parallel:
+
+```
+Time:    0       10      20      30      40      50
+CPU:     AAAAAAAAAA|AA........|
+Disk:    ..........|BBBBBBBBBB|BB........|
+Network: ..........|..........|CCCCCCCCCC|CC........|
+```
+
+**Code pattern:**
+
+```go
+// Each stage runs in its own goroutine, bounded by channel buffers
+compressedCh := make(chan []byte, 100)    // A → B buffer
+uploadedCh := make(chan bool, 100)        // B → C buffer
+
+// Stage A: CPU-bound compression
+go func() {
+    for record := range inputCh {
+        compressed := compress(record)    // saturates CPU
+        compressedCh <- compressed
+    }
+    close(compressedCh)
+}()
+
+// Stage B: I/O-bound disk writes
+go func() {
+    for compressed := range compressedCh {
+        diskFile.Write(compressed)        // saturates disk I/O
+        uploadedCh <- true
+    }
+    close(uploadedCh)
+}()
+
+// Stage C: network-bound uploads
+go func() {
+    for <-uploadedCh {
+        client.Post(uploadURL, ...)       // saturates network
+    }
+}()
+```
+
+With batching per stage, total throughput = min(A_throughput, B_throughput, C_throughput). Without concurrency, throughput = sequential sum of stages. **Concurrent stages only help when bottlenecks don't overlap.**
+
+### When to use this (and when NOT to)
+
+**Use concurrent pipelines only when ALL of these are true:**
+
+1. **Resource saturation is predictable and non-overlapping** — You measured that A saturates one resource (e.g., CPU = 95%), B saturates another (disk I/O = 90%), C saturates a third (network = 85%). Overlapping saturation means concurrency adds no benefit.
+2. **Bottleneck shifts don't hurt latency** — Processing order doesn't matter, or records can flow out-of-order through stages.
+3. **Buffering overhead is acceptable** — Inter-stage channels consume memory. For large records, channel buffers can overflow system limits.
+4. **You've benchmarked the alternative** — Profile both sequential and concurrent versions. Sequential + batching often wins because it is simpler and avoids context-switching overhead.
+
+**Avoid concurrent pipelines if:**
+
+- **Records must be ordered** — Concurrent processing may reorder records; if downstream expects order, you need synchronization that kills the speedup.
+- **Resources overlap** — If A and B both compete for CPU (e.g., both compress), concurrency causes context-switching overhead with no resource utilization gain.
+- **Latency matters more than throughput** — A single record now travels through 3 stages in parallel, increasing per-record latency.
+- **Memory is tight** — Each stage's channel buffer is a memory budget; deeply buffered channels can exhaust available RAM.
+
+→ See `samber/cc-skills-golang@golang-concurrency` skill for detailed channel patterns and when to use worker pools instead.
+
+## Batch Operations
+
+**Diagnose:** 1- `go test -bench` — benchmark single-item vs batched operations; expect N-fold improvement in throughput when amortizing per-operation overhead (syscalls, round-trips) 2- `go tool trace` — look for repeated short network/disk operations with idle gaps between them; these gaps represent wasted round-trip time that batching eliminates
+
+Batching amortizes per-operation overhead (syscalls, network round-trips, transaction costs) across many items. The pattern applies everywhere: I/O, database, network, and even in-memory processing.
+
+### Database: batch inserts over row-by-row
+
+Inserting 1,000 rows one at a time means 1,000 round-trips, 1,000 query parses, and 1,000 transaction commits. A single batch insert does it in one round-trip:
+
+```go
+// Bad — 1,000 round-trips, ~500ms
+for _, user := range users {
+    db.Exec("INSERT INTO users (name, email) VALUES ($1, $2)", user.Name, user.Email)
+}
+
+// Good — 1 round-trip with multi-row VALUES, ~5ms
+const batchSize = 1000
+for i := 0; i < len(users); i += batchSize {
+    end := min(i+batchSize, len(users))
+    batch := users[i:end]
+    // Build multi-row INSERT or use COPY protocol
+    tx, _ := db.Begin()
+    stmt, _ := tx.Prepare(pq.CopyIn("users", "name", "email"))
+    for _, u := range batch { stmt.Exec(u.Name, u.Email) }
+    stmt.Exec()
+    tx.Commit()
+}
+```
+
+→ See `samber/cc-skills-golang@golang-database` skill for detailed batch patterns and connection pool configuration.
+
+### HTTP: batch API calls
+
+Instead of N individual HTTP requests, send one request with N items when the API supports it:
+
+```go
+// Bad — 100 HTTP round-trips
+for _, id := range ids {
+    resp, _ := client.Get(fmt.Sprintf("/api/users/%s", id))
+    // ...
+}
+
+// Good — 1 HTTP request with all IDs
+resp, _ := client.Post("/api/users/batch", "application/json",
+    bytes.NewReader(marshalIDs(ids)))
+```
+
+### Channel: batch processing from a stream
+
+Accumulate items from a channel and process in bulk to reduce per-item overhead:
+
+```go
+func batchProcessor(in <-chan Item, batchSize int) {
+    batch := make([]Item, 0, batchSize)
+    ticker := time.NewTicker(100 * time.Millisecond) // flush on timeout too
+    defer ticker.Stop()
+    for {
+        select {
+        case item, ok := <-in:
+            if !ok { flush(batch); return }
+            batch = append(batch, item)
+            if len(batch) >= batchSize { flush(batch); batch = batch[:0] }
+        case <-ticker.C:
+            if len(batch) > 0 { flush(batch); batch = batch[:0] }
+        }
+    }
+}
+```
diff --git a/.agents/skills/golang-performance/references/memory.md b/.agents/skills/golang-performance/references/memory.md
new file mode 100644
index 0000000..5c5972f
--- /dev/null
+++ b/.agents/skills/golang-performance/references/memory.md
@@ -0,0 +1,233 @@
+# Memory Optimization
+
+Allocation reduction is the single highest-ROI optimization in most Go programs. Every allocation eventually requires garbage collection — reducing allocation count and size directly reduces GC pauses and CPU overhead.
+
+## Allocation Patterns
+
+**Diagnose:** 1- `go tool pprof -alloc_objects` — rank functions by number of heap allocations; expect hot-path functions (request handlers, serializers) near the top with thousands of alloc/op 2- `go build -gcflags="-m -m"` — verbose escape analysis showing _why_ variables escape; look for `"leaking param"`, `"too large for stack"`, or `"captured by closure"` on variables you expect to stay on the stack 3- `go test -bench -benchmem` — measure allocs/op and B/op per benchmark; expect the target function to show >0 allocs/op that can be eliminated
+
+### Reuse slices via append(s[:0], ...)
+
+Reslicing to zero length retains the backing array, turning what would be a new allocation into a no-op:
+
+```go
+// Bad — allocates new slice, old one becomes garbage
+mode = []T{item}
+
+// Good — reuses existing backing array (0 allocations)
+mode = append(mode[:0], item)
+```
+
+### Direct indexing vs append
+
+When the output size equals the input size, use `make([]T, len(input))` with direct assignment instead of `make([]T, 0, len(input))` with `append`. Direct assignment avoids per-element bounds checking and length increment:
+
+```go
+// Slower — append overhead per element
+result := make([]T, 0, len(input))
+for i := range input { result = append(result, transform(input[i])) }
+
+// Faster — direct assignment
+result := make([]T, len(input))
+for i := range input { result[i] = transform(input[i]) }
+```
+
+Use append when the result might be smaller (filtering) or when early error return could discard partial results.
+
+### Eliminate redundant map lookups
+
+`for k := range m { use(m[k]) }` does two lookups per iteration. Capture the value from range:
+
+```go
+// Bad — two lookups per iteration
+for k := range in { result[k] = fn(in[k]) }
+
+// Good — single lookup
+for k, v := range in { result[k] = fn(v) }
+```
+
+### Map size hints
+
+`make(map[K]V)` starts with a small number of buckets and rehashes as it grows. Providing a size hint avoids rehashing:
+
+```go
+m := make(map[string]int, len(items)) // single allocation, no rehashing
+```
+
+### Sentinel errors vs fmt.Errorf
+
+`fmt.Errorf` allocates on every call. For predictable errors in hot paths, use preallocated sentinels:
+
+```go
+var ErrNegative = errors.New("value is negative") // allocated once
+
+func validate(x int) error {
+    if x < 0 { return ErrNegative } // zero allocation
+    return nil
+}
+```
+
+Only use `fmt.Errorf` when you need dynamic context (field names, values).
+
+### Interface boxing
+
+Passing concrete types through `any`/`interface{}` forces heap allocation for boxing. In hot paths, use typed parameters or generics:
+
+```go
+// Bad — boxes each int, allocates
+func sum(values []any) int { ... }
+
+// Good — no boxing, no allocation
+func sum(values []int) int { ... }
+
+// Good — generic, still no boxing
+func sum[T ~int | ~int64](values []T) T { ... }
+```
+
+## Backing Array Leaks
+
+**Diagnose:** 1- `go tool pprof -inuse_space` — show currently live heap memory by allocation site; look for unexpectedly large live objects (MB-sized) that should have been GC'd — a sign of backing array retention 2- `go tool pprof -alloc_space` — show cumulative bytes allocated over time; look for allocation sites producing far more bytes than the final data they hold (e.g., 100MB allocated for 16-byte results)
+
+### Slice reslicing retains the entire backing array
+
+A small reslice of a large slice keeps the entire original array in memory:
+
+```go
+// Bad — retains entire megabyte-sized backing array
+func getHeader(data []byte) []byte { return data[:16] }
+
+// Good — independent copy, original can be GC'd
+func getHeader(data []byte) []byte {
+    header := make([]byte, 16)
+    copy(header, data[:16])
+    return header
+}
+```
+
+### Substring memory leaks
+
+Substrings share the backing array of the original string:
+
+```go
+// Bad — keeps entire longMsg in memory
+func extractID(msg string) string { return msg[:8] }
+
+// Good — independent copy (Go 1.20+)
+func extractID(msg string) string { return strings.Clone(msg[:8]) }
+```
+
+### Map never shrinks
+
+Go maps grow but never release bucket memory when entries are deleted. A map that once held millions of entries retains its allocation forever:
+
+```go
+// Recreate periodically to reclaim memory
+func compact(old map[string]Data) map[string]Data {
+    m := make(map[string]Data, len(old))
+    for k, v := range old { m[k] = v }
+    return m // old map becomes eligible for GC
+}
+```
+
+## String and Byte Optimization
+
+**Diagnose:** 1- `go tool pprof -alloc_objects` — look for string/byte conversion functions (`runtime.stringtoslicebyte`, `runtime.slicebytetostring`) appearing as top allocators 2- `go test -bench -benchmem` — measure allocs/op; expect repeated conversions to show 1+ alloc/op per conversion that can be reduced to zero by caching
+
+**Cache string-to-byte conversions** — converting between `string` and `[]byte` allocates a copy each time. Convert once and reuse the result.
+
+**Use `bytes` package directly** — `bytes.Contains`, `bytes.HasPrefix`, `bytes.Split`, `bytes.ToUpper` etc. operate on `[]byte` without string conversion. The `bytes` package mirrors most of `strings`.
+
+## sync.Pool Hot-Path Patterns
+
+**Diagnose:** 1- `go tool pprof -alloc_objects` — identify hot allocation sites creating the same object type repeatedly (e.g., `[]byte` buffers, temp structs); expect one site with thousands of allocs/s that can be pooled
+
+`sync.Pool` recycles objects across GC cycles, reducing allocation pressure. Use it for frequently allocated, short-lived objects in hot paths (HTTP handlers, serialization, logging):
+
+```go
+var bufPool = sync.Pool{
+    New: func() any {
+        buf := make([]byte, 0, 4096)
+        return &buf
+    },
+}
+
+func handleRequest(data []byte) []byte {
+    bp := bufPool.Get().(*[]byte)
+    buf := (*bp)[:0] // reset length, keep capacity
+    defer func() { *bp = buf; bufPool.Put(bp) }()
+
+    // ... process data into buf ...
+
+    result := make([]byte, len(buf))
+    copy(result, buf) // return a copy — buf goes back to pool
+    return result
+}
+```
+
+**Rules:**
+
+- Reset state before `Put()` — clear references to avoid retaining large object graphs across GC cycles
+- Return copies, not pooled buffers — callers must not hold references to pooled memory
+- Don't pool objects >32KB — large allocations bypass the pool's size classes and GC already handles them efficiently
+- Don't pool infrequently used objects — pool overhead exceeds benefit when allocations are rare
+
+→ See `samber/cc-skills-golang@golang-concurrency` skill for `sync.Pool` API reference and basic usage patterns.
+
+## Memory Layout
+
+**Diagnose:** 1- `fieldalignment ./...` — detect structs with wasted padding bytes; expect warnings like `"struct of size 40 could be 24"` listing which structs benefit from reordering 2- `unsafe.Sizeof`/`Alignof`/`Offsetof` — measure exact byte sizes and field offsets; use to confirm savings before/after and document them in code comments
+
+### Struct field alignment
+
+Go adds padding between fields to satisfy alignment requirements. Reorder fields from largest to smallest:
+
+```go
+// Bad — 24 bytes (7 + 3 bytes padding)
+type Bad struct {
+    a bool    // 1 byte + 7 padding
+    b int64   // 8 bytes
+    c bool    // 1 byte + 3 padding
+    d int32   // 4 bytes
+}
+
+// Good — 16 bytes (2 bytes padding)
+type Good struct {
+    b int64   // 8 bytes
+    d int32   // 4 bytes
+    a bool    // 1 byte
+    c bool    // 1 byte + 2 padding
+}
+```
+
+**Alignment requirements:** `bool`/`byte` = 1, `int16` = 2, `int32`/`float32` = 4, `int64`/`float64`/`string`/`[]T`/`*T` = 8.
+
+**Inspect layout:** `unsafe.Sizeof(T{})`, `unsafe.Alignof(T{})`, `unsafe.Offsetof(T{}.field)`
+
+### Zero-size field at end of struct
+
+If the last field has zero size (`struct{}`), the compiler adds word-sized padding to prevent a pointer to that field from overlapping the next memory block:
+
+```go
+// Bad — 16 bytes (8 for Value + 8 padding for Flag)
+type Entry struct { Value int64; Flag struct{} }
+
+// Good — 8 bytes (0 for Flag + 8 for Value)
+type Entry struct { Flag struct{}; Value int64 }
+```
+
+Having a `struct{}` field in a struct is rare and almost useless.
+
+### Pointer receivers for large structs
+
+Value receivers copy the entire struct on every method call. Use pointer receivers for structs larger than ~128 bytes. If any method uses a pointer receiver, all methods should for consistency.
+
+### Map of pointers for large, frequently updated structs
+
+Map values are not addressable — you cannot modify a field in place. For large structs with frequent updates, `map[K]*V` avoids the copy-modify-reassign pattern:
+
+```go
+players := map[string]*Player{"alice": {Score: 100}}
+players["alice"].Score += 10 // direct modification, no copy
+```
+
+Trade-off: each pointer is a separate heap allocation, adding GC pressure. For small, mostly-read structs, `map[K]V` (value) is better.
diff --git a/.agents/skills/golang-performance/references/observability.md b/.agents/skills/golang-performance/references/observability.md
new file mode 100644
index 0000000..6b1bda7
--- /dev/null
+++ b/.agents/skills/golang-performance/references/observability.md
@@ -0,0 +1,101 @@
+# Production Observability for Performance
+
+Third-party monitoring tools complement local profiling (pprof, benchmarks) by providing continuous monitoring, historical trends, and regression detection in production.
+
+## Prometheus Metrics for Go
+
+**Setup:** `github.com/prometheus/client_golang` — expose `/metrics` endpoint with `promhttp.Handler()`. Default collectors automatically export Go runtime metrics (`go_goroutines`, `go_memstats_*`, `go_gc_duration_seconds`, `process_cpu_seconds_total`, etc.).
+
+→ See `samber/cc-skills-golang@golang-benchmark` skill (investigation-session.md) for the full runtime metrics table, investigation session setup (scrape interval tuning, env-var toggling), and cost warnings for profiling tools.
+
+### PromQL Queries for Performance Diagnosis
+
+#### GC pressure
+
+| PromQL | What to look for |
+| --- | --- |
+| `rate(go_gc_duration_seconds_count[5m])` | GC cycles/s — >2/s sustained suggests excessive allocation rate |
+| `rate(go_gc_duration_seconds_sum[5m]) / rate(go_gc_duration_seconds_count[5m])` | Average GC pause — increasing trend means heap is growing or has too many pointers |
+| `go_gc_duration_seconds{quantile="1"}` | Worst-case GC pause — spikes here cause tail latency |
+
+#### Memory leaks
+
+| PromQL | What to look for |
+| --- | --- |
+| `go_memstats_alloc_bytes` | Should be roughly stable under constant load; continuous increase = memory leak |
+| `rate(go_memstats_alloc_bytes_total[5m])` | Allocation rate (bytes/s) — drives GC frequency; compare before/after deploy for regressions |
+| `process_resident_memory_bytes - go_memstats_sys_bytes` | Gap = non-Go memory (cgo, mmap); growing gap = non-Go leak |
+
+#### Goroutine leaks
+
+| PromQL | What to look for |
+| --- | --- |
+| `go_goroutines` | Should correlate with load; growing independently of traffic = leak |
+| `delta(go_goroutines[1h])` | Net goroutine change over 1h; positive without load increase = leak |
+
+#### CPU saturation
+
+| PromQL | What to look for |
+| --- | --- |
+| `rate(process_cpu_seconds_total[5m])` | CPU cores consumed; compare to GOMAXPROCS to detect saturation |
+| `rate(process_cpu_seconds_total[5m]) / <GOMAXPROCS>` | CPU utilization ratio; >0.8 sustained = CPU-saturated |
+
+#### Regression detection (after deploy)
+
+| PromQL | What to look for |
+| --- | --- |
+| `rate(go_memstats_alloc_bytes_total[5m])` | Compare before/after deploy; significant increase = new allocation pattern introduced |
+| `histogram_quantile(0.99, rate(http_request_duration_seconds_bucket[5m]))` | p99 latency increase after deploy = regression (requires app-level histogram) |
+
+### Alerting rules (examples)
+
+[Example alerting rules](assets/prometheus-alerts.yml) — adjust thresholds to your application; a high-throughput data pipeline will have different baselines than a lightweight API server.
+
+→ See `samber/cc-skills@promql-cli` skill for interactively testing these PromQL expressions against your Prometheus instance from the CLI.
+
+### Grafana Dashboards
+
+→ See `samber/cc-skills-golang@golang-observability` skill for recommended community Grafana dashboards that visualize Go runtime metrics out of the box.
+
+## Continuous Profiling
+
+Continuous profiling collects low-overhead samples in production and stores them for historical comparison. Use it to detect regressions across deploys, compare flamegraphs over time, and feed PGO (see [Runtime Tuning](./runtime.md#profile-guided-optimization-pgo)).
+
+| Tool | Model | Overhead | Best for |
+| --- | --- | --- | --- |
+| **Grafana Pyroscope** | push SDK or pull (via Alloy) | ~2-5% | Grafana ecosystem, historical flamegraph comparison |
+| **Parca** (Polar Signals) | eBPF-based pull | <1% | Infrastructure-wide profiling, no code changes |
+| **Datadog Continuous Profiler** | push (agent) | ~1-2% | Existing Datadog users |
+| **Google Cloud Profiler** | push (agent) | ~1-2% | GCP-hosted Go services |
+
+### Pyroscope push mode
+
+```go
+import "github.com/grafana/pyroscope-go"
+
+pyroscope.Start(pyroscope.Config{
+    ApplicationName: "myapp",
+    ServerAddress:   "http://pyroscope:4040",
+    ProfileTypes: []pyroscope.ProfileType{
+        pyroscope.ProfileCPU,
+        pyroscope.ProfileAllocObjects,
+        pyroscope.ProfileAllocSpace,
+        pyroscope.ProfileInuseObjects,
+        pyroscope.ProfileInuseSpace,
+        pyroscope.ProfileGoroutines,
+    },
+})
+```
+
+### Pyroscope pull mode (via Grafana Alloy)
+
+No code changes required — Alloy scrapes `/debug/pprof/*` endpoints periodically. Configure Alloy to target your service's pprof endpoint.
+
+When using third-party profiling libraries, refer to the library's official documentation for current API signatures.
+
+## Real-Time Visualization (Development)
+
+| Tool | What it does |
+| --- | --- |
+| **statsviz** (`github.com/arl/statsviz`) | Real-time browser dashboard at `/debug/statsviz` — heap, GC pauses, goroutines, scheduler. Register with `statsviz.Register(mux)`. Great for local development |
+| **expvar** (stdlib `expvar`) | JSON metrics at `/debug/vars` — lightweight, no dependencies. Integrates with Netdata, Telegraf, or custom dashboards |
diff --git a/.agents/skills/golang-performance/references/runtime.md b/.agents/skills/golang-performance/references/runtime.md
new file mode 100644
index 0000000..28128ce
--- /dev/null
+++ b/.agents/skills/golang-performance/references/runtime.md
@@ -0,0 +1,222 @@
+# Runtime Tuning
+
+Runtime settings control garbage collection frequency, memory limits, CPU scheduling, and compiler optimizations. Tune them after profiling — the defaults are well-chosen for most workloads.
+
+## Garbage Collector Tuning
+
+**Diagnose:** 1- `GODEBUG=gctrace=1` — print one line per GC cycle; look for high GC frequency (cycles/s), high CPU% (>5% means GC is competing for CPU), or heap growing faster than expected 2- `runtime.ReadMemStats` — inspect `Alloc`, `TotalAlloc`, `NumGC`, `PauseNs`; compare `Alloc` vs `Sys` to see how much memory the GC is reclaiming vs how much the OS allocated 3- `go tool trace` — visualize GC stop-the-world pauses and GC assist stealing CPU from application goroutines; look for long STW bars or frequent assist marks 4- `debug.ReadGCStats` — get pause time percentiles (p50, p95, p99); high p99 pauses indicate large heap scans or too many pointers 5- `runtime/metrics` — programmatic access to GC stats for dashboards; monitor `/gc/cycles/total`, `/gc/heap/allocs`, `/gc/pauses` 6- `GODEBUG=gcpacertrace=1` — trace the GC pacer's decisions; useful to understand why GC triggers earlier or later than expected 7- Prometheus `rate(go_gc_duration_seconds_count[5m])` — monitor GC frequency in production; >2 cycles/s sustained suggests excessive allocation rate
+
+### GOGC (default: 100)
+
+Controls the heap growth ratio that triggers the next GC cycle. `GOGC=100` means GC runs when the heap doubles since the last collection. Higher values reduce GC frequency but use more memory:
+
+```bash
+GOGC=50  ./myapp  # latency-sensitive: more frequent, shorter GC pauses
+GOGC=200 ./myapp  # throughput-oriented: less frequent GC, more memory used
+GOGC=off ./myapp  # disable GC entirely (testing only!)
+```
+
+### GOMEMLIMIT (Go 1.19+)
+
+Soft memory limit — the runtime increases GC frequency to stay under this limit. Essential for containerized applications where exceeding the container limit triggers an OOM kill:
+
+```bash
+# Container with 512MB limit: leave headroom for non-heap memory (goroutine stacks, OS buffers)
+GOMEMLIMIT=450MiB ./myapp
+
+# Container with 1GB limit
+GOMEMLIMIT=900MiB ./myapp
+```
+
+The GC pacer adjusts collection timing based on both GOGC and GOMEMLIMIT. When the heap approaches the limit, the GC runs more aggressively regardless of GOGC.
+
+### Programmatic control
+
+```go
+import "runtime/debug"
+
+debug.SetGCPercent(200)                    // equivalent to GOGC=200
+debug.SetMemoryLimit(450 * 1024 * 1024)   // 450 MiB soft limit
+```
+
+Use programmatic control for dynamic tuning based on observed workload, or when environment variables cannot be set.
+
+### Ballast pattern (pre-Go 1.19)
+
+Before GOMEMLIMIT, teams allocated a large byte array at startup to inflate the live heap size, reducing GC frequency:
+
+```go
+var ballast [1 << 30]byte // 1 GB — obsolete pattern
+```
+
+**GOMEMLIMIT is strictly better** — it provides the same benefit (fewer GC cycles) without wasting physical memory. Use GOMEMLIMIT instead.
+
+## GC Profiling and Diagnostics
+
+### GODEBUG=gctrace=1
+
+Prints a line per GC cycle to stderr:
+
+```bash
+GODEBUG=gctrace=1 ./myapp 2>&1 | head -20
+```
+
+Sample output:
+
+```
+gc 5 @1.234s 2%: 0.012+12+0.9 ms clock, 0.25+8.9/20+18 ms cpu, 45->92->50 MB, 200 MB goal, 8 P
+```
+
+Key fields:
+
+- `gc 5` — 5th GC cycle
+- `@1.234s` — time since program start
+- `2%` — total CPU time spent in GC
+- `45->92->50 MB` — heap before → peak during collection → after
+- `200 MB goal` — target heap size (based on GOGC and GOMEMLIMIT)
+- `8 P` — number of processors
+
+Watch for: GC frequency (too often = too many allocations), pause times (high = large heap or many pointers), CPU% (high = tune GOGC or reduce allocations).
+
+### runtime.ReadMemStats
+
+Programmatic monitoring for dashboards and alerting:
+
+```go
+var m runtime.MemStats
+runtime.ReadMemStats(&m)
+
+fmt.Printf("Alloc: %d MB\n", m.Alloc/1024/1024)       // currently allocated
+fmt.Printf("TotalAlloc: %d MB\n", m.TotalAlloc/1024/1024) // cumulative
+fmt.Printf("Sys: %d MB\n", m.Sys/1024/1024)            // requested from OS
+fmt.Printf("NumGC: %d\n", m.NumGC)                      // completed collections
+fmt.Printf("LastPause: %d ms\n", m.PauseNs[(m.NumGC+255)%256]/1_000_000)
+```
+
+### GC pacing
+
+The GC pacer predicts when to start the next collection based on:
+
+1. **Live heap size** after the last collection
+2. **GOGC percentage** — how much growth to allow
+3. **GOMEMLIMIT** — soft ceiling (if set)
+4. **Current allocation rate** — how fast the heap is growing
+
+The pacer starts collection early enough to finish before hitting the target. Fast allocation rates cause earlier starts.
+
+## Allocation Rate Reduction
+
+**Diagnose:** 1- `go tool pprof -alloc_objects` — rank functions by allocation count; the top allocators are where allocation reduction will have the biggest GC impact 2- `GODEBUG=gctrace=1` — monitor GC frequency before and after reducing allocations; expect fewer GC cycles per second as allocation rate drops 3- Prometheus `rate(go_memstats_alloc_bytes_total[5m])` — track allocation rate trend in production; compare before/after deploy to detect regressions
+
+Reducing allocations helps more than tuning GOGC — it addresses the root cause instead of managing the symptom:
+
+- **Value types over pointer types** where possible — values stay on the stack (no GC), pointers escape to the heap
+- **Pool frequently allocated objects** with `sync.Pool` (see [memory.md](./memory.md))
+- **Preallocate slices and maps** — → See `samber/cc-skills-golang@golang-data-structures` skill
+- **Avoid interface boxing** in hot paths — use typed parameters or generics
+
+## GOMAXPROCS in Containers
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — look for high `runtime.schedule` or `runtime.findRunnable` overhead; this indicates too many P's competing for work or too few P's starving goroutines 2- `go tool trace` — check if goroutines are evenly distributed across P's; uneven distribution suggests GOMAXPROCS is misconfigured for the container 3- `GODEBUG=schedtrace=1000` — print scheduler state every second; look for `runqueue` imbalances or idle P's when work is available 4- `runtime.GOMAXPROCS(0)` — query the current value; if it returns the host CPU count (e.g., 64) instead of the container limit (e.g., 2), the runtime is over-scheduling 5- Prometheus `rate(process_cpu_seconds_total[5m])` — monitor CPU cores consumed in production; if consistently near GOMAXPROCS value, the app is CPU-saturated
+
+**Go 1.25+** automatically detects and respects container CPU limits (cgroup v1 and v2). The runtime sets `GOMAXPROCS` based on:
+
+- Logical CPUs on the machine
+- Process CPU affinity mask
+- cgroup CPU quota limits (on Linux)
+
+In a container with 2 CPU cores on a 64-core host running Go 1.25+, `GOMAXPROCS` is correctly set to 2 by default—no additional setup required.
+
+**For Go 1.24 and earlier**, use the `go.uber.org/automaxprocs` library to handle container CPU detection:
+
+```go
+// Pre-Go 1.25: explicit container-aware detection
+import _ "go.uber.org/automaxprocs"
+
+func main() {
+    // GOMAXPROCS is now correctly set to container CPU limit
+    startServer()
+}
+```
+
+**Manual override** (if needed):
+
+```bash
+GOMAXPROCS=2 ./myapp
+GODEBUG=updatemaxprocs=0 ./myapp  # disable dynamic updates (Go 1.25+)
+```
+
+**Known limitations (Go 1.25)**: cgroup v1 on certain systems (Oracle OCPUs) may not properly detect Kubernetes CPU limits. Manually set `GOMAXPROCS` as a workaround in these cases.
+
+## Profile-Guided Optimization (PGO)
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — collect a representative production profile (30+ seconds); look for hot interface method calls and deep call chains that PGO can optimize via devirtualization and inlining 2- `go test -bench` — benchmark before and after placing `default.pgo`; expect 2-7% improvement on interface-heavy code, less on already-optimized paths
+
+Go 1.21+ supports PGO — the compiler uses a production CPU profile to make better inlining and devirtualization decisions. Expected improvement: 2-7% for minimal effort.
+
+**Workflow:**
+
+1. Collect a production CPU profile (30+ seconds of representative load):
+
+   ```bash
+   curl http://localhost:6060/debug/pprof/profile?seconds=60 > cpu.pprof
+   ```
+
+2. Place as `default.pgo` in the main package directory:
+
+   ```bash
+   cp cpu.pprof ./cmd/myapp/default.pgo
+   ```
+
+3. Build — `go build` auto-detects `default.pgo`:
+
+   ```bash
+   go build ./cmd/myapp
+   ```
+
+**What the compiler optimizes:**
+
+- **Inlining** — hot function calls are inlined more aggressively
+- **Devirtualization** — interface method calls with high probability of targeting specific types become direct calls
+
+**When it helps most:** code with many interface calls, hot inlining opportunities, deep call stacks. **When it helps least:** already-optimized code, memory-bound workloads.
+
+Rebuild profiles after significant code changes — stale profiles can mislead the compiler.
+
+## Logging Overhead in Hot Paths
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — look for `fmt.Sprintf`, `log.Printf`, or `slog.(*Logger).log` appearing in hot paths; these indicate log formatting consuming CPU even when the log level filters the message 2- `go build -gcflags="-m"` — check if log arguments escape to the heap; expect `"moved to heap"` for arguments boxed into `any` interface by logging functions 3- `go test -bench -benchmem` — benchmark with logging enabled vs disabled; if allocs/op doesn't change, the logger is allocating even when the level is off
+
+Log formatting allocates memory and consumes CPU even when the message is discarded because it's below the configured level:
+
+```go
+// Bad — fmt.Sprintf runs BEFORE the logger checks the level
+logger.Debug(fmt.Sprintf("processing item %d with data %v", item.ID, item.Data))
+
+// Good — slog defers formatting until level check passes (Go 1.21+)
+slog.Debug("processing item", slog.Int("id", item.ID), slog.Any("data", item.Data))
+
+// Best — LogAttrs: zero allocations when level is disabled
+slog.LogAttrs(ctx, slog.LevelDebug, "processing item",
+    slog.Int("id", item.ID))
+```
+
+In hot paths, even `slog.Any` can allocate. Prefer typed attributes: `slog.Int`, `slog.String`, `slog.Bool`.
+
+## Panic/Recover Cost
+
+**Diagnose:** 1- `go tool pprof` (CPU profile) — look for `runtime.gopanic` or `runtime.gorecover` in the profile; their presence in hot paths means panic/recover is being used for control flow 2- `go test -bench` — benchmark panic/recover vs error-return versions; expect 10-100x overhead from stack unwinding and defer execution
+
+`panic` triggers stack unwinding, running all deferred functions up the call stack. `recover` catches the panic but the unwinding itself is expensive. Never use panic/recover for control flow:
+
+```go
+// Bad — panic overhead for a normal condition
+defer func() { recover() }()
+v, _ := strconv.Atoi(s) // relies on panic for invalid input
+
+// Good — explicit error check, no panic overhead
+v, err := strconv.Atoi(s)
+if err != nil { continue }
+```
+
+Panic is appropriate only for truly unrecoverable situations (programmer errors, corrupted state). Always convert panics to errors at package boundaries.
diff --git a/.agents/skills/golang-pro/SKILL.md b/.agents/skills/golang-pro/SKILL.md
new file mode 100644
index 0000000..5b040ca
--- /dev/null
+++ b/.agents/skills/golang-pro/SKILL.md
@@ -0,0 +1,124 @@
+---
+name: golang-pro
+description: Implements concurrent Go patterns using goroutines and channels, designs and builds microservices with gRPC or REST, optimizes Go application performance with pprof, and enforces idiomatic Go with generics, interfaces, and robust error handling. Use when building Go applications requiring concurrent programming, microservices architecture, or high-performance systems. Invoke for goroutines, channels, Go generics, gRPC integration, CLI tools, benchmarks, or table-driven testing.
+license: MIT
+metadata:
+  author: https://github.com/Jeffallan
+  version: "1.1.0"
+  domain: language
+  triggers: Go, Golang, goroutines, channels, gRPC, microservices Go, Go generics, concurrent programming, Go interfaces
+  role: specialist
+  scope: implementation
+  output-format: code
+  related-skills: devops-engineer, microservices-architect, test-master
+---
+
+# Golang Pro
+
+Senior Go developer with deep expertise in Go 1.21+, concurrent programming, and cloud-native microservices. Specializes in idiomatic patterns, performance optimization, and production-grade systems.
+
+## Core Workflow
+
+1. **Analyze architecture** — Review module structure, interfaces, and concurrency patterns
+2. **Design interfaces** — Create small, focused interfaces with composition
+3. **Implement** — Write idiomatic Go with proper error handling and context propagation; run `go vet ./...` before proceeding
+4. **Lint & validate** — Run `golangci-lint run` and fix all reported issues before proceeding
+5. **Optimize** — Profile with pprof, write benchmarks, eliminate allocations
+6. **Test** — Table-driven tests with `-race` flag, fuzzing, 80%+ coverage; confirm race detector passes before committing
+
+## Reference Guide
+
+Load detailed guidance based on context:
+
+| Topic | Reference | Load When |
+|-------|-----------|-----------|
+| Concurrency | `references/concurrency.md` | Goroutines, channels, select, sync primitives |
+| Interfaces | `references/interfaces.md` | Interface design, io.Reader/Writer, composition |
+| Generics | `references/generics.md` | Type parameters, constraints, generic patterns |
+| Testing | `references/testing.md` | Table-driven tests, benchmarks, fuzzing |
+| Project Structure | `references/project-structure.md` | Module layout, internal packages, go.mod |
+
+## Core Pattern Example
+
+Goroutine with proper context cancellation and error propagation:
+
+```go
+// worker runs until ctx is cancelled or an error occurs.
+// Errors are returned via the errCh channel; the caller must drain it.
+func worker(ctx context.Context, jobs <-chan Job, errCh chan<- error) {
+    for {
+        select {
+        case <-ctx.Done():
+            errCh <- fmt.Errorf("worker cancelled: %w", ctx.Err())
+            return
+        case job, ok := <-jobs:
+            if !ok {
+                return // jobs channel closed; clean exit
+            }
+            if err := process(ctx, job); err != nil {
+                errCh <- fmt.Errorf("process job %v: %w", job.ID, err)
+                return
+            }
+        }
+    }
+}
+
+func runPipeline(ctx context.Context, jobs []Job) error {
+    ctx, cancel := context.WithTimeout(ctx, 30*time.Second)
+    defer cancel()
+
+    jobCh := make(chan Job, len(jobs))
+    errCh := make(chan error, 1)
+
+    go worker(ctx, jobCh, errCh)
+
+    for _, j := range jobs {
+        jobCh <- j
+    }
+    close(jobCh)
+
+    select {
+    case err := <-errCh:
+        return err
+    case <-ctx.Done():
+        return fmt.Errorf("pipeline timed out: %w", ctx.Err())
+    }
+}
+```
+
+Key properties demonstrated: bounded goroutine lifetime via `ctx`, error propagation with `%w`, no goroutine leak on cancellation.
+
+## Constraints
+
+### MUST DO
+- Use gofmt and golangci-lint on all code
+- Add context.Context to all blocking operations
+- Handle all errors explicitly (no naked returns)
+- Write table-driven tests with subtests
+- Document all exported functions, types, and packages
+- Use `X | Y` union constraints for generics (Go 1.18+)
+- Propagate errors with fmt.Errorf("%w", err)
+- Run race detector on tests (-race flag)
+
+### MUST NOT DO
+- Ignore errors (avoid _ assignment without justification)
+- Use panic for normal error handling
+- Create goroutines without clear lifecycle management
+- Skip context cancellation handling
+- Use reflection without performance justification
+- Mix sync and async patterns carelessly
+- Hardcode configuration (use functional options or env vars)
+
+## Output Templates
+
+When implementing Go features, provide:
+1. Interface definitions (contracts first)
+2. Implementation files with proper package structure
+3. Test file with table-driven tests
+4. Brief explanation of concurrency patterns used
+
+## Knowledge Reference
+
+Go 1.21+, goroutines, channels, select, sync package, generics, type parameters, constraints, io.Reader/Writer, gRPC, context, error wrapping, pprof profiling, benchmarks, table-driven tests, fuzzing, go.mod, internal packages, functional options
+
+[Documentation](https://jeffallan.github.io/claude-skills/skills/language/golang-pro/)
diff --git a/.agents/skills/golang-pro/references/concurrency.md b/.agents/skills/golang-pro/references/concurrency.md
new file mode 100644
index 0000000..e204b3e
--- /dev/null
+++ b/.agents/skills/golang-pro/references/concurrency.md
@@ -0,0 +1,329 @@
+# Concurrency Patterns
+
+## Goroutine Lifecycle Management
+
+```go
+package main
+
+import (
+    "context"
+    "fmt"
+    "sync"
+    "time"
+)
+
+// Worker pool with bounded concurrency
+type WorkerPool struct {
+    workers int
+    tasks   chan func()
+    wg      sync.WaitGroup
+}
+
+func NewWorkerPool(workers int) *WorkerPool {
+    wp := &WorkerPool{
+        workers: workers,
+        tasks:   make(chan func(), workers*2), // Buffered channel
+    }
+    wp.start()
+    return wp
+}
+
+func (wp *WorkerPool) start() {
+    for i := 0; i < wp.workers; i++ {
+        wp.wg.Add(1)
+        go func() {
+            defer wp.wg.Done()
+            for task := range wp.tasks {
+                task()
+            }
+        }()
+    }
+}
+
+func (wp *WorkerPool) Submit(task func()) {
+    wp.tasks <- task
+}
+
+func (wp *WorkerPool) Shutdown() {
+    close(wp.tasks)
+    wp.wg.Wait()
+}
+```
+
+## Channel Patterns
+
+```go
+// Generator pattern
+func generateNumbers(ctx context.Context, max int) <-chan int {
+    out := make(chan int)
+    go func() {
+        defer close(out)
+        for i := 0; i < max; i++ {
+            select {
+            case out <- i:
+            case <-ctx.Done():
+                return
+            }
+        }
+    }()
+    return out
+}
+
+// Fan-out, fan-in pattern
+func fanOut(ctx context.Context, input <-chan int, workers int) []<-chan int {
+    channels := make([]<-chan int, workers)
+    for i := 0; i < workers; i++ {
+        channels[i] = process(ctx, input)
+    }
+    return channels
+}
+
+func process(ctx context.Context, input <-chan int) <-chan int {
+    out := make(chan int)
+    go func() {
+        defer close(out)
+        for val := range input {
+            select {
+            case out <- val * 2:
+            case <-ctx.Done():
+                return
+            }
+        }
+    }()
+    return out
+}
+
+func fanIn(ctx context.Context, channels ...<-chan int) <-chan int {
+    out := make(chan int)
+    var wg sync.WaitGroup
+
+    for _, ch := range channels {
+        wg.Add(1)
+        go func(c <-chan int) {
+            defer wg.Done()
+            for val := range c {
+                select {
+                case out <- val:
+                case <-ctx.Done():
+                    return
+                }
+            }
+        }(ch)
+    }
+
+    go func() {
+        wg.Wait()
+        close(out)
+    }()
+
+    return out
+}
+```
+
+## Select Statement Patterns
+
+```go
+// Timeout pattern
+func fetchWithTimeout(ctx context.Context, url string) (string, error) {
+    result := make(chan string, 1)
+    errCh := make(chan error, 1)
+
+    go func() {
+        // Simulate network call
+        time.Sleep(100 * time.Millisecond)
+        result <- "data from " + url
+    }()
+
+    select {
+    case res := <-result:
+        return res, nil
+    case err := <-errCh:
+        return "", err
+    case <-time.After(50 * time.Millisecond):
+        return "", fmt.Errorf("timeout")
+    case <-ctx.Done():
+        return "", ctx.Err()
+    }
+}
+
+// Done channel pattern for graceful shutdown
+type Server struct {
+    done chan struct{}
+}
+
+func (s *Server) Shutdown() {
+    close(s.done)
+}
+
+func (s *Server) Run(ctx context.Context) {
+    ticker := time.NewTicker(1 * time.Second)
+    defer ticker.Stop()
+
+    for {
+        select {
+        case <-ticker.C:
+            fmt.Println("tick")
+        case <-s.done:
+            fmt.Println("shutting down")
+            return
+        case <-ctx.Done():
+            fmt.Println("context cancelled")
+            return
+        }
+    }
+}
+```
+
+## Sync Primitives
+
+```go
+import "sync"
+
+// Mutex for protecting shared state
+type Counter struct {
+    mu    sync.Mutex
+    count int
+}
+
+func (c *Counter) Increment() {
+    c.mu.Lock()
+    defer c.mu.Unlock()
+    c.count++
+}
+
+func (c *Counter) Value() int {
+    c.mu.Lock()
+    defer c.mu.Unlock()
+    return c.count
+}
+
+// RWMutex for read-heavy workloads
+type Cache struct {
+    mu    sync.RWMutex
+    items map[string]string
+}
+
+func (c *Cache) Get(key string) (string, bool) {
+    c.mu.RLock()
+    defer c.mu.RUnlock()
+    val, ok := c.items[key]
+    return val, ok
+}
+
+func (c *Cache) Set(key, value string) {
+    c.mu.Lock()
+    defer c.mu.Unlock()
+    c.items[key] = value
+}
+
+// sync.Once for initialization
+type Service struct {
+    once   sync.Once
+    config *Config
+}
+
+func (s *Service) getConfig() *Config {
+    s.once.Do(func() {
+        s.config = loadConfig() // Only called once
+    })
+    return s.config
+}
+```
+
+## Rate Limiting and Backpressure
+
+```go
+import "golang.org/x/time/rate"
+
+// Token bucket rate limiter
+type RateLimiter struct {
+    limiter *rate.Limiter
+}
+
+func NewRateLimiter(rps int) *RateLimiter {
+    return &RateLimiter{
+        limiter: rate.NewLimiter(rate.Limit(rps), rps),
+    }
+}
+
+func (rl *RateLimiter) Process(ctx context.Context, item string) error {
+    if err := rl.limiter.Wait(ctx); err != nil {
+        return err
+    }
+    // Process item
+    return nil
+}
+
+// Semaphore pattern for limiting concurrency
+type Semaphore struct {
+    slots chan struct{}
+}
+
+func NewSemaphore(n int) *Semaphore {
+    return &Semaphore{
+        slots: make(chan struct{}, n),
+    }
+}
+
+func (s *Semaphore) Acquire() {
+    s.slots <- struct{}{}
+}
+
+func (s *Semaphore) Release() {
+    <-s.slots
+}
+
+func (s *Semaphore) Do(fn func()) {
+    s.Acquire()
+    defer s.Release()
+    fn()
+}
+```
+
+## Pipeline Pattern
+
+```go
+// Stage-based processing pipeline
+func pipeline(ctx context.Context, input <-chan int) <-chan int {
+    // Stage 1: Square numbers
+    stage1 := make(chan int)
+    go func() {
+        defer close(stage1)
+        for num := range input {
+            select {
+            case stage1 <- num * num:
+            case <-ctx.Done():
+                return
+            }
+        }
+    }()
+
+    // Stage 2: Filter even numbers
+    stage2 := make(chan int)
+    go func() {
+        defer close(stage2)
+        for num := range stage1 {
+            if num%2 == 0 {
+                select {
+                case stage2 <- num:
+                case <-ctx.Done():
+                    return
+                }
+            }
+        }
+    }()
+
+    return stage2
+}
+```
+
+## Quick Reference
+
+| Pattern | Use Case | Key Points |
+|---------|----------|------------|
+| Worker Pool | Bounded concurrency | Limit goroutines, reuse workers |
+| Fan-out/Fan-in | Parallel processing | Distribute work, merge results |
+| Pipeline | Stream processing | Chain transformations |
+| Rate Limiter | API throttling | Control request rate |
+| Semaphore | Resource limits | Cap concurrent operations |
+| Done Channel | Graceful shutdown | Signal completion |
diff --git a/.agents/skills/golang-pro/references/generics.md b/.agents/skills/golang-pro/references/generics.md
new file mode 100644
index 0000000..45e7003
--- /dev/null
+++ b/.agents/skills/golang-pro/references/generics.md
@@ -0,0 +1,442 @@
+# Generics and Type Parameters
+
+## Basic Type Parameters
+
+```go
+package main
+
+// Generic function with type parameter
+func Max[T constraints.Ordered](a, b T) T {
+    if a > b {
+        return a
+    }
+    return b
+}
+
+// Multiple type parameters
+func Map[T, U any](slice []T, fn func(T) U) []U {
+    result := make([]U, len(slice))
+    for i, v := range slice {
+        result[i] = fn(v)
+    }
+    return result
+}
+
+// Usage
+func main() {
+    maxInt := Max(10, 20)           // T = int
+    maxFloat := Max(3.14, 2.71)     // T = float64
+    maxString := Max("abc", "xyz")  // T = string
+
+    nums := []int{1, 2, 3}
+    doubled := Map(nums, func(n int) int { return n * 2 })
+    strings := Map(nums, func(n int) string { return fmt.Sprintf("%d", n) })
+}
+```
+
+## Type Constraints
+
+```go
+import "constraints"
+
+// Built-in constraints
+type Number interface {
+    constraints.Integer | constraints.Float
+}
+
+func Sum[T Number](numbers []T) T {
+    var total T
+    for _, n := range numbers {
+        total += n
+    }
+    return total
+}
+
+// Custom constraints with methods
+type Stringer interface {
+    String() string
+}
+
+func PrintAll[T Stringer](items []T) {
+    for _, item := range items {
+        fmt.Println(item.String())
+    }
+}
+
+// Approximate constraint using ~
+type Integer interface {
+    ~int | ~int8 | ~int16 | ~int32 | ~int64
+}
+
+type MyInt int
+
+func Double[T Integer](n T) T {
+    return n * 2
+}
+
+// Works with both int and MyInt
+func main() {
+    fmt.Println(Double(5))          // int
+    fmt.Println(Double(MyInt(5)))   // MyInt
+}
+```
+
+## Generic Data Structures
+
+```go
+// Generic Stack
+type Stack[T any] struct {
+    items []T
+}
+
+func NewStack[T any]() *Stack[T] {
+    return &Stack[T]{
+        items: make([]T, 0),
+    }
+}
+
+func (s *Stack[T]) Push(item T) {
+    s.items = append(s.items, item)
+}
+
+func (s *Stack[T]) Pop() (T, bool) {
+    if len(s.items) == 0 {
+        var zero T
+        return zero, false
+    }
+    item := s.items[len(s.items)-1]
+    s.items = s.items[:len(s.items)-1]
+    return item, true
+}
+
+func (s *Stack[T]) IsEmpty() bool {
+    return len(s.items) == 0
+}
+
+// Usage
+intStack := NewStack[int]()
+intStack.Push(1)
+intStack.Push(2)
+
+stringStack := NewStack[string]()
+stringStack.Push("hello")
+stringStack.Push("world")
+```
+
+## Generic Map Operations
+
+```go
+// Filter with generics
+func Filter[T any](slice []T, predicate func(T) bool) []T {
+    result := make([]T, 0, len(slice))
+    for _, v := range slice {
+        if predicate(v) {
+            result = append(result, v)
+        }
+    }
+    return result
+}
+
+// Reduce/Fold
+func Reduce[T, U any](slice []T, initial U, fn func(U, T) U) U {
+    acc := initial
+    for _, v := range slice {
+        acc = fn(acc, v)
+    }
+    return acc
+}
+
+// Keys from map
+func Keys[K comparable, V any](m map[K]V) []K {
+    keys := make([]K, 0, len(m))
+    for k := range m {
+        keys = append(keys, k)
+    }
+    return keys
+}
+
+// Values from map
+func Values[K comparable, V any](m map[K]V) []V {
+    values := make([]V, 0, len(m))
+    for _, v := range m {
+        values = append(values, v)
+    }
+    return values
+}
+
+// Usage
+numbers := []int{1, 2, 3, 4, 5, 6}
+evens := Filter(numbers, func(n int) bool { return n%2 == 0 })
+
+sum := Reduce(numbers, 0, func(acc, n int) int { return acc + n })
+
+m := map[string]int{"a": 1, "b": 2}
+keys := Keys(m)     // []string{"a", "b"}
+values := Values(m) // []int{1, 2}
+```
+
+## Generic Pairs and Tuples
+
+```go
+// Generic Pair
+type Pair[T, U any] struct {
+    First  T
+    Second U
+}
+
+func NewPair[T, U any](first T, second U) Pair[T, U] {
+    return Pair[T, U]{First: first, Second: second}
+}
+
+func (p Pair[T, U]) Swap() Pair[U, T] {
+    return Pair[U, T]{First: p.Second, Second: p.First}
+}
+
+// Usage
+pair := NewPair("name", 42)
+swapped := pair.Swap() // Pair[int, string]
+
+// Generic Result type (like Rust's Result<T, E>)
+type Result[T any] struct {
+    value T
+    err   error
+}
+
+func Ok[T any](value T) Result[T] {
+    return Result[T]{value: value}
+}
+
+func Err[T any](err error) Result[T] {
+    return Result[T]{err: err}
+}
+
+func (r Result[T]) IsOk() bool {
+    return r.err == nil
+}
+
+func (r Result[T]) Unwrap() (T, error) {
+    return r.value, r.err
+}
+
+func (r Result[T]) UnwrapOr(defaultValue T) T {
+    if r.err != nil {
+        return defaultValue
+    }
+    return r.value
+}
+```
+
+## Comparable Constraint
+
+```go
+// Find using comparable
+func Find[T comparable](slice []T, target T) (int, bool) {
+    for i, v := range slice {
+        if v == target {
+            return i, true
+        }
+    }
+    return -1, false
+}
+
+// Contains
+func Contains[T comparable](slice []T, target T) bool {
+    _, found := Find(slice, target)
+    return found
+}
+
+// Unique elements
+func Unique[T comparable](slice []T) []T {
+    seen := make(map[T]struct{})
+    result := make([]T, 0, len(slice))
+
+    for _, v := range slice {
+        if _, exists := seen[v]; !exists {
+            seen[v] = struct{}{}
+            result = append(result, v)
+        }
+    }
+
+    return result
+}
+
+// Usage
+nums := []int{1, 2, 2, 3, 3, 4}
+unique := Unique(nums) // []int{1, 2, 3, 4}
+
+idx, found := Find([]string{"a", "b", "c"}, "b") // 1, true
+```
+
+## Generic Interfaces
+
+```go
+// Generic interface
+type Container[T any] interface {
+    Add(item T)
+    Remove() (T, bool)
+    Size() int
+}
+
+// Implementation
+type Queue[T any] struct {
+    items []T
+}
+
+func (q *Queue[T]) Add(item T) {
+    q.items = append(q.items, item)
+}
+
+func (q *Queue[T]) Remove() (T, bool) {
+    if len(q.items) == 0 {
+        var zero T
+        return zero, false
+    }
+    item := q.items[0]
+    q.items = q.items[1:]
+    return item, true
+}
+
+func (q *Queue[T]) Size() int {
+    return len(q.items)
+}
+
+// Function accepting generic interface
+func ProcessContainer[T any](c Container[T], item T) {
+    c.Add(item)
+    fmt.Printf("Container size: %d\n", c.Size())
+}
+```
+
+## Type Inference
+
+```go
+// Type inference works in most cases
+func Identity[T any](x T) T {
+    return x
+}
+
+// No need to specify type
+result := Identity(42)          // T inferred as int
+str := Identity("hello")        // T inferred as string
+
+// Type inference with constraints
+func Min[T constraints.Ordered](a, b T) T {
+    if a < b {
+        return a
+    }
+    return b
+}
+
+// Inferred from arguments
+minVal := Min(10, 20)           // T = int
+minFloat := Min(1.5, 2.5)       // T = float64
+
+// Explicit type when needed
+result := Map[int, string]([]int{1, 2}, func(n int) string {
+    return fmt.Sprintf("%d", n)
+})
+```
+
+## Generic Channels
+
+```go
+// Generic channel operations
+func Merge[T any](channels ...<-chan T) <-chan T {
+    out := make(chan T)
+    var wg sync.WaitGroup
+
+    for _, ch := range channels {
+        wg.Add(1)
+        go func(c <-chan T) {
+            defer wg.Done()
+            for v := range c {
+                out <- v
+            }
+        }(ch)
+    }
+
+    go func() {
+        wg.Wait()
+        close(out)
+    }()
+
+    return out
+}
+
+// Generic pipeline stage
+func Stage[T, U any](in <-chan T, fn func(T) U) <-chan U {
+    out := make(chan U)
+    go func() {
+        defer close(out)
+        for v := range in {
+            out <- fn(v)
+        }
+    }()
+    return out
+}
+
+// Usage
+ch1 := make(chan int)
+ch2 := make(chan int)
+
+merged := Merge(ch1, ch2)
+
+numbers := make(chan int)
+doubled := Stage(numbers, func(n int) int { return n * 2 })
+strings := Stage(doubled, func(n int) string { return fmt.Sprintf("%d", n) })
+```
+
+## Union Constraints
+
+```go
+// Union of types
+type StringOrInt interface {
+    string | int
+}
+
+func Process[T StringOrInt](val T) string {
+    return fmt.Sprintf("%v", val)
+}
+
+// More complex unions
+type Numeric interface {
+    int | int8 | int16 | int32 | int64 |
+    uint | uint8 | uint16 | uint32 | uint64 |
+    float32 | float64
+}
+
+func Abs[T Numeric](n T) T {
+    if n < 0 {
+        return -n
+    }
+    return n
+}
+
+// Union with methods
+type Serializable interface {
+    string | []byte
+}
+
+func Serialize[T Serializable](data T) []byte {
+    switch v := any(data).(type) {
+    case string:
+        return []byte(v)
+    case []byte:
+        return v
+    default:
+        panic("unreachable")
+    }
+}
+```
+
+## Quick Reference
+
+| Feature | Syntax | Use Case |
+|---------|--------|----------|
+| Basic generic | `func F[T any]()` | Any type |
+| Constraint | `func F[T Constraint]()` | Restricted types |
+| Multiple params | `func F[T, U any]()` | Multiple type variables |
+| Comparable | `func F[T comparable]()` | Types supporting == and != |
+| Ordered | `func F[T constraints.Ordered]()` | Types supporting <, >, <=, >= |
+| Union | `T interface{int \| string}` | Either type |
+| Approximate | `~int` | Include type aliases |
diff --git a/.agents/skills/golang-pro/references/interfaces.md b/.agents/skills/golang-pro/references/interfaces.md
new file mode 100644
index 0000000..41856f9
--- /dev/null
+++ b/.agents/skills/golang-pro/references/interfaces.md
@@ -0,0 +1,432 @@
+# Interface Design and Composition
+
+## Small, Focused Interfaces
+
+```go
+// Single-method interfaces (idiomatic Go)
+type Reader interface {
+    Read(p []byte) (n int, err error)
+}
+
+type Writer interface {
+    Write(p []byte) (n int, err error)
+}
+
+type Closer interface {
+    Close() error
+}
+
+// Interface composition
+type ReadCloser interface {
+    Reader
+    Closer
+}
+
+type WriteCloser interface {
+    Writer
+    Closer
+}
+
+type ReadWriteCloser interface {
+    Reader
+    Writer
+    Closer
+}
+```
+
+## Accept Interfaces, Return Structs
+
+```go
+package storage
+
+import "io"
+
+// Storage is the concrete type (struct)
+type Storage struct {
+    baseDir string
+}
+
+// NewStorage returns a concrete type
+func NewStorage(baseDir string) *Storage {
+    return &Storage{baseDir: baseDir}
+}
+
+// SaveFile accepts an interface for flexibility
+func (s *Storage) SaveFile(filename string, data io.Reader) error {
+    // Implementation can work with any Reader
+    // (file, network, buffer, etc.)
+    return nil
+}
+
+// Usage allows dependency injection
+type Uploader interface {
+    SaveFile(filename string, data io.Reader) error
+}
+
+type Service struct {
+    uploader Uploader // Accept interface
+}
+
+// NewService accepts interface for testing flexibility
+func NewService(uploader Uploader) *Service {
+    return &Service{uploader: uploader}
+}
+```
+
+## io.Reader and io.Writer Patterns
+
+```go
+import (
+    "io"
+    "strings"
+)
+
+// Chain readers with io.MultiReader
+func combineReaders() io.Reader {
+    r1 := strings.NewReader("Hello ")
+    r2 := strings.NewReader("World")
+    return io.MultiReader(r1, r2)
+}
+
+// Tee reader for duplicating reads
+func duplicateRead(r io.Reader, w io.Writer) io.Reader {
+    return io.TeeReader(r, w) // Writes to w while reading from r
+}
+
+// Limit reader to prevent reading too much
+func limitedRead(r io.Reader, n int64) io.Reader {
+    return io.LimitReader(r, n)
+}
+
+// Custom Reader implementation
+type UppercaseReader struct {
+    src io.Reader
+}
+
+func (u *UppercaseReader) Read(p []byte) (n int, err error) {
+    n, err = u.src.Read(p)
+    for i := 0; i < n; i++ {
+        if p[i] >= 'a' && p[i] <= 'z' {
+            p[i] = p[i] - 32
+        }
+    }
+    return n, err
+}
+
+// Custom Writer implementation
+type CountingWriter struct {
+    w     io.Writer
+    count int64
+}
+
+func (cw *CountingWriter) Write(p []byte) (n int, err error) {
+    n, err = cw.w.Write(p)
+    cw.count += int64(n)
+    return n, err
+}
+
+func (cw *CountingWriter) BytesWritten() int64 {
+    return cw.count
+}
+```
+
+## Embedding for Composition
+
+```go
+import "sync"
+
+// Embed to extend behavior
+type SafeCounter struct {
+    mu sync.Mutex
+    m  map[string]int
+}
+
+func (sc *SafeCounter) Inc(key string) {
+    sc.mu.Lock()
+    defer sc.mu.Unlock()
+    sc.m[key]++
+}
+
+// Embed interface to add default behavior
+type Logger interface {
+    Log(msg string)
+}
+
+type NoOpLogger struct{}
+
+func (NoOpLogger) Log(msg string) {}
+
+type Service struct {
+    Logger // Embedded interface (default implementation can be provided)
+}
+
+func NewService(logger Logger) *Service {
+    if logger == nil {
+        logger = NoOpLogger{} // Provide default
+    }
+    return &Service{Logger: logger}
+}
+
+// Now Service.Log() is available
+```
+
+## Interface Satisfaction Verification
+
+```go
+import "io"
+
+// Compile-time interface verification
+var _ io.Reader = (*MyReader)(nil)
+var _ io.Writer = (*MyWriter)(nil)
+var _ io.Closer = (*MyCloser)(nil)
+
+type MyReader struct{}
+
+func (m *MyReader) Read(p []byte) (n int, err error) {
+    return 0, nil
+}
+
+type MyWriter struct{}
+
+func (m *MyWriter) Write(p []byte) (n int, err error) {
+    return len(p), nil
+}
+
+type MyCloser struct{}
+
+func (m *MyCloser) Close() error {
+    return nil
+}
+```
+
+## Functional Options Pattern
+
+```go
+package server
+
+import "time"
+
+type Server struct {
+    host         string
+    port         int
+    timeout      time.Duration
+    maxConns     int
+    enableLogger bool
+}
+
+// Option is a functional option for configuring Server
+type Option func(*Server)
+
+func WithHost(host string) Option {
+    return func(s *Server) {
+        s.host = host
+    }
+}
+
+func WithPort(port int) Option {
+    return func(s *Server) {
+        s.port = port
+    }
+}
+
+func WithTimeout(timeout time.Duration) Option {
+    return func(s *Server) {
+        s.timeout = timeout
+    }
+}
+
+func WithMaxConnections(max int) Option {
+    return func(s *Server) {
+        s.maxConns = max
+    }
+}
+
+func WithLogger(enabled bool) Option {
+    return func(s *Server) {
+        s.enableLogger = enabled
+    }
+}
+
+// NewServer creates a server with functional options
+func NewServer(opts ...Option) *Server {
+    // Defaults
+    s := &Server{
+        host:     "localhost",
+        port:     8080,
+        timeout:  30 * time.Second,
+        maxConns: 100,
+    }
+
+    // Apply options
+    for _, opt := range opts {
+        opt(s)
+    }
+
+    return s
+}
+
+// Usage:
+// server := NewServer(
+//     WithHost("0.0.0.0"),
+//     WithPort(9000),
+//     WithTimeout(60 * time.Second),
+//     WithLogger(true),
+// )
+```
+
+## Interface Segregation
+
+```go
+// Bad: Fat interface
+type BadRepository interface {
+    Create(item Item) error
+    Read(id string) (Item, error)
+    Update(item Item) error
+    Delete(id string) error
+    List() ([]Item, error)
+    Search(query string) ([]Item, error)
+    Count() (int, error)
+}
+
+// Good: Segregated interfaces
+type Creator interface {
+    Create(item Item) error
+}
+
+type Reader interface {
+    Read(id string) (Item, error)
+}
+
+type Updater interface {
+    Update(item Item) error
+}
+
+type Deleter interface {
+    Delete(id string) error
+}
+
+type Lister interface {
+    List() ([]Item, error)
+}
+
+// Compose only what you need
+type ReadWriter interface {
+    Reader
+    Creator
+}
+
+type FullRepository interface {
+    Creator
+    Reader
+    Updater
+    Deleter
+    Lister
+}
+```
+
+## Type Assertions and Type Switches
+
+```go
+import "fmt"
+
+// Safe type assertion
+func processValue(v interface{}) {
+    // Two-value assertion (safe)
+    if str, ok := v.(string); ok {
+        fmt.Println("String:", str)
+        return
+    }
+
+    // Type switch
+    switch val := v.(type) {
+    case int:
+        fmt.Println("Int:", val)
+    case string:
+        fmt.Println("String:", val)
+    case bool:
+        fmt.Println("Bool:", val)
+    default:
+        fmt.Println("Unknown type")
+    }
+}
+
+// Check for optional interface methods
+type Flusher interface {
+    Flush() error
+}
+
+func writeAndFlush(w io.Writer, data []byte) error {
+    if _, err := w.Write(data); err != nil {
+        return err
+    }
+
+    // Check if Writer also implements Flusher
+    if flusher, ok := w.(Flusher); ok {
+        return flusher.Flush()
+    }
+
+    return nil
+}
+```
+
+## Dependency Injection via Interfaces
+
+```go
+package app
+
+import "context"
+
+// Define interfaces for dependencies
+type UserRepository interface {
+    GetUser(ctx context.Context, id string) (*User, error)
+    SaveUser(ctx context.Context, user *User) error
+}
+
+type EmailSender interface {
+    SendEmail(ctx context.Context, to, subject, body string) error
+}
+
+// Service depends on interfaces
+type UserService struct {
+    repo   UserRepository
+    mailer EmailSender
+}
+
+func NewUserService(repo UserRepository, mailer EmailSender) *UserService {
+    return &UserService{
+        repo:   repo,
+        mailer: mailer,
+    }
+}
+
+func (s *UserService) RegisterUser(ctx context.Context, email string) error {
+    user := &User{Email: email}
+    if err := s.repo.SaveUser(ctx, user); err != nil {
+        return err
+    }
+    return s.mailer.SendEmail(ctx, email, "Welcome", "Thanks for registering!")
+}
+
+// Easy to mock in tests
+type MockUserRepository struct{}
+
+func (m *MockUserRepository) GetUser(ctx context.Context, id string) (*User, error) {
+    return &User{ID: id}, nil
+}
+
+func (m *MockUserRepository) SaveUser(ctx context.Context, user *User) error {
+    return nil
+}
+```
+
+## Quick Reference
+
+| Pattern | Use Case | Key Principle |
+|---------|----------|---------------|
+| Small interfaces | Flexibility | Single-method interfaces |
+| Accept interfaces | Testability | Depend on abstractions |
+| Return structs | Clarity | Concrete return types |
+| io.Reader/Writer | I/O operations | Standard library integration |
+| Embedding | Composition | Extend behavior without inheritance |
+| Functional options | Configuration | Flexible constructors |
+| Type assertions | Runtime checks | Safe downcasting |
diff --git a/.agents/skills/golang-pro/references/project-structure.md b/.agents/skills/golang-pro/references/project-structure.md
new file mode 100644
index 0000000..eb599b9
--- /dev/null
+++ b/.agents/skills/golang-pro/references/project-structure.md
@@ -0,0 +1,477 @@
+# Project Structure and Module Management
+
+## Standard Project Layout
+
+```
+myproject/
+├── cmd/                    # Main applications
+│   ├── server/
+│   │   └── main.go        # Entry point for server
+│   └── cli/
+│       └── main.go        # Entry point for CLI tool
+├── internal/              # Private application code
+│   ├── api/              # API handlers
+│   ├── service/          # Business logic
+│   └── repository/       # Data access layer
+├── pkg/                   # Public library code
+│   └── models/           # Shared models
+├── api/                   # API definitions
+│   ├── openapi.yaml      # OpenAPI spec
+│   └── proto/            # Protocol buffers
+├── web/                   # Web assets
+│   ├── static/
+│   └── templates/
+├── scripts/               # Build and install scripts
+├── configs/              # Configuration files
+├── deployments/          # Docker, K8s configs
+├── test/                 # Additional test data
+├── docs/                 # Documentation
+├── go.mod               # Module definition
+├── go.sum               # Dependency checksums
+├── Makefile             # Build automation
+└── README.md
+```
+
+## go.mod Basics
+
+```go
+// Initialize module
+// go mod init github.com/user/project
+
+module github.com/user/myproject
+
+go 1.21
+
+require (
+    github.com/gin-gonic/gin v1.9.1
+    github.com/lib/pq v1.10.9
+    go.uber.org/zap v1.26.0
+)
+
+require (
+    // Indirect dependencies (automatically managed)
+    github.com/bytedance/sonic v1.9.1 // indirect
+    github.com/chenzhuoyu/base64x v0.0.0-20221115062448-fe3a3abad311 // indirect
+)
+
+// Replace directive for local development
+replace github.com/user/mylib => ../mylib
+
+// Retract directive to mark bad versions
+retract v1.0.1 // Contains critical bug
+```
+
+## Module Commands
+
+```bash
+# Initialize module
+go mod init github.com/user/project
+
+# Add missing dependencies
+go mod tidy
+
+# Download dependencies
+go mod download
+
+# Verify dependencies
+go mod verify
+
+# Show module graph
+go mod graph
+
+# Show why package is needed
+go mod why github.com/user/package
+
+# Vendor dependencies (copy to vendor/)
+go mod vendor
+
+# Update dependency
+go get -u github.com/user/package
+
+# Update to specific version
+go get github.com/user/package@v1.2.3
+
+# Update all dependencies
+go get -u ./...
+
+# Remove unused dependencies
+go mod tidy
+```
+
+## Internal Packages
+
+```go
+// internal/ packages can only be imported by code in the parent tree
+
+myproject/
+├── internal/
+│   ├── auth/           # Can only be imported by myproject
+│   │   └── jwt.go
+│   └── database/
+│       └── postgres.go
+└── pkg/
+    └── models/         # Can be imported by anyone
+        └── user.go
+
+// This works (same project):
+import "github.com/user/myproject/internal/auth"
+
+// This fails (different project):
+import "github.com/other/project/internal/auth" // Error!
+
+// Internal subdirectories
+myproject/
+└── api/
+    └── internal/       # Can only be imported by code in api/
+        └── helpers.go
+```
+
+## Package Organization
+
+```go
+// user/user.go - Domain package
+package user
+
+import (
+    "context"
+    "time"
+)
+
+// User represents a user entity
+type User struct {
+    ID        string
+    Email     string
+    CreatedAt time.Time
+}
+
+// Repository defines data access interface
+type Repository interface {
+    Create(ctx context.Context, user *User) error
+    GetByID(ctx context.Context, id string) (*User, error)
+    Update(ctx context.Context, user *User) error
+    Delete(ctx context.Context, id string) error
+}
+
+// Service handles business logic
+type Service struct {
+    repo Repository
+}
+
+// NewService creates a new user service
+func NewService(repo Repository) *Service {
+    return &Service{repo: repo}
+}
+
+func (s *Service) RegisterUser(ctx context.Context, email string) (*User, error) {
+    user := &User{
+        ID:        generateID(),
+        Email:     email,
+        CreatedAt: time.Now(),
+    }
+    return user, s.repo.Create(ctx, user)
+}
+```
+
+## Multi-Module Repository (Monorepo)
+
+```
+monorepo/
+├── go.work              # Workspace file
+├── services/
+│   ├── api/
+│   │   ├── go.mod
+│   │   └── main.go
+│   └── worker/
+│       ├── go.mod
+│       └── main.go
+└── shared/
+    └── models/
+        ├── go.mod
+        └── user.go
+
+// go.work
+go 1.21
+
+use (
+    ./services/api
+    ./services/worker
+    ./shared/models
+)
+
+// Commands:
+// go work init ./services/api ./services/worker
+// go work use ./shared/models
+// go work sync
+```
+
+## Build Tags and Constraints
+
+```go
+// +build integration
+// integration_test.go
+
+package myapp
+
+import "testing"
+
+func TestIntegration(t *testing.T) {
+    // Integration test code
+}
+
+// Build: go test -tags=integration
+
+// File-level build constraints (Go 1.17+)
+//go:build linux && amd64
+
+package myapp
+
+// Multiple constraints
+//go:build linux || darwin
+//go:build amd64
+
+// Negation
+//go:build !windows
+
+// Common tags:
+// linux, darwin, windows, freebsd
+// amd64, arm64, 386, arm
+// cgo, !cgo
+```
+
+## Makefile Example
+
+```makefile
+# Makefile
+.PHONY: build test lint clean run
+
+# Variables
+BINARY_NAME=myapp
+BUILD_DIR=bin
+GO=go
+GOFLAGS=-v
+
+# Build the application
+build:
+	$(GO) build $(GOFLAGS) -o $(BUILD_DIR)/$(BINARY_NAME) ./cmd/server
+
+# Run tests
+test:
+	$(GO) test -v -race -coverprofile=coverage.out ./...
+
+# Run tests with coverage report
+test-coverage: test
+	$(GO) tool cover -html=coverage.out
+
+# Run linters
+lint:
+	golangci-lint run ./...
+
+# Format code
+fmt:
+	$(GO) fmt ./...
+	goimports -w .
+
+# Run the application
+run:
+	$(GO) run ./cmd/server
+
+# Clean build artifacts
+clean:
+	rm -rf $(BUILD_DIR)
+	rm -f coverage.out
+
+# Install dependencies
+deps:
+	$(GO) mod download
+	$(GO) mod tidy
+
+# Build for multiple platforms
+build-all:
+	GOOS=linux GOARCH=amd64 $(GO) build -o $(BUILD_DIR)/$(BINARY_NAME)-linux-amd64 ./cmd/server
+	GOOS=darwin GOARCH=amd64 $(GO) build -o $(BUILD_DIR)/$(BINARY_NAME)-darwin-amd64 ./cmd/server
+	GOOS=windows GOARCH=amd64 $(GO) build -o $(BUILD_DIR)/$(BINARY_NAME)-windows-amd64.exe ./cmd/server
+
+# Run with race detector
+run-race:
+	$(GO) run -race ./cmd/server
+
+# Generate code
+generate:
+	$(GO) generate ./...
+
+# Docker build
+docker-build:
+	docker build -t $(BINARY_NAME):latest .
+
+# Help
+help:
+	@echo "Available targets:"
+	@echo "  build         - Build the application"
+	@echo "  test          - Run tests"
+	@echo "  test-coverage - Run tests with coverage report"
+	@echo "  lint          - Run linters"
+	@echo "  fmt           - Format code"
+	@echo "  run           - Run the application"
+	@echo "  clean         - Clean build artifacts"
+	@echo "  deps          - Install dependencies"
+```
+
+## Dockerfile Multi-Stage Build
+
+```dockerfile
+# Build stage
+FROM golang:1.21-alpine AS builder
+
+WORKDIR /app
+
+# Copy go mod files
+COPY go.mod go.sum ./
+RUN go mod download
+
+# Copy source code
+COPY . .
+
+# Build binary
+RUN CGO_ENABLED=0 GOOS=linux go build -a -installsuffix cgo -o server ./cmd/server
+
+# Final stage
+FROM alpine:latest
+
+RUN apk --no-cache add ca-certificates
+
+WORKDIR /root/
+
+# Copy binary from builder
+COPY --from=builder /app/server .
+
+# Copy config files if needed
+COPY --from=builder /app/configs ./configs
+
+EXPOSE 8080
+
+CMD ["./server"]
+```
+
+## Version Information
+
+```go
+// version/version.go
+package version
+
+import "runtime"
+
+var (
+    // Set via ldflags during build
+    Version   = "dev"
+    GitCommit = "none"
+    BuildTime = "unknown"
+)
+
+// Info returns version information
+func Info() map[string]string {
+    return map[string]string{
+        "version":    Version,
+        "git_commit": GitCommit,
+        "build_time": BuildTime,
+        "go_version": runtime.Version(),
+        "os":         runtime.GOOS,
+        "arch":       runtime.GOARCH,
+    }
+}
+
+// Build with version info:
+// go build -ldflags "-X github.com/user/project/version.Version=1.0.0 \
+//   -X github.com/user/project/version.GitCommit=$(git rev-parse HEAD) \
+//   -X github.com/user/project/version.BuildTime=$(date -u +%Y-%m-%dT%H:%M:%SZ)"
+```
+
+## Go Generate
+
+```go
+// models/user.go
+//go:generate mockgen -source=user.go -destination=../mocks/user_mock.go -package=mocks
+
+package models
+
+type UserRepository interface {
+    GetUser(id string) (*User, error)
+    SaveUser(user *User) error
+}
+
+// tools.go - Track tool dependencies
+//go:build tools
+
+package tools
+
+import (
+    _ "github.com/golang/mock/mockgen"
+    _ "golang.org/x/tools/cmd/stringer"
+)
+
+// Install tools:
+// go install github.com/golang/mock/mockgen@latest
+
+// Run generate:
+// go generate ./...
+```
+
+## Configuration Management
+
+```go
+// config/config.go
+package config
+
+import (
+    "os"
+    "time"
+
+    "github.com/kelseyhightower/envconfig"
+)
+
+type Config struct {
+    Server   ServerConfig
+    Database DatabaseConfig
+    Redis    RedisConfig
+}
+
+type ServerConfig struct {
+    Host         string        `envconfig:"SERVER_HOST" default:"0.0.0.0"`
+    Port         int           `envconfig:"SERVER_PORT" default:"8080"`
+    ReadTimeout  time.Duration `envconfig:"SERVER_READ_TIMEOUT" default:"10s"`
+    WriteTimeout time.Duration `envconfig:"SERVER_WRITE_TIMEOUT" default:"10s"`
+}
+
+type DatabaseConfig struct {
+    URL          string `envconfig:"DATABASE_URL" required:"true"`
+    MaxOpenConns int    `envconfig:"DB_MAX_OPEN_CONNS" default:"25"`
+    MaxIdleConns int    `envconfig:"DB_MAX_IDLE_CONNS" default:"5"`
+}
+
+type RedisConfig struct {
+    Addr     string `envconfig:"REDIS_ADDR" default:"localhost:6379"`
+    Password string `envconfig:"REDIS_PASSWORD"`
+    DB       int    `envconfig:"REDIS_DB" default:"0"`
+}
+
+// Load loads configuration from environment
+func Load() (*Config, error) {
+    var cfg Config
+    if err := envconfig.Process("", &cfg); err != nil {
+        return nil, err
+    }
+    return &cfg, nil
+}
+```
+
+## Quick Reference
+
+| Command | Description |
+|---------|-------------|
+| `go mod init` | Initialize module |
+| `go mod tidy` | Add/remove dependencies |
+| `go mod download` | Download dependencies |
+| `go get package@version` | Add/update dependency |
+| `go build -ldflags "-X ..."` | Set version info |
+| `go generate ./...` | Run code generation |
+| `GOOS=linux go build` | Cross-compile |
+| `go work init` | Initialize workspace |
diff --git a/.agents/skills/golang-pro/references/testing.md b/.agents/skills/golang-pro/references/testing.md
new file mode 100644
index 0000000..3696a2c
--- /dev/null
+++ b/.agents/skills/golang-pro/references/testing.md
@@ -0,0 +1,451 @@
+# Testing and Benchmarking
+
+## Table-Driven Tests
+
+```go
+package math
+
+import "testing"
+
+func Add(a, b int) int {
+    return a + b
+}
+
+func TestAdd(t *testing.T) {
+    tests := []struct {
+        name     string
+        a, b     int
+        expected int
+    }{
+        {"positive numbers", 2, 3, 5},
+        {"negative numbers", -2, -3, -5},
+        {"mixed signs", -2, 3, 1},
+        {"zeros", 0, 0, 0},
+        {"large numbers", 1000000, 2000000, 3000000},
+    }
+
+    for _, tt := range tests {
+        t.Run(tt.name, func(t *testing.T) {
+            result := Add(tt.a, tt.b)
+            if result != tt.expected {
+                t.Errorf("Add(%d, %d) = %d; want %d", tt.a, tt.b, result, tt.expected)
+            }
+        })
+    }
+}
+```
+
+## Subtests and Parallel Execution
+
+```go
+func TestParallel(t *testing.T) {
+    tests := []struct {
+        name  string
+        input string
+        want  string
+    }{
+        {"lowercase", "hello", "HELLO"},
+        {"uppercase", "WORLD", "WORLD"},
+        {"mixed", "HeLLo", "HELLO"},
+    }
+
+    for _, tt := range tests {
+        tt := tt // Capture range variable for parallel tests
+        t.Run(tt.name, func(t *testing.T) {
+            t.Parallel() // Run subtests in parallel
+
+            result := strings.ToUpper(tt.input)
+            if result != tt.want {
+                t.Errorf("got %q, want %q", result, tt.want)
+            }
+        })
+    }
+}
+```
+
+## Test Helpers and Setup/Teardown
+
+```go
+func TestWithSetup(t *testing.T) {
+    // Setup
+    db := setupTestDB(t)
+    defer cleanupTestDB(t, db)
+
+    tests := []struct {
+        name string
+        user User
+    }{
+        {"valid user", User{Name: "John", Email: "john@example.com"}},
+        {"empty name", User{Name: "", Email: "test@example.com"}},
+    }
+
+    for _, tt := range tests {
+        t.Run(tt.name, func(t *testing.T) {
+            err := db.SaveUser(tt.user)
+            if err != nil {
+                t.Fatalf("SaveUser failed: %v", err)
+            }
+        })
+    }
+}
+
+// Helper function (doesn't show in stack trace)
+func setupTestDB(t *testing.T) *DB {
+    t.Helper()
+
+    db, err := NewDB(":memory:")
+    if err != nil {
+        t.Fatalf("failed to create test DB: %v", err)
+    }
+    return db
+}
+
+func cleanupTestDB(t *testing.T, db *DB) {
+    t.Helper()
+
+    if err := db.Close(); err != nil {
+        t.Errorf("failed to close DB: %v", err)
+    }
+}
+```
+
+## Mocking with Interfaces
+
+```go
+// Interface to mock
+type EmailSender interface {
+    Send(to, subject, body string) error
+}
+
+// Mock implementation
+type MockEmailSender struct {
+    SentEmails []Email
+    ShouldFail bool
+}
+
+type Email struct {
+    To, Subject, Body string
+}
+
+func (m *MockEmailSender) Send(to, subject, body string) error {
+    if m.ShouldFail {
+        return fmt.Errorf("failed to send email")
+    }
+    m.SentEmails = append(m.SentEmails, Email{to, subject, body})
+    return nil
+}
+
+// Test using mock
+func TestUserService_Register(t *testing.T) {
+    mockSender := &MockEmailSender{}
+    service := NewUserService(mockSender)
+
+    err := service.Register("user@example.com")
+    if err != nil {
+        t.Fatalf("Register failed: %v", err)
+    }
+
+    if len(mockSender.SentEmails) != 1 {
+        t.Errorf("expected 1 email sent, got %d", len(mockSender.SentEmails))
+    }
+
+    email := mockSender.SentEmails[0]
+    if email.To != "user@example.com" {
+        t.Errorf("expected email to user@example.com, got %s", email.To)
+    }
+}
+```
+
+## Benchmarking
+
+```go
+func BenchmarkAdd(b *testing.B) {
+    for i := 0; i < b.N; i++ {
+        Add(100, 200)
+    }
+}
+
+// Benchmark with subtests
+func BenchmarkStringOperations(b *testing.B) {
+    benchmarks := []struct {
+        name  string
+        input string
+    }{
+        {"short", "hello"},
+        {"medium", strings.Repeat("hello", 10)},
+        {"long", strings.Repeat("hello", 100)},
+    }
+
+    for _, bm := range benchmarks {
+        b.Run(bm.name, func(b *testing.B) {
+            for i := 0; i < b.N; i++ {
+                _ = strings.ToUpper(bm.input)
+            }
+        })
+    }
+}
+
+// Benchmark with setup
+func BenchmarkMapOperations(b *testing.B) {
+    m := make(map[string]int)
+    for i := 0; i < 1000; i++ {
+        m[fmt.Sprintf("key%d", i)] = i
+    }
+
+    b.ResetTimer() // Don't count setup time
+
+    for i := 0; i < b.N; i++ {
+        _ = m["key500"]
+    }
+}
+
+// Parallel benchmark
+func BenchmarkConcurrentAccess(b *testing.B) {
+    var counter int64
+
+    b.RunParallel(func(pb *testing.PB) {
+        for pb.Next() {
+            atomic.AddInt64(&counter, 1)
+        }
+    })
+}
+
+// Memory allocation benchmark
+func BenchmarkAllocation(b *testing.B) {
+    b.ReportAllocs() // Report allocations
+
+    for i := 0; i < b.N; i++ {
+        s := make([]int, 1000)
+        _ = s
+    }
+}
+```
+
+## Fuzzing (Go 1.18+)
+
+```go
+func FuzzReverse(f *testing.F) {
+    // Seed corpus
+    testcases := []string{"hello", "world", "123", ""}
+    for _, tc := range testcases {
+        f.Add(tc)
+    }
+
+    f.Fuzz(func(t *testing.T, input string) {
+        reversed := Reverse(input)
+        doubleReversed := Reverse(reversed)
+
+        if input != doubleReversed {
+            t.Errorf("Reverse(Reverse(%q)) = %q, want %q", input, doubleReversed, input)
+        }
+    })
+}
+
+// Fuzz with multiple parameters
+func FuzzAdd(f *testing.F) {
+    f.Add(1, 2)
+    f.Add(0, 0)
+    f.Add(-1, 1)
+
+    f.Fuzz(func(t *testing.T, a, b int) {
+        result := Add(a, b)
+
+        // Properties that should always hold
+        if result < a && b >= 0 {
+            t.Errorf("Add(%d, %d) = %d; result should be >= a when b >= 0", a, b, result)
+        }
+    })
+}
+```
+
+## Test Coverage
+
+```go
+// Run tests with coverage:
+// go test -cover
+// go test -coverprofile=coverage.out
+// go tool cover -html=coverage.out
+
+func TestCalculate(t *testing.T) {
+    tests := []struct {
+        name     string
+        input    int
+        expected int
+    }{
+        {"zero", 0, 0},
+        {"positive", 5, 25},
+        {"negative", -3, 9},
+    }
+
+    for _, tt := range tests {
+        t.Run(tt.name, func(t *testing.T) {
+            result := Calculate(tt.input)
+            if result != tt.expected {
+                t.Errorf("Calculate(%d) = %d; want %d", tt.input, result, tt.expected)
+            }
+        })
+    }
+}
+```
+
+## Race Detector
+
+```go
+// Run with: go test -race
+
+func TestConcurrentAccess(t *testing.T) {
+    var counter int
+    var wg sync.WaitGroup
+
+    // This will fail with -race if not synchronized
+    for i := 0; i < 10; i++ {
+        wg.Add(1)
+        go func() {
+            defer wg.Done()
+            counter++ // Data race!
+        }()
+    }
+
+    wg.Wait()
+}
+
+// Fixed version with mutex
+func TestConcurrentAccessSafe(t *testing.T) {
+    var counter int
+    var mu sync.Mutex
+    var wg sync.WaitGroup
+
+    for i := 0; i < 10; i++ {
+        wg.Add(1)
+        go func() {
+            defer wg.Done()
+            mu.Lock()
+            counter++
+            mu.Unlock()
+        }()
+    }
+
+    wg.Wait()
+
+    if counter != 10 {
+        t.Errorf("expected 10, got %d", counter)
+    }
+}
+```
+
+## Golden Files
+
+```go
+import (
+    "os"
+    "path/filepath"
+    "testing"
+)
+
+func TestRenderHTML(t *testing.T) {
+    data := Data{Title: "Test", Content: "Hello"}
+    result := RenderHTML(data)
+
+    goldenFile := filepath.Join("testdata", "expected.html")
+
+    if *update {
+        // Update golden file: go test -update
+        os.WriteFile(goldenFile, []byte(result), 0644)
+    }
+
+    expected, err := os.ReadFile(goldenFile)
+    if err != nil {
+        t.Fatalf("failed to read golden file: %v", err)
+    }
+
+    if result != string(expected) {
+        t.Errorf("output doesn't match golden file\ngot:\n%s\nwant:\n%s", result, expected)
+    }
+}
+
+var update = flag.Bool("update", false, "update golden files")
+```
+
+## Integration Tests
+
+```go
+// integration_test.go
+// +build integration
+
+package myapp
+
+import (
+    "testing"
+    "time"
+)
+
+func TestIntegration(t *testing.T) {
+    if testing.Short() {
+        t.Skip("skipping integration test in short mode")
+    }
+
+    // Long-running integration test
+    server := startTestServer(t)
+    defer server.Stop()
+
+    time.Sleep(100 * time.Millisecond) // Wait for server
+
+    client := NewClient(server.URL)
+    resp, err := client.Get("/health")
+    if err != nil {
+        t.Fatalf("health check failed: %v", err)
+    }
+
+    if resp.Status != "ok" {
+        t.Errorf("expected status ok, got %s", resp.Status)
+    }
+}
+
+// Run: go test -tags=integration
+// Run short tests only: go test -short
+```
+
+## Testable Examples
+
+```go
+// Example tests that appear in godoc
+func ExampleAdd() {
+    result := Add(2, 3)
+    fmt.Println(result)
+    // Output: 5
+}
+
+func ExampleAdd_negative() {
+    result := Add(-2, -3)
+    fmt.Println(result)
+    // Output: -5
+}
+
+// Unordered output
+func ExampleKeys() {
+    m := map[string]int{"a": 1, "b": 2, "c": 3}
+    keys := Keys(m)
+    for _, k := range keys {
+        fmt.Println(k)
+    }
+    // Unordered output:
+    // a
+    // b
+    // c
+}
+```
+
+## Quick Reference
+
+| Command | Description |
+|---------|-------------|
+| `go test` | Run tests |
+| `go test -v` | Verbose output |
+| `go test -run TestName` | Run specific test |
+| `go test -bench .` | Run benchmarks |
+| `go test -cover` | Show coverage |
+| `go test -race` | Run race detector |
+| `go test -short` | Skip long tests |
+| `go test -fuzz FuzzName` | Run fuzzing |
+| `go test -cpuprofile cpu.prof` | CPU profiling |
+| `go test -memprofile mem.prof` | Memory profiling |
diff --git a/.gitignore b/.gitignore
index eaebdac..7957e2b 100644
--- a/.gitignore
+++ b/.gitignore
@@ -2,6 +2,16 @@
 bin
 .DS_Store
 
+# Binaries generated by make
+vcr
+vcr_linux_amd64
+vcr_linux_arm64
+vcr_windows_amd64.exe
+vcr_macos_amd64
+vcr_macos_arm64
+tests/integration/bin/vcr-cli
+tests/integration/bin/mockserver
+
 # Lefthook local overrides
 lefthook-local.yml
 
diff --git a/.release-please-manifest.json b/.release-please-manifest.json
index 9965a34..a549f59 100644
--- a/.release-please-manifest.json
+++ b/.release-please-manifest.json
@@ -1,3 +1,3 @@
 {
-  ".": "2.3.0"
+  ".": "2.4.0"
 }
diff --git a/AGENTS.md b/AGENTS.md
new file mode 100644
index 0000000..efad1b8
--- /dev/null
+++ b/AGENTS.md
@@ -0,0 +1,86 @@
+# AGENTS.md
+
+Go CLI for the Vonage Cloud Runtime platform. Built with Cobra + resty. Node.js is present **only** for commitlint — this is not a Node project.
+
+## Commands
+
+```sh
+make build          # go build -o vcr -v
+make test           # go test -v ./...
+make test-unit      # alias for test
+make test-integration  # requires Docker; runs docker compose in tests/integration/
+make test-all       # unit + integration
+golangci-lint run   # lint (v2.6.2 — match CI version)
+go generate ./...   # regenerate mocks (see Codegen below)
+```
+
+Run a single package test:
+```sh
+go test -v ./vcr/app/list/...
+go test -v -run TestMyFunc ./pkg/cmdutil/...
+```
+
+## Architecture
+
+- `main.go` — wires `cmdutil.NewDefaultFactory(...)`, creates root command, executes.
+- `pkg/cmdutil/factory.go` — central `Factory` interface; all commands receive it.
+- `vcr/<command>/` — command implementations mirror the CLI path (`vcr/app/list/` = `vcr app list`).
+- `pkg/api/` — API clients (Asset, Deployment, Release, Datastore, Websocket, GraphQL).
+- `pkg/config/` — INI (`~/.vcr-cli`) and YAML (`vcr.yaml`) parsing.
+- `pkg/format/` — output formatting, tables, error printing.
+- `testutil/mocks/factory.go` — generated `Factory` mock used by all command tests.
+
+## Codegen
+
+Mocks are generated from `pkg/cmdutil/factory.go`:
+```sh
+go generate ./...
+```
+Output: `testutil/mocks/factory.go`. Regenerate whenever `Factory` interface changes.
+
+## Build Flags
+
+Five `ldflags` variables are injected at build time in CI (`main.apiVersion`, `main.version`, `main.buildDate`, `main.commit`, `main.releaseURL`). A plain `make build` omits them — that is fine for local dev.
+
+## Testing Conventions
+
+- Table-driven tests with `gomock` + `testify/assert`.
+- Use `testutil.NewTestIOStreams()` for capturing stdout/stderr in command tests.
+- Use `testutil/mocks.NewMockFactory(ctrl)` as the command factory in tests.
+- HTTP calls are mocked with `httpmock` (no real network in unit tests).
+- Integration tests require Docker and build a Linux amd64 `vcr-cli` binary and a `mockserver` binary into `tests/integration/bin/` before running Docker Compose. Use `make test-integration-build` then `make test-integration`.
+
+## Linting
+
+Config: `.golangci.yml`, golangci-lint **v2.6.2** (same version as CI — mismatches cause false failures).
+
+Notable rules:
+- `gocyclo` / `cyclop` max complexity: 25.
+- Test files are exempt from `funlen`, `goconst`, `dupl`, `errcheck`, cyclomatic checks.
+- Unchecked `Close()` and `fmt.Fprint*` errors are excluded globally.
+
+## Git Hooks (Lefthook)
+
+`lefthook.yml` defines:
+- `commit-msg`: `npx commitlint` — requires Node + `npm install` in repo root.
+- `pre-commit`: `gofmt -l -w` + `go vet ./...`.
+- `pre-push`: `go test ./...` + `golangci-lint run`.
+
+Install hooks: `lefthook install` (after `npm install`).
+
+## Commit Convention
+
+Conventional Commits enforced. Allowed types: `feat`, `fix`, `docs`, `style`, `refactor`, `perf`, `test`, `build`, `ci`, `chore`, `revert`. Breaking changes: `feat!:` or `BREAKING CHANGE:` footer.
+
+## Configuration Precedence (Runtime)
+
+Flags > `vcr.yaml` (project manifest) > `~/.vcr-cli` (INI, stores `api_key`, `api_secret`, `default_region`, `graphql_endpoint`).
+
+## Release
+
+- Automated via Release Please on merge to `main`.
+- macOS binaries are code-signed ("Developer ID Application: Nexmo Inc.") and notarized — this only runs in GitHub Actions with org secrets. Local builds do not require signing.
+
+## Reference
+
+- `.github/copilot-instructions.md` — detailed patterns for commands, tests, error handling, API clients, output formatting.
diff --git a/CHANGELOG.md b/CHANGELOG.md
index f697851..eec7b69 100644
--- a/CHANGELOG.md
+++ b/CHANGELOG.md
@@ -1,5 +1,12 @@
 # Changelog
 
+## [2.4.0](https://github.com/Vonage/cloud-runtime-cli/compare/v2.3.0...v2.4.0) (2026-04-15)
+
+
+### 🚀 Features
+
+* **debug:** add prune-sessions command ([#76](https://github.com/Vonage/cloud-runtime-cli/issues/76)) ([b0cfbb0](https://github.com/Vonage/cloud-runtime-cli/commit/b0cfbb0f327d607cfb33b1410b8f3064a584064d))
+
 ## [2.3.0](https://github.com/Vonage/cloud-runtime-cli/compare/v2.2.1...v2.3.0) (2026-02-26)
 
 
diff --git a/docs/vcr_instance_log.md b/docs/vcr_instance_log.md
index d8e55e0..e7ccd80 100644
--- a/docs/vcr_instance_log.md
+++ b/docs/vcr_instance_log.md
@@ -1,6 +1,9 @@
 ## vcr instance log
 
-This command will output the log of an instance.
+Fetch logs from a deployed VCR instance.
+
+By default, the command retrieves the last N log entries (controlled by `--history`) and exits.
+Use the `--follow` (`-f`) flag to continuously stream new log entries until you press Ctrl+C.
 
 ```
 vcr instance log --project-name <project-name> --instance-name <instance-name> [flags]
@@ -9,23 +12,41 @@ vcr instance log --project-name <project-name> --instance-name <instance-name> [
 ### Examples
 
 ```
-# Output instance log by instance id:
+# Print the last logs by instance id (default, no follow):
 $ vcr instance log --id <instance-id>
 
-# Output instance log by project and instance name:
+# Print the last logs by project and instance name:
 $ vcr instance log --project-name <project-name> --instance-name <instance-name>
 
+# Continuously stream new logs (follow mode):
+$ vcr instance log --project-name <project-name> --instance-name <instance-name> --follow
+
+# Follow logs using the short flag:
+$ vcr instance log -p <project-name> -n <instance-name> -f
+
+# Print the last 500 log entries and exit:
+$ vcr instance log --id <instance-id> --history 500
+
+# Filter to show only errors and above:
+$ vcr instance log -p <project-name> -n <instance-name> --log-level error
+
+# Show only application logs (exclude provider logs):
+$ vcr instance log -p <project-name> -n <instance-name> --source-type application
+
+# Combine filters with follow:
+$ vcr instance log -p <project-name> -n <instance-name> -l warn -s application -f
 ```
 
 ### Options
 
 ```
-      --history int            Prints the last N number of records (default 300)
-  -i, --id string              Instance ID
-  -n, --instance-name string   Instance name (must be used with project-name flag)
-  -l, --log-level string       Filter for log level, e.g.trace, debug, info, warn, error, fatal
-  -p, --project-name string    Project name (must be used with instance-name flag)
-  -s, --source-type string     Filter for source type e.g. application, provider
+  -f, --follow                 Continuously stream new log entries (press Ctrl+C to stop)
+      --history int            Number of historical log entries to fetch initially (default 300)
+  -i, --id string              Instance UUID (alternative to project-name + instance-name)
+  -n, --instance-name string   Instance name (requires --project-name)
+  -l, --log-level string       Minimum log level: trace, debug, info, warn, error, fatal
+  -p, --project-name string    Project name (requires --instance-name)
+  -s, --source-type string     Filter by source: application, provider
 ```
 
 ### Options inherited from parent commands
@@ -44,4 +65,4 @@ $ vcr instance log --project-name <project-name> --instance-name <instance-name>
 
 * [vcr instance](vcr_instance.md)	 - Used for instance management
 
-###### Auto generated by spf13/cobra on 26-Nov-2024
+###### Auto generated by spf13/cobra on 13-Apr-2026
diff --git a/pkg/api/deployment.go b/pkg/api/deployment.go
index 90f792b..ebbe169 100644
--- a/pkg/api/deployment.go
+++ b/pkg/api/deployment.go
@@ -156,6 +156,19 @@ func (c *DeploymentClient) DeleteDebugService(ctx context.Context, serviceName s
 	return nil
 }
 
+func (c *DeploymentClient) PruneDebugSessions(ctx context.Context) error {
+	resp, err := c.httpClient.R().
+		SetContext(ctx).
+		Delete(c.baseURL + "/debug/services")
+	if err != nil {
+		return fmt.Errorf("%w: trace_id = %s", err, traceIDFromHTTPResponse(resp))
+	}
+	if resp.IsError() {
+		return NewErrorFromHTTPResponse(resp)
+	}
+	return nil
+}
+
 type statusResponse struct {
 	Ready bool `json:"ready"`
 }
@@ -403,6 +416,25 @@ func (c *DeploymentClient) RemoveSecret(ctx context.Context, name string) error
 	return nil
 }
 
+type listSecretsResponse struct {
+	Secrets []string `json:"secrets"`
+}
+
+func (c *DeploymentClient) ListSecrets(ctx context.Context) ([]string, error) {
+	var result listSecretsResponse
+	resp, err := c.httpClient.R().
+		SetContext(ctx).
+		SetResult(&result).
+		Get(c.baseURL + "/secrets")
+	if err != nil {
+		return nil, fmt.Errorf("%w: trace_id = %s", err, traceIDFromHTTPResponse(resp))
+	}
+	if resp.IsError() {
+		return nil, NewErrorFromHTTPResponse(resp)
+	}
+	return result.Secrets, nil
+}
+
 type pluginsRequest struct {
 	Plugin  string                 `json:"plugin"`
 	Version string                 `json:"version"`
diff --git a/pkg/api/deployment_test.go b/pkg/api/deployment_test.go
index f75568a..a6d8467 100644
--- a/pkg/api/deployment_test.go
+++ b/pkg/api/deployment_test.go
@@ -403,6 +403,80 @@ func TestDeleteDebugService(t *testing.T) {
 	}
 }
 
+func TestPruneDebugSessions(t *testing.T) {
+	client := resty.New()
+	httpmock.ActivateNonDefault(client.GetClient())
+	defer httpmock.DeactivateAndReset()
+
+	type mock struct {
+		mockResponse string
+		status       int
+	}
+
+	type want struct {
+		err error
+	}
+
+	tests := []struct {
+		name string
+		mock mock
+		want want
+	}{
+		{
+			name: "204-happy-path",
+			mock: mock{
+				mockResponse: "",
+				status:       http.StatusNoContent,
+			},
+			want: want{
+				err: nil,
+			},
+		},
+		{
+			name: "404-error",
+			mock: mock{
+				mockResponse: `{"error": {"code": 2003, "message": "not found", "traceId": "n/a", "containerLogs": ""}}`,
+				status:       http.StatusNotFound,
+			},
+			want: want{
+				err: errors.New("API Error Encountered: ( HTTP status: 404 Error code: 2003 Detailed message: not found Trace ID: n/a )"),
+			},
+		},
+		{
+			name: "500-error",
+			mock: mock{
+				mockResponse: `{"error": {"code": 1001, "message": "internal server error", "traceId": "n/a", "containerLogs": ""}}`,
+				status:       http.StatusInternalServerError,
+			},
+			want: want{
+				err: errors.New("API Error Encountered: ( HTTP status: 500 Error code: 1001 Detailed message: internal server error Trace ID: n/a )"),
+			},
+		},
+	}
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+
+			httpmock.RegisterResponder("DELETE", "https://example.com/v0.3/debug/services",
+				func(_ *http.Request) (*http.Response, error) {
+					resp := httpmock.NewStringResponse(tt.mock.status, tt.mock.mockResponse)
+					resp.Header.Set("Content-Type", "application/json")
+					return resp, nil
+				})
+
+			deploymentClient := NewDeploymentClient("https://example.com", "v0.3", client, nil)
+
+			err := deploymentClient.PruneDebugSessions(t.Context())
+			if tt.want.err != nil {
+				require.EqualError(t, err, tt.want.err.Error())
+				httpmock.Reset()
+				return
+			}
+			require.NoError(t, err)
+			httpmock.Reset()
+		})
+	}
+}
+
 func TestGetServiceReadyStatus(t *testing.T) {
 	client := resty.New()
 	httpmock.ActivateNonDefault(client.GetClient())
@@ -1573,6 +1647,86 @@ func TestRemoveSecret(t *testing.T) {
 	}
 }
 
+func TestListSecrets(t *testing.T) {
+	client := resty.New()
+	httpmock.ActivateNonDefault(client.GetClient())
+	defer httpmock.DeactivateAndReset()
+
+	type mock struct {
+		mockResponse string
+		status       int
+	}
+
+	type want struct {
+		output []string
+		err    error
+	}
+
+	tests := []struct {
+		name string
+		mock mock
+		want want
+	}{
+		{
+			name: "200-happy-path",
+			mock: mock{
+				mockResponse: `{"secrets":["MY_API_KEY","DATABASE_PASSWORD"]}`,
+				status:       http.StatusOK,
+			},
+			want: want{
+				output: []string{"MY_API_KEY", "DATABASE_PASSWORD"},
+				err:    nil,
+			},
+		},
+		{
+			name: "200-empty-list",
+			mock: mock{
+				mockResponse: `{"secrets":[]}`,
+				status:       http.StatusOK,
+			},
+			want: want{
+				output: []string{},
+				err:    nil,
+			},
+		},
+		{
+			name: "500-error",
+			mock: mock{
+				mockResponse: `{"error": {"code": 1001, "message": "internal server error", "traceId": "n/a", "containerLogs": ""}}`,
+				status:       http.StatusInternalServerError,
+			},
+			want: want{
+				output: nil,
+				err:    errors.New("API Error Encountered: ( HTTP status: 500 Error code: 1001 Detailed message: internal server error Trace ID: n/a )"),
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+
+			httpmock.RegisterResponder("GET", "https://example.com/v0.3/secrets",
+				func(_ *http.Request) (*http.Response, error) {
+					resp := httpmock.NewStringResponse(tt.mock.status, tt.mock.mockResponse)
+					resp.Header.Set("Content-Type", "application/json")
+					return resp, nil
+				})
+
+			deploymentClient := NewDeploymentClient("https://example.com", "v0.3", client, nil)
+
+			output, err := deploymentClient.ListSecrets(t.Context())
+			if tt.want.err != nil {
+				require.EqualError(t, err, tt.want.err.Error())
+				httpmock.Reset()
+				return
+			}
+			require.NoError(t, err)
+			require.Equal(t, tt.want.output, output)
+			httpmock.Reset()
+		})
+	}
+}
+
 func TestDeploymentClient_WatchDeployment(t *testing.T) {
 	type mock struct {
 		message     []byte
diff --git a/pkg/api/error.go b/pkg/api/error.go
index 6885b0e..85b3877 100644
--- a/pkg/api/error.go
+++ b/pkg/api/error.go
@@ -13,6 +13,7 @@ import (
 )
 
 const traceIDHeaderName = "X-Neru-Traceid"
+const traceIDNotAvailable = "n/a"
 
 var (
 	ErrNotFound      = errors.New("not found")
@@ -116,13 +117,16 @@ func (e Error) Error() string {
 }
 
 func traceIDFromHTTPResponse(resp *resty.Response) string {
+	if resp == nil {
+		return traceIDNotAvailable
+	}
 	if t := resp.Header().Get(traceIDHeaderName); t != "" {
 		return t
 	}
 	if t := resp.Request.Header.Get(traceIDHeaderName); t != "" {
 		return t
 	}
-	return "n/a"
+	return traceIDNotAvailable
 }
 func traceIDFromWebsocketResponse(resp *http.Response) string {
 	if t := resp.Header.Get(traceIDHeaderName); t != "" {
@@ -131,5 +135,5 @@ func traceIDFromWebsocketResponse(resp *http.Response) string {
 	if t := resp.Request.Header.Get(traceIDHeaderName); t != "" {
 		return t
 	}
-	return "n/a"
+	return traceIDNotAvailable
 }
diff --git a/pkg/cmdutil/factory.go b/pkg/cmdutil/factory.go
index 43c6ec7..309997a 100644
--- a/pkg/cmdutil/factory.go
+++ b/pkg/cmdutil/factory.go
@@ -43,6 +43,7 @@ type DeploymentInterface interface {
 	DeployDebugService(ctx context.Context, region, applicationID, name string, caps api.Capabilities) (api.DeployResponse, error)
 	GetServiceReadyStatus(ctx context.Context, serviceName string) (bool, error)
 	DeleteDebugService(ctx context.Context, serviceName string, preserveData bool) error
+	PruneDebugSessions(ctx context.Context) error
 	CreatePackage(ctx context.Context, createPackageArgs api.CreatePackageArgs) (api.CreatePackageResponse, error)
 	CreateProject(ctx context.Context, projectName string) (api.CreateProjectResponse, error)
 	DeployInstance(ctx context.Context, deployInstanceArgs api.DeployInstanceArgs) (api.DeployInstanceResponse, error)
@@ -52,6 +53,7 @@ type DeploymentInterface interface {
 	CreateSecret(ctx context.Context, s config.Secret) error
 	UpdateSecret(ctx context.Context, s config.Secret) error
 	RemoveSecret(ctx context.Context, name string) error
+	ListSecrets(ctx context.Context) ([]string, error)
 	CreateMongoDatabase(ctx context.Context, version string) (api.MongoInfoResponse, error)
 	DeleteMongoDatabase(ctx context.Context, version, database string) error
 	GetMongoDatabase(ctx context.Context, version, database string) (api.MongoInfoResponse, error)
diff --git a/testutil/mocks/factory.go b/testutil/mocks/factory.go
index c7dfdbc..63c097b 100644
--- a/testutil/mocks/factory.go
+++ b/testutil/mocks/factory.go
@@ -456,6 +456,21 @@ func (mr *MockDeploymentInterfaceMockRecorder) ListMongoDatabases(ctx, version i
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListMongoDatabases", reflect.TypeOf((*MockDeploymentInterface)(nil).ListMongoDatabases), ctx, version)
 }
 
+// ListSecrets mocks base method.
+func (m *MockDeploymentInterface) ListSecrets(ctx context.Context) ([]string, error) {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "ListSecrets", ctx)
+	ret0, _ := ret[0].([]string)
+	ret1, _ := ret[1].(error)
+	return ret0, ret1
+}
+
+// ListSecrets indicates an expected call of ListSecrets.
+func (mr *MockDeploymentInterfaceMockRecorder) ListSecrets(ctx interface{}) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListSecrets", reflect.TypeOf((*MockDeploymentInterface)(nil).ListSecrets), ctx)
+}
+
 // ListVonageApplications mocks base method.
 func (m *MockDeploymentInterface) ListVonageApplications(ctx context.Context, filter string) (api.ListVonageApplicationsOutput, error) {
 	m.ctrl.T.Helper()
@@ -471,6 +486,20 @@ func (mr *MockDeploymentInterfaceMockRecorder) ListVonageApplications(ctx, filte
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ListVonageApplications", reflect.TypeOf((*MockDeploymentInterface)(nil).ListVonageApplications), ctx, filter)
 }
 
+// PruneDebugSessions mocks base method.
+func (m *MockDeploymentInterface) PruneDebugSessions(ctx context.Context) error {
+	m.ctrl.T.Helper()
+	ret := m.ctrl.Call(m, "PruneDebugSessions", ctx)
+	ret0, _ := ret[0].(error)
+	return ret0
+}
+
+// PruneDebugSessions indicates an expected call of PruneDebugSessions.
+func (mr *MockDeploymentInterfaceMockRecorder) PruneDebugSessions(ctx interface{}) *gomock.Call {
+	mr.mock.ctrl.T.Helper()
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "PruneDebugSessions", reflect.TypeOf((*MockDeploymentInterface)(nil).PruneDebugSessions), ctx)
+}
+
 // RemoveSecret mocks base method.
 func (m *MockDeploymentInterface) RemoveSecret(ctx context.Context, name string) error {
 	m.ctrl.T.Helper()
@@ -499,34 +528,34 @@ func (mr *MockDeploymentInterfaceMockRecorder) UpdateSecret(ctx, s interface{})
 	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UpdateSecret", reflect.TypeOf((*MockDeploymentInterface)(nil).UpdateSecret), ctx, s)
 }
 
-// ValidateDeployment mocks base method.
-func (m *MockDeploymentInterface) ValidateDeployment(ctx context.Context, req api.ValidateDeploymentRequest) (api.ValidateDeploymentResponse, error) {
+// UploadTgz mocks base method.
+func (m *MockDeploymentInterface) UploadTgz(ctx context.Context, fileBytes []byte) (api.UploadResponse, error) {
 	m.ctrl.T.Helper()
-	ret := m.ctrl.Call(m, "ValidateDeployment", ctx, req)
-	ret0, _ := ret[0].(api.ValidateDeploymentResponse)
+	ret := m.ctrl.Call(m, "UploadTgz", ctx, fileBytes)
+	ret0, _ := ret[0].(api.UploadResponse)
 	ret1, _ := ret[1].(error)
 	return ret0, ret1
 }
 
-// ValidateDeployment indicates an expected call of ValidateDeployment.
-func (mr *MockDeploymentInterfaceMockRecorder) ValidateDeployment(ctx, req interface{}) *gomock.Call {
+// UploadTgz indicates an expected call of UploadTgz.
+func (mr *MockDeploymentInterfaceMockRecorder) UploadTgz(ctx, fileBytes interface{}) *gomock.Call {
 	mr.mock.ctrl.T.Helper()
-	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ValidateDeployment", reflect.TypeOf((*MockDeploymentInterface)(nil).ValidateDeployment), ctx, req)
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UploadTgz", reflect.TypeOf((*MockDeploymentInterface)(nil).UploadTgz), ctx, fileBytes)
 }
 
-// UploadTgz mocks base method.
-func (m *MockDeploymentInterface) UploadTgz(ctx context.Context, fileBytes []byte) (api.UploadResponse, error) {
+// ValidateDeployment mocks base method.
+func (m *MockDeploymentInterface) ValidateDeployment(ctx context.Context, req api.ValidateDeploymentRequest) (api.ValidateDeploymentResponse, error) {
 	m.ctrl.T.Helper()
-	ret := m.ctrl.Call(m, "UploadTgz", ctx, fileBytes)
-	ret0, _ := ret[0].(api.UploadResponse)
+	ret := m.ctrl.Call(m, "ValidateDeployment", ctx, req)
+	ret0, _ := ret[0].(api.ValidateDeploymentResponse)
 	ret1, _ := ret[1].(error)
 	return ret0, ret1
 }
 
-// UploadTgz indicates an expected call of UploadTgz.
-func (mr *MockDeploymentInterfaceMockRecorder) UploadTgz(ctx, fileBytes interface{}) *gomock.Call {
+// ValidateDeployment indicates an expected call of ValidateDeployment.
+func (mr *MockDeploymentInterfaceMockRecorder) ValidateDeployment(ctx, req interface{}) *gomock.Call {
 	mr.mock.ctrl.T.Helper()
-	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "UploadTgz", reflect.TypeOf((*MockDeploymentInterface)(nil).UploadTgz), ctx, fileBytes)
+	return mr.mock.ctrl.RecordCallWithMethodType(mr.mock, "ValidateDeployment", reflect.TypeOf((*MockDeploymentInterface)(nil).ValidateDeployment), ctx, req)
 }
 
 // WatchDeployment mocks base method.
diff --git a/vcr/debug/debug.go b/vcr/debug/debug.go
index 3b7d24d..cc3e3ea 100644
--- a/vcr/debug/debug.go
+++ b/vcr/debug/debug.go
@@ -157,6 +157,9 @@ func NewCmdDebug(f cmdutil.Factory) *cobra.Command {
 	cmd.Flags().IntVarP(&opts.DebuggerPort, "debugger-port", "d", defaultDebuggerPort, "Local port for debugger proxy server (default: 3001)")
 	cmd.Flags().BoolVarP(&opts.PreserveData, "preserve-data", "", false, "Keep debug session data after stopping (useful for debugging state issues)")
 	cmd.Flags().StringVarP(&opts.ManifestFile, "filename", "f", "", "Path to VCR manifest file (default: vcr.yml in project directory)")
+
+	cmd.AddCommand(NewCmdPruneSessions(f))
+
 	return cmd
 }
 
diff --git a/vcr/debug/prune_sessions.go b/vcr/debug/prune_sessions.go
new file mode 100644
index 0000000..d76aa50
--- /dev/null
+++ b/vcr/debug/prune_sessions.go
@@ -0,0 +1,46 @@
+package debug
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/spf13/cobra"
+
+	"vonage-cloud-runtime-cli/pkg/cmdutil"
+)
+
+type PruneSessionsOptions struct {
+	cmdutil.Factory
+}
+
+func NewCmdPruneSessions(f cmdutil.Factory) *cobra.Command {
+	opts := &PruneSessionsOptions{Factory: f}
+
+	cmd := &cobra.Command{
+		Use:   "prune-sessions",
+		Short: "Remove all active debug sessions",
+		Long:  "Remove all active debug sessions for the configured API key.",
+		RunE: func(_ *cobra.Command, _ []string) error {
+			ctx, cancel := context.WithDeadline(context.Background(), opts.Deadline())
+			defer cancel()
+			return runPruneSessions(ctx, opts)
+		},
+	}
+
+	return cmd
+}
+
+func runPruneSessions(ctx context.Context, opts *PruneSessionsOptions) error {
+	io := opts.IOStreams()
+	c := io.ColorScheme()
+
+	spinner := cmdutil.DisplaySpinnerMessageWithHandle(" Pruning debug sessions...")
+	err := opts.DeploymentClient().PruneDebugSessions(ctx)
+	spinner.Stop()
+	if err != nil {
+		return fmt.Errorf("failed to prune debug sessions: %w", err)
+	}
+
+	fmt.Fprintf(io.Out, "%s Debug sessions successfully pruned\n", c.SuccessIcon())
+	return nil
+}
diff --git a/vcr/debug/prune_sessions_test.go b/vcr/debug/prune_sessions_test.go
new file mode 100644
index 0000000..c72f452
--- /dev/null
+++ b/vcr/debug/prune_sessions_test.go
@@ -0,0 +1,86 @@
+package debug
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"testing"
+
+	"github.com/cli/cli/v2/pkg/iostreams"
+	"github.com/golang/mock/gomock"
+	"github.com/stretchr/testify/require"
+
+	"vonage-cloud-runtime-cli/testutil"
+	"vonage-cloud-runtime-cli/testutil/mocks"
+)
+
+func TestPruneSessions(t *testing.T) {
+	type mock struct {
+		pruneDebugSessionsTimes     int
+		pruneDebugSessionsReturnErr error
+	}
+
+	type want struct {
+		errMsg string
+		stdout string
+	}
+
+	tests := []struct {
+		name string
+		mock mock
+		want want
+	}{
+		{
+			name: "happy-path",
+			mock: mock{
+				pruneDebugSessionsTimes:     1,
+				pruneDebugSessionsReturnErr: nil,
+			},
+			want: want{
+				stdout: "✓ Debug sessions successfully pruned\n",
+			},
+		},
+		{
+			name: "api-error",
+			mock: mock{
+				pruneDebugSessionsTimes:     1,
+				pruneDebugSessionsReturnErr: errors.New("api error"),
+			},
+			want: want{
+				errMsg: "failed to prune debug sessions: api error",
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ctrl := gomock.NewController(t)
+			deploymentMock := mocks.NewMockDeploymentInterface(ctrl)
+
+			deploymentMock.EXPECT().
+				PruneDebugSessions(gomock.Any()).
+				Times(tt.mock.pruneDebugSessionsTimes).
+				Return(tt.mock.pruneDebugSessionsReturnErr)
+
+			ios, _, stdout, _ := iostreams.Test()
+
+			f := testutil.DefaultFactoryMock(t, ios, nil, nil, nil, deploymentMock, nil, nil)
+
+			cmd := NewCmdPruneSessions(f)
+			cmd.SetArgs([]string{})
+			cmd.SetIn(&bytes.Buffer{})
+			cmd.SetOut(io.Discard)
+			cmd.SetErr(io.Discard)
+
+			err := cmd.Execute()
+			if tt.want.errMsg != "" {
+				require.Error(t, err)
+				require.Equal(t, tt.want.errMsg, err.Error())
+				return
+			}
+
+			require.NoError(t, err)
+			require.Equal(t, tt.want.stdout, stdout.String())
+		})
+	}
+}
diff --git a/vcr/instance/log/log.go b/vcr/instance/log/log.go
index 0b70a32..b829a8b 100644
--- a/vcr/instance/log/log.go
+++ b/vcr/instance/log/log.go
@@ -52,6 +52,7 @@ type Options struct {
 	LogLevel     string
 	SourceType   string
 	Limit        int
+	Follow       bool
 }
 
 func NewCmdInstanceLog(f cmdutil.Factory) *cobra.Command {
@@ -62,11 +63,12 @@ func NewCmdInstanceLog(f cmdutil.Factory) *cobra.Command {
 	cmd := &cobra.Command{
 		Use:     "log",
 		Aliases: []string{"logs"},
-		Short:   "Stream real-time logs from a deployed VCR instance",
-		Long: heredoc.Doc(`Stream real-time logs from a deployed VCR instance.
+		Short:   "Fetch logs from a deployed VCR instance",
+		Long: heredoc.Doc(`Fetch logs from a deployed VCR instance.
 
-			This command connects to a running instance and streams its logs in real-time
-			to your terminal. Logs are continuously fetched until you press Ctrl+C.
+			By default, the command retrieves the last N log entries (controlled by --history)
+			and exits. Use --follow (-f) to continuously stream new log entries until you
+			press Ctrl+C.
 
 			IDENTIFYING THE INSTANCE
 			  You can identify the instance using either:
@@ -93,27 +95,29 @@ func NewCmdInstanceLog(f cmdutil.Factory) *cobra.Command {
 		`),
 		Args: cobra.MaximumNArgs(0),
 		Example: heredoc.Doc(`
-			# Stream logs by project and instance name
+			# Print the last logs by project and instance name (default, exits after output)
 			$ vcr instance log --project-name my-app --instance-name dev
 			2024-01-15T10:30:00Z [application] Server started on port 3000
 			2024-01-15T10:30:01Z [application] Connected to database
-			^C
-			Interrupt received, stopping...
 
-			# Stream logs by instance ID
+			# Print the last logs by instance ID
 			$ vcr instance log --id 12345678-1234-1234-1234-123456789abc
 
+			# Continuously stream new logs (press Ctrl+C to stop)
+			$ vcr instance log -p my-app -n dev --follow
+			$ vcr instance log -p my-app -n dev -f
+
+			# Print the last 500 log entries and exit
+			$ vcr instance log -p my-app -n dev --history 500
+
 			# Filter to show only errors and above
 			$ vcr instance log -p my-app -n dev --log-level error
 
 			# Show only application logs (exclude provider logs)
 			$ vcr instance log -p my-app -n dev --source-type application
 
-			# Increase history to last 500 log entries
-			$ vcr instance log -p my-app -n dev --history 500
-
-			# Combine filters
-			$ vcr instance log -p my-app -n dev -l warn -s application
+			# Combine filters with follow
+			$ vcr instance log -p my-app -n dev -l warn -s application -f
 		`),
 		RunE: func(_ *cobra.Command, _ []string) error {
 			ctx, cancel := context.WithDeadline(context.Background(), opts.Deadline())
@@ -129,6 +133,7 @@ func NewCmdInstanceLog(f cmdutil.Factory) *cobra.Command {
 	cmd.Flags().StringVarP(&opts.InstanceName, "instance-name", "n", "", "Instance name (requires --project-name)")
 	cmd.Flags().StringVarP(&opts.LogLevel, "log-level", "l", "", "Minimum log level: trace, debug, info, warn, error, fatal")
 	cmd.Flags().StringVarP(&opts.SourceType, "source-type", "s", "", "Filter by source: application, provider")
+	cmd.Flags().BoolVarP(&opts.Follow, "follow", "f", false, "Continuously stream new log entries (press Ctrl+C to stop)")
 
 	return cmd
 }
@@ -146,6 +151,12 @@ func runLog(ctx context.Context, opts *Options) error {
 
 	opts.InstanceID = inst.ID
 
+	// Without --follow just print the historical logs and exit.
+	if !opts.Follow {
+		fetchLogs(io, opts, time.Time{})
+		return nil
+	}
+
 	ticker := time.NewTicker(TickerInterval)
 	defer ticker.Stop()
 	lastTimestamp := time.Time{}
diff --git a/vcr/instance/log/log_test.go b/vcr/instance/log/log_test.go
index d174767..9575b0d 100644
--- a/vcr/instance/log/log_test.go
+++ b/vcr/instance/log/log_test.go
@@ -4,6 +4,7 @@ import (
 	"bytes"
 	"errors"
 	"io"
+	"os"
 	"testing"
 	"time"
 
@@ -21,8 +22,10 @@ func TestLog(t *testing.T) {
 	type mock struct {
 		LogListLogsByInstanceIDTimes         int
 		LogGetInstByProjAndInstNameTimes     int
+		LogGetInstanceByIDTimes              int
 		LogListLogsByInstanceIDReturnErr     error
 		LogGetInstByProjAndInstNameReturnErr error
+		LogGetInstanceByIDReturnErr          error
 		LogReturnLogs                        []api.Log
 		LogReturnInstance                    api.Instance
 		LogProjectName                       string
@@ -69,6 +72,41 @@ func TestLog(t *testing.T) {
 				errMsg: "failed to validate flags: must provide either 'id' flag or 'project-name' and 'instance-name' flags",
 			},
 		},
+		{
+			name: "default-no-follow-fetches-once-by-instance-id",
+			cli:  "--id=abc-123",
+			mock: mock{
+				LogListLogsByInstanceIDTimes:         1,
+				LogGetInstByProjAndInstNameTimes:     0,
+				LogGetInstanceByIDTimes:              1,
+				LogReturnInstance:                    api.Instance{ID: "abc-123"},
+				LogInstanceID:                        "abc-123",
+				LogReturnLogs:                        []api.Log{{Timestamp: time.Now(), SourceType: "application", Message: "hello"}},
+				LogListLogsByInstanceIDReturnErr:     nil,
+				LogGetInstByProjAndInstNameReturnErr: nil,
+				LogGetInstanceByIDReturnErr:          nil,
+			},
+			want: want{
+				stdout: "[application] hello",
+			},
+		},
+		{
+			name: "default-no-follow-get-instance-error",
+			cli:  "--id=bad-id",
+			mock: mock{
+				LogListLogsByInstanceIDTimes:         0,
+				LogGetInstByProjAndInstNameTimes:     0,
+				LogGetInstanceByIDTimes:              1,
+				LogReturnInstance:                    api.Instance{},
+				LogInstanceID:                        "bad-id",
+				LogListLogsByInstanceIDReturnErr:     nil,
+				LogGetInstByProjAndInstNameReturnErr: nil,
+				LogGetInstanceByIDReturnErr:          errors.New("datastore error"),
+			},
+			want: want{
+				errMsg: "failed to get instance",
+			},
+		},
 	}
 	for _, tt := range tests {
 		t.Run(tt.name, func(t *testing.T) {
@@ -83,6 +121,10 @@ func TestLog(t *testing.T) {
 				GetInstanceByProjectAndInstanceName(gomock.Any(), tt.mock.LogProjectName, tt.mock.LogInstanceName).
 				Times(tt.mock.LogGetInstByProjAndInstNameTimes).
 				Return(tt.mock.LogReturnInstance, tt.mock.LogGetInstByProjAndInstNameReturnErr)
+			datastoreMock.EXPECT().
+				GetInstanceByID(gomock.Any(), tt.mock.LogInstanceID).
+				Times(tt.mock.LogGetInstanceByIDTimes).
+				Return(tt.mock.LogReturnInstance, tt.mock.LogGetInstanceByIDReturnErr)
 			datastoreMock.EXPECT().ListLogsByInstanceID(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).
 				Times(tt.mock.LogListLogsByInstanceIDTimes).
 				Return(tt.mock.LogReturnLogs, tt.mock.LogListLogsByInstanceIDReturnErr)
@@ -104,7 +146,7 @@ func TestLog(t *testing.T) {
 
 			if _, err := cmd.ExecuteC(); err != nil && tt.want.errMsg != "" {
 				require.Error(t, err, "should throw error")
-				require.Equal(t, tt.want.errMsg, err.Error())
+				require.Contains(t, err.Error(), tt.want.errMsg)
 				return
 			}
 			cmdOut := &testutil.CmdOut{
@@ -116,7 +158,11 @@ func TestLog(t *testing.T) {
 				return
 			}
 			require.NoError(t, err, "should not throw error")
-			require.Equal(t, tt.want.stdout, cmdOut.String())
+			if tt.want.stdout != "" {
+				require.Contains(t, cmdOut.String(), tt.want.stdout)
+			} else {
+				require.Equal(t, tt.want.stdout, cmdOut.String())
+			}
 		})
 	}
 }
@@ -249,3 +295,50 @@ func Test_printLogs(t *testing.T) {
 		})
 	}
 }
+
+func TestLog_Follow(t *testing.T) {
+	ctrl := gomock.NewController(t)
+
+	datastoreMock := mocks.NewMockDatastoreInterface(ctrl)
+	deploymentMock := mocks.NewMockDeploymentInterface(ctrl)
+
+	datastoreMock.EXPECT().
+		GetInstanceByID(gomock.Any(), "abc-123").
+		Times(1).
+		Return(api.Instance{ID: "abc-123"}, nil)
+
+	// Track how many times ListLogsByInstanceID is called and send SIGTERM
+	// after the second tick so the follow loop exits cleanly.
+	callCount := 0
+	datastoreMock.EXPECT().
+		ListLogsByInstanceID(gomock.Any(), gomock.Any(), gomock.Any(), gomock.Any()).
+		MinTimes(2).
+		DoAndReturn(func(_ interface{}, _ interface{}, _ interface{}, _ interface{}) ([]api.Log, error) {
+			callCount++
+			if callCount >= 2 {
+				// Send an interrupt to the current process so runLog's signal
+				// handler fires and the follow loop exits.
+				p, _ := os.FindProcess(os.Getpid())
+				_ = p.Signal(os.Interrupt)
+			}
+			return []api.Log{{Timestamp: time.Now(), SourceType: "application", Message: "streaming"}}, nil
+		})
+
+	ios, _, stdout, _ := iostreams.Test()
+
+	argv, err := shlex.Split("--id=abc-123 --follow")
+	require.NoError(t, err)
+
+	f := testutil.DefaultFactoryMock(t, ios, nil, nil, datastoreMock, deploymentMock, nil, nil)
+
+	cmd := NewCmdInstanceLog(f)
+	cmd.SetArgs(argv)
+	cmd.SetIn(&bytes.Buffer{})
+	cmd.SetOut(io.Discard)
+	cmd.SetErr(io.Discard)
+
+	_, err = cmd.ExecuteC()
+	require.NoError(t, err, "follow should exit cleanly on interrupt")
+	require.GreaterOrEqual(t, callCount, 2, "logs should have been fetched at least twice")
+	require.Contains(t, stdout.String(), "[application] streaming")
+}
diff --git a/vcr/secret/list/list.go b/vcr/secret/list/list.go
new file mode 100644
index 0000000..6beb5d0
--- /dev/null
+++ b/vcr/secret/list/list.go
@@ -0,0 +1,84 @@
+package list
+
+import (
+	"context"
+	"fmt"
+
+	"github.com/MakeNowJust/heredoc"
+	"github.com/spf13/cobra"
+
+	"vonage-cloud-runtime-cli/pkg/cmdutil"
+)
+
+type Options struct {
+	cmdutil.Factory
+}
+
+func NewCmdSecretList(f cmdutil.Factory) *cobra.Command {
+	opts := Options{
+		Factory: f,
+	}
+
+	cmd := &cobra.Command{
+		Use:   "list",
+		Short: "List all secrets",
+		Long: heredoc.Doc(`List all secrets stored in your VCR account.
+
+			This command displays the names of all secrets. Secret values are never
+			shown for security reasons.
+
+			Use this to verify which secrets are available before referencing them
+			in your vcr.yml manifest.
+		`),
+		Example: heredoc.Doc(`
+			# List all secrets
+			$ vcr secret list
+			✓ Found 3 secret(s):
+			  ℹ MY_API_KEY
+			  ℹ DATABASE_PASSWORD
+			  ℹ SSL_CERT
+
+			# No secrets configured
+			$ vcr secret list
+			! No secrets found
+
+			# Using the 'ls' alias
+			$ vcr secret ls
+		`),
+		Args:    cobra.MaximumNArgs(0),
+		Aliases: []string{"ls"},
+
+		RunE: func(_ *cobra.Command, _ []string) error {
+			ctx, cancel := context.WithDeadline(context.Background(), opts.Deadline())
+			defer cancel()
+
+			return runList(ctx, &opts)
+		},
+	}
+
+	return cmd
+}
+
+func runList(ctx context.Context, opts *Options) error {
+	io := opts.IOStreams()
+	c := io.ColorScheme()
+
+	spinner := cmdutil.DisplaySpinnerMessageWithHandle(" Fetching secrets...")
+	secrets, err := opts.DeploymentClient().ListSecrets(ctx)
+	spinner.Stop()
+	if err != nil {
+		return fmt.Errorf("failed to list secrets: %w", err)
+	}
+
+	if len(secrets) == 0 {
+		fmt.Fprintf(io.Out, "%s No secrets found\n", c.WarningIcon())
+		return nil
+	}
+
+	fmt.Fprintf(io.Out, "%s Found %d secret(s):\n", c.SuccessIcon(), len(secrets))
+	for _, name := range secrets {
+		fmt.Fprintf(io.Out, "  %s %s\n", c.Blue(cmdutil.InfoIcon), name)
+	}
+
+	return nil
+}
diff --git a/vcr/secret/list/list_test.go b/vcr/secret/list/list_test.go
new file mode 100644
index 0000000..94a7c24
--- /dev/null
+++ b/vcr/secret/list/list_test.go
@@ -0,0 +1,111 @@
+package list
+
+import (
+	"bytes"
+	"errors"
+	"io"
+	"testing"
+
+	"github.com/cli/cli/v2/pkg/iostreams"
+	"github.com/golang/mock/gomock"
+	"github.com/google/shlex"
+	"github.com/stretchr/testify/require"
+
+	"vonage-cloud-runtime-cli/testutil"
+	"vonage-cloud-runtime-cli/testutil/mocks"
+)
+
+func TestSecretList(t *testing.T) {
+	type mock struct {
+		ListTimes     int
+		ListReturn    []string
+		ListReturnErr error
+	}
+	type want struct {
+		errMsg string
+		stdout string
+	}
+
+	tests := []struct {
+		name string
+		cli  string
+		mock mock
+		want want
+	}{
+		{
+			name: "happy-path",
+			cli:  "",
+			mock: mock{
+				ListTimes:  1,
+				ListReturn: []string{"MY_API_KEY", "DATABASE_PASSWORD"},
+			},
+			want: want{
+				stdout: "Found 2 secret(s):\n",
+			},
+		},
+		{
+			name: "no-secrets",
+			cli:  "",
+			mock: mock{
+				ListTimes:  1,
+				ListReturn: []string{},
+			},
+			want: want{
+				stdout: "No secrets found\n",
+			},
+		},
+		{
+			name: "api-error",
+			cli:  "",
+			mock: mock{
+				ListTimes:     1,
+				ListReturnErr: errors.New("api error"),
+			},
+			want: want{
+				errMsg: "failed to list secrets: api error",
+			},
+		},
+	}
+
+	for _, tt := range tests {
+		t.Run(tt.name, func(t *testing.T) {
+			ctrl := gomock.NewController(t)
+
+			deploymentMock := mocks.NewMockDeploymentInterface(ctrl)
+			deploymentMock.EXPECT().
+				ListSecrets(gomock.Any()).
+				Times(tt.mock.ListTimes).
+				Return(tt.mock.ListReturn, tt.mock.ListReturnErr)
+
+			ios, _, stdout, stderr := iostreams.Test()
+
+			argv, err := shlex.Split(tt.cli)
+			if err != nil {
+				t.Fatal(err)
+			}
+
+			f := testutil.DefaultFactoryMock(t, ios, nil, nil, nil, deploymentMock, nil, nil)
+
+			cmd := NewCmdSecretList(f)
+			cmd.SetArgs(argv)
+			cmd.SetIn(&bytes.Buffer{})
+			cmd.SetOut(io.Discard)
+			cmd.SetErr(io.Discard)
+
+			if _, execErr := cmd.ExecuteC(); execErr != nil && tt.want.errMsg != "" {
+				require.Error(t, execErr, "should throw error")
+				require.Equal(t, tt.want.errMsg, execErr.Error())
+				return
+			} else if execErr != nil {
+				require.NoError(t, execErr, "should not throw error")
+				return
+			}
+			cmdOut := &testutil.CmdOut{
+				OutBuf: stdout,
+				ErrBuf: stderr,
+			}
+
+			require.Contains(t, cmdOut.String(), tt.want.stdout)
+		})
+	}
+}
diff --git a/vcr/secret/secret.go b/vcr/secret/secret.go
index fcbea7c..64ad675 100644
--- a/vcr/secret/secret.go
+++ b/vcr/secret/secret.go
@@ -6,6 +6,7 @@ import (
 
 	"vonage-cloud-runtime-cli/pkg/cmdutil"
 	"vonage-cloud-runtime-cli/vcr/secret/create"
+	"vonage-cloud-runtime-cli/vcr/secret/list"
 	"vonage-cloud-runtime-cli/vcr/secret/remove"
 	"vonage-cloud-runtime-cli/vcr/secret/update"
 )
@@ -35,6 +36,7 @@ func NewCmdSecret(f cmdutil.Factory) *cobra.Command {
 
 			AVAILABLE COMMANDS
 			  create (add)   Create a new secret
+			  list (ls)      List all secrets
 			  update         Update an existing secret's value
 			  remove (rm)    Delete a secret
 
@@ -60,6 +62,7 @@ func NewCmdSecret(f cmdutil.Factory) *cobra.Command {
 	}
 
 	cmd.AddCommand(create.NewCmdSecretCreate(f))
+	cmd.AddCommand(list.NewCmdSecretList(f))
 	cmd.AddCommand(remove.NewCmdSecretRemove(f))
 	cmd.AddCommand(update.NewCmdSecretUpdate(f))
 	return cmd