From 0d4c86214d06eebb80b82728ae03efa7553ba8a4 Mon Sep 17 00:00:00 2001
From: Cody <cody@copperstateit.com>
Date: Sat, 2 May 2026 21:33:21 -0700
Subject: [PATCH 1/2] Add local AI readiness profile to AppLens-Tune

---
 AppLens-Tune.py                               | 277 +++++++++++++++++-
 README.md                                     |   2 +-
 docs/AppLens-Tune-LLM-Profile.md              | 101 +++++++
 docs/AppLens-Tune-Product-Outline.md          |   3 +
 src/AppLens.Backend/AuditService.cs           |  15 +-
 src/AppLens.Backend/LocalAiProfileBuilder.cs  | 129 ++++++++
 src/AppLens.Backend/Models.cs                 |  36 +++
 .../ReadinessSummaryBuilder.cs                |   5 +
 src/AppLens.Backend/ReportWriter.cs           |  38 +++
 src/AppLens.Backend/TuneCollector.cs          |   7 +
 src/AppLens.Backend/TunePlanBuilder.cs        |  28 ++
 .../LocalAiProfileBuilderTests.cs             |  91 ++++++
 .../ReadinessSummaryBuilderTests.cs           |  22 ++
 .../ReportWriterTests.cs                      |  17 +-
 .../TunePlanBuilderTests.cs                   |  28 ++
 15 files changed, 793 insertions(+), 6 deletions(-)
 create mode 100644 docs/AppLens-Tune-LLM-Profile.md
 create mode 100644 src/AppLens.Backend/LocalAiProfileBuilder.cs
 create mode 100644 tests/AppLens.Backend.Tests/LocalAiProfileBuilderTests.cs

diff --git a/AppLens-Tune.py b/AppLens-Tune.py
index 9b573f9..bc41c23 100644
--- a/AppLens-Tune.py
+++ b/AppLens-Tune.py
@@ -9,6 +9,8 @@
 from __future__ import annotations
 
 import getpass
+import csv
+import json
 import os
 import platform
 import shutil
@@ -222,6 +224,259 @@ def key_services() -> list[dict[str, str]]:
     return rows
 
 
+def first_version_line(lines: list[str]) -> str:
+    for line in lines:
+        if line and not line.lower().startswith("warning"):
+            return line[:120]
+    for line in lines:
+        if "version" in line.lower():
+            return line[:120]
+    return lines[0][:120] if lines else ""
+
+
+def local_llm_tools() -> list[dict[str, str]]:
+    checks = (
+        ("git", "git", ["--version"]),
+        ("python3", "python3", ["--version"]),
+        ("pip3", "pip3", ["--version"]),
+        ("uv", "uv", ["--version"]),
+        ("cmake", "cmake", ["--version"]),
+        ("make", "make", ["--version"]),
+        ("gcc", "gcc", ["--version"]),
+        ("g++", "g++", ["--version"]),
+        ("docker", "docker", ["--version"]),
+        ("ollama", "ollama", ["--version"]),
+        ("nvidia-smi", "nvidia-smi", ["--version"]),
+        ("nvcc", "nvcc", ["--version"]),
+    )
+    rows: list[dict[str, str]] = []
+    for label, command, args in checks:
+        if not shutil.which(command):
+            rows.append({"Tool": label, "Status": "Missing", "Detail": ""})
+            continue
+        lines = run_command([command, *args], timeout=8)
+        rows.append({"Tool": label, "Status": "Present", "Detail": first_version_line(lines)})
+    return rows
+
+
+def llama_cpp_builds() -> list[dict[str, str]]:
+    root = Path.home() / "local-llm/src/llama.cpp"
+    rows: list[dict[str, str]] = []
+    if not root.exists():
+        return [{"Build": "llama.cpp source", "Status": "Missing", "Path": str(root)}]
+
+    commit = ""
+    if (root / ".git").exists():
+        lines = run_command(["git", "-C", str(root), "rev-parse", "--short", "HEAD"], timeout=5)
+        commit = lines[0] if lines else ""
+
+    rows.append({"Build": "llama.cpp source", "Status": f"Present {commit}".strip(), "Path": str(root)})
+    for build_name in ("build-cpu", "build-cuda", "build-cuda-mmq", "build-vulkan"):
+        build_dir = root / build_name
+        bin_dir = build_dir / "bin"
+        built = [
+            name
+            for name in ("llama-cli", "llama-server", "llama-bench")
+            if (bin_dir / name).exists()
+        ]
+        status = "Built: " + ", ".join(built) if built else "Missing"
+        rows.append({"Build": build_name, "Status": status, "Path": str(build_dir)})
+    return rows
+
+
+def ollama_cached_models() -> list[dict[str, str]]:
+    manifests = Path.home() / ".ollama/models/manifests"
+    if not manifests.exists():
+        return [{"Model": "(none)", "Size": "", "Manifest": str(manifests)}]
+
+    rows: list[dict[str, str]] = []
+    for manifest in sorted(manifests.rglob("*")):
+        if not manifest.is_file():
+            continue
+        try:
+            rel = manifest.relative_to(manifests)
+            parts = rel.parts
+            model = "/".join(parts[:-1]) + ":" + parts[-1] if len(parts) >= 2 else rel.as_posix()
+            payload = json.loads(manifest.read_text(encoding="utf-8", errors="replace"))
+            size = sum(int(layer.get("size", 0)) for layer in payload.get("layers", []))
+            rows.append({"Model": model, "Size": format_size(size), "Manifest": str(manifest)})
+        except Exception:
+            rows.append({"Model": manifest.name, "Size": "", "Manifest": str(manifest)})
+    return rows or [{"Model": "(none)", "Size": "", "Manifest": str(manifests)}]
+
+
+def nvidia_gpus() -> list[dict[str, str]]:
+    if not shutil.which("nvidia-smi"):
+        return []
+
+    lines = run_command(
+        [
+            "nvidia-smi",
+            "--query-gpu=name,driver_version,memory.total,memory.used,compute_cap,power.limit",
+            "--format=csv,noheader,nounits",
+        ],
+        timeout=10,
+    )
+    rows: list[dict[str, str]] = []
+    for fields in csv.reader(lines):
+        if len(fields) < 6:
+            continue
+        rows.append(
+            {
+                "Name": fields[0].strip(),
+                "Driver": fields[1].strip(),
+                "VRAM_MB": fields[2].strip(),
+                "Used_MB": fields[3].strip(),
+                "Compute": fields[4].strip(),
+                "Power_W": fields[5].strip(),
+            }
+        )
+    return rows
+
+
+def parse_int(value: str) -> int | None:
+    try:
+        return int(float(value.strip()))
+    except (TypeError, ValueError):
+        return None
+
+
+def pytorch_probe() -> list[dict[str, str]]:
+    python = shutil.which("python3") or shutil.which("python")
+    if not python:
+        return [{"Component": "PyTorch", "Status": "Missing", "Detail": "Python 3 not found"}]
+
+    script = """
+import json
+try:
+    import torch
+    payload = {
+        "installed": True,
+        "version": getattr(torch, "__version__", ""),
+        "cuda_available": bool(torch.cuda.is_available()),
+        "cuda_version": getattr(torch.version, "cuda", None),
+        "device": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "",
+        "vram": torch.cuda.get_device_properties(0).total_memory if torch.cuda.is_available() else 0,
+    }
+except Exception as exc:
+    payload = {"installed": False, "error": f"{type(exc).__name__}: {exc}"}
+print(json.dumps(payload, sort_keys=True))
+""".strip()
+
+    lines = run_command([python, "-c", script], timeout=20)
+    try:
+        payload = json.loads(lines[-1])
+    except Exception:
+        return [{"Component": "PyTorch", "Status": "Error", "Detail": "; ".join(lines)[:160]}]
+
+    if not payload.get("installed"):
+        return [{"Component": "PyTorch", "Status": "Missing", "Detail": str(payload.get("error", ""))[:160]}]
+
+    status = "CUDA ready" if payload.get("cuda_available") else "Installed, CUDA unavailable"
+    detail = f"{payload.get('version', '')}; CUDA {payload.get('cuda_version') or 'n/a'}"
+    if payload.get("device"):
+        detail += f"; {payload['device']} ({format_size(int(payload.get('vram') or 0))})"
+    return [{"Component": "PyTorch", "Status": status, "Detail": detail[:160]}]
+
+
+def max_vram_mb(gpu_rows: list[dict[str, str]]) -> int:
+    values = [parse_int(row["VRAM_MB"]) for row in gpu_rows]
+    return max([value for value in values if value is not None], default=0)
+
+
+def local_llm_profile(
+    gpu_rows: list[dict[str, str]],
+    tool_rows: list[dict[str, str]],
+    torch_rows: list[dict[str, str]],
+    service_rows: list[dict[str, str]],
+    llama_rows: list[dict[str, str]],
+) -> tuple[list[dict[str, str]], list[str], list[str]]:
+    tool_status = {row["Tool"]: row["Status"] for row in tool_rows}
+    service_status = {row["Name"]: row for row in service_rows}
+    vram_mb = max_vram_mb(gpu_rows)
+    llama_status = {row["Build"]: row["Status"] for row in llama_rows}
+    review: list[str] = []
+    optional: list[str] = []
+
+    if vram_mb <= 0:
+        gpu_tier = "CPU or non-NVIDIA profile"
+        backend = "CPU llama.cpp/Ollama inference; avoid GPU training assumptions."
+        model_target = "1B-4B quantized models for interactive work."
+        context_target = "2k-8k context unless benchmarks prove more headroom."
+        training_target = "CPU-only dataset prep, evals, and tiny smoke tests."
+    elif vram_mb < 8 * 1024:
+        gpu_tier = "Small NVIDIA GPU profile (under 8 GB VRAM)"
+        backend = "GGUF inference through Ollama, Jan, or llama.cpp; PyTorch only after CUDA smoke passes."
+        model_target = "3B-8B Q4/IQ4-class models; avoid 27B-31B interactive local-agent loops here."
+        context_target = "4k-16k for inference; 256-512 tokens for training/autoresearch experiments."
+        training_target = "Tiny from-scratch models, classifiers, eval sweeps, and very small LoRA tests."
+        review.append("- NVIDIA VRAM is under 8 GB; tune for small-model workloads, not large local fine-tunes.")
+    elif vram_mb < 16 * 1024:
+        gpu_tier = "Mid NVIDIA GPU profile"
+        backend = "GGUF inference plus selective PyTorch fine-tune experiments."
+        model_target = "7B-14B quantized inference; small LoRA experiments after benchmarking."
+        context_target = "8k-32k for inference; benchmark before larger context."
+        training_target = "Small LoRA/QLoRA experiments with conservative batch and sequence length."
+    else:
+        gpu_tier = "Large local-GPU profile"
+        backend = "llama.cpp/Ollama/Jan for inference and PyTorch for broader fine-tune experiments."
+        model_target = "14B+ quantized inference and larger LoRA experiments, subject to benchmarks."
+        context_target = "16k-64k after prompt-eval and memory tests."
+        training_target = "LoRA/QLoRA and longer autoresearch sweeps with checkpointing."
+
+    torch_status = torch_rows[0]["Status"] if torch_rows else "Missing"
+    if "CUDA ready" not in torch_status:
+        review.append("- PyTorch CUDA is not ready; training experiments should wait for a CUDA smoke test.")
+    if tool_status.get("uv") == "Missing":
+        optional.append("- uv is missing; Python ML environments will be slower to create and reproduce.")
+    if tool_status.get("cmake") == "Missing":
+        optional.append("- cmake is missing; local llama.cpp builds will fail until it is installed.")
+    if tool_status.get("nvcc") == "Missing":
+        optional.append("- nvcc is missing; CUDA extension builds are not available, but prebuilt PyTorch wheels can still work.")
+    gpu_builds = ("build-cuda", "build-cuda-mmq", "build-vulkan")
+    if all("Built:" not in llama_status.get(build_name, "") for build_name in gpu_builds):
+        optional.append("- llama.cpp GPU build is missing; current llama.cpp binaries are CPU-only.")
+
+    ollama = service_status.get("ollama", {})
+    if ollama.get("Installed") == "Yes" and ollama.get("Running") != "Yes":
+        optional.append("- Ollama is installed but not running; start it before runtime benchmarks.")
+
+    rows = [
+        {"Signal": "GPU tier", "Recommendation": gpu_tier},
+        {"Signal": "Backend", "Recommendation": backend},
+        {"Signal": "Model target", "Recommendation": model_target},
+        {"Signal": "Context target", "Recommendation": context_target},
+        {"Signal": "Training target", "Recommendation": training_target},
+        {"Signal": "Safe overnight jobs", "Recommendation": "read-only scans, llama.cpp/Ollama benchmarks, eval sweeps, dataset prep"},
+        {"Signal": "Manual-gated jobs", "Recommendation": "driver/CUDA changes, service changes, firmware/RF/Wi-Fi actions, large downloads"},
+    ]
+    return rows, review, optional
+
+
+def autoresearch_queue(
+    llama_rows: list[dict[str, str]],
+    ollama_model_rows: list[dict[str, str]],
+    torch_rows: list[dict[str, str]],
+) -> list[dict[str, str]]:
+    llama_status = {row["Build"]: row["Status"] for row in llama_rows}
+    has_mmq = "Built:" in llama_status.get("build-cuda-mmq", "")
+    has_cuda = "Built:" in llama_status.get("build-cuda", "") or has_mmq
+    models = [row["Model"] for row in ollama_model_rows if row.get("Model") and row.get("Model") != "(none)"]
+    torch_status = torch_rows[0]["Status"] if torch_rows else "Missing"
+
+    runtime = "llama.cpp CUDA-MMQ" if has_mmq else "llama.cpp CUDA" if has_cuda else "llama.cpp CPU/Ollama"
+    model = models[0] if models else "no cached model detected"
+    training_gate = "closed" if "CUDA ready" not in torch_status else "manual approval required"
+
+    return [
+        {"Queue": "Runtime", "State": runtime, "Boundary": "read-only inference and benchmarks"},
+        {"Queue": "Seed model", "State": model, "Boundary": "use cached models unless a user approves downloads"},
+        {"Queue": "Unattended OK", "State": "AppLens scans, llama.cpp benchmarks, eval sweeps, dataset prep", "Boundary": "no service/system changes"},
+        {"Queue": "Training", "State": training_gate, "Boundary": "wait for PyTorch CUDA smoke test and user approval"},
+        {"Queue": "Stop conditions", "State": "capture metrics, cap run time, keep logs", "Boundary": "abort on OOM, thermal issues, or failed smoke tests"},
+    ]
+
+
 def storage_hotspots() -> list[dict[str, object]]:
     home = Path.home()
     candidates = [
@@ -323,10 +578,12 @@ def build_findings(
     service_rows: list[dict[str, str]],
     storage_rows: list[dict[str, object]],
     repo_rows: list[dict[str, object]],
+    llm_review: list[str],
+    llm_optional: list[str],
 ) -> tuple[list[str], list[str], list[str]]:
     stable = ["- Audit mode only; no changes were made."]
-    review: list[str] = []
-    optional: list[str] = []
+    review: list[str] = [*llm_review]
+    optional: list[str] = [*llm_optional]
 
     running = {row["Name"] for row in service_rows if row.get("Running") == "Yes"}
     if {"docker", "colima", "podman"} & running:
@@ -371,7 +628,14 @@ def build_report() -> str:
     service_rows = key_services()
     storage_rows = storage_hotspots()
     repo_rows = repo_placement()
-    stable, review, optional = build_findings(startup_rows, service_rows, storage_rows, repo_rows)
+    llm_tool_rows = local_llm_tools()
+    llama_rows = llama_cpp_builds()
+    ollama_model_rows = ollama_cached_models()
+    gpu_rows = nvidia_gpus()
+    torch_rows = pytorch_probe()
+    llm_rows, llm_review, llm_optional = local_llm_profile(gpu_rows, llm_tool_rows, torch_rows, service_rows, llama_rows)
+    autoresearch_rows = autoresearch_queue(llama_rows, ollama_model_rows, torch_rows)
+    stable, review, optional = build_findings(startup_rows, service_rows, storage_rows, repo_rows, llm_review, llm_optional)
 
     lines: list[str] = []
     lines.append("=== AppLens-Tune Audit Results ===")
@@ -387,6 +651,13 @@ def build_report() -> str:
     lines.extend(section("--- Stability Checks ---", stable))
     lines.extend(section("--- Review Items ---", review))
     lines.extend(section("--- Optional Improvements ---", optional))
+    lines.extend(section("--- Local LLM Profile ---", table(llm_rows, ["Signal", "Recommendation"])))
+    lines.extend(section("--- Auto-Research Queue ---", table(autoresearch_rows, ["Queue", "State", "Boundary"])))
+    lines.extend(section("--- NVIDIA GPU Profile ---", table(gpu_rows, ["Name", "Driver", "VRAM_MB", "Used_MB", "Compute", "Power_W"])))
+    lines.extend(section("--- PyTorch CUDA Probe ---", table(torch_rows, ["Component", "Status", "Detail"])))
+    lines.extend(section("--- Local LLM Toolchain ---", table(llm_tool_rows, ["Tool", "Status", "Detail"])))
+    lines.extend(section("--- llama.cpp Builds ---", table(llama_rows, ["Build", "Status", "Path"])))
+    lines.extend(section("--- Ollama Cached Models ---", table(ollama_model_rows, ["Model", "Size", "Manifest"])))
     lines.extend(section("--- Top Memory Processes ---", table(top_processes(), ["Name", "PID", "RSS_MB", "CPU_%"])))
     lines.extend(section("--- Startup Entries ---", table(startup_rows, ["Name", "State", "Source"])))
     lines.extend(section("--- Key Services/Processes ---", table(service_rows, ["Name", "Installed", "Running"])))
diff --git a/README.md b/README.md
index 1b4dbde..c1bda40 100644
--- a/README.md
+++ b/README.md
@@ -21,7 +21,7 @@ AppLens is a local-first audit tool for understanding what is installed, running
 The repository currently includes three surfaces:
 
 - **AppLens**: cross-platform installed-app inventory scripts for Windows, macOS, and Linux.
-- **AppLens-Tune**: read-only workstation diagnostics and tune-plan guidance for startup load, services, local dev tooling, storage hotspots, and repo placement.
+- **AppLens-Tune**: read-only workstation diagnostics and tune-plan guidance for startup load, services, local dev tooling, local AI readiness, storage hotspots, and repo placement.
 - **AppLens-desktop**: a CSI-branded Windows desktop app built with WinUI 3, .NET, and Windows App SDK for eventual Microsoft Store packaging.
 
 ## Safety Model
diff --git a/docs/AppLens-Tune-LLM-Profile.md b/docs/AppLens-Tune-LLM-Profile.md
new file mode 100644
index 0000000..4415320
--- /dev/null
+++ b/docs/AppLens-Tune-LLM-Profile.md
@@ -0,0 +1,101 @@
+# AppLens-Tune Local LLM Profile
+
+## Purpose
+
+AppLens-Tune should turn machine evidence into a local-LLM runtime profile. It should not choose a model by hype or parameter count. It should measure the host, identify the safe workload class, and recommend runtime settings that can be verified with benchmarks.
+
+## Current Prototype
+
+`AppLens-Tune.py` now emits read-only sections for:
+
+- NVIDIA GPU profile: driver, VRAM, current VRAM use, compute capability, power limit.
+- PyTorch CUDA probe: installed state, CUDA availability, version, device, and VRAM.
+- Local LLM toolchain: Git, Python, pip, uv, cmake, make, compilers, Docker, Ollama, `nvidia-smi`, and `nvcc`.
+- llama.cpp builds: source checkout plus CPU, CUDA, CUDA-MMQ, and Vulkan build folders.
+- Ollama cached models: offline manifest detection even when the Ollama daemon is stopped.
+- Local LLM profile: backend, model target, context target, training target, safe overnight jobs, and manual-gated jobs.
+- Auto-research queue: runtime, seed model, unattended-safe jobs, training gates, and stop-condition guidance.
+
+The .NET backend now mirrors this direction with a `LocalAiProfile` contract, a read-only profile builder, exported Markdown/HTML sections, readiness highlights, and a tune-plan item that keeps training manual-gated.
+
+## Dogfood Finding
+
+The gaming PC is a small-GPU node:
+
+- Ryzen 5 5600X, 32 GB RAM.
+- GTX 1660 SUPER, 6 GB VRAM, compute capability 7.5.
+- NVIDIA driver is present.
+- Ollama is installed but not running.
+- PyTorch is not installed.
+- `uv` is missing; `cmake`, `ninja`, `nvcc`, Vulkan tools, and Docker are present.
+- Local llama.cpp CPU, CUDA, and CUDA-MMQ builds exist under `/home/cody/local-llm/src/llama.cpp`.
+- `qwen2.5:7b` is cached in Ollama as a GGUF blob and can be used directly by llama.cpp.
+- CPU-only llama.cpp is too slow for quick interactive use on `qwen2.5:7b`.
+- CUDA full offload works. The best measured build for this GTX 1660 SUPER is the CUDA-MMQ build.
+
+This should be treated as a small-model/autoresearch worker, not a large fine-tune host.
+
+## llama.cpp Benchmark
+
+Model: `qwen2.5:7b`, Q4_K_M GGUF, 7.6B params, 4.36 GB model blob.
+
+| Build | GPU layers | Prompt eval | Generation |
+| --- | ---: | ---: | ---: |
+| CUDA | 0 | 104.50 tok/s | 6.64 tok/s |
+| CUDA | 20 | 137.64 tok/s | 17.39 tok/s |
+| CUDA | 99 | 161.19 tok/s | 50.02 tok/s |
+| CUDA-MMQ | 0 | 193.28 tok/s | 6.63 tok/s |
+| CUDA-MMQ | 20 | 333.91 tok/s | 16.53 tok/s |
+| CUDA-MMQ | 99 | 558.29 tok/s | 49.96 tok/s |
+
+Recommended llama.cpp runtime for this host:
+
+```bash
+~/local-llm/src/llama.cpp/build-cuda-mmq/bin/llama-cli \
+  -m ~/local-llm/models/qwen2.5-7b-ollama.gguf \
+  -ngl 99 \
+  -t 8
+```
+
+For service experiments, start from the same binary family:
+
+```bash
+~/local-llm/src/llama.cpp/build-cuda-mmq/bin/llama-server \
+  -m ~/local-llm/models/qwen2.5-7b-ollama.gguf \
+  -ngl 99 \
+  -t 8 \
+  --host 127.0.0.1 \
+  --port 8080
+```
+
+## Recommended Profile
+
+- Backend: GGUF inference through Ollama, Jan, or llama.cpp first.
+- Model target: 3B-8B Q4/IQ4-class models.
+- Inference context: start around 4k-16k and benchmark.
+- Training/autoresearch context: start around 256-512 tokens.
+- llama.cpp acceleration: prefer the CUDA-MMQ build with full offload (`-ngl 99`) on this GTX 1660 SUPER.
+- Good jobs: read-only scans, llama.cpp/Ollama benchmarks, eval sweeps, dataset prep, tiny classifier training.
+- Gated jobs: driver/CUDA changes, service changes, firmware/RF/Wi-Fi actions, and large model downloads.
+
+## Product Boundary
+
+Keep the separation explicit:
+
+- AppLens measures installed apps, tools, hardware, services, storage, and runtime state.
+- AppLens-Tune recommends and later applies user-approved configuration.
+- LLM Tune learns from benchmark results and proposes runtime profiles.
+
+The first metric set should be tokens/sec, time to first token, prompt eval speed, VRAM/RAM headroom, load time, crash rate, and quality tradeoff.
+
+## Backend Contract
+
+`TuneSummary.LocalAiProfile` captures the local AI posture without starting a model or changing the machine:
+
+- `Readiness`: unknown, limited, inference-ready, or training-ready.
+- `WorkloadClass`: plain-language machine role.
+- `RecommendedRuntime`: current best runtime family.
+- `TrainingReady` and `TrainingGate`: explicit training boundary.
+- `Signals`: GPU, CUDA compiler, llama.cpp, Ollama, PyTorch CUDA, and model-cache evidence.
+
+This gives AppLens-Tune and future AppLens-Tune extensions a stable place to hang benchmark results, run manifests, and user-approved training state later.
diff --git a/docs/AppLens-Tune-Product-Outline.md b/docs/AppLens-Tune-Product-Outline.md
index 9fdbbc7..b7cd695 100644
--- a/docs/AppLens-Tune-Product-Outline.md
+++ b/docs/AppLens-Tune-Product-Outline.md
@@ -247,6 +247,8 @@ Implemented in the current backend:
 - JSON, Markdown, and HTML report output for tune plans.
 - AppLens-desktop tune plan list.
 - Unit coverage for startup, service/admin, privacy, and report contract behavior.
+- Local AI/autoresearch readiness profile in the backend contract.
+- Read-only local AI signals in exports and tune-plan guidance.
 
 Still intentionally not implemented:
 
@@ -256,4 +258,5 @@ Still intentionally not implemented:
 - cache deletion
 - admin elevation
 - rollback execution
+- unattended training or model downloads
 
diff --git a/src/AppLens.Backend/AuditService.cs b/src/AppLens.Backend/AuditService.cs
index 35a7c9d..925736b 100644
--- a/src/AppLens.Backend/AuditService.cs
+++ b/src/AppLens.Backend/AuditService.cs
@@ -8,6 +8,7 @@ public sealed class AuditService
     private readonly RulesEngine _rulesEngine;
     private readonly TunePlanBuilder _tunePlanBuilder;
     private readonly ReadinessSummaryBuilder _readinessSummaryBuilder;
+    private readonly LocalAiProfileBuilder _localAiProfileBuilder;
 
     public AuditService()
     {
@@ -17,6 +18,7 @@ public AuditService()
         _rulesEngine = new RulesEngine();
         _tunePlanBuilder = new TunePlanBuilder();
         _readinessSummaryBuilder = new ReadinessSummaryBuilder();
+        _localAiProfileBuilder = new LocalAiProfileBuilder();
     }
 
     public async Task<AuditSnapshot> RunAsync(CancellationToken cancellationToken = default)
@@ -42,12 +44,23 @@ public async Task<AuditSnapshot> RunAsync(CancellationToken cancellationToken =
             new TuneSummary(),
             timeout.Token).ConfigureAwait(false);
 
+        var tuneWithLocalAiProfile = new TuneSummary
+        {
+            TopProcesses = tune.TopProcesses,
+            StartupEntries = tune.StartupEntries,
+            Services = tune.Services,
+            StorageHotspots = tune.StorageHotspots,
+            RepoPlacements = tune.RepoPlacements,
+            ToolProbes = tune.ToolProbes,
+            LocalAiProfile = _localAiProfileBuilder.Build(tune)
+        };
+
         var snapshot = new AuditSnapshot
         {
             GeneratedAt = DateTimeOffset.Now,
             Machine = machine,
             Inventory = inventory,
-            Tune = tune,
+            Tune = tuneWithLocalAiProfile,
             ProbeStatuses = _probeRunner.Statuses.ToList()
         };
 
diff --git a/src/AppLens.Backend/LocalAiProfileBuilder.cs b/src/AppLens.Backend/LocalAiProfileBuilder.cs
new file mode 100644
index 0000000..3bdc24c
--- /dev/null
+++ b/src/AppLens.Backend/LocalAiProfileBuilder.cs
@@ -0,0 +1,129 @@
+namespace AppLens.Backend;
+
+public sealed class LocalAiProfileBuilder
+{
+    public LocalAiProfile Build(TuneSummary tune)
+    {
+        var signals = new List<LocalAiSignal>
+        {
+            Signal("NVIDIA GPU", HasSucceededProbe(tune, "NVIDIA GPU"), Detail(tune, "NVIDIA GPU")),
+            Signal("CUDA compiler", HasSucceededProbe(tune, "CUDA Compiler"), Detail(tune, "CUDA Compiler")),
+            Signal("llama.cpp", HasSucceededProbe(tune, "llama.cpp"), Detail(tune, "llama.cpp")),
+            Signal("Ollama", HasSucceededProbe(tune, "Ollama Summary"), Detail(tune, "Ollama Summary")),
+            Signal("PyTorch CUDA", HasPyTorchCuda(tune), Detail(tune, "PyTorch CUDA")),
+            Signal("Model cache", HasModelCache(tune), ModelCacheDetail(tune))
+        };
+
+        var hasGpu = signals.Any(signal => signal.Name == "NVIDIA GPU" && signal.Status == LocalAiSignalStatus.Present);
+        var hasLlamaCpp = signals.Any(signal => signal.Name == "llama.cpp" && signal.Status == LocalAiSignalStatus.Present);
+        var hasOllama = signals.Any(signal => signal.Name == "Ollama" && signal.Status == LocalAiSignalStatus.Present);
+        var hasTorch = signals.Any(signal => signal.Name == "PyTorch CUDA" && signal.Status == LocalAiSignalStatus.Present);
+        var hasCache = signals.Any(signal => signal.Name == "Model cache" && signal.Status == LocalAiSignalStatus.Present);
+
+        var trainingReady = hasGpu && hasTorch;
+        var readiness = trainingReady
+            ? LocalAiReadiness.TrainingReady
+            : hasGpu && (hasLlamaCpp || hasOllama || hasCache)
+                ? LocalAiReadiness.InferenceReady
+                : hasLlamaCpp || hasOllama || hasCache
+                    ? LocalAiReadiness.Limited
+                    : LocalAiReadiness.Unknown;
+
+        return new LocalAiProfile
+        {
+            Readiness = readiness,
+            WorkloadClass = WorkloadClass(hasGpu, signals),
+            RecommendedRuntime = RecommendedRuntime(hasGpu, hasLlamaCpp, hasOllama),
+            TrainingReady = trainingReady,
+            TrainingGate = trainingReady
+                ? "PyTorch CUDA appears available; still require explicit user approval before training."
+                : "Training remains gated until PyTorch CUDA passes a smoke test and the user approves a run.",
+            Signals = signals
+        };
+    }
+
+    private static LocalAiSignal Signal(string name, bool present, string detail) =>
+        new()
+        {
+            Name = name,
+            Status = present ? LocalAiSignalStatus.Present : LocalAiSignalStatus.Missing,
+            Detail = detail
+        };
+
+    private static bool HasSucceededProbe(TuneSummary tune, string probeName) =>
+        tune.ToolProbes.Any(probe =>
+            probe.Name.Contains(probeName, StringComparison.OrdinalIgnoreCase) &&
+            probe.Status.Equals(ProbeState.Succeeded.ToString(), StringComparison.OrdinalIgnoreCase) &&
+            !LooksLikeError(probe.Output));
+
+    private static string Detail(TuneSummary tune, string probeName) =>
+        tune.ToolProbes
+            .FirstOrDefault(probe => probe.Name.Contains(probeName, StringComparison.OrdinalIgnoreCase))
+            ?.Output ?? "";
+
+    private static bool HasPyTorchCuda(TuneSummary tune)
+    {
+        var output = Detail(tune, "PyTorch CUDA");
+        return HasSucceededProbe(tune, "PyTorch CUDA") &&
+               output.Contains("True", StringComparison.OrdinalIgnoreCase) &&
+               !output.Contains("no cuda", StringComparison.OrdinalIgnoreCase);
+    }
+
+    private static bool LooksLikeError(string output) =>
+        output.Contains("No module named", StringComparison.OrdinalIgnoreCase) ||
+        output.Contains("not found", StringComparison.OrdinalIgnoreCase) ||
+        output.Contains("not recognized", StringComparison.OrdinalIgnoreCase) ||
+        output.Contains("could not connect", StringComparison.OrdinalIgnoreCase) ||
+        output.Contains("timed out", StringComparison.OrdinalIgnoreCase) ||
+        output.Contains("error", StringComparison.OrdinalIgnoreCase);
+
+    private static bool HasModelCache(TuneSummary tune) =>
+        tune.StorageHotspots.Any(hotspot =>
+            hotspot.Location.Contains(".ollama", StringComparison.OrdinalIgnoreCase) &&
+            hotspot.Bytes > 0);
+
+    private static string ModelCacheDetail(TuneSummary tune)
+    {
+        var cache = tune.StorageHotspots.FirstOrDefault(hotspot =>
+            hotspot.Location.Contains(".ollama", StringComparison.OrdinalIgnoreCase));
+        return cache is null ? "" : $"{cache.Location}: {Formatting.Size(cache.Bytes)}";
+    }
+
+    private static string WorkloadClass(bool hasGpu, List<LocalAiSignal> signals)
+    {
+        if (!hasGpu)
+        {
+            return "CPU/local-service only; use small models or remote endpoints for heavier work.";
+        }
+
+        var gpuDetail = signals.First(signal => signal.Name == "NVIDIA GPU").Detail;
+        if (gpuDetail.Contains("6144", StringComparison.OrdinalIgnoreCase) ||
+            gpuDetail.Contains("6 GB", StringComparison.OrdinalIgnoreCase) ||
+            gpuDetail.Contains("1660", StringComparison.OrdinalIgnoreCase))
+        {
+            return "Small-model/autoresearch worker: 3B-8B quantized inference, eval sweeps, and dataset prep.";
+        }
+
+        return "GPU local-AI workstation; benchmark model size, context, and training jobs before unattended use.";
+    }
+
+    private static string RecommendedRuntime(bool hasGpu, bool hasLlamaCpp, bool hasOllama)
+    {
+        if (hasGpu && hasLlamaCpp)
+        {
+            return "llama.cpp CUDA/MMQ with full offload when VRAM allows.";
+        }
+
+        if (hasGpu && hasOllama)
+        {
+            return "Ollama or llama.cpp GPU inference after runtime benchmark.";
+        }
+
+        if (hasLlamaCpp || hasOllama)
+        {
+            return "CPU llama.cpp/Ollama for light local tasks; prefer remote or larger GPU hosts for heavy work.";
+        }
+
+        return "Install or connect a local model runtime before autoresearch.";
+    }
+}
diff --git a/src/AppLens.Backend/Models.cs b/src/AppLens.Backend/Models.cs
index c9778a6..e140fdf 100644
--- a/src/AppLens.Backend/Models.cs
+++ b/src/AppLens.Backend/Models.cs
@@ -67,6 +67,7 @@ public sealed class TuneSummary
     public List<StorageHotspot> StorageHotspots { get; init; } = [];
     public List<RepoPlacement> RepoPlacements { get; init; } = [];
     public List<ToolProbe> ToolProbes { get; init; } = [];
+    public LocalAiProfile LocalAiProfile { get; init; } = new();
 }
 
 public sealed class ProcessSnapshot
@@ -115,6 +116,41 @@ public sealed class ToolProbe
     public string Output { get; init; } = "";
 }
 
+public sealed class LocalAiProfile
+{
+    public LocalAiReadiness Readiness { get; init; } = LocalAiReadiness.Unknown;
+    public string WorkloadClass { get; init; } = "";
+    public string RecommendedRuntime { get; init; } = "";
+    public bool TrainingReady { get; init; }
+    public string TrainingGate { get; init; } = "";
+    public List<LocalAiSignal> Signals { get; init; } = [];
+}
+
+public sealed class LocalAiSignal
+{
+    public string Name { get; init; } = "";
+    public LocalAiSignalStatus Status { get; init; } = LocalAiSignalStatus.Unknown;
+    public string Detail { get; init; } = "";
+}
+
+[JsonConverter(typeof(JsonStringEnumConverter<LocalAiReadiness>))]
+public enum LocalAiReadiness
+{
+    Unknown,
+    Limited,
+    InferenceReady,
+    TrainingReady
+}
+
+[JsonConverter(typeof(JsonStringEnumConverter<LocalAiSignalStatus>))]
+public enum LocalAiSignalStatus
+{
+    Unknown,
+    Present,
+    Missing,
+    Review
+}
+
 public sealed class Finding
 {
     public FindingSeverity Severity { get; init; }
diff --git a/src/AppLens.Backend/ReadinessSummaryBuilder.cs b/src/AppLens.Backend/ReadinessSummaryBuilder.cs
index 7b6e1b9..9755959 100644
--- a/src/AppLens.Backend/ReadinessSummaryBuilder.cs
+++ b/src/AppLens.Backend/ReadinessSummaryBuilder.cs
@@ -84,6 +84,11 @@ private static List<string> BuildHighlights(
             highlights.Add($"{snapshot.Tune.StorageHotspots.Count} storage hotspot(s) were measured for review.");
         }
 
+        if (snapshot.Tune.LocalAiProfile.Readiness != LocalAiReadiness.Unknown)
+        {
+            highlights.Add($"Local AI profile: {snapshot.Tune.LocalAiProfile.Readiness}; {snapshot.Tune.LocalAiProfile.WorkloadClass}");
+        }
+
         return highlights;
     }
 
diff --git a/src/AppLens.Backend/ReportWriter.cs b/src/AppLens.Backend/ReportWriter.cs
index 71ce954..860e98b 100644
--- a/src/AppLens.Backend/ReportWriter.cs
+++ b/src/AppLens.Backend/ReportWriter.cs
@@ -44,6 +44,7 @@ public string WriteMarkdown(AuditSnapshot snapshot, bool includeRawDetails = fal
         AppendReadiness(builder, snapshot);
         AppendFindings(builder, snapshot);
         AppendTunePlan(builder, snapshot);
+        AppendLocalAiProfile(builder, snapshot.Tune.LocalAiProfile);
         AppendInventory(builder, snapshot);
         AppendTune(builder, snapshot);
         AppendProbeStatuses(builder, snapshot);
@@ -128,6 +129,7 @@ public string WriteHtml(AuditSnapshot snapshot, bool includeRawDetails = false)
                 </tbody></table>
 
                 {{HtmlTable("Tune Plan", ["Category", "Risk", "Item", "Guidance", "Future Action"], snapshot.TunePlan.Select(item => new[] { item.Category.ToString(), item.Risk.ToString(), item.Title, item.Guidance, item.ProposedAction.Description }))}}
+                {{HtmlLocalAiProfile(snapshot.Tune.LocalAiProfile)}}
                 {{HtmlTable("Desktop Applications", ["Name", "Version", "Publisher", "Source"], snapshot.Inventory.DesktopApplications.Select(app => new[] { app.Name, app.Version, app.Publisher, app.Source }))}}
                 {{HtmlTable("Store Applications", ["Name", "Version", "Publisher", "Source"], snapshot.Inventory.StoreApplications.Select(app => new[] { app.Name, app.Version, app.Publisher, app.Source }))}}
                 {{HtmlTable("Top Processes", ["Name", "PID", "Memory", "CPU Seconds"], snapshot.Tune.TopProcesses.Select(process => new[] { process.Name, process.Id.ToString(), Formatting.Size(process.WorkingSetBytes), process.CpuSeconds.ToString("N1") }))}}
@@ -169,6 +171,26 @@ private static void AppendReadiness(StringBuilder builder, AuditSnapshot snapsho
         builder.AppendLine();
     }
 
+    private static void AppendLocalAiProfile(StringBuilder builder, LocalAiProfile profile)
+    {
+        builder.AppendLine("## Local AI Readiness");
+        builder.AppendLine();
+        builder.AppendLine($"Readiness: {profile.Readiness}");
+        builder.AppendLine($"Workload class: {Formatting.MarkdownEscape(profile.WorkloadClass)}");
+        builder.AppendLine($"Recommended runtime: {Formatting.MarkdownEscape(profile.RecommendedRuntime)}");
+        builder.AppendLine($"Training ready: {(profile.TrainingReady ? "Yes" : "No")}");
+        builder.AppendLine($"Training gate: {Formatting.MarkdownEscape(profile.TrainingGate)}");
+        builder.AppendLine();
+        builder.AppendLine("| Signal | Status | Detail |");
+        builder.AppendLine("| --- | --- | --- |");
+        foreach (var signal in profile.Signals)
+        {
+            builder.AppendLine($"| {Formatting.MarkdownEscape(signal.Name)} | {signal.Status} | {Formatting.MarkdownEscape(signal.Detail)} |");
+        }
+
+        builder.AppendLine();
+    }
+
     private static void AppendFindings(StringBuilder builder, AuditSnapshot snapshot)
     {
         builder.AppendLine("## Findings");
@@ -272,4 +294,20 @@ private static string HtmlTable(string title, string[] columns, IEnumerable<stri
             </table>
             """;
     }
+
+    private static string HtmlLocalAiProfile(LocalAiProfile profile)
+    {
+        var summaryRows = new[]
+        {
+            new[] { "Readiness", profile.Readiness.ToString() },
+            new[] { "Workload class", profile.WorkloadClass },
+            new[] { "Recommended runtime", profile.RecommendedRuntime },
+            new[] { "Training ready", profile.TrainingReady ? "Yes" : "No" },
+            new[] { "Training gate", profile.TrainingGate }
+        };
+
+        var signalRows = profile.Signals.Select(signal => new[] { signal.Name, signal.Status.ToString(), signal.Detail });
+        return HtmlTable("Local AI Readiness", ["Metric", "Value"], summaryRows) +
+               HtmlTable("Local AI Signals", ["Signal", "Status", "Detail"], signalRows);
+    }
 }
diff --git a/src/AppLens.Backend/TuneCollector.cs b/src/AppLens.Backend/TuneCollector.cs
index d181802..63f0bb5 100644
--- a/src/AppLens.Backend/TuneCollector.cs
+++ b/src/AppLens.Backend/TuneCollector.cs
@@ -377,6 +377,13 @@ private static RepoPlacement CountRepos(string root, CancellationToken cancellat
 
     private List<ToolProbe> GetToolProbes() =>
     [
+        _probeRunner.RunTool("Git", "git", "--version", TimeSpan.FromSeconds(5)),
+        _probeRunner.RunTool("Python", "python", "--version", TimeSpan.FromSeconds(5)),
+        _probeRunner.RunTool("CMake", "cmake", "--version", TimeSpan.FromSeconds(5)),
+        _probeRunner.RunTool("NVIDIA GPU", "nvidia-smi", "--query-gpu=name,memory.total,driver_version,compute_cap --format=csv,noheader,nounits", TimeSpan.FromSeconds(8)),
+        _probeRunner.RunTool("CUDA Compiler", "nvcc", "--version", TimeSpan.FromSeconds(8)),
+        _probeRunner.RunTool("llama.cpp", "llama-cli", "--version", TimeSpan.FromSeconds(5)),
+        _probeRunner.RunTool("PyTorch CUDA", "python", "-c \"import torch; print(torch.__version__); print(torch.cuda.is_available()); print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'no cuda')\"", TimeSpan.FromSeconds(12)),
         _probeRunner.RunTool("WSL Status", "wsl.exe", "--status", TimeSpan.FromSeconds(8)),
         _probeRunner.RunTool("WSL Distros", "wsl.exe", "-l -v", TimeSpan.FromSeconds(8)),
         _probeRunner.RunTool("Docker Summary", "docker", "system df", TimeSpan.FromSeconds(8)),
diff --git a/src/AppLens.Backend/TunePlanBuilder.cs b/src/AppLens.Backend/TunePlanBuilder.cs
index 383ece3..7c4773e 100644
--- a/src/AppLens.Backend/TunePlanBuilder.cs
+++ b/src/AppLens.Backend/TunePlanBuilder.cs
@@ -20,6 +20,7 @@ public List<TunePlanItem> Build(AuditSnapshot snapshot)
 
         AddStartupPlanItems(snapshot, items);
         AddServicePlanItems(snapshot, items);
+        AddLocalAiPlanItem(snapshot, items);
 
         return items
             .GroupBy(item => item.Id, StringComparer.OrdinalIgnoreCase)
@@ -238,6 +239,33 @@ private static void AddServicePlanItems(AuditSnapshot snapshot, List<TunePlanIte
         }
     }
 
+    private static void AddLocalAiPlanItem(AuditSnapshot snapshot, List<TunePlanItem> items)
+    {
+        var profile = snapshot.Tune.LocalAiProfile;
+        if (profile.Readiness == LocalAiReadiness.Unknown)
+        {
+            return;
+        }
+
+        items.Add(new TunePlanItem
+        {
+            Id = "local-ai-autoresearch-profile",
+            Category = profile.TrainingReady ? TunePlanCategory.Optional : TunePlanCategory.Review,
+            Risk = profile.TrainingReady ? TunePlanRisk.Medium : TunePlanRisk.Low,
+            Title = "Local autoresearch profile",
+            Evidence = $"{profile.Readiness}: {profile.WorkloadClass}",
+            Guidance = $"{profile.RecommendedRuntime} Keep training jobs manual-gated until the user approves scope, model, and stop conditions.",
+            BackupPlan = "Before future training, save the model path, benchmark output, dataset location, and run manifest.",
+            VerificationStep = profile.TrainingGate,
+            ProposedAction = new ProposedAction
+            {
+                Kind = ProposedActionKind.ManualReview,
+                ExecutionState = TunePlanExecutionState.ReadOnlyOnly,
+                Description = "Read-only guidance: benchmark and plan autoresearch jobs; do not start training automatically."
+            }
+        });
+    }
+
     private static bool IsEnabled(StartupEntry entry) =>
         entry.State.Equals("Enabled", StringComparison.OrdinalIgnoreCase) ||
         entry.State.Equals("Unknown", StringComparison.OrdinalIgnoreCase);
diff --git a/tests/AppLens.Backend.Tests/LocalAiProfileBuilderTests.cs b/tests/AppLens.Backend.Tests/LocalAiProfileBuilderTests.cs
new file mode 100644
index 0000000..7fb22f8
--- /dev/null
+++ b/tests/AppLens.Backend.Tests/LocalAiProfileBuilderTests.cs
@@ -0,0 +1,91 @@
+namespace AppLens.Backend.Tests;
+
+public sealed class LocalAiProfileBuilderTests
+{
+    [Fact]
+    public void Cuda_llama_cpp_profile_is_inference_ready_but_training_gated()
+    {
+        var tune = new TuneSummary
+        {
+            ToolProbes =
+            [
+                new ToolProbe { Name = "NVIDIA GPU", Status = "Succeeded", Output = "NVIDIA GeForce GTX 1660 SUPER, 6144 MiB" },
+                new ToolProbe { Name = "CUDA Compiler", Status = "Succeeded", Output = "nvcc: NVIDIA (R) Cuda compiler driver" },
+                new ToolProbe { Name = "llama.cpp CUDA-MMQ", Status = "Succeeded", Output = "llama-cli llama-server llama-bench" },
+                new ToolProbe { Name = "PyTorch CUDA", Status = "Skipped", Output = "ModuleNotFoundError: No module named 'torch'" }
+            ],
+            StorageHotspots =
+            [
+                new StorageHotspot { Location = ".ollama", Bytes = 4L * 1024 * 1024 * 1024 }
+            ]
+        };
+
+        var profile = new LocalAiProfileBuilder().Build(tune);
+
+        Assert.Equal(LocalAiReadiness.InferenceReady, profile.Readiness);
+        Assert.False(profile.TrainingReady);
+        Assert.Contains("small-model", profile.WorkloadClass, StringComparison.OrdinalIgnoreCase);
+        Assert.Contains(profile.Signals, signal =>
+            signal.Name == "llama.cpp" &&
+            signal.Status == LocalAiSignalStatus.Present);
+    }
+
+    [Fact]
+    public void Missing_gpu_keeps_autoresearch_readiness_limited()
+    {
+        var tune = new TuneSummary
+        {
+            ToolProbes =
+            [
+                new ToolProbe { Name = "NVIDIA GPU", Status = "Skipped", Output = "nvidia-smi not found" },
+                new ToolProbe { Name = "Ollama Summary", Status = "Succeeded", Output = "NAME ID SIZE MODIFIED" }
+            ]
+        };
+
+        var profile = new LocalAiProfileBuilder().Build(tune);
+
+        Assert.Equal(LocalAiReadiness.Limited, profile.Readiness);
+        Assert.False(profile.TrainingReady);
+        Assert.Contains("CPU", profile.RecommendedRuntime, StringComparison.OrdinalIgnoreCase);
+    }
+
+    [Fact]
+    public void Probe_error_output_does_not_count_as_present_runtime()
+    {
+        var tune = new TuneSummary
+        {
+            ToolProbes =
+            [
+                new ToolProbe { Name = "PyTorch CUDA", Status = "Succeeded", Output = "ModuleNotFoundError: No module named 'torch'" }
+            ]
+        };
+
+        var profile = new LocalAiProfileBuilder().Build(tune);
+
+        Assert.Contains(profile.Signals, signal =>
+            signal.Name == "PyTorch CUDA" &&
+            signal.Status == LocalAiSignalStatus.Missing);
+        Assert.False(profile.TrainingReady);
+    }
+
+    [Fact]
+    public void PyTorch_without_cuda_does_not_open_training_gate()
+    {
+        var tune = new TuneSummary
+        {
+            ToolProbes =
+            [
+                new ToolProbe { Name = "NVIDIA GPU", Status = "Succeeded", Output = "NVIDIA GeForce RTX, 12288 MiB" },
+                new ToolProbe { Name = "PyTorch CUDA", Status = "Succeeded", Output = "2.9.0 False no cuda" }
+            ]
+        };
+
+        var profile = new LocalAiProfileBuilder().Build(tune);
+
+        Assert.Contains(profile.Signals, signal =>
+            signal.Name == "PyTorch CUDA" &&
+            signal.Status == LocalAiSignalStatus.Missing);
+        Assert.False(profile.TrainingReady);
+        Assert.NotEqual(LocalAiReadiness.TrainingReady, profile.Readiness);
+    }
+}
diff --git a/tests/AppLens.Backend.Tests/ReadinessSummaryBuilderTests.cs b/tests/AppLens.Backend.Tests/ReadinessSummaryBuilderTests.cs
index a510a11..02d77dd 100644
--- a/tests/AppLens.Backend.Tests/ReadinessSummaryBuilderTests.cs
+++ b/tests/AppLens.Backend.Tests/ReadinessSummaryBuilderTests.cs
@@ -51,4 +51,26 @@ public void Readiness_summary_counts_tune_plan_and_startup_state()
         Assert.Equal(10, summary.StorageHotspotBytes);
         Assert.InRange(summary.Score, 0, 99);
     }
+
+    [Fact]
+    public void Readiness_summary_highlights_local_ai_profile()
+    {
+        var snapshot = new AuditSnapshot
+        {
+            Tune = new TuneSummary
+            {
+                LocalAiProfile = new LocalAiProfile
+                {
+                    Readiness = LocalAiReadiness.InferenceReady,
+                    WorkloadClass = "Small-model/autoresearch worker"
+                }
+            }
+        };
+
+        var summary = new ReadinessSummaryBuilder().Build(snapshot);
+
+        Assert.Contains(summary.Highlights, highlight =>
+            highlight.Contains("Local AI", StringComparison.OrdinalIgnoreCase) &&
+            highlight.Contains("InferenceReady", StringComparison.OrdinalIgnoreCase));
+    }
 }
diff --git a/tests/AppLens.Backend.Tests/ReportWriterTests.cs b/tests/AppLens.Backend.Tests/ReportWriterTests.cs
index 77ee79c..dac0cb8 100644
--- a/tests/AppLens.Backend.Tests/ReportWriterTests.cs
+++ b/tests/AppLens.Backend.Tests/ReportWriterTests.cs
@@ -57,10 +57,13 @@ public void Markdown_and_html_exports_include_core_sections()
         Assert.Contains("## Readiness Summary", markdown);
         Assert.Contains("## Findings", markdown);
         Assert.Contains("## Tune Plan", markdown);
+        Assert.Contains("## Local AI Readiness", markdown);
+        Assert.Contains("InferenceReady", markdown);
         Assert.Contains("## App Inventory", markdown);
         Assert.Contains("## Workstation Diagnostics", markdown);
         Assert.Contains("<h2>Findings</h2>", html);
         Assert.Contains("<h2>Tune Plan</h2>", html);
+        Assert.Contains("<h2>Local AI Readiness</h2>", html);
         Assert.Contains("AppLens-desktop", html);
     }
 
@@ -111,7 +114,19 @@ private static AuditSnapshot FixtureSnapshot()
                 RepoPlacements =
                 [
                     new RepoPlacement { Root = Path.Combine(profile, "OneDrive", "Documents"), RepoCount = 1, Sample = Path.Combine(profile, "OneDrive", "Documents", "repo") }
-                ]
+                ],
+                LocalAiProfile = new LocalAiProfile
+                {
+                    Readiness = LocalAiReadiness.InferenceReady,
+                    WorkloadClass = "Small-model/autoresearch worker",
+                    RecommendedRuntime = "llama.cpp CUDA-MMQ with full offload.",
+                    TrainingReady = false,
+                    TrainingGate = "Training remains gated until PyTorch CUDA passes a smoke test.",
+                    Signals =
+                    [
+                        new LocalAiSignal { Name = "NVIDIA GPU", Status = LocalAiSignalStatus.Present, Detail = "GTX 1660 SUPER" }
+                    ]
+                }
             },
             Readiness = new ReadinessSummary
             {
diff --git a/tests/AppLens.Backend.Tests/TunePlanBuilderTests.cs b/tests/AppLens.Backend.Tests/TunePlanBuilderTests.cs
index c8ee1f7..536f861 100644
--- a/tests/AppLens.Backend.Tests/TunePlanBuilderTests.cs
+++ b/tests/AppLens.Backend.Tests/TunePlanBuilderTests.cs
@@ -81,4 +81,32 @@ public void Privacy_finding_keeps_v1_read_only()
         Assert.Equal(TunePlanExecutionState.ReadOnlyOnly, item.ProposedAction.ExecutionState);
         Assert.Equal(ProposedActionKind.None, item.ProposedAction.Kind);
     }
+
+    [Fact]
+    public void Local_ai_profile_adds_read_only_autoresearch_guidance()
+    {
+        var snapshot = new AuditSnapshot
+        {
+            Tune = new TuneSummary
+            {
+                LocalAiProfile = new LocalAiProfile
+                {
+                    Readiness = LocalAiReadiness.InferenceReady,
+                    WorkloadClass = "Small-model/autoresearch worker",
+                    RecommendedRuntime = "llama.cpp CUDA-MMQ with full offload.",
+                    TrainingReady = false,
+                    TrainingGate = "Training remains gated until PyTorch CUDA passes a smoke test."
+                }
+            }
+        };
+
+        var plan = new TunePlanBuilder().Build(snapshot);
+
+        var item = Assert.Single(plan, item => item.Title.Contains("autoresearch", StringComparison.OrdinalIgnoreCase));
+        Assert.Equal(TunePlanCategory.Review, item.Category);
+        Assert.Equal(TunePlanRisk.Low, item.Risk);
+        Assert.Equal(TunePlanExecutionState.ReadOnlyOnly, item.ProposedAction.ExecutionState);
+        Assert.Contains("llama.cpp", item.Guidance, StringComparison.OrdinalIgnoreCase);
+        Assert.Contains("Training remains gated", item.VerificationStep, StringComparison.OrdinalIgnoreCase);
+    }
 }

From d94f4558d3e66b6ee059925de71e4fd128238b98 Mon Sep 17 00:00:00 2001
From: Cody <cody@copperstateit.com>
Date: Sat, 2 May 2026 22:21:00 -0700
Subject: [PATCH 2/2] Document AppLens-Tune local AI test run

---
 docs/AppLens-Tune-Test-Run-2026-05-03.md | 97 ++++++++++++++++++++++++
 1 file changed, 97 insertions(+)
 create mode 100644 docs/AppLens-Tune-Test-Run-2026-05-03.md

diff --git a/docs/AppLens-Tune-Test-Run-2026-05-03.md b/docs/AppLens-Tune-Test-Run-2026-05-03.md
new file mode 100644
index 0000000..269b6bf
--- /dev/null
+++ b/docs/AppLens-Tune-Test-Run-2026-05-03.md
@@ -0,0 +1,97 @@
+# AppLens-Tune Local AI Test Run - 2026-05-03
+
+## Scope
+
+Controlled local-AI readiness test for the gaming PC. This run tested inference, local API access, PyTorch CUDA availability, and a tiny AppLens-Tune eval. It did not start training.
+
+## Host
+
+- SSH target: `cody@192.168.68.57`
+- GPU: NVIDIA GeForce GTX 1660 SUPER, 6 GB VRAM
+- llama.cpp source: `/home/cody/local-llm/src/llama.cpp`
+- Runtime build: `/home/cody/local-llm/src/llama.cpp/build-cuda-mmq`
+- Model: `/home/cody/local-llm/models/qwen2.5-7b-ollama.gguf`
+
+## llama.cpp Server Smoke
+
+Server command family:
+
+```bash
+~/local-llm/src/llama.cpp/build-cuda-mmq/bin/llama-server \
+  -m ~/local-llm/models/qwen2.5-7b-ollama.gguf \
+  -ngl 99 \
+  -t 8 \
+  -c 4096 \
+  --parallel 1 \
+  --host 127.0.0.1 \
+  --port 8080
+```
+
+Results:
+
+- Remote health check: `{"status":"ok"}`
+- Remote OpenAI-compatible chat response: `AppLens local llama is ready.`
+- Local SSH tunnel health check at `http://127.0.0.1:18080/health`: `{"status":"ok"}`
+- Local tunneled chat response: `AppLens tunnel ready.`
+- Local tunneled generation speed: about `51 tok/s`
+
+## PyTorch CUDA Smoke
+
+Isolated environment:
+
+```bash
+~/local-llm/envs/torch-cuda
+```
+
+Installed only into the venv:
+
+- `torch 2.11.0+cu128`
+- `numpy 2.4.4`
+
+Smoke result:
+
+```json
+{
+  "cuda_available": true,
+  "cuda_version": "12.8",
+  "device": "NVIDIA GeForce GTX 1660 SUPER",
+  "torch": "2.11.0+cu128"
+}
+```
+
+## AppLens-Tune Report
+
+When run from the isolated torch env, AppLens-Tune reports:
+
+- PyTorch CUDA probe: `CUDA ready`
+- Runtime: `llama.cpp CUDA-MMQ`
+- Seed model: cached `qwen2.5:7b`
+- Training: `manual approval required`
+
+Report path:
+
+```bash
+/home/cody/applens-cli-20260502/reports/latest-tune-torch-output.txt
+```
+
+## Tiny Eval
+
+Eval record path:
+
+```bash
+/home/cody/local-llm/evals/applens-tune-summary-eval.jsonl
+```
+
+The eval asked the local model to summarize the AppLens-Tune report into readiness, safe next test, and remaining gate.
+
+Measured result:
+
+- Prompt tokens: `1206`
+- Completion tokens: `83`
+- Prompt eval: about `588 tok/s`
+- Generation: about `47 tok/s`
+- Latency: about `3.9s`
+
+## Current Boundary
+
+The machine is ready for local inference tests, AppLens eval sweeps, and small dataset-prep jobs. Training should start only after selecting a tiny controlled target, run manifest, stop conditions, output folder, and expected duration.