From 0d4c86214d06eebb80b82728ae03efa7553ba8a4 Mon Sep 17 00:00:00 2001 From: Cody Date: Sat, 2 May 2026 21:33:21 -0700 Subject: [PATCH 1/2] Add local AI readiness profile to AppLens-Tune --- AppLens-Tune.py | 277 +++++++++++++++++- README.md | 2 +- docs/AppLens-Tune-LLM-Profile.md | 101 +++++++ docs/AppLens-Tune-Product-Outline.md | 3 + src/AppLens.Backend/AuditService.cs | 15 +- src/AppLens.Backend/LocalAiProfileBuilder.cs | 129 ++++++++ src/AppLens.Backend/Models.cs | 36 +++ .../ReadinessSummaryBuilder.cs | 5 + src/AppLens.Backend/ReportWriter.cs | 38 +++ src/AppLens.Backend/TuneCollector.cs | 7 + src/AppLens.Backend/TunePlanBuilder.cs | 28 ++ .../LocalAiProfileBuilderTests.cs | 91 ++++++ .../ReadinessSummaryBuilderTests.cs | 22 ++ .../ReportWriterTests.cs | 17 +- .../TunePlanBuilderTests.cs | 28 ++ 15 files changed, 793 insertions(+), 6 deletions(-) create mode 100644 docs/AppLens-Tune-LLM-Profile.md create mode 100644 src/AppLens.Backend/LocalAiProfileBuilder.cs create mode 100644 tests/AppLens.Backend.Tests/LocalAiProfileBuilderTests.cs diff --git a/AppLens-Tune.py b/AppLens-Tune.py index 9b573f9..bc41c23 100644 --- a/AppLens-Tune.py +++ b/AppLens-Tune.py @@ -9,6 +9,8 @@ from __future__ import annotations import getpass +import csv +import json import os import platform import shutil @@ -222,6 +224,259 @@ def key_services() -> list[dict[str, str]]: return rows +def first_version_line(lines: list[str]) -> str: + for line in lines: + if line and not line.lower().startswith("warning"): + return line[:120] + for line in lines: + if "version" in line.lower(): + return line[:120] + return lines[0][:120] if lines else "" + + +def local_llm_tools() -> list[dict[str, str]]: + checks = ( + ("git", "git", ["--version"]), + ("python3", "python3", ["--version"]), + ("pip3", "pip3", ["--version"]), + ("uv", "uv", ["--version"]), + ("cmake", "cmake", ["--version"]), + ("make", "make", ["--version"]), + ("gcc", "gcc", ["--version"]), + ("g++", "g++", ["--version"]), + ("docker", "docker", ["--version"]), + ("ollama", "ollama", ["--version"]), + ("nvidia-smi", "nvidia-smi", ["--version"]), + ("nvcc", "nvcc", ["--version"]), + ) + rows: list[dict[str, str]] = [] + for label, command, args in checks: + if not shutil.which(command): + rows.append({"Tool": label, "Status": "Missing", "Detail": ""}) + continue + lines = run_command([command, *args], timeout=8) + rows.append({"Tool": label, "Status": "Present", "Detail": first_version_line(lines)}) + return rows + + +def llama_cpp_builds() -> list[dict[str, str]]: + root = Path.home() / "local-llm/src/llama.cpp" + rows: list[dict[str, str]] = [] + if not root.exists(): + return [{"Build": "llama.cpp source", "Status": "Missing", "Path": str(root)}] + + commit = "" + if (root / ".git").exists(): + lines = run_command(["git", "-C", str(root), "rev-parse", "--short", "HEAD"], timeout=5) + commit = lines[0] if lines else "" + + rows.append({"Build": "llama.cpp source", "Status": f"Present {commit}".strip(), "Path": str(root)}) + for build_name in ("build-cpu", "build-cuda", "build-cuda-mmq", "build-vulkan"): + build_dir = root / build_name + bin_dir = build_dir / "bin" + built = [ + name + for name in ("llama-cli", "llama-server", "llama-bench") + if (bin_dir / name).exists() + ] + status = "Built: " + ", ".join(built) if built else "Missing" + rows.append({"Build": build_name, "Status": status, "Path": str(build_dir)}) + return rows + + +def ollama_cached_models() -> list[dict[str, str]]: + manifests = Path.home() / ".ollama/models/manifests" + if not manifests.exists(): + return [{"Model": "(none)", "Size": "", "Manifest": str(manifests)}] + + rows: list[dict[str, str]] = [] + for manifest in sorted(manifests.rglob("*")): + if not manifest.is_file(): + continue + try: + rel = manifest.relative_to(manifests) + parts = rel.parts + model = "/".join(parts[:-1]) + ":" + parts[-1] if len(parts) >= 2 else rel.as_posix() + payload = json.loads(manifest.read_text(encoding="utf-8", errors="replace")) + size = sum(int(layer.get("size", 0)) for layer in payload.get("layers", [])) + rows.append({"Model": model, "Size": format_size(size), "Manifest": str(manifest)}) + except Exception: + rows.append({"Model": manifest.name, "Size": "", "Manifest": str(manifest)}) + return rows or [{"Model": "(none)", "Size": "", "Manifest": str(manifests)}] + + +def nvidia_gpus() -> list[dict[str, str]]: + if not shutil.which("nvidia-smi"): + return [] + + lines = run_command( + [ + "nvidia-smi", + "--query-gpu=name,driver_version,memory.total,memory.used,compute_cap,power.limit", + "--format=csv,noheader,nounits", + ], + timeout=10, + ) + rows: list[dict[str, str]] = [] + for fields in csv.reader(lines): + if len(fields) < 6: + continue + rows.append( + { + "Name": fields[0].strip(), + "Driver": fields[1].strip(), + "VRAM_MB": fields[2].strip(), + "Used_MB": fields[3].strip(), + "Compute": fields[4].strip(), + "Power_W": fields[5].strip(), + } + ) + return rows + + +def parse_int(value: str) -> int | None: + try: + return int(float(value.strip())) + except (TypeError, ValueError): + return None + + +def pytorch_probe() -> list[dict[str, str]]: + python = shutil.which("python3") or shutil.which("python") + if not python: + return [{"Component": "PyTorch", "Status": "Missing", "Detail": "Python 3 not found"}] + + script = """ +import json +try: + import torch + payload = { + "installed": True, + "version": getattr(torch, "__version__", ""), + "cuda_available": bool(torch.cuda.is_available()), + "cuda_version": getattr(torch.version, "cuda", None), + "device": torch.cuda.get_device_name(0) if torch.cuda.is_available() else "", + "vram": torch.cuda.get_device_properties(0).total_memory if torch.cuda.is_available() else 0, + } +except Exception as exc: + payload = {"installed": False, "error": f"{type(exc).__name__}: {exc}"} +print(json.dumps(payload, sort_keys=True)) +""".strip() + + lines = run_command([python, "-c", script], timeout=20) + try: + payload = json.loads(lines[-1]) + except Exception: + return [{"Component": "PyTorch", "Status": "Error", "Detail": "; ".join(lines)[:160]}] + + if not payload.get("installed"): + return [{"Component": "PyTorch", "Status": "Missing", "Detail": str(payload.get("error", ""))[:160]}] + + status = "CUDA ready" if payload.get("cuda_available") else "Installed, CUDA unavailable" + detail = f"{payload.get('version', '')}; CUDA {payload.get('cuda_version') or 'n/a'}" + if payload.get("device"): + detail += f"; {payload['device']} ({format_size(int(payload.get('vram') or 0))})" + return [{"Component": "PyTorch", "Status": status, "Detail": detail[:160]}] + + +def max_vram_mb(gpu_rows: list[dict[str, str]]) -> int: + values = [parse_int(row["VRAM_MB"]) for row in gpu_rows] + return max([value for value in values if value is not None], default=0) + + +def local_llm_profile( + gpu_rows: list[dict[str, str]], + tool_rows: list[dict[str, str]], + torch_rows: list[dict[str, str]], + service_rows: list[dict[str, str]], + llama_rows: list[dict[str, str]], +) -> tuple[list[dict[str, str]], list[str], list[str]]: + tool_status = {row["Tool"]: row["Status"] for row in tool_rows} + service_status = {row["Name"]: row for row in service_rows} + vram_mb = max_vram_mb(gpu_rows) + llama_status = {row["Build"]: row["Status"] for row in llama_rows} + review: list[str] = [] + optional: list[str] = [] + + if vram_mb <= 0: + gpu_tier = "CPU or non-NVIDIA profile" + backend = "CPU llama.cpp/Ollama inference; avoid GPU training assumptions." + model_target = "1B-4B quantized models for interactive work." + context_target = "2k-8k context unless benchmarks prove more headroom." + training_target = "CPU-only dataset prep, evals, and tiny smoke tests." + elif vram_mb < 8 * 1024: + gpu_tier = "Small NVIDIA GPU profile (under 8 GB VRAM)" + backend = "GGUF inference through Ollama, Jan, or llama.cpp; PyTorch only after CUDA smoke passes." + model_target = "3B-8B Q4/IQ4-class models; avoid 27B-31B interactive local-agent loops here." + context_target = "4k-16k for inference; 256-512 tokens for training/autoresearch experiments." + training_target = "Tiny from-scratch models, classifiers, eval sweeps, and very small LoRA tests." + review.append("- NVIDIA VRAM is under 8 GB; tune for small-model workloads, not large local fine-tunes.") + elif vram_mb < 16 * 1024: + gpu_tier = "Mid NVIDIA GPU profile" + backend = "GGUF inference plus selective PyTorch fine-tune experiments." + model_target = "7B-14B quantized inference; small LoRA experiments after benchmarking." + context_target = "8k-32k for inference; benchmark before larger context." + training_target = "Small LoRA/QLoRA experiments with conservative batch and sequence length." + else: + gpu_tier = "Large local-GPU profile" + backend = "llama.cpp/Ollama/Jan for inference and PyTorch for broader fine-tune experiments." + model_target = "14B+ quantized inference and larger LoRA experiments, subject to benchmarks." + context_target = "16k-64k after prompt-eval and memory tests." + training_target = "LoRA/QLoRA and longer autoresearch sweeps with checkpointing." + + torch_status = torch_rows[0]["Status"] if torch_rows else "Missing" + if "CUDA ready" not in torch_status: + review.append("- PyTorch CUDA is not ready; training experiments should wait for a CUDA smoke test.") + if tool_status.get("uv") == "Missing": + optional.append("- uv is missing; Python ML environments will be slower to create and reproduce.") + if tool_status.get("cmake") == "Missing": + optional.append("- cmake is missing; local llama.cpp builds will fail until it is installed.") + if tool_status.get("nvcc") == "Missing": + optional.append("- nvcc is missing; CUDA extension builds are not available, but prebuilt PyTorch wheels can still work.") + gpu_builds = ("build-cuda", "build-cuda-mmq", "build-vulkan") + if all("Built:" not in llama_status.get(build_name, "") for build_name in gpu_builds): + optional.append("- llama.cpp GPU build is missing; current llama.cpp binaries are CPU-only.") + + ollama = service_status.get("ollama", {}) + if ollama.get("Installed") == "Yes" and ollama.get("Running") != "Yes": + optional.append("- Ollama is installed but not running; start it before runtime benchmarks.") + + rows = [ + {"Signal": "GPU tier", "Recommendation": gpu_tier}, + {"Signal": "Backend", "Recommendation": backend}, + {"Signal": "Model target", "Recommendation": model_target}, + {"Signal": "Context target", "Recommendation": context_target}, + {"Signal": "Training target", "Recommendation": training_target}, + {"Signal": "Safe overnight jobs", "Recommendation": "read-only scans, llama.cpp/Ollama benchmarks, eval sweeps, dataset prep"}, + {"Signal": "Manual-gated jobs", "Recommendation": "driver/CUDA changes, service changes, firmware/RF/Wi-Fi actions, large downloads"}, + ] + return rows, review, optional + + +def autoresearch_queue( + llama_rows: list[dict[str, str]], + ollama_model_rows: list[dict[str, str]], + torch_rows: list[dict[str, str]], +) -> list[dict[str, str]]: + llama_status = {row["Build"]: row["Status"] for row in llama_rows} + has_mmq = "Built:" in llama_status.get("build-cuda-mmq", "") + has_cuda = "Built:" in llama_status.get("build-cuda", "") or has_mmq + models = [row["Model"] for row in ollama_model_rows if row.get("Model") and row.get("Model") != "(none)"] + torch_status = torch_rows[0]["Status"] if torch_rows else "Missing" + + runtime = "llama.cpp CUDA-MMQ" if has_mmq else "llama.cpp CUDA" if has_cuda else "llama.cpp CPU/Ollama" + model = models[0] if models else "no cached model detected" + training_gate = "closed" if "CUDA ready" not in torch_status else "manual approval required" + + return [ + {"Queue": "Runtime", "State": runtime, "Boundary": "read-only inference and benchmarks"}, + {"Queue": "Seed model", "State": model, "Boundary": "use cached models unless a user approves downloads"}, + {"Queue": "Unattended OK", "State": "AppLens scans, llama.cpp benchmarks, eval sweeps, dataset prep", "Boundary": "no service/system changes"}, + {"Queue": "Training", "State": training_gate, "Boundary": "wait for PyTorch CUDA smoke test and user approval"}, + {"Queue": "Stop conditions", "State": "capture metrics, cap run time, keep logs", "Boundary": "abort on OOM, thermal issues, or failed smoke tests"}, + ] + + def storage_hotspots() -> list[dict[str, object]]: home = Path.home() candidates = [ @@ -323,10 +578,12 @@ def build_findings( service_rows: list[dict[str, str]], storage_rows: list[dict[str, object]], repo_rows: list[dict[str, object]], + llm_review: list[str], + llm_optional: list[str], ) -> tuple[list[str], list[str], list[str]]: stable = ["- Audit mode only; no changes were made."] - review: list[str] = [] - optional: list[str] = [] + review: list[str] = [*llm_review] + optional: list[str] = [*llm_optional] running = {row["Name"] for row in service_rows if row.get("Running") == "Yes"} if {"docker", "colima", "podman"} & running: @@ -371,7 +628,14 @@ def build_report() -> str: service_rows = key_services() storage_rows = storage_hotspots() repo_rows = repo_placement() - stable, review, optional = build_findings(startup_rows, service_rows, storage_rows, repo_rows) + llm_tool_rows = local_llm_tools() + llama_rows = llama_cpp_builds() + ollama_model_rows = ollama_cached_models() + gpu_rows = nvidia_gpus() + torch_rows = pytorch_probe() + llm_rows, llm_review, llm_optional = local_llm_profile(gpu_rows, llm_tool_rows, torch_rows, service_rows, llama_rows) + autoresearch_rows = autoresearch_queue(llama_rows, ollama_model_rows, torch_rows) + stable, review, optional = build_findings(startup_rows, service_rows, storage_rows, repo_rows, llm_review, llm_optional) lines: list[str] = [] lines.append("=== AppLens-Tune Audit Results ===") @@ -387,6 +651,13 @@ def build_report() -> str: lines.extend(section("--- Stability Checks ---", stable)) lines.extend(section("--- Review Items ---", review)) lines.extend(section("--- Optional Improvements ---", optional)) + lines.extend(section("--- Local LLM Profile ---", table(llm_rows, ["Signal", "Recommendation"]))) + lines.extend(section("--- Auto-Research Queue ---", table(autoresearch_rows, ["Queue", "State", "Boundary"]))) + lines.extend(section("--- NVIDIA GPU Profile ---", table(gpu_rows, ["Name", "Driver", "VRAM_MB", "Used_MB", "Compute", "Power_W"]))) + lines.extend(section("--- PyTorch CUDA Probe ---", table(torch_rows, ["Component", "Status", "Detail"]))) + lines.extend(section("--- Local LLM Toolchain ---", table(llm_tool_rows, ["Tool", "Status", "Detail"]))) + lines.extend(section("--- llama.cpp Builds ---", table(llama_rows, ["Build", "Status", "Path"]))) + lines.extend(section("--- Ollama Cached Models ---", table(ollama_model_rows, ["Model", "Size", "Manifest"]))) lines.extend(section("--- Top Memory Processes ---", table(top_processes(), ["Name", "PID", "RSS_MB", "CPU_%"]))) lines.extend(section("--- Startup Entries ---", table(startup_rows, ["Name", "State", "Source"]))) lines.extend(section("--- Key Services/Processes ---", table(service_rows, ["Name", "Installed", "Running"]))) diff --git a/README.md b/README.md index 1b4dbde..c1bda40 100644 --- a/README.md +++ b/README.md @@ -21,7 +21,7 @@ AppLens is a local-first audit tool for understanding what is installed, running The repository currently includes three surfaces: - **AppLens**: cross-platform installed-app inventory scripts for Windows, macOS, and Linux. -- **AppLens-Tune**: read-only workstation diagnostics and tune-plan guidance for startup load, services, local dev tooling, storage hotspots, and repo placement. +- **AppLens-Tune**: read-only workstation diagnostics and tune-plan guidance for startup load, services, local dev tooling, local AI readiness, storage hotspots, and repo placement. - **AppLens-desktop**: a CSI-branded Windows desktop app built with WinUI 3, .NET, and Windows App SDK for eventual Microsoft Store packaging. ## Safety Model diff --git a/docs/AppLens-Tune-LLM-Profile.md b/docs/AppLens-Tune-LLM-Profile.md new file mode 100644 index 0000000..4415320 --- /dev/null +++ b/docs/AppLens-Tune-LLM-Profile.md @@ -0,0 +1,101 @@ +# AppLens-Tune Local LLM Profile + +## Purpose + +AppLens-Tune should turn machine evidence into a local-LLM runtime profile. It should not choose a model by hype or parameter count. It should measure the host, identify the safe workload class, and recommend runtime settings that can be verified with benchmarks. + +## Current Prototype + +`AppLens-Tune.py` now emits read-only sections for: + +- NVIDIA GPU profile: driver, VRAM, current VRAM use, compute capability, power limit. +- PyTorch CUDA probe: installed state, CUDA availability, version, device, and VRAM. +- Local LLM toolchain: Git, Python, pip, uv, cmake, make, compilers, Docker, Ollama, `nvidia-smi`, and `nvcc`. +- llama.cpp builds: source checkout plus CPU, CUDA, CUDA-MMQ, and Vulkan build folders. +- Ollama cached models: offline manifest detection even when the Ollama daemon is stopped. +- Local LLM profile: backend, model target, context target, training target, safe overnight jobs, and manual-gated jobs. +- Auto-research queue: runtime, seed model, unattended-safe jobs, training gates, and stop-condition guidance. + +The .NET backend now mirrors this direction with a `LocalAiProfile` contract, a read-only profile builder, exported Markdown/HTML sections, readiness highlights, and a tune-plan item that keeps training manual-gated. + +## Dogfood Finding + +The gaming PC is a small-GPU node: + +- Ryzen 5 5600X, 32 GB RAM. +- GTX 1660 SUPER, 6 GB VRAM, compute capability 7.5. +- NVIDIA driver is present. +- Ollama is installed but not running. +- PyTorch is not installed. +- `uv` is missing; `cmake`, `ninja`, `nvcc`, Vulkan tools, and Docker are present. +- Local llama.cpp CPU, CUDA, and CUDA-MMQ builds exist under `/home/cody/local-llm/src/llama.cpp`. +- `qwen2.5:7b` is cached in Ollama as a GGUF blob and can be used directly by llama.cpp. +- CPU-only llama.cpp is too slow for quick interactive use on `qwen2.5:7b`. +- CUDA full offload works. The best measured build for this GTX 1660 SUPER is the CUDA-MMQ build. + +This should be treated as a small-model/autoresearch worker, not a large fine-tune host. + +## llama.cpp Benchmark + +Model: `qwen2.5:7b`, Q4_K_M GGUF, 7.6B params, 4.36 GB model blob. + +| Build | GPU layers | Prompt eval | Generation | +| --- | ---: | ---: | ---: | +| CUDA | 0 | 104.50 tok/s | 6.64 tok/s | +| CUDA | 20 | 137.64 tok/s | 17.39 tok/s | +| CUDA | 99 | 161.19 tok/s | 50.02 tok/s | +| CUDA-MMQ | 0 | 193.28 tok/s | 6.63 tok/s | +| CUDA-MMQ | 20 | 333.91 tok/s | 16.53 tok/s | +| CUDA-MMQ | 99 | 558.29 tok/s | 49.96 tok/s | + +Recommended llama.cpp runtime for this host: + +```bash +~/local-llm/src/llama.cpp/build-cuda-mmq/bin/llama-cli \ + -m ~/local-llm/models/qwen2.5-7b-ollama.gguf \ + -ngl 99 \ + -t 8 +``` + +For service experiments, start from the same binary family: + +```bash +~/local-llm/src/llama.cpp/build-cuda-mmq/bin/llama-server \ + -m ~/local-llm/models/qwen2.5-7b-ollama.gguf \ + -ngl 99 \ + -t 8 \ + --host 127.0.0.1 \ + --port 8080 +``` + +## Recommended Profile + +- Backend: GGUF inference through Ollama, Jan, or llama.cpp first. +- Model target: 3B-8B Q4/IQ4-class models. +- Inference context: start around 4k-16k and benchmark. +- Training/autoresearch context: start around 256-512 tokens. +- llama.cpp acceleration: prefer the CUDA-MMQ build with full offload (`-ngl 99`) on this GTX 1660 SUPER. +- Good jobs: read-only scans, llama.cpp/Ollama benchmarks, eval sweeps, dataset prep, tiny classifier training. +- Gated jobs: driver/CUDA changes, service changes, firmware/RF/Wi-Fi actions, and large model downloads. + +## Product Boundary + +Keep the separation explicit: + +- AppLens measures installed apps, tools, hardware, services, storage, and runtime state. +- AppLens-Tune recommends and later applies user-approved configuration. +- LLM Tune learns from benchmark results and proposes runtime profiles. + +The first metric set should be tokens/sec, time to first token, prompt eval speed, VRAM/RAM headroom, load time, crash rate, and quality tradeoff. + +## Backend Contract + +`TuneSummary.LocalAiProfile` captures the local AI posture without starting a model or changing the machine: + +- `Readiness`: unknown, limited, inference-ready, or training-ready. +- `WorkloadClass`: plain-language machine role. +- `RecommendedRuntime`: current best runtime family. +- `TrainingReady` and `TrainingGate`: explicit training boundary. +- `Signals`: GPU, CUDA compiler, llama.cpp, Ollama, PyTorch CUDA, and model-cache evidence. + +This gives AppLens-Tune and future AppLens-Tune extensions a stable place to hang benchmark results, run manifests, and user-approved training state later. diff --git a/docs/AppLens-Tune-Product-Outline.md b/docs/AppLens-Tune-Product-Outline.md index 9fdbbc7..b7cd695 100644 --- a/docs/AppLens-Tune-Product-Outline.md +++ b/docs/AppLens-Tune-Product-Outline.md @@ -247,6 +247,8 @@ Implemented in the current backend: - JSON, Markdown, and HTML report output for tune plans. - AppLens-desktop tune plan list. - Unit coverage for startup, service/admin, privacy, and report contract behavior. +- Local AI/autoresearch readiness profile in the backend contract. +- Read-only local AI signals in exports and tune-plan guidance. Still intentionally not implemented: @@ -256,4 +258,5 @@ Still intentionally not implemented: - cache deletion - admin elevation - rollback execution +- unattended training or model downloads diff --git a/src/AppLens.Backend/AuditService.cs b/src/AppLens.Backend/AuditService.cs index 35a7c9d..925736b 100644 --- a/src/AppLens.Backend/AuditService.cs +++ b/src/AppLens.Backend/AuditService.cs @@ -8,6 +8,7 @@ public sealed class AuditService private readonly RulesEngine _rulesEngine; private readonly TunePlanBuilder _tunePlanBuilder; private readonly ReadinessSummaryBuilder _readinessSummaryBuilder; + private readonly LocalAiProfileBuilder _localAiProfileBuilder; public AuditService() { @@ -17,6 +18,7 @@ public AuditService() _rulesEngine = new RulesEngine(); _tunePlanBuilder = new TunePlanBuilder(); _readinessSummaryBuilder = new ReadinessSummaryBuilder(); + _localAiProfileBuilder = new LocalAiProfileBuilder(); } public async Task RunAsync(CancellationToken cancellationToken = default) @@ -42,12 +44,23 @@ public async Task RunAsync(CancellationToken cancellationToken = new TuneSummary(), timeout.Token).ConfigureAwait(false); + var tuneWithLocalAiProfile = new TuneSummary + { + TopProcesses = tune.TopProcesses, + StartupEntries = tune.StartupEntries, + Services = tune.Services, + StorageHotspots = tune.StorageHotspots, + RepoPlacements = tune.RepoPlacements, + ToolProbes = tune.ToolProbes, + LocalAiProfile = _localAiProfileBuilder.Build(tune) + }; + var snapshot = new AuditSnapshot { GeneratedAt = DateTimeOffset.Now, Machine = machine, Inventory = inventory, - Tune = tune, + Tune = tuneWithLocalAiProfile, ProbeStatuses = _probeRunner.Statuses.ToList() }; diff --git a/src/AppLens.Backend/LocalAiProfileBuilder.cs b/src/AppLens.Backend/LocalAiProfileBuilder.cs new file mode 100644 index 0000000..3bdc24c --- /dev/null +++ b/src/AppLens.Backend/LocalAiProfileBuilder.cs @@ -0,0 +1,129 @@ +namespace AppLens.Backend; + +public sealed class LocalAiProfileBuilder +{ + public LocalAiProfile Build(TuneSummary tune) + { + var signals = new List + { + Signal("NVIDIA GPU", HasSucceededProbe(tune, "NVIDIA GPU"), Detail(tune, "NVIDIA GPU")), + Signal("CUDA compiler", HasSucceededProbe(tune, "CUDA Compiler"), Detail(tune, "CUDA Compiler")), + Signal("llama.cpp", HasSucceededProbe(tune, "llama.cpp"), Detail(tune, "llama.cpp")), + Signal("Ollama", HasSucceededProbe(tune, "Ollama Summary"), Detail(tune, "Ollama Summary")), + Signal("PyTorch CUDA", HasPyTorchCuda(tune), Detail(tune, "PyTorch CUDA")), + Signal("Model cache", HasModelCache(tune), ModelCacheDetail(tune)) + }; + + var hasGpu = signals.Any(signal => signal.Name == "NVIDIA GPU" && signal.Status == LocalAiSignalStatus.Present); + var hasLlamaCpp = signals.Any(signal => signal.Name == "llama.cpp" && signal.Status == LocalAiSignalStatus.Present); + var hasOllama = signals.Any(signal => signal.Name == "Ollama" && signal.Status == LocalAiSignalStatus.Present); + var hasTorch = signals.Any(signal => signal.Name == "PyTorch CUDA" && signal.Status == LocalAiSignalStatus.Present); + var hasCache = signals.Any(signal => signal.Name == "Model cache" && signal.Status == LocalAiSignalStatus.Present); + + var trainingReady = hasGpu && hasTorch; + var readiness = trainingReady + ? LocalAiReadiness.TrainingReady + : hasGpu && (hasLlamaCpp || hasOllama || hasCache) + ? LocalAiReadiness.InferenceReady + : hasLlamaCpp || hasOllama || hasCache + ? LocalAiReadiness.Limited + : LocalAiReadiness.Unknown; + + return new LocalAiProfile + { + Readiness = readiness, + WorkloadClass = WorkloadClass(hasGpu, signals), + RecommendedRuntime = RecommendedRuntime(hasGpu, hasLlamaCpp, hasOllama), + TrainingReady = trainingReady, + TrainingGate = trainingReady + ? "PyTorch CUDA appears available; still require explicit user approval before training." + : "Training remains gated until PyTorch CUDA passes a smoke test and the user approves a run.", + Signals = signals + }; + } + + private static LocalAiSignal Signal(string name, bool present, string detail) => + new() + { + Name = name, + Status = present ? LocalAiSignalStatus.Present : LocalAiSignalStatus.Missing, + Detail = detail + }; + + private static bool HasSucceededProbe(TuneSummary tune, string probeName) => + tune.ToolProbes.Any(probe => + probe.Name.Contains(probeName, StringComparison.OrdinalIgnoreCase) && + probe.Status.Equals(ProbeState.Succeeded.ToString(), StringComparison.OrdinalIgnoreCase) && + !LooksLikeError(probe.Output)); + + private static string Detail(TuneSummary tune, string probeName) => + tune.ToolProbes + .FirstOrDefault(probe => probe.Name.Contains(probeName, StringComparison.OrdinalIgnoreCase)) + ?.Output ?? ""; + + private static bool HasPyTorchCuda(TuneSummary tune) + { + var output = Detail(tune, "PyTorch CUDA"); + return HasSucceededProbe(tune, "PyTorch CUDA") && + output.Contains("True", StringComparison.OrdinalIgnoreCase) && + !output.Contains("no cuda", StringComparison.OrdinalIgnoreCase); + } + + private static bool LooksLikeError(string output) => + output.Contains("No module named", StringComparison.OrdinalIgnoreCase) || + output.Contains("not found", StringComparison.OrdinalIgnoreCase) || + output.Contains("not recognized", StringComparison.OrdinalIgnoreCase) || + output.Contains("could not connect", StringComparison.OrdinalIgnoreCase) || + output.Contains("timed out", StringComparison.OrdinalIgnoreCase) || + output.Contains("error", StringComparison.OrdinalIgnoreCase); + + private static bool HasModelCache(TuneSummary tune) => + tune.StorageHotspots.Any(hotspot => + hotspot.Location.Contains(".ollama", StringComparison.OrdinalIgnoreCase) && + hotspot.Bytes > 0); + + private static string ModelCacheDetail(TuneSummary tune) + { + var cache = tune.StorageHotspots.FirstOrDefault(hotspot => + hotspot.Location.Contains(".ollama", StringComparison.OrdinalIgnoreCase)); + return cache is null ? "" : $"{cache.Location}: {Formatting.Size(cache.Bytes)}"; + } + + private static string WorkloadClass(bool hasGpu, List signals) + { + if (!hasGpu) + { + return "CPU/local-service only; use small models or remote endpoints for heavier work."; + } + + var gpuDetail = signals.First(signal => signal.Name == "NVIDIA GPU").Detail; + if (gpuDetail.Contains("6144", StringComparison.OrdinalIgnoreCase) || + gpuDetail.Contains("6 GB", StringComparison.OrdinalIgnoreCase) || + gpuDetail.Contains("1660", StringComparison.OrdinalIgnoreCase)) + { + return "Small-model/autoresearch worker: 3B-8B quantized inference, eval sweeps, and dataset prep."; + } + + return "GPU local-AI workstation; benchmark model size, context, and training jobs before unattended use."; + } + + private static string RecommendedRuntime(bool hasGpu, bool hasLlamaCpp, bool hasOllama) + { + if (hasGpu && hasLlamaCpp) + { + return "llama.cpp CUDA/MMQ with full offload when VRAM allows."; + } + + if (hasGpu && hasOllama) + { + return "Ollama or llama.cpp GPU inference after runtime benchmark."; + } + + if (hasLlamaCpp || hasOllama) + { + return "CPU llama.cpp/Ollama for light local tasks; prefer remote or larger GPU hosts for heavy work."; + } + + return "Install or connect a local model runtime before autoresearch."; + } +} diff --git a/src/AppLens.Backend/Models.cs b/src/AppLens.Backend/Models.cs index c9778a6..e140fdf 100644 --- a/src/AppLens.Backend/Models.cs +++ b/src/AppLens.Backend/Models.cs @@ -67,6 +67,7 @@ public sealed class TuneSummary public List StorageHotspots { get; init; } = []; public List RepoPlacements { get; init; } = []; public List ToolProbes { get; init; } = []; + public LocalAiProfile LocalAiProfile { get; init; } = new(); } public sealed class ProcessSnapshot @@ -115,6 +116,41 @@ public sealed class ToolProbe public string Output { get; init; } = ""; } +public sealed class LocalAiProfile +{ + public LocalAiReadiness Readiness { get; init; } = LocalAiReadiness.Unknown; + public string WorkloadClass { get; init; } = ""; + public string RecommendedRuntime { get; init; } = ""; + public bool TrainingReady { get; init; } + public string TrainingGate { get; init; } = ""; + public List Signals { get; init; } = []; +} + +public sealed class LocalAiSignal +{ + public string Name { get; init; } = ""; + public LocalAiSignalStatus Status { get; init; } = LocalAiSignalStatus.Unknown; + public string Detail { get; init; } = ""; +} + +[JsonConverter(typeof(JsonStringEnumConverter))] +public enum LocalAiReadiness +{ + Unknown, + Limited, + InferenceReady, + TrainingReady +} + +[JsonConverter(typeof(JsonStringEnumConverter))] +public enum LocalAiSignalStatus +{ + Unknown, + Present, + Missing, + Review +} + public sealed class Finding { public FindingSeverity Severity { get; init; } diff --git a/src/AppLens.Backend/ReadinessSummaryBuilder.cs b/src/AppLens.Backend/ReadinessSummaryBuilder.cs index 7b6e1b9..9755959 100644 --- a/src/AppLens.Backend/ReadinessSummaryBuilder.cs +++ b/src/AppLens.Backend/ReadinessSummaryBuilder.cs @@ -84,6 +84,11 @@ private static List BuildHighlights( highlights.Add($"{snapshot.Tune.StorageHotspots.Count} storage hotspot(s) were measured for review."); } + if (snapshot.Tune.LocalAiProfile.Readiness != LocalAiReadiness.Unknown) + { + highlights.Add($"Local AI profile: {snapshot.Tune.LocalAiProfile.Readiness}; {snapshot.Tune.LocalAiProfile.WorkloadClass}"); + } + return highlights; } diff --git a/src/AppLens.Backend/ReportWriter.cs b/src/AppLens.Backend/ReportWriter.cs index 71ce954..860e98b 100644 --- a/src/AppLens.Backend/ReportWriter.cs +++ b/src/AppLens.Backend/ReportWriter.cs @@ -44,6 +44,7 @@ public string WriteMarkdown(AuditSnapshot snapshot, bool includeRawDetails = fal AppendReadiness(builder, snapshot); AppendFindings(builder, snapshot); AppendTunePlan(builder, snapshot); + AppendLocalAiProfile(builder, snapshot.Tune.LocalAiProfile); AppendInventory(builder, snapshot); AppendTune(builder, snapshot); AppendProbeStatuses(builder, snapshot); @@ -128,6 +129,7 @@ public string WriteHtml(AuditSnapshot snapshot, bool includeRawDetails = false) {{HtmlTable("Tune Plan", ["Category", "Risk", "Item", "Guidance", "Future Action"], snapshot.TunePlan.Select(item => new[] { item.Category.ToString(), item.Risk.ToString(), item.Title, item.Guidance, item.ProposedAction.Description }))}} + {{HtmlLocalAiProfile(snapshot.Tune.LocalAiProfile)}} {{HtmlTable("Desktop Applications", ["Name", "Version", "Publisher", "Source"], snapshot.Inventory.DesktopApplications.Select(app => new[] { app.Name, app.Version, app.Publisher, app.Source }))}} {{HtmlTable("Store Applications", ["Name", "Version", "Publisher", "Source"], snapshot.Inventory.StoreApplications.Select(app => new[] { app.Name, app.Version, app.Publisher, app.Source }))}} {{HtmlTable("Top Processes", ["Name", "PID", "Memory", "CPU Seconds"], snapshot.Tune.TopProcesses.Select(process => new[] { process.Name, process.Id.ToString(), Formatting.Size(process.WorkingSetBytes), process.CpuSeconds.ToString("N1") }))}} @@ -169,6 +171,26 @@ private static void AppendReadiness(StringBuilder builder, AuditSnapshot snapsho builder.AppendLine(); } + private static void AppendLocalAiProfile(StringBuilder builder, LocalAiProfile profile) + { + builder.AppendLine("## Local AI Readiness"); + builder.AppendLine(); + builder.AppendLine($"Readiness: {profile.Readiness}"); + builder.AppendLine($"Workload class: {Formatting.MarkdownEscape(profile.WorkloadClass)}"); + builder.AppendLine($"Recommended runtime: {Formatting.MarkdownEscape(profile.RecommendedRuntime)}"); + builder.AppendLine($"Training ready: {(profile.TrainingReady ? "Yes" : "No")}"); + builder.AppendLine($"Training gate: {Formatting.MarkdownEscape(profile.TrainingGate)}"); + builder.AppendLine(); + builder.AppendLine("| Signal | Status | Detail |"); + builder.AppendLine("| --- | --- | --- |"); + foreach (var signal in profile.Signals) + { + builder.AppendLine($"| {Formatting.MarkdownEscape(signal.Name)} | {signal.Status} | {Formatting.MarkdownEscape(signal.Detail)} |"); + } + + builder.AppendLine(); + } + private static void AppendFindings(StringBuilder builder, AuditSnapshot snapshot) { builder.AppendLine("## Findings"); @@ -272,4 +294,20 @@ private static string HtmlTable(string title, string[] columns, IEnumerable """; } + + private static string HtmlLocalAiProfile(LocalAiProfile profile) + { + var summaryRows = new[] + { + new[] { "Readiness", profile.Readiness.ToString() }, + new[] { "Workload class", profile.WorkloadClass }, + new[] { "Recommended runtime", profile.RecommendedRuntime }, + new[] { "Training ready", profile.TrainingReady ? "Yes" : "No" }, + new[] { "Training gate", profile.TrainingGate } + }; + + var signalRows = profile.Signals.Select(signal => new[] { signal.Name, signal.Status.ToString(), signal.Detail }); + return HtmlTable("Local AI Readiness", ["Metric", "Value"], summaryRows) + + HtmlTable("Local AI Signals", ["Signal", "Status", "Detail"], signalRows); + } } diff --git a/src/AppLens.Backend/TuneCollector.cs b/src/AppLens.Backend/TuneCollector.cs index d181802..63f0bb5 100644 --- a/src/AppLens.Backend/TuneCollector.cs +++ b/src/AppLens.Backend/TuneCollector.cs @@ -377,6 +377,13 @@ private static RepoPlacement CountRepos(string root, CancellationToken cancellat private List GetToolProbes() => [ + _probeRunner.RunTool("Git", "git", "--version", TimeSpan.FromSeconds(5)), + _probeRunner.RunTool("Python", "python", "--version", TimeSpan.FromSeconds(5)), + _probeRunner.RunTool("CMake", "cmake", "--version", TimeSpan.FromSeconds(5)), + _probeRunner.RunTool("NVIDIA GPU", "nvidia-smi", "--query-gpu=name,memory.total,driver_version,compute_cap --format=csv,noheader,nounits", TimeSpan.FromSeconds(8)), + _probeRunner.RunTool("CUDA Compiler", "nvcc", "--version", TimeSpan.FromSeconds(8)), + _probeRunner.RunTool("llama.cpp", "llama-cli", "--version", TimeSpan.FromSeconds(5)), + _probeRunner.RunTool("PyTorch CUDA", "python", "-c \"import torch; print(torch.__version__); print(torch.cuda.is_available()); print(torch.cuda.get_device_name(0) if torch.cuda.is_available() else 'no cuda')\"", TimeSpan.FromSeconds(12)), _probeRunner.RunTool("WSL Status", "wsl.exe", "--status", TimeSpan.FromSeconds(8)), _probeRunner.RunTool("WSL Distros", "wsl.exe", "-l -v", TimeSpan.FromSeconds(8)), _probeRunner.RunTool("Docker Summary", "docker", "system df", TimeSpan.FromSeconds(8)), diff --git a/src/AppLens.Backend/TunePlanBuilder.cs b/src/AppLens.Backend/TunePlanBuilder.cs index 383ece3..7c4773e 100644 --- a/src/AppLens.Backend/TunePlanBuilder.cs +++ b/src/AppLens.Backend/TunePlanBuilder.cs @@ -20,6 +20,7 @@ public List Build(AuditSnapshot snapshot) AddStartupPlanItems(snapshot, items); AddServicePlanItems(snapshot, items); + AddLocalAiPlanItem(snapshot, items); return items .GroupBy(item => item.Id, StringComparer.OrdinalIgnoreCase) @@ -238,6 +239,33 @@ private static void AddServicePlanItems(AuditSnapshot snapshot, List items) + { + var profile = snapshot.Tune.LocalAiProfile; + if (profile.Readiness == LocalAiReadiness.Unknown) + { + return; + } + + items.Add(new TunePlanItem + { + Id = "local-ai-autoresearch-profile", + Category = profile.TrainingReady ? TunePlanCategory.Optional : TunePlanCategory.Review, + Risk = profile.TrainingReady ? TunePlanRisk.Medium : TunePlanRisk.Low, + Title = "Local autoresearch profile", + Evidence = $"{profile.Readiness}: {profile.WorkloadClass}", + Guidance = $"{profile.RecommendedRuntime} Keep training jobs manual-gated until the user approves scope, model, and stop conditions.", + BackupPlan = "Before future training, save the model path, benchmark output, dataset location, and run manifest.", + VerificationStep = profile.TrainingGate, + ProposedAction = new ProposedAction + { + Kind = ProposedActionKind.ManualReview, + ExecutionState = TunePlanExecutionState.ReadOnlyOnly, + Description = "Read-only guidance: benchmark and plan autoresearch jobs; do not start training automatically." + } + }); + } + private static bool IsEnabled(StartupEntry entry) => entry.State.Equals("Enabled", StringComparison.OrdinalIgnoreCase) || entry.State.Equals("Unknown", StringComparison.OrdinalIgnoreCase); diff --git a/tests/AppLens.Backend.Tests/LocalAiProfileBuilderTests.cs b/tests/AppLens.Backend.Tests/LocalAiProfileBuilderTests.cs new file mode 100644 index 0000000..7fb22f8 --- /dev/null +++ b/tests/AppLens.Backend.Tests/LocalAiProfileBuilderTests.cs @@ -0,0 +1,91 @@ +namespace AppLens.Backend.Tests; + +public sealed class LocalAiProfileBuilderTests +{ + [Fact] + public void Cuda_llama_cpp_profile_is_inference_ready_but_training_gated() + { + var tune = new TuneSummary + { + ToolProbes = + [ + new ToolProbe { Name = "NVIDIA GPU", Status = "Succeeded", Output = "NVIDIA GeForce GTX 1660 SUPER, 6144 MiB" }, + new ToolProbe { Name = "CUDA Compiler", Status = "Succeeded", Output = "nvcc: NVIDIA (R) Cuda compiler driver" }, + new ToolProbe { Name = "llama.cpp CUDA-MMQ", Status = "Succeeded", Output = "llama-cli llama-server llama-bench" }, + new ToolProbe { Name = "PyTorch CUDA", Status = "Skipped", Output = "ModuleNotFoundError: No module named 'torch'" } + ], + StorageHotspots = + [ + new StorageHotspot { Location = ".ollama", Bytes = 4L * 1024 * 1024 * 1024 } + ] + }; + + var profile = new LocalAiProfileBuilder().Build(tune); + + Assert.Equal(LocalAiReadiness.InferenceReady, profile.Readiness); + Assert.False(profile.TrainingReady); + Assert.Contains("small-model", profile.WorkloadClass, StringComparison.OrdinalIgnoreCase); + Assert.Contains(profile.Signals, signal => + signal.Name == "llama.cpp" && + signal.Status == LocalAiSignalStatus.Present); + } + + [Fact] + public void Missing_gpu_keeps_autoresearch_readiness_limited() + { + var tune = new TuneSummary + { + ToolProbes = + [ + new ToolProbe { Name = "NVIDIA GPU", Status = "Skipped", Output = "nvidia-smi not found" }, + new ToolProbe { Name = "Ollama Summary", Status = "Succeeded", Output = "NAME ID SIZE MODIFIED" } + ] + }; + + var profile = new LocalAiProfileBuilder().Build(tune); + + Assert.Equal(LocalAiReadiness.Limited, profile.Readiness); + Assert.False(profile.TrainingReady); + Assert.Contains("CPU", profile.RecommendedRuntime, StringComparison.OrdinalIgnoreCase); + } + + [Fact] + public void Probe_error_output_does_not_count_as_present_runtime() + { + var tune = new TuneSummary + { + ToolProbes = + [ + new ToolProbe { Name = "PyTorch CUDA", Status = "Succeeded", Output = "ModuleNotFoundError: No module named 'torch'" } + ] + }; + + var profile = new LocalAiProfileBuilder().Build(tune); + + Assert.Contains(profile.Signals, signal => + signal.Name == "PyTorch CUDA" && + signal.Status == LocalAiSignalStatus.Missing); + Assert.False(profile.TrainingReady); + } + + [Fact] + public void PyTorch_without_cuda_does_not_open_training_gate() + { + var tune = new TuneSummary + { + ToolProbes = + [ + new ToolProbe { Name = "NVIDIA GPU", Status = "Succeeded", Output = "NVIDIA GeForce RTX, 12288 MiB" }, + new ToolProbe { Name = "PyTorch CUDA", Status = "Succeeded", Output = "2.9.0 False no cuda" } + ] + }; + + var profile = new LocalAiProfileBuilder().Build(tune); + + Assert.Contains(profile.Signals, signal => + signal.Name == "PyTorch CUDA" && + signal.Status == LocalAiSignalStatus.Missing); + Assert.False(profile.TrainingReady); + Assert.NotEqual(LocalAiReadiness.TrainingReady, profile.Readiness); + } +} diff --git a/tests/AppLens.Backend.Tests/ReadinessSummaryBuilderTests.cs b/tests/AppLens.Backend.Tests/ReadinessSummaryBuilderTests.cs index a510a11..02d77dd 100644 --- a/tests/AppLens.Backend.Tests/ReadinessSummaryBuilderTests.cs +++ b/tests/AppLens.Backend.Tests/ReadinessSummaryBuilderTests.cs @@ -51,4 +51,26 @@ public void Readiness_summary_counts_tune_plan_and_startup_state() Assert.Equal(10, summary.StorageHotspotBytes); Assert.InRange(summary.Score, 0, 99); } + + [Fact] + public void Readiness_summary_highlights_local_ai_profile() + { + var snapshot = new AuditSnapshot + { + Tune = new TuneSummary + { + LocalAiProfile = new LocalAiProfile + { + Readiness = LocalAiReadiness.InferenceReady, + WorkloadClass = "Small-model/autoresearch worker" + } + } + }; + + var summary = new ReadinessSummaryBuilder().Build(snapshot); + + Assert.Contains(summary.Highlights, highlight => + highlight.Contains("Local AI", StringComparison.OrdinalIgnoreCase) && + highlight.Contains("InferenceReady", StringComparison.OrdinalIgnoreCase)); + } } diff --git a/tests/AppLens.Backend.Tests/ReportWriterTests.cs b/tests/AppLens.Backend.Tests/ReportWriterTests.cs index 77ee79c..dac0cb8 100644 --- a/tests/AppLens.Backend.Tests/ReportWriterTests.cs +++ b/tests/AppLens.Backend.Tests/ReportWriterTests.cs @@ -57,10 +57,13 @@ public void Markdown_and_html_exports_include_core_sections() Assert.Contains("## Readiness Summary", markdown); Assert.Contains("## Findings", markdown); Assert.Contains("## Tune Plan", markdown); + Assert.Contains("## Local AI Readiness", markdown); + Assert.Contains("InferenceReady", markdown); Assert.Contains("## App Inventory", markdown); Assert.Contains("## Workstation Diagnostics", markdown); Assert.Contains("

Findings

", html); Assert.Contains("

Tune Plan

", html); + Assert.Contains("

Local AI Readiness

", html); Assert.Contains("AppLens-desktop", html); } @@ -111,7 +114,19 @@ private static AuditSnapshot FixtureSnapshot() RepoPlacements = [ new RepoPlacement { Root = Path.Combine(profile, "OneDrive", "Documents"), RepoCount = 1, Sample = Path.Combine(profile, "OneDrive", "Documents", "repo") } - ] + ], + LocalAiProfile = new LocalAiProfile + { + Readiness = LocalAiReadiness.InferenceReady, + WorkloadClass = "Small-model/autoresearch worker", + RecommendedRuntime = "llama.cpp CUDA-MMQ with full offload.", + TrainingReady = false, + TrainingGate = "Training remains gated until PyTorch CUDA passes a smoke test.", + Signals = + [ + new LocalAiSignal { Name = "NVIDIA GPU", Status = LocalAiSignalStatus.Present, Detail = "GTX 1660 SUPER" } + ] + } }, Readiness = new ReadinessSummary { diff --git a/tests/AppLens.Backend.Tests/TunePlanBuilderTests.cs b/tests/AppLens.Backend.Tests/TunePlanBuilderTests.cs index c8ee1f7..536f861 100644 --- a/tests/AppLens.Backend.Tests/TunePlanBuilderTests.cs +++ b/tests/AppLens.Backend.Tests/TunePlanBuilderTests.cs @@ -81,4 +81,32 @@ public void Privacy_finding_keeps_v1_read_only() Assert.Equal(TunePlanExecutionState.ReadOnlyOnly, item.ProposedAction.ExecutionState); Assert.Equal(ProposedActionKind.None, item.ProposedAction.Kind); } + + [Fact] + public void Local_ai_profile_adds_read_only_autoresearch_guidance() + { + var snapshot = new AuditSnapshot + { + Tune = new TuneSummary + { + LocalAiProfile = new LocalAiProfile + { + Readiness = LocalAiReadiness.InferenceReady, + WorkloadClass = "Small-model/autoresearch worker", + RecommendedRuntime = "llama.cpp CUDA-MMQ with full offload.", + TrainingReady = false, + TrainingGate = "Training remains gated until PyTorch CUDA passes a smoke test." + } + } + }; + + var plan = new TunePlanBuilder().Build(snapshot); + + var item = Assert.Single(plan, item => item.Title.Contains("autoresearch", StringComparison.OrdinalIgnoreCase)); + Assert.Equal(TunePlanCategory.Review, item.Category); + Assert.Equal(TunePlanRisk.Low, item.Risk); + Assert.Equal(TunePlanExecutionState.ReadOnlyOnly, item.ProposedAction.ExecutionState); + Assert.Contains("llama.cpp", item.Guidance, StringComparison.OrdinalIgnoreCase); + Assert.Contains("Training remains gated", item.VerificationStep, StringComparison.OrdinalIgnoreCase); + } } From d94f4558d3e66b6ee059925de71e4fd128238b98 Mon Sep 17 00:00:00 2001 From: Cody Date: Sat, 2 May 2026 22:21:00 -0700 Subject: [PATCH 2/2] Document AppLens-Tune local AI test run --- docs/AppLens-Tune-Test-Run-2026-05-03.md | 97 ++++++++++++++++++++++++ 1 file changed, 97 insertions(+) create mode 100644 docs/AppLens-Tune-Test-Run-2026-05-03.md diff --git a/docs/AppLens-Tune-Test-Run-2026-05-03.md b/docs/AppLens-Tune-Test-Run-2026-05-03.md new file mode 100644 index 0000000..269b6bf --- /dev/null +++ b/docs/AppLens-Tune-Test-Run-2026-05-03.md @@ -0,0 +1,97 @@ +# AppLens-Tune Local AI Test Run - 2026-05-03 + +## Scope + +Controlled local-AI readiness test for the gaming PC. This run tested inference, local API access, PyTorch CUDA availability, and a tiny AppLens-Tune eval. It did not start training. + +## Host + +- SSH target: `cody@192.168.68.57` +- GPU: NVIDIA GeForce GTX 1660 SUPER, 6 GB VRAM +- llama.cpp source: `/home/cody/local-llm/src/llama.cpp` +- Runtime build: `/home/cody/local-llm/src/llama.cpp/build-cuda-mmq` +- Model: `/home/cody/local-llm/models/qwen2.5-7b-ollama.gguf` + +## llama.cpp Server Smoke + +Server command family: + +```bash +~/local-llm/src/llama.cpp/build-cuda-mmq/bin/llama-server \ + -m ~/local-llm/models/qwen2.5-7b-ollama.gguf \ + -ngl 99 \ + -t 8 \ + -c 4096 \ + --parallel 1 \ + --host 127.0.0.1 \ + --port 8080 +``` + +Results: + +- Remote health check: `{"status":"ok"}` +- Remote OpenAI-compatible chat response: `AppLens local llama is ready.` +- Local SSH tunnel health check at `http://127.0.0.1:18080/health`: `{"status":"ok"}` +- Local tunneled chat response: `AppLens tunnel ready.` +- Local tunneled generation speed: about `51 tok/s` + +## PyTorch CUDA Smoke + +Isolated environment: + +```bash +~/local-llm/envs/torch-cuda +``` + +Installed only into the venv: + +- `torch 2.11.0+cu128` +- `numpy 2.4.4` + +Smoke result: + +```json +{ + "cuda_available": true, + "cuda_version": "12.8", + "device": "NVIDIA GeForce GTX 1660 SUPER", + "torch": "2.11.0+cu128" +} +``` + +## AppLens-Tune Report + +When run from the isolated torch env, AppLens-Tune reports: + +- PyTorch CUDA probe: `CUDA ready` +- Runtime: `llama.cpp CUDA-MMQ` +- Seed model: cached `qwen2.5:7b` +- Training: `manual approval required` + +Report path: + +```bash +/home/cody/applens-cli-20260502/reports/latest-tune-torch-output.txt +``` + +## Tiny Eval + +Eval record path: + +```bash +/home/cody/local-llm/evals/applens-tune-summary-eval.jsonl +``` + +The eval asked the local model to summarize the AppLens-Tune report into readiness, safe next test, and remaining gate. + +Measured result: + +- Prompt tokens: `1206` +- Completion tokens: `83` +- Prompt eval: about `588 tok/s` +- Generation: about `47 tok/s` +- Latency: about `3.9s` + +## Current Boundary + +The machine is ready for local inference tests, AppLens eval sweeps, and small dataset-prep jobs. Training should start only after selecting a tiny controlled target, run manifest, stop conditions, output folder, and expected duration.