diff --git a/docs/design/81-skill-tree.md b/docs/design/81-skill-tree.md index 041341f..80beb04 100644 --- a/docs/design/81-skill-tree.md +++ b/docs/design/81-skill-tree.md @@ -47,10 +47,14 @@ New helpers in `memory/models.py` (mirror `axis_tag`): `AREA_TAG_PREFIX`, critic-clean merge, a one-shot LLM "librarian" distills the merged diff + acceptance criteria into a `SkillCard(area, trigger, procedure, pitfalls, failing_signal, target, confidence)`. `harvest_skills_from_merge(...)` is pure - and injectable (the LLM call is a passed-in callable) so the wiring is unit - tested without network. It calls `record_procedural_skill` with the `area:` - tag and provenance carrying the merged SHA in `evidence_refs`. Emits - `skill_harvested`. + and injectable (both the diff fetch and the LLM call are passed-in callables) + so the wiring is unit tested without network. The diff is fetched in prod via + `gh_issues.pr_diff(pr_url, repo)` (forge-loop's githubkit client — the gh CLI + is deliberately unused and a loop `GH_TOKEN` would break it). It calls + `record_procedural_skill` with the `area:` tag and `pr:` provenance in + `evidence_refs`. Emits `skill_harvested`. (Commit-SHA-based age/expiry of + harvested cards is a follow-up; recipe freshness is held by supersession on + the skill-key.) 2. **Retrieve + inject** (`memory/skills.py` + `worker_brief.py`): `retrieve_skills_for(store, query, *, k)` ranks active procedural cards by area/title/tag token overlap with the ticket, walking most-specific-leaf → diff --git a/src/forge_loop/events.py b/src/forge_loop/events.py index ff9ea02..c719356 100644 --- a/src/forge_loop/events.py +++ b/src/forge_loop/events.py @@ -454,7 +454,7 @@ class SkillHarvestedEvent(EventBase): area: str = "" skill_key: str = "" memory_id: str = "" - sha: str = "" + pr: str = "" confidence: float = Field(ge=0.0, le=1.0, default=1.0) diff --git a/src/forge_loop/runner/learning.py b/src/forge_loop/runner/learning.py index 556907e..974388c 100644 --- a/src/forge_loop/runner/learning.py +++ b/src/forge_loop/runner/learning.py @@ -369,7 +369,7 @@ class HarvestedSkill: issue: int area: str skill_key: str - sha: str + pr_url: str confidence: float title: str @@ -378,38 +378,40 @@ def harvest_skills_from_merge( memory_store: MemoryStore, merged: Iterable[object], *, - fetch_diff: Callable[[int], tuple[str, str]], + fetch_diff: Callable[[str], str], call_llm: Callable[[str], str], now: datetime | None = None, ) -> tuple[HarvestedSkill, ...]: """Distil and record one PROCEDURAL skill card per merged outcome. - For each merged issue: fetch its diff + merged SHA via ``fetch_diff(issue)``, - distil a :class:`~forge_loop.skill_librarian.SkillCard` via the injected - ``call_llm`` librarian, and record it as a procedural skill tagged with its - ``area:`` path and stamped with ``commit:`` provenance. Outcomes whose - diff is empty, or whose distillation fails to parse, are skipped — only a - real, proven recipe is harvested. Idempotent per ``(issue, sha)``: the same - merge re-harvested upserts in place rather than duplicating. + For each merged issue: fetch its unified diff via ``fetch_diff(pr_url)`` + (matching ``gh_issues.pr_diff(pr_url, repo)`` — forge-loop's githubkit path; + the gh CLI is deliberately not used), distil a + :class:`~forge_loop.skill_librarian.SkillCard` via the injected ``call_llm`` + librarian, and record it as a procedural skill tagged with its ``area:`` path + and stamped with ``pr:`` provenance. Outcomes without a PR URL, with an + empty diff, or whose distillation fails to parse are skipped — only a real, + proven recipe is harvested. Idempotent per PR URL: the same merge re-harvested + upserts in place rather than duplicating. Both I/O concerns (the diff fetch and the model call) are injected, so the - harvest control flow is fully unit-testable without git or the SDK. Returns - one :class:`HarvestedSkill` per recorded card so the caller can emit events. + harvest control flow is fully unit-testable without the network. Returns one + :class:`HarvestedSkill` per recorded card so the caller can emit events. """ from forge_loop.skill_librarian import distill_skill_from_merge harvested: list[HarvestedSkill] = [] - seen: set[int] = set() + seen: set[str] = set() for record in merged: coerced = _coerce_issue(record) if coerced is None: continue - n, title, _pr = coerced - if n in seen: + n, title, pr_url = coerced + if not pr_url or pr_url in seen: continue - seen.add(n) + seen.add(pr_url) - diff, sha = fetch_diff(n) + diff = fetch_diff(pr_url) if not diff.strip(): continue card = distill_skill_from_merge( @@ -426,10 +428,10 @@ def harvest_skills_from_merge( target=card.target, title=card_title, body=card.to_body(), - source_key=f"harvest:{n}:{sha}", + source_key=f"harvest:{pr_url}", source_task_ref=f"issue:#{n}", extra_tags=(area_tag(card.area),) if card.area.strip() else (), - evidence_refs=(f"commit:{sha}",), + evidence_refs=(f"pr:{pr_url}",), confidence=card.confidence, now=now, ) @@ -439,7 +441,7 @@ def harvest_skills_from_merge( issue=n, area=card.area, skill_key=skill_key, - sha=sha, + pr_url=pr_url, confidence=card.confidence, title=card_title, ) diff --git a/src/forge_loop/runner/tick.py b/src/forge_loop/runner/tick.py index ef011e0..68f14f8 100644 --- a/src/forge_loop/runner/tick.py +++ b/src/forge_loop/runner/tick.py @@ -175,33 +175,22 @@ def _record_merged_memory( _harvest_merged_skills(cfg, merged) -def _merged_pr_diff(cfg: Config, issue_n: int) -> tuple[str, str]: - """Fetch a merged PR's diff + merge-commit SHA for an issue via ``gh``.""" - import subprocess - - diff_cmd = ["gh", "pr", "diff", str(issue_n)] - sha_cmd = ["gh", "pr", "view", str(issue_n), "--json", "mergeCommit", "-q", ".mergeCommit.oid"] - if cfg.github_repo: - diff_cmd += ["-R", cfg.github_repo] - sha_cmd += ["-R", cfg.github_repo] - diff = subprocess.run(diff_cmd, capture_output=True, text=True, timeout=60, cwd=cfg.repo) - sha = subprocess.run(sha_cmd, capture_output=True, text=True, timeout=30, cwd=cfg.repo) - return diff.stdout, sha.stdout.strip() - - def _harvest_merged_skills(cfg: Config, merged: list[WorkerOutcome]) -> None: """Best-effort: distil + record one procedural skill card per merged PR. Isolated from :func:`_record_merged_memory`'s episodic write and broadly - wrapped: a librarian/``gh`` failure emits ``skill_harvest_failed`` and never - breaks the tick. Gated by ``misc.skill_tree_harvest`` (env - ``FORGE_SKILL_TREE_HARVEST``); on by default. + wrapped: a librarian/diff-fetch failure emits ``skill_harvest_failed`` and + never breaks the tick. Gated by ``misc.skill_tree_harvest`` (env + ``FORGE_SKILL_TREE_HARVEST``); on by default. The diff is fetched through + forge-loop's githubkit client (``gh_issues.pr_diff``) — the gh CLI is + deliberately not used and a loop ``GH_TOKEN`` would break it. """ from forge_loop.settings import get_settings if not merged or not get_settings().misc.skill_tree_harvest: return try: + from forge_loop import gh_issues from forge_loop.control.boot import canonical_task_saga_path from forge_loop.events import SkillHarvestedEvent, emit from forge_loop.memory.store import SqliteMemoryStore @@ -212,7 +201,7 @@ def _harvest_merged_skills(cfg: Config, merged: list[WorkerOutcome]) -> None: harvested = harvest_skills_from_merge( store, merged, - fetch_diff=lambda issue_n: _merged_pr_diff(cfg, issue_n), + fetch_diff=lambda pr_url: gh_issues.pr_diff(pr_url, cfg.github_repo or ""), call_llm=default_librarian(cwd=cfg.repo), ) for skill in harvested: @@ -223,7 +212,7 @@ def _harvest_merged_skills(cfg: Config, merged: list[WorkerOutcome]) -> None: area=skill.area, skill_key=skill.skill_key, memory_id=skill.memory_id, - sha=skill.sha, + pr=skill.pr_url, confidence=skill.confidence, ), ) diff --git a/tests/test_skill_events.py b/tests/test_skill_events.py index 11de5b8..0d23eba 100644 --- a/tests/test_skill_events.py +++ b/tests/test_skill_events.py @@ -19,7 +19,7 @@ def test_skill_harvested_registered_and_roundtrips() -> None: area="pulsar-node/http-route", skill_key="k", memory_id="m", - sha="s", + pr="https://github.com/o/r/pull/9", confidence=0.9, ) rec = ev.to_record() @@ -31,7 +31,7 @@ def test_skill_harvested_registered_and_roundtrips() -> None: def test_skill_harvested_rejects_out_of_range_confidence() -> None: with pytest.raises(Exception): # noqa: B017 - pydantic ValidationError - SkillHarvestedEvent(issue=1, area="a", skill_key="k", memory_id="m", sha="s", confidence=5) + SkillHarvestedEvent(issue=1, area="a", skill_key="k", memory_id="m", pr="p", confidence=5) def test_skill_injected_carries_rank_and_kind() -> None: diff --git a/tests/test_skill_harvest.py b/tests/test_skill_harvest.py index b52ca2c..37a0b03 100644 --- a/tests/test_skill_harvest.py +++ b/tests/test_skill_harvest.py @@ -28,12 +28,14 @@ } """ +_PR = "https://github.com/o/r/pull/9" + def _store(tmp_path: Path) -> SqliteMemoryStore: return SqliteMemoryStore(tmp_path / "memory.db") -# --- record_procedural_skill: area tag + evidence (the SHA provenance) -------- +# --- record_procedural_skill: area tag + evidence (provenance) ---------------- def test_record_procedural_skill_persists_area_tag_and_evidence(tmp_path: Path) -> None: @@ -57,17 +59,22 @@ def test_record_procedural_skill_persists_area_tag_and_evidence(tmp_path: Path) # --- harvest_skills_from_merge ------------------------------------------------ +# fetch_diff is keyed by the PR URL (which the merged outcome carries) and +# returns the unified diff text — matching gh_issues.pr_diff(pr_url, repo), +# forge-loop's githubkit path (the gh CLI is not used and a loop GH_TOKEN +# breaks it). Provenance is the PR URL. -def test_harvest_records_one_card_per_merge_with_area_and_sha(tmp_path: Path) -> None: +def test_harvest_records_one_card_per_merge_with_area_and_pr(tmp_path: Path) -> None: store = _store(tmp_path) - def fetch_diff(issue: int) -> tuple[str, str]: - return ("diff --git a/ledger.rs b/ledger.rs\n+fn f(){}", "sha-deadbeef") + def fetch_diff(pr_url: str) -> str: + assert pr_url == _PR # the PR URL drives the diff fetch, not the issue + return "diff --git a/ledger.rs b/ledger.rs\n+fn f(){}" harvested = harvest_skills_from_merge( store, - [{"issue": 430, "title": "APP4: app-scoped token auth"}], + [{"issue": 430, "title": "APP4: app-scoped token auth", "pr_url": _PR}], fetch_diff=fetch_diff, call_llm=lambda _p: _VALID_CARD, ) @@ -75,21 +82,33 @@ def fetch_diff(issue: int) -> tuple[str, str]: assert len(harvested) == 1 assert harvested[0].issue == 430 assert harvested[0].area == "pulsar-node/http-route" - assert harvested[0].sha == "sha-deadbeef" + assert harvested[0].pr_url == _PR active = store.list_active(kind=MemoryKind.PROCEDURAL) assert len(active) == 1 assert area_from_tags(active[0].tags) == "pulsar-node/http-route" - assert "commit:sha-deadbeef" in active[0].provenance.evidence_refs + assert f"pr:{_PR}" in active[0].provenance.evidence_refs assert "cargo test" in active[0].body # the recipe survived +def test_harvest_skips_outcome_without_a_pr_url(tmp_path: Path) -> None: + store = _store(tmp_path) + harvested = harvest_skills_from_merge( + store, + [{"issue": 1, "title": "x"}], # no pr_url -> cannot fetch a diff + fetch_diff=lambda _u: "real diff", + call_llm=lambda _p: _VALID_CARD, + ) + assert harvested == () + assert store.list_active(kind=MemoryKind.PROCEDURAL) == () + + def test_harvest_skips_outcome_with_empty_diff(tmp_path: Path) -> None: store = _store(tmp_path) harvested = harvest_skills_from_merge( store, - [{"issue": 1, "title": "x"}], - fetch_diff=lambda _n: ("", "sha"), + [{"issue": 1, "title": "x", "pr_url": _PR}], + fetch_diff=lambda _u: "", call_llm=lambda _p: _VALID_CARD, ) assert harvested == () @@ -100,8 +119,8 @@ def test_harvest_skips_when_librarian_returns_garbage(tmp_path: Path) -> None: store = _store(tmp_path) harvested = harvest_skills_from_merge( store, - [{"issue": 1, "title": "x"}], - fetch_diff=lambda _n: ("real diff", "sha"), + [{"issue": 1, "title": "x", "pr_url": _PR}], + fetch_diff=lambda _u: "real diff", call_llm=lambda _p: "sorry, cannot help", ) assert harvested == () @@ -114,12 +133,12 @@ def test_harvest_is_idempotent_for_the_same_merge(tmp_path: Path) -> None: def run() -> None: harvest_skills_from_merge( store, - [{"issue": 430, "title": "APP4"}], - fetch_diff=lambda _n: ("diff", "sha-x"), + [{"issue": 430, "title": "APP4", "pr_url": _PR}], + fetch_diff=lambda _u: "diff", call_llm=lambda _p: _VALID_CARD, ) run() run() - # same (issue, sha) → same source_key → upsert in place, not a duplicate + # same PR URL -> same source_key -> upsert in place, not a duplicate assert len(store.list_active(kind=MemoryKind.PROCEDURAL)) == 1