Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
12 changes: 8 additions & 4 deletions docs/design/81-skill-tree.md
Original file line number Diff line number Diff line change
Expand Up @@ -47,10 +47,14 @@ New helpers in `memory/models.py` (mirror `axis_tag`): `AREA_TAG_PREFIX`,
critic-clean merge, a one-shot LLM "librarian" distills the merged diff +
acceptance criteria into a `SkillCard(area, trigger, procedure, pitfalls,
failing_signal, target, confidence)`. `harvest_skills_from_merge(...)` is pure
and injectable (the LLM call is a passed-in callable) so the wiring is unit
tested without network. It calls `record_procedural_skill` with the `area:`
tag and provenance carrying the merged SHA in `evidence_refs`. Emits
`skill_harvested`.
and injectable (both the diff fetch and the LLM call are passed-in callables)
so the wiring is unit tested without network. The diff is fetched in prod via
`gh_issues.pr_diff(pr_url, repo)` (forge-loop's githubkit client — the gh CLI
is deliberately unused and a loop `GH_TOKEN` would break it). It calls
`record_procedural_skill` with the `area:` tag and `pr:<url>` provenance in
`evidence_refs`. Emits `skill_harvested`. (Commit-SHA-based age/expiry of
harvested cards is a follow-up; recipe freshness is held by supersession on
the skill-key.)
2. **Retrieve + inject** (`memory/skills.py` + `worker_brief.py`):
`retrieve_skills_for(store, query, *, k)` ranks active procedural cards by
area/title/tag token overlap with the ticket, walking most-specific-leaf →
Expand Down
2 changes: 1 addition & 1 deletion src/forge_loop/events.py
Original file line number Diff line number Diff line change
Expand Up @@ -454,7 +454,7 @@ class SkillHarvestedEvent(EventBase):
area: str = ""
skill_key: str = ""
memory_id: str = ""
sha: str = ""
pr: str = ""
confidence: float = Field(ge=0.0, le=1.0, default=1.0)


Expand Down
40 changes: 21 additions & 19 deletions src/forge_loop/runner/learning.py
Original file line number Diff line number Diff line change
Expand Up @@ -369,7 +369,7 @@ class HarvestedSkill:
issue: int
area: str
skill_key: str
sha: str
pr_url: str
confidence: float
title: str

Expand All @@ -378,38 +378,40 @@ def harvest_skills_from_merge(
memory_store: MemoryStore,
merged: Iterable[object],
*,
fetch_diff: Callable[[int], tuple[str, str]],
fetch_diff: Callable[[str], str],
call_llm: Callable[[str], str],
now: datetime | None = None,
) -> tuple[HarvestedSkill, ...]:
"""Distil and record one PROCEDURAL skill card per merged outcome.

For each merged issue: fetch its diff + merged SHA via ``fetch_diff(issue)``,
distil a :class:`~forge_loop.skill_librarian.SkillCard` via the injected
``call_llm`` librarian, and record it as a procedural skill tagged with its
``area:`` path and stamped with ``commit:<sha>`` provenance. Outcomes whose
diff is empty, or whose distillation fails to parse, are skipped — only a
real, proven recipe is harvested. Idempotent per ``(issue, sha)``: the same
merge re-harvested upserts in place rather than duplicating.
For each merged issue: fetch its unified diff via ``fetch_diff(pr_url)``
(matching ``gh_issues.pr_diff(pr_url, repo)`` — forge-loop's githubkit path;
the gh CLI is deliberately not used), distil a
:class:`~forge_loop.skill_librarian.SkillCard` via the injected ``call_llm``
librarian, and record it as a procedural skill tagged with its ``area:`` path
and stamped with ``pr:<url>`` provenance. Outcomes without a PR URL, with an
empty diff, or whose distillation fails to parse are skipped — only a real,
proven recipe is harvested. Idempotent per PR URL: the same merge re-harvested
upserts in place rather than duplicating.

Both I/O concerns (the diff fetch and the model call) are injected, so the
harvest control flow is fully unit-testable without git or the SDK. Returns
one :class:`HarvestedSkill` per recorded card so the caller can emit events.
harvest control flow is fully unit-testable without the network. Returns one
:class:`HarvestedSkill` per recorded card so the caller can emit events.
"""
from forge_loop.skill_librarian import distill_skill_from_merge

harvested: list[HarvestedSkill] = []
seen: set[int] = set()
seen: set[str] = set()
for record in merged:
coerced = _coerce_issue(record)
if coerced is None:
continue
n, title, _pr = coerced
if n in seen:
n, title, pr_url = coerced
if not pr_url or pr_url in seen:
continue
seen.add(n)
seen.add(pr_url)

diff, sha = fetch_diff(n)
diff = fetch_diff(pr_url)
if not diff.strip():
continue
card = distill_skill_from_merge(
Expand All @@ -426,10 +428,10 @@ def harvest_skills_from_merge(
target=card.target,
title=card_title,
body=card.to_body(),
source_key=f"harvest:{n}:{sha}",
source_key=f"harvest:{pr_url}",
source_task_ref=f"issue:#{n}",
extra_tags=(area_tag(card.area),) if card.area.strip() else (),
evidence_refs=(f"commit:{sha}",),
evidence_refs=(f"pr:{pr_url}",),
confidence=card.confidence,
now=now,
)
Expand All @@ -439,7 +441,7 @@ def harvest_skills_from_merge(
issue=n,
area=card.area,
skill_key=skill_key,
sha=sha,
pr_url=pr_url,
confidence=card.confidence,
title=card_title,
)
Expand Down
27 changes: 8 additions & 19 deletions src/forge_loop/runner/tick.py
Original file line number Diff line number Diff line change
Expand Up @@ -175,33 +175,22 @@ def _record_merged_memory(
_harvest_merged_skills(cfg, merged)


def _merged_pr_diff(cfg: Config, issue_n: int) -> tuple[str, str]:
"""Fetch a merged PR's diff + merge-commit SHA for an issue via ``gh``."""
import subprocess

diff_cmd = ["gh", "pr", "diff", str(issue_n)]
sha_cmd = ["gh", "pr", "view", str(issue_n), "--json", "mergeCommit", "-q", ".mergeCommit.oid"]
if cfg.github_repo:
diff_cmd += ["-R", cfg.github_repo]
sha_cmd += ["-R", cfg.github_repo]
diff = subprocess.run(diff_cmd, capture_output=True, text=True, timeout=60, cwd=cfg.repo)
sha = subprocess.run(sha_cmd, capture_output=True, text=True, timeout=30, cwd=cfg.repo)
return diff.stdout, sha.stdout.strip()


def _harvest_merged_skills(cfg: Config, merged: list[WorkerOutcome]) -> None:
"""Best-effort: distil + record one procedural skill card per merged PR.

Isolated from :func:`_record_merged_memory`'s episodic write and broadly
wrapped: a librarian/``gh`` failure emits ``skill_harvest_failed`` and never
breaks the tick. Gated by ``misc.skill_tree_harvest`` (env
``FORGE_SKILL_TREE_HARVEST``); on by default.
wrapped: a librarian/diff-fetch failure emits ``skill_harvest_failed`` and
never breaks the tick. Gated by ``misc.skill_tree_harvest`` (env
``FORGE_SKILL_TREE_HARVEST``); on by default. The diff is fetched through
forge-loop's githubkit client (``gh_issues.pr_diff``) — the gh CLI is
deliberately not used and a loop ``GH_TOKEN`` would break it.
"""
from forge_loop.settings import get_settings

if not merged or not get_settings().misc.skill_tree_harvest:
return
try:
from forge_loop import gh_issues
from forge_loop.control.boot import canonical_task_saga_path
from forge_loop.events import SkillHarvestedEvent, emit
from forge_loop.memory.store import SqliteMemoryStore
Expand All @@ -212,7 +201,7 @@ def _harvest_merged_skills(cfg: Config, merged: list[WorkerOutcome]) -> None:
harvested = harvest_skills_from_merge(
store,
merged,
fetch_diff=lambda issue_n: _merged_pr_diff(cfg, issue_n),
fetch_diff=lambda pr_url: gh_issues.pr_diff(pr_url, cfg.github_repo or ""),
call_llm=default_librarian(cwd=cfg.repo),
)
for skill in harvested:
Expand All @@ -223,7 +212,7 @@ def _harvest_merged_skills(cfg: Config, merged: list[WorkerOutcome]) -> None:
area=skill.area,
skill_key=skill.skill_key,
memory_id=skill.memory_id,
sha=skill.sha,
pr=skill.pr_url,
confidence=skill.confidence,
),
)
Expand Down
4 changes: 2 additions & 2 deletions tests/test_skill_events.py
Original file line number Diff line number Diff line change
Expand Up @@ -19,7 +19,7 @@ def test_skill_harvested_registered_and_roundtrips() -> None:
area="pulsar-node/http-route",
skill_key="k",
memory_id="m",
sha="s",
pr="https://github.com/o/r/pull/9",
confidence=0.9,
)
rec = ev.to_record()
Expand All @@ -31,7 +31,7 @@ def test_skill_harvested_registered_and_roundtrips() -> None:

def test_skill_harvested_rejects_out_of_range_confidence() -> None:
with pytest.raises(Exception): # noqa: B017 - pydantic ValidationError
SkillHarvestedEvent(issue=1, area="a", skill_key="k", memory_id="m", sha="s", confidence=5)
SkillHarvestedEvent(issue=1, area="a", skill_key="k", memory_id="m", pr="p", confidence=5)


def test_skill_injected_carries_rank_and_kind() -> None:
Expand Down
47 changes: 33 additions & 14 deletions tests/test_skill_harvest.py
Original file line number Diff line number Diff line change
Expand Up @@ -28,12 +28,14 @@
}
"""

_PR = "https://github.com/o/r/pull/9"


def _store(tmp_path: Path) -> SqliteMemoryStore:
return SqliteMemoryStore(tmp_path / "memory.db")


# --- record_procedural_skill: area tag + evidence (the SHA provenance) --------
# --- record_procedural_skill: area tag + evidence (provenance) ----------------


def test_record_procedural_skill_persists_area_tag_and_evidence(tmp_path: Path) -> None:
Expand All @@ -57,39 +59,56 @@ def test_record_procedural_skill_persists_area_tag_and_evidence(tmp_path: Path)


# --- harvest_skills_from_merge ------------------------------------------------
# fetch_diff is keyed by the PR URL (which the merged outcome carries) and
# returns the unified diff text — matching gh_issues.pr_diff(pr_url, repo),
# forge-loop's githubkit path (the gh CLI is not used and a loop GH_TOKEN
# breaks it). Provenance is the PR URL.


def test_harvest_records_one_card_per_merge_with_area_and_sha(tmp_path: Path) -> None:
def test_harvest_records_one_card_per_merge_with_area_and_pr(tmp_path: Path) -> None:
store = _store(tmp_path)

def fetch_diff(issue: int) -> tuple[str, str]:
return ("diff --git a/ledger.rs b/ledger.rs\n+fn f(){}", "sha-deadbeef")
def fetch_diff(pr_url: str) -> str:
assert pr_url == _PR # the PR URL drives the diff fetch, not the issue
return "diff --git a/ledger.rs b/ledger.rs\n+fn f(){}"

harvested = harvest_skills_from_merge(
store,
[{"issue": 430, "title": "APP4: app-scoped token auth"}],
[{"issue": 430, "title": "APP4: app-scoped token auth", "pr_url": _PR}],
fetch_diff=fetch_diff,
call_llm=lambda _p: _VALID_CARD,
)

assert len(harvested) == 1
assert harvested[0].issue == 430
assert harvested[0].area == "pulsar-node/http-route"
assert harvested[0].sha == "sha-deadbeef"
assert harvested[0].pr_url == _PR

active = store.list_active(kind=MemoryKind.PROCEDURAL)
assert len(active) == 1
assert area_from_tags(active[0].tags) == "pulsar-node/http-route"
assert "commit:sha-deadbeef" in active[0].provenance.evidence_refs
assert f"pr:{_PR}" in active[0].provenance.evidence_refs
assert "cargo test" in active[0].body # the recipe survived


def test_harvest_skips_outcome_without_a_pr_url(tmp_path: Path) -> None:
store = _store(tmp_path)
harvested = harvest_skills_from_merge(
store,
[{"issue": 1, "title": "x"}], # no pr_url -> cannot fetch a diff
fetch_diff=lambda _u: "real diff",
call_llm=lambda _p: _VALID_CARD,
)
assert harvested == ()
assert store.list_active(kind=MemoryKind.PROCEDURAL) == ()


def test_harvest_skips_outcome_with_empty_diff(tmp_path: Path) -> None:
store = _store(tmp_path)
harvested = harvest_skills_from_merge(
store,
[{"issue": 1, "title": "x"}],
fetch_diff=lambda _n: ("", "sha"),
[{"issue": 1, "title": "x", "pr_url": _PR}],
fetch_diff=lambda _u: "",
call_llm=lambda _p: _VALID_CARD,
)
assert harvested == ()
Expand All @@ -100,8 +119,8 @@ def test_harvest_skips_when_librarian_returns_garbage(tmp_path: Path) -> None:
store = _store(tmp_path)
harvested = harvest_skills_from_merge(
store,
[{"issue": 1, "title": "x"}],
fetch_diff=lambda _n: ("real diff", "sha"),
[{"issue": 1, "title": "x", "pr_url": _PR}],
fetch_diff=lambda _u: "real diff",
call_llm=lambda _p: "sorry, cannot help",
)
assert harvested == ()
Expand All @@ -114,12 +133,12 @@ def test_harvest_is_idempotent_for_the_same_merge(tmp_path: Path) -> None:
def run() -> None:
harvest_skills_from_merge(
store,
[{"issue": 430, "title": "APP4"}],
fetch_diff=lambda _n: ("diff", "sha-x"),
[{"issue": 430, "title": "APP4", "pr_url": _PR}],
fetch_diff=lambda _u: "diff",
call_llm=lambda _p: _VALID_CARD,
)

run()
run()
# same (issue, sha) → same source_key upsert in place, not a duplicate
# same PR URL -> same source_key -> upsert in place, not a duplicate
assert len(store.list_active(kind=MemoryKind.PROCEDURAL)) == 1
Loading